Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: imaps-frontend/node_modules/regexpu-core/rewrite-pattern.js

main

Last change on this file was 79a0317, checked in by stefan toskovski <stefantoska84@…>, 6 months ago
F4 Finalna Verzija
Property mode set to `100644`
File size: 30.6 KB

Line
1	'use strict';
2
3	const generate = require('regjsgen').generate;
4	const parse = require('regjsparser').parse;
5	const regenerate = require('regenerate');
6	const unicodeMatchProperty = require('unicode-match-property-ecmascript');
7	const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
8	const iuMappings = require('./data/iu-mappings.js');
9	const iBMPMappings = require('./data/i-bmp-mappings.js');
10	const iuFoldings = require('./data/iu-foldings.js');
11	const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
12	const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
13
14	function flatMap(array, callback) {
15	const result = [];
16	array.forEach(item => {
17	const res = callback(item);
18	if (Array.isArray(res)) {
19	result.push.apply(result, res);
20	} else {
21	result.push(res);
22	}
23	});
24	return result;
25	}
26
27	function regenerateContainsAstral(regenerateData) {
28	const data = regenerateData.data;
29	return data.length >= 1 && data[data.length - 1] >= 0x10000;
30	}
31
32	// https://tc39.es/ecma262/#prod-SyntaxCharacter
33	const SYNTAX_CHARS = /[\\^$.*+?()[\]{}\|]/g;
34
35	const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
36
37	const NEWLINE_SET = regenerate().add(
38	// `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
39	0x000A, // Line Feed <LF>
40	0x000D, // Carriage Return <CR>
41	0x2028, // Line Separator <LS>
42	0x2029 // Paragraph Separator <PS>
43	);
44
45	// Prepare a Regenerate set containing all code points that are supposed to be
46	// matched by `/./u`. https://mths.be/es6#sec-atom
47	const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
48	.remove(NEWLINE_SET);
49
50	const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => {
51	if (unicode) {
52	if (ignoreCase) {
53	const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
54	if (shouldApplySCF) {
55	return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character);
56	} else {
57	return result;
58	}
59	}
60	return ESCAPE_SETS.UNICODE.get(character);
61	}
62	return ESCAPE_SETS.REGULAR.get(character);
63	};
64
65	const getUnicodeDotSet = (dotAll) => {
66	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
67	};
68
69	const getUnicodePropertyValueSet = (property, value) => {
70	const path = value ?
71	`${ property }/${ value }` :
72	`Binary_Property/${ property }`;
73	try {
74	return require(`regenerate-unicode-properties/${ path }.js`);
75	} catch (exception) {
76	throw new Error(
77	`Failed to recognize value \`${ value }\` for property ` +
78	`\`${ property }\`.`
79	);
80	}
81	};
82
83	const handleLoneUnicodePropertyNameOrValue = (value) => {
84	// It could be a `General_Category` value or a binary property.
85	// Note: `unicodeMatchPropertyValue` throws on invalid values.
86	try {
87	const property = 'General_Category';
88	const category = unicodeMatchPropertyValue(property, value);
89	return getUnicodePropertyValueSet(property, category);
90	} catch (exception) {}
91	// It’s not a `General_Category` value, so check if it’s a property
92	// of strings.
93	try {
94	return getUnicodePropertyValueSet('Property_of_Strings', value);
95	} catch (exception) {}
96	// Lastly, check if it’s a binary property of single code points.
97	// Note: `unicodeMatchProperty` throws on invalid properties.
98	const property = unicodeMatchProperty(value);
99	return getUnicodePropertyValueSet(property);
100	};
101
102	const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) => {
103	const parts = value.split('=');
104	const firstPart = parts[0];
105	let set;
106	if (parts.length == 1) {
107	set = handleLoneUnicodePropertyNameOrValue(firstPart);
108	} else {
109	// The pattern consists of two parts, i.e. `Property=Value`.
110	const property = unicodeMatchProperty(firstPart);
111	const value = unicodeMatchPropertyValue(property, parts[1]);
112	set = getUnicodePropertyValueSet(property, value);
113	}
114	if (isNegative) {
115	if (set.strings) {
116	throw new Error('Cannot negate Unicode property of strings');
117	}
118	return {
119	characters: (isUnicodeSetIgnoreCase ? UNICODE_IV_SET : UNICODE_SET).clone().remove(set.characters),
120	strings: new Set()
121	};
122	}
123	return {
124	characters: set.characters.clone(),
125	strings: set.strings
126	// We need to escape strings like *️⃣ to make sure that they can be safely used in unions.
127	? new Set(set.strings.map(str => str.replace(SYNTAX_CHARS, '\\$&')))
128	: new Set()
129	};
130	};
131
132	const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => {
133	const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase);
134	const data = getCharacterClassEmptyData();
135	const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters;
136	const caseEqFlags = configGetCaseEqFlags();
137	if (caseEqFlags) {
138	for (const codepoint of singleChars.toArray()) {
139	const list = getCaseEquivalents(codepoint, caseEqFlags);
140	if (list) {
141	singleChars.add(list);
142	}
143	}
144	}
145	data.singleChars = singleChars;
146	if (set.strings.size > 0) {
147	data.longStrings = set.strings;
148	data.maybeIncludesStrings = true;
149	}
150	return data;
151	};
152
153	const CASE_EQ_FLAG_NONE = 0b00;
154	const CASE_EQ_FLAG_BMP = 0b01;
155	const CASE_EQ_FLAG_UNICODE = 0b10;
156
157	function configGetCaseEqFlags() {
158	let flags = CASE_EQ_FLAG_NONE;
159	if (config.modifiersData.i === true) {
160	if (config.transform.modifiers) {
161	flags \|= CASE_EQ_FLAG_BMP;
162	if (config.flags.unicode \|\| config.flags.unicodeSets) {
163	flags \|= CASE_EQ_FLAG_UNICODE;
164	}
165	}
166	} else if (config.modifiersData.i === undefined) {
167	if (config.transform.unicodeFlag && config.flags.ignoreCase) {
168	flags \|= CASE_EQ_FLAG_UNICODE;
169	}
170	}
171	return flags;
172	}
173
174	// Given a range of code points, add any case-equivalent code points in that range
175	// to a set.
176	regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) {
177	const $this = this;
178	do {
179	const list = getCaseEquivalents(min, caseEqFlags);
180	if (list) {
181	$this.add(list);
182	}
183	} while (++min <= max);
184	return $this;
185	};
186	regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) {
187	const $this = this;
188	do {
189	const list = getCaseEquivalents(min, caseEqFlags);
190	if (list) {
191	$this.remove(list);
192	}
193	} while (++min <= max);
194	return $this;
195	};
196
197	const update = (item, pattern) => {
198	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
199	lookbehind: true,
200	namedGroups: true,
201	unicodePropertyEscape: true,
202	unicodeSet: true,
203	modifiers: true,
204	});
205	switch (tree.type) {
206	case 'characterClass':
207	case 'group':
208	case 'value':
209	// No wrapping needed.
210	break;
211	default:
212	// Wrap the pattern in a non-capturing group.
213	tree = wrap(tree, pattern);
214	}
215	Object.assign(item, tree);
216	};
217
218	const wrap = (tree, pattern) => {
219	// Wrap the pattern in a non-capturing group.
220	return {
221	'type': 'group',
222	'behavior': 'ignore',
223	'body': [tree],
224	'raw': `(?:${ pattern })`
225	};
226	};
227
228	/**
229	* Given any codepoint ch, returns false or an array of characters,
230	* such that for every c in the array,
231	* c != ch and Canonicalize(~, c) == Canonicalize(~, ch)
232	*
233	* where Canonicalize is defined in
234	* https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
235	* @param {number} codePoint input code point
236	* @param {number} flags bitwise flags composed of CASE_EQ_FLAG_*
237	* @returns false \| number[]
238	*/
239	const getCaseEquivalents = (codePoint, flags) => {
240	if (flags === CASE_EQ_FLAG_NONE) {
241	return false;
242	}
243	let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) \|\| [];
244	if (typeof result === "number") result = [result];
245	if (flags & CASE_EQ_FLAG_BMP) {
246	for (const cp of [codePoint].concat(result)) {
247	// Fast path for ASCII characters
248	if (cp >= 0x41 && cp <= 0x5a) {
249	result.push(cp + 0x20);
250	} else if (cp >= 0x61 && cp <= 0x7a) {
251	result.push(cp - 0x20);
252	} else {
253	result = result.concat(iBMPMappings.get(cp) \|\| []);
254	}
255	}
256	}
257	return result.length == 0 ? false : result;
258	};
259
260	// https://tc39.es/ecma262/#sec-maybesimplecasefolding
261	const simpleCaseFolding = (codePoint) => {
262	// Fast path for ASCII characters
263	if (codePoint <= 0x7F) {
264	if (codePoint >= 0x41 && codePoint <= 0x5A) {
265	return codePoint + 0x20;
266	}
267	return codePoint;
268	}
269	return iuFoldings.get(codePoint) \|\| codePoint;
270	}
271
272	const buildHandler = (action) => {
273	switch (action) {
274	case 'union':
275	return {
276	single: (data, cp) => {
277	data.singleChars.add(cp);
278	},
279	regSet: (data, set2) => {
280	data.singleChars.add(set2);
281	},
282	range: (data, start, end) => {
283	data.singleChars.addRange(start, end);
284	},
285	iuRange: (data, start, end, caseEqFlags) => {
286	data.singleChars.iuAddRange(start, end, caseEqFlags);
287	},
288	nested: (data, nestedData) => {
289	data.singleChars.add(nestedData.singleChars);
290	for (const str of nestedData.longStrings) data.longStrings.add(str);
291	if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
292	}
293	};
294	case 'union-negative': {
295	const regSet = (data, set2) => {
296	data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
297	};
298	return {
299	single: (data, cp) => {
300	const unicode = UNICODE_SET.clone();
301	data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
302	},
303	regSet: regSet,
304	range: (data, start, end) => {
305	data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
306	},
307	iuRange: (data, start, end, caseEqFlags) => {
308	data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars);
309	},
310	nested: (data, nestedData) => {
311	regSet(data, nestedData.singleChars);
312	if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
313	}
314	};
315	}
316	case 'intersection': {
317	const regSet = (data, set2) => {
318	if (data.first) data.singleChars = set2;
319	else data.singleChars.intersection(set2);
320	};
321	return {
322	single: (data, cp) => {
323	data.singleChars = data.first \|\| data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
324	data.longStrings.clear();
325	data.maybeIncludesStrings = false;
326	},
327	regSet: (data, set) => {
328	regSet(data, set);
329	data.longStrings.clear();
330	data.maybeIncludesStrings = false;
331	},
332	range: (data, start, end) => {
333	if (data.first) data.singleChars.addRange(start, end);
334	else data.singleChars.intersection(regenerate().addRange(start, end));
335	data.longStrings.clear();
336	data.maybeIncludesStrings = false;
337	},
338	iuRange: (data, start, end, caseEqFlags) => {
339	if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
340	else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags));
341	data.longStrings.clear();
342	data.maybeIncludesStrings = false;
343	},
344	nested: (data, nestedData) => {
345	regSet(data, nestedData.singleChars);
346
347	if (data.first) {
348	data.longStrings = nestedData.longStrings;
349	data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
350	} else {
351	for (const str of data.longStrings) {
352	if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
353	}
354	if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
355	}
356	}
357	};
358	}
359	case 'subtraction': {
360	const regSet = (data, set2) => {
361	if (data.first) data.singleChars.add(set2);
362	else data.singleChars.remove(set2);
363	};
364	return {
365	single: (data, cp) => {
366	if (data.first) data.singleChars.add(cp);
367	else data.singleChars.remove(cp);
368	},
369	regSet: regSet,
370	range: (data, start, end) => {
371	if (data.first) data.singleChars.addRange(start, end);
372	else data.singleChars.removeRange(start, end);
373	},
374	iuRange: (data, start, end, caseEqFlags) => {
375	if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
376	else data.singleChars.iuRemoveRange(start, end, caseEqFlags);
377	},
378	nested: (data, nestedData) => {
379	regSet(data, nestedData.singleChars);
380
381	if (data.first) {
382	data.longStrings = nestedData.longStrings;
383	data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
384	} else {
385	for (const str of data.longStrings) {
386	if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
387	}
388	}
389	}
390	};
391	}
392	// The `default` clause is only here as a safeguard; it should never be
393	// reached. Code coverage tools should ignore it.
394	/* node:coverage ignore next */
395	default:
396	throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
397	}
398	};
399
400	const getCharacterClassEmptyData = () => ({
401	transformed: config.transform.unicodeFlag,
402	singleChars: regenerate(),
403	longStrings: new Set(),
404	hasEmptyString: false,
405	first: true,
406	maybeIncludesStrings: false
407	});
408
409	const concatCaseEquivalents = (codePoint, caseEqFlags) => {
410	const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags);
411	if (caseEquivalents) {
412	return [codePoint, ...caseEquivalents];
413	}
414	return [codePoint];
415	};
416
417	const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => {
418	let data = getCharacterClassEmptyData();
419
420	for (const string of classStrings.strings) {
421	if (string.characters.length === 1) {
422	const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint
423	concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
424	data.singleChars.add(cp);
425	});
426	} else {
427	let stringifiedString = '';
428	if (caseEqFlags) {
429	for (const ch of string.characters) {
430	const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
431	const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
432	stringifiedString += set.toString(regenerateOptions);
433	}
434	} else {
435	for (const ch of string.characters) {
436	const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
437	if (codePoint !== ch.codePoint) {
438	stringifiedString += regenerate(codePoint).toString(regenerateOptions);
439	} else {
440	stringifiedString += generate(ch);
441	}
442	}
443	}
444
445	data.longStrings.add(stringifiedString);
446	data.maybeIncludesStrings = true;
447	}
448	}
449
450	return data;
451	}
452
453	const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => {
454	let data = getCharacterClassEmptyData();
455
456	let handlePositive;
457	let handleNegative;
458
459	let caseEqFlags = configGetCaseEqFlags();
460
461	switch (characterClassItem.kind) {
462	case 'union':
463	handlePositive = buildHandler('union');
464	handleNegative = buildHandler('union-negative');
465	break;
466	case 'intersection':
467	handlePositive = buildHandler('intersection');
468	handleNegative = buildHandler('subtraction');
469	if (config.transform.unicodeSetsFlag) data.transformed = true;
470	if (config.isIgnoreCaseMode) {
471	shouldApplySCF = true;
472	}
473	break;
474	case 'subtraction':
475	handlePositive = buildHandler('subtraction');
476	handleNegative = buildHandler('intersection');
477	if (config.transform.unicodeSetsFlag) data.transformed = true;
478	if (config.isIgnoreCaseMode) {
479	shouldApplySCF = true;
480	}
481	break;
482	// The `default` clause is only here as a safeguard; it should never be
483	// reached. Code coverage tools should ignore it.
484	/* node:coverage ignore next */
485	default:
486	throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
487	}
488
489	for (const item of characterClassItem.body) {
490	switch (item.type) {
491	case 'value':
492	const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint;
493	const list = concatCaseEquivalents(codePoint, caseEqFlags);
494	handlePositive.regSet(data, regenerate(list));
495	if (list.length > 1) {
496	data.transformed = true;
497	}
498	break;
499	case 'characterClassRange':
500	const min = item.min.codePoint;
501	const max = item.max.codePoint;
502	if (shouldApplySCF) {
503	let list = [];
504	for (let cp = min; cp <= max; cp++) {
505	list.push(simpleCaseFolding(cp));
506	}
507	handlePositive.regSet(data, regenerate(list));
508	} else {
509	handlePositive.range(data, min, max);
510	}
511	if (caseEqFlags) {
512	// If shouldApplySCF is true, it is still ok to call iuRange because
513	// the set [min, max] shares the same case equivalents with scf([min, max])
514	handlePositive.iuRange(data, min, max, caseEqFlags);
515	data.transformed = true;
516	}
517	break;
518	case 'characterClassEscape':
519	handlePositive.regSet(data, getCharacterClassEscapeSet(
520	item.value,
521	config.flags.unicode \|\| config.flags.unicodeSets,
522	config.flags.ignoreCase,
523	shouldApplySCF
524	));
525	break;
526	case 'unicodePropertyEscape':
527	const nestedData = getUnicodePropertyEscapeCharacterClassData(
528	item.value,
529	item.negative,
530	config.flags.unicodeSets && config.isIgnoreCaseMode,
531	shouldApplySCF
532	);
533	handlePositive.nested(data, nestedData);
534	data.transformed =
535	data.transformed \|\|
536	config.transform.unicodePropertyEscapes \|\|
537	(config.transform.unicodeSetsFlag && (nestedData.maybeIncludesStrings \|\| characterClassItem.kind !== "union" \|\| item.negative));
538	break;
539	case 'characterClass':
540	const handler = item.negative ? handleNegative : handlePositive;
541	const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF);
542	handler.nested(data, res);
543	data.transformed = true;
544	break;
545	case 'classStrings':
546	handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF));
547	data.transformed = true;
548	break;
549	// The `default` clause is only here as a safeguard; it should never be
550	// reached. Code coverage tools should ignore it.
551	/* node:coverage ignore next */
552	default:
553	throw new Error(`Unknown term type: ${ item.type }`);
554	}
555
556	data.first = false;
557	}
558
559	if (characterClassItem.negative && data.maybeIncludesStrings) {
560	throw new SyntaxError('Cannot negate set containing strings');
561	}
562
563	return data;
564	}
565
566	const processCharacterClass = (
567	characterClassItem,
568	regenerateOptions,
569	computed = computeCharacterClass(characterClassItem, regenerateOptions)
570	) => {
571	const negative = characterClassItem.negative;
572	const { singleChars, transformed, longStrings } = computed;
573	if (transformed) {
574	// If single chars already contains some astral character, regenerate (bmpOnly: true) will create valid regex strings
575	const bmpOnly = regenerateContainsAstral(singleChars);
576	const setStr = singleChars.toString(Object.assign({}, regenerateOptions, { bmpOnly: bmpOnly }));
577
578	if (negative) {
579	if (config.useUnicodeFlag) {
580	update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
581	} else {
582	if (config.flags.unicode \|\| config.flags.unicodeSets) {
583	if (config.flags.ignoreCase) {
584	const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
585	// Assumption: singleChars do not contain lone surrogates.
586	// Regex like /[^\ud800]/u is not supported
587	const surrogateOrBMPSetStr = singleChars
588	.clone()
589	.remove(astralCharsSet)
590	.addRange(0xd800, 0xdfff)
591	.toString({ bmpOnly: true });
592	// Don't generate negative lookahead for astral characters
593	// because the case folding is not working anyway as we break
594	// code points into surrogate pairs.
595	const astralNegativeSetStr = ASTRAL_SET
596	.clone()
597	.remove(astralCharsSet)
598	.toString(regenerateOptions);
599	// The transform here does not support lone surrogates.
600	update(
601	characterClassItem,
602	`(?!${surrogateOrBMPSetStr})[^]\|${astralNegativeSetStr}`
603	);
604	} else {
605	// Generate negative set directly when case folding is not involved.
606	const negativeSet = UNICODE_SET.clone().remove(singleChars);
607	update(characterClassItem, negativeSet.toString(regenerateOptions));
608	}
609	} else {
610	update(characterClassItem, `(?!${setStr})[^]`);
611	}
612	}
613	} else {
614	const hasEmptyString = longStrings.has('');
615	const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
616
617	if (setStr !== '[]' \|\| longStrings.size === 0) {
618	pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
619	}
620
621	update(characterClassItem, pieces.join('\|'));
622	}
623	}
624	return characterClassItem;
625	};
626
627	const assertNoUnmatchedReferences = (groups) => {
628	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
629	if (unmatchedReferencesNames.length > 0) {
630	throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
631	}
632	};
633
634	const processModifiers = (item, regenerateOptions, groups) => {
635	const enabling = item.modifierFlags.enabling;
636	const disabling = item.modifierFlags.disabling;
637
638	const oldData = Object.assign({}, config.modifiersData);
639
640	for (const flag of enabling) {
641	config.modifiersData[flag] = true;
642	}
643	for (const flag of disabling) {
644	config.modifiersData[flag] = false;
645	}
646
647	if (config.transform.modifiers) {
648	delete item.modifierFlags;
649	item.behavior = 'ignore';
650	}
651
652	item.body = item.body.map(term => {
653	return processTerm(term, regenerateOptions, groups);
654	});
655
656	config.modifiersData = oldData;
657
658	return item;
659	}
660
661	const processTerm = (item, regenerateOptions, groups) => {
662	switch (item.type) {
663	case 'dot':
664	if (config.transform.unicodeFlag) {
665	update(
666	item,
667	getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions)
668	);
669	} else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) {
670	// TODO: consider changing this at the regenerate level.
671	update(item, '[^]');
672	}
673	break;
674	case 'characterClass':
675	item = processCharacterClass(item, regenerateOptions);
676	break;
677	case 'unicodePropertyEscape':
678	const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative, config.flags.unicodeSets && config.isIgnoreCaseMode);
679	if (data.maybeIncludesStrings) {
680	if (!config.flags.unicodeSets) {
681	throw new Error(
682	'Properties of strings are only supported when using the unicodeSets (v) flag.'
683	);
684	}
685	if (config.transform.unicodeSetsFlag) {
686	data.transformed = true;
687	item = processCharacterClass(item, regenerateOptions, data);
688	}
689	} else if (config.transform.unicodePropertyEscapes \|\| configGetCaseEqFlags()) {
690	update(
691	item,
692	data.singleChars.toString(regenerateOptions)
693	);
694	}
695	break;
696	case 'characterClassEscape':
697	if (config.transform.unicodeFlag) {
698	update(
699	item,
700	getCharacterClassEscapeSet(
701	item.value,
702	/* config.transform.unicodeFlag implies config.flags.unicode */ true,
703	config.flags.ignoreCase
704	).toString(regenerateOptions)
705	);
706	}
707	break;
708	case 'group':
709	if (item.behavior == 'normal') {
710	groups.lastIndex++;
711	}
712	if (item.name) {
713	const name = item.name.value;
714
715	if (groups.namesConflicts[name]) {
716	throw new Error(
717	`Group '${ name }' has already been defined in this context.`
718	);
719	}
720	groups.namesConflicts[name] = true;
721
722	if (config.transform.namedGroups) {
723	delete item.name;
724	}
725
726	const index = groups.lastIndex;
727	if (!groups.names[name]) {
728	groups.names[name] = [];
729	}
730	groups.names[name].push(index);
731
732	if (groups.onNamedGroup) {
733	groups.onNamedGroup.call(null, name, index);
734	}
735
736	if (groups.unmatchedReferences[name]) {
737	delete groups.unmatchedReferences[name];
738	}
739	}
740	if (item.modifierFlags) {
741	return processModifiers(item, regenerateOptions, groups);
742	}
743	/* falls through */
744	case 'quantifier':
745	item.body = item.body.map(term => {
746	return processTerm(term, regenerateOptions, groups);
747	});
748	break;
749	case 'disjunction':
750	const outerNamesConflicts = groups.namesConflicts;
751	item.body = item.body.map(term => {
752	groups.namesConflicts = Object.create(outerNamesConflicts);
753	return processTerm(term, regenerateOptions, groups);
754	});
755	break;
756	case 'alternative':
757	item.body = flatMap(item.body, term => {
758	const res = processTerm(term, regenerateOptions, groups);
759	// Alternatives cannot contain alternatives; flatten them.
760	return res.type === 'alternative' ? res.body : res;
761	});
762	break;
763	case 'value':
764	const codePoint = item.codePoint;
765	const caseEqFlags = configGetCaseEqFlags();
766	const list = concatCaseEquivalents(codePoint, caseEqFlags);
767	if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) {
768	// skip regenerate when it is a printable ASCII symbol
769	break;
770	}
771	const set = regenerate(list);
772	update(item, set.toString(regenerateOptions));
773	break;
774	case 'reference':
775	if (item.name) {
776	const name = item.name.value;
777	const indexes = groups.names[name];
778	if (!indexes) {
779	groups.unmatchedReferences[name] = true;
780	}
781
782	if (config.transform.namedGroups) {
783	if (indexes) {
784	const body = indexes.map(index => ({
785	'type': 'reference',
786	'matchIndex': index,
787	'raw': '\\' + index,
788	}));
789	if (body.length === 1) {
790	return body[0];
791	}
792	return {
793	'type': 'alternative',
794	'body': body,
795	'raw': body.map(term => term.raw).join(''),
796	};
797	}
798
799	// This named reference comes before the group where it’s defined,
800	// so it’s always an empty match.
801	return {
802	'type': 'group',
803	'behavior': 'ignore',
804	'body': [],
805	'raw': '(?:)',
806	};
807	}
808	}
809	break;
810	case 'anchor':
811	if (config.modifiersData.m && config.transform.modifiers) {
812	if (item.kind == 'start') {
813	update(item, `(?:^\|(?<=${NEWLINE_SET.toString()}))`);
814	} else if (item.kind == 'end') {
815	update(item, `(?:$\|(?=${NEWLINE_SET.toString()}))`);
816	}
817	}
818	case 'empty':
819	// Nothing to do here.
820	break;
821	// The `default` clause is only here as a safeguard; it should never be
822	// reached. Code coverage tools should ignore it.
823	/* node:coverage ignore next */
824	default:
825	throw new Error(`Unknown term type: ${ item.type }`);
826	}
827	return item;
828	};
829
830	const config = {
831	'flags': {
832	'ignoreCase': false,
833	'unicode': false,
834	'unicodeSets': false,
835	'dotAll': false,
836	'multiline': false,
837	},
838	'transform': {
839	'dotAllFlag': false,
840	'unicodeFlag': false,
841	'unicodeSetsFlag': false,
842	'unicodePropertyEscapes': false,
843	'namedGroups': false,
844	'modifiers': false,
845	},
846	'modifiersData': {
847	'i': undefined,
848	's': undefined,
849	'm': undefined,
850	},
851	get useUnicodeFlag() {
852	return (this.flags.unicode \|\| this.flags.unicodeSets) && !this.transform.unicodeFlag;
853	},
854	get isDotAllMode() {
855	return (this.modifiersData.s !== undefined ? this.modifiersData.s : this.flags.dotAll);
856	},
857	get isIgnoreCaseMode() {
858	return (this.modifiersData.i !== undefined ? this.modifiersData.i : this.flags.ignoreCase);
859	}
860	};
861
862	const validateOptions = (options) => {
863	if (!options) return;
864
865	for (const key of Object.keys(options)) {
866	const value = options[key];
867	switch (key) {
868	case 'dotAllFlag':
869	case 'unicodeFlag':
870	case 'unicodePropertyEscapes':
871	case 'unicodeSetsFlag':
872	case 'namedGroups':
873	if (value != null && value !== false && value !== 'transform') {
874	throw new Error(`.${key} must be false (default) or 'transform'.`);
875	}
876	break;
877	// todo: remove modifiers: 'parse' in regexpu-core v7
878	case 'modifiers':
879	if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
880	throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
881	}
882	break;
883	case 'onNamedGroup':
884	case 'onNewFlags':
885	if (value != null && typeof value !== 'function') {
886	throw new Error(`.${key} must be a function.`);
887	}
888	break;
889	default:
890	throw new Error(`.${key} is not a valid regexpu-core option.`);
891	}
892	}
893	};
894
895	const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
896	const transform = (options, name) => options ? options[name] === 'transform' : false;
897
898	const rewritePattern = (pattern, flags, options) => {
899	validateOptions(options);
900
901	config.flags.unicode = hasFlag(flags, 'u');
902	config.flags.unicodeSets = hasFlag(flags, 'v');
903	config.flags.ignoreCase = hasFlag(flags, 'i');
904	config.flags.dotAll = hasFlag(flags, 's');
905	config.flags.multiline = hasFlag(flags, 'm');
906
907	config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
908	config.transform.unicodeFlag = (config.flags.unicode \|\| config.flags.unicodeSets) && transform(options, 'unicodeFlag');
909	config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
910
911	// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
912	config.transform.unicodePropertyEscapes = (config.flags.unicode \|\| config.flags.unicodeSets) && (
913	transform(options, 'unicodeFlag') \|\| transform(options, 'unicodePropertyEscapes')
914	);
915	config.transform.namedGroups = transform(options, 'namedGroups');
916	config.transform.modifiers = transform(options, 'modifiers');
917
918	config.modifiersData.i = undefined;
919	config.modifiersData.s = undefined;
920	config.modifiersData.m = undefined;
921
922	const regjsparserFeatures = {
923	// Enable every stable RegExp feature by default
924	'modifiers': true,
925	'unicodePropertyEscape': true,
926	'unicodeSet': true,
927	'namedGroups': true,
928	'lookbehind': true,
929	};
930
931	const regenerateOptions = {
932	'hasUnicodeFlag': config.useUnicodeFlag,
933	'bmpOnly': !config.flags.unicode && !config.flags.unicodeSets
934	};
935
936	const groups = {
937	'onNamedGroup': options && options.onNamedGroup,
938	'lastIndex': 0,
939	'names': Object.create(null), // { [name]: Array<index> }
940	'namesConflicts': Object.create(null), // { [name]: true }
941	'unmatchedReferences': Object.create(null) // { [name]: true }
942	};
943
944	const tree = parse(pattern, flags, regjsparserFeatures);
945
946	if (config.transform.modifiers) {
947	if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
948	// the pattern _likely_ contain inline disabled modifiers
949	// we need to traverse to make sure that they are actually modifiers and to collect them
950	const allDisabledModifiers = Object.create(null)
951	const itemStack = [tree];
952	let node;
953	while (node = itemStack.pop(), node != undefined) {
954	if (Array.isArray(node)) {
955	Array.prototype.push.apply(itemStack, node);
956	} else if (typeof node == 'object' && node != null) {
957	for (const key of Object.keys(node)) {
958	const value = node[key];
959	if (key == 'modifierFlags') {
960	for (const flag of value.disabling) {
961	allDisabledModifiers[flag] = true;
962	}
963	} else if (typeof value == 'object' && value != null) {
964	itemStack.push(value);
965	}
966	}
967	}
968	}
969	if (allDisabledModifiers.i) {
970	config.modifiersData.i = config.flags.ignoreCase;
971	}
972	if (allDisabledModifiers.m) {
973	config.modifiersData.m = config.flags.multiline;
974	}
975	if (allDisabledModifiers.s) {
976	config.modifiersData.s = config.flags.dotAll;
977	}
978	}
979	}
980
981	// Note: `processTerm` mutates `tree` and `groups`.
982	processTerm(tree, regenerateOptions, groups);
983	assertNoUnmatchedReferences(groups);
984
985	const onNewFlags = options && options.onNewFlags;
986	if (onNewFlags) {
987	let newFlags = flags.split('').filter((flag) => !config.modifiersData[flag]).join('');
988	if (config.transform.unicodeSetsFlag) {
989	newFlags = newFlags.replace('v', 'u');
990	}
991	if (config.transform.unicodeFlag) {
992	newFlags = newFlags.replace('u', '');
993	}
994	if (config.transform.dotAllFlag) {
995	newFlags = newFlags.replace('s', '');
996	}
997	onNewFlags(newFlags);
998	}
999
1000	return generate(tree);
1001	};
1002
1003	module.exports = rewritePattern;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: