Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

source: imaps-frontend/node_modules/regexpu-core/rewrite-pattern.js

main

Last change on this file was 79a0317, checked in by stefan toskovski <stefantoska84@…>, 6 months ago
F4 Finalna Verzija
Property mode set to `100644`
File size: 30.6 KB

Rev	Line
[79a0317]	1	'use strict';
	2
	3	const generate = require('regjsgen').generate;
	4	const parse = require('regjsparser').parse;
	5	const regenerate = require('regenerate');
	6	const unicodeMatchProperty = require('unicode-match-property-ecmascript');
	7	const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
	8	const iuMappings = require('./data/iu-mappings.js');
	9	const iBMPMappings = require('./data/i-bmp-mappings.js');
	10	const iuFoldings = require('./data/iu-foldings.js');
	11	const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
	12	const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
	13
	14	function flatMap(array, callback) {
	15	const result = [];
	16	array.forEach(item => {
	17	const res = callback(item);
	18	if (Array.isArray(res)) {
	19	result.push.apply(result, res);
	20	} else {
	21	result.push(res);
	22	}
	23	});
	24	return result;
	25	}
	26
	27	function regenerateContainsAstral(regenerateData) {
	28	const data = regenerateData.data;
	29	return data.length >= 1 && data[data.length - 1] >= 0x10000;
	30	}
	31
	32	// https://tc39.es/ecma262/#prod-SyntaxCharacter
	33	const SYNTAX_CHARS = /[\\^$.*+?()[\]{}\|]/g;
	34
	35	const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
	36
	37	const NEWLINE_SET = regenerate().add(
	38	// `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
	39	0x000A, // Line Feed <LF>
	40	0x000D, // Carriage Return <CR>
	41	0x2028, // Line Separator <LS>
	42	0x2029 // Paragraph Separator <PS>
	43	);
	44
	45	// Prepare a Regenerate set containing all code points that are supposed to be
	46	// matched by `/./u`. https://mths.be/es6#sec-atom
	47	const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
	48	.remove(NEWLINE_SET);
	49
	50	const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => {
	51	if (unicode) {
	52	if (ignoreCase) {
	53	const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
	54	if (shouldApplySCF) {
	55	return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character);
	56	} else {
	57	return result;
	58	}
	59	}
	60	return ESCAPE_SETS.UNICODE.get(character);
	61	}
	62	return ESCAPE_SETS.REGULAR.get(character);
	63	};
	64
	65	const getUnicodeDotSet = (dotAll) => {
	66	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
	67	};
	68
	69	const getUnicodePropertyValueSet = (property, value) => {
	70	const path = value ?
	71	`${ property }/${ value }` :
	72	`Binary_Property/${ property }`;
	73	try {
	74	return require(`regenerate-unicode-properties/${ path }.js`);
	75	} catch (exception) {
	76	throw new Error(
	77	`Failed to recognize value \`${ value }\` for property ` +
	78	`\`${ property }\`.`
	79	);
	80	}
	81	};
	82
	83	const handleLoneUnicodePropertyNameOrValue = (value) => {
	84	// It could be a `General_Category` value or a binary property.
	85	// Note: `unicodeMatchPropertyValue` throws on invalid values.
	86	try {
	87	const property = 'General_Category';
	88	const category = unicodeMatchPropertyValue(property, value);
	89	return getUnicodePropertyValueSet(property, category);
	90	} catch (exception) {}
	91	// It’s not a `General_Category` value, so check if it’s a property
	92	// of strings.
	93	try {
	94	return getUnicodePropertyValueSet('Property_of_Strings', value);
	95	} catch (exception) {}
	96	// Lastly, check if it’s a binary property of single code points.
	97	// Note: `unicodeMatchProperty` throws on invalid properties.
	98	const property = unicodeMatchProperty(value);
	99	return getUnicodePropertyValueSet(property);
	100	};
	101
	102	const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) => {
	103	const parts = value.split('=');
	104	const firstPart = parts[0];
	105	let set;
	106	if (parts.length == 1) {
	107	set = handleLoneUnicodePropertyNameOrValue(firstPart);
	108	} else {
	109	// The pattern consists of two parts, i.e. `Property=Value`.
	110	const property = unicodeMatchProperty(firstPart);
	111	const value = unicodeMatchPropertyValue(property, parts[1]);
	112	set = getUnicodePropertyValueSet(property, value);
	113	}
	114	if (isNegative) {
	115	if (set.strings) {
	116	throw new Error('Cannot negate Unicode property of strings');
	117	}
	118	return {
	119	characters: (isUnicodeSetIgnoreCase ? UNICODE_IV_SET : UNICODE_SET).clone().remove(set.characters),
	120	strings: new Set()
	121	};
	122	}
	123	return {
	124	characters: set.characters.clone(),
	125	strings: set.strings
	126	// We need to escape strings like *️⃣ to make sure that they can be safely used in unions.
	127	? new Set(set.strings.map(str => str.replace(SYNTAX_CHARS, '\\$&')))
	128	: new Set()
	129	};
	130	};
	131
	132	const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => {
	133	const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase);
	134	const data = getCharacterClassEmptyData();
	135	const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters;
	136	const caseEqFlags = configGetCaseEqFlags();
	137	if (caseEqFlags) {
	138	for (const codepoint of singleChars.toArray()) {
	139	const list = getCaseEquivalents(codepoint, caseEqFlags);
	140	if (list) {
	141	singleChars.add(list);
	142	}
	143	}
	144	}
	145	data.singleChars = singleChars;
	146	if (set.strings.size > 0) {
	147	data.longStrings = set.strings;
	148	data.maybeIncludesStrings = true;
	149	}
	150	return data;
	151	};
	152
	153	const CASE_EQ_FLAG_NONE = 0b00;
	154	const CASE_EQ_FLAG_BMP = 0b01;
	155	const CASE_EQ_FLAG_UNICODE = 0b10;
	156
	157	function configGetCaseEqFlags() {
	158	let flags = CASE_EQ_FLAG_NONE;
	159	if (config.modifiersData.i === true) {
	160	if (config.transform.modifiers) {
	161	flags \|= CASE_EQ_FLAG_BMP;
	162	if (config.flags.unicode \|\| config.flags.unicodeSets) {
	163	flags \|= CASE_EQ_FLAG_UNICODE;
	164	}
	165	}
	166	} else if (config.modifiersData.i === undefined) {
	167	if (config.transform.unicodeFlag && config.flags.ignoreCase) {
	168	flags \|= CASE_EQ_FLAG_UNICODE;
	169	}
	170	}
	171	return flags;
	172	}
	173
	174	// Given a range of code points, add any case-equivalent code points in that range
	175	// to a set.
	176	regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) {
	177	const $this = this;
	178	do {
	179	const list = getCaseEquivalents(min, caseEqFlags);
	180	if (list) {
	181	$this.add(list);
	182	}
	183	} while (++min <= max);
	184	return $this;
	185	};
	186	regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) {
	187	const $this = this;
	188	do {
	189	const list = getCaseEquivalents(min, caseEqFlags);
	190	if (list) {
	191	$this.remove(list);
	192	}
	193	} while (++min <= max);
	194	return $this;
	195	};
	196
	197	const update = (item, pattern) => {
	198	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
	199	lookbehind: true,
	200	namedGroups: true,
	201	unicodePropertyEscape: true,
	202	unicodeSet: true,
	203	modifiers: true,
	204	});
	205	switch (tree.type) {
	206	case 'characterClass':
	207	case 'group':
	208	case 'value':
	209	// No wrapping needed.
	210	break;
	211	default:
	212	// Wrap the pattern in a non-capturing group.
	213	tree = wrap(tree, pattern);
	214	}
	215	Object.assign(item, tree);
	216	};
	217
	218	const wrap = (tree, pattern) => {
	219	// Wrap the pattern in a non-capturing group.
	220	return {
	221	'type': 'group',
	222	'behavior': 'ignore',
	223	'body': [tree],
	224	'raw': `(?:${ pattern })`
	225	};
	226	};
	227
	228	/**
	229	* Given any codepoint ch, returns false or an array of characters,
	230	* such that for every c in the array,
	231	* c != ch and Canonicalize(~, c) == Canonicalize(~, ch)
	232	*
	233	* where Canonicalize is defined in
	234	* https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
	235	* @param {number} codePoint input code point
	236	* @param {number} flags bitwise flags composed of CASE_EQ_FLAG_*
	237	* @returns false \| number[]
	238	*/
	239	const getCaseEquivalents = (codePoint, flags) => {
	240	if (flags === CASE_EQ_FLAG_NONE) {
	241	return false;
	242	}
	243	let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) \|\| [];
	244	if (typeof result === "number") result = [result];
	245	if (flags & CASE_EQ_FLAG_BMP) {
	246	for (const cp of [codePoint].concat(result)) {
	247	// Fast path for ASCII characters
	248	if (cp >= 0x41 && cp <= 0x5a) {
	249	result.push(cp + 0x20);
	250	} else if (cp >= 0x61 && cp <= 0x7a) {
	251	result.push(cp - 0x20);
	252	} else {
	253	result = result.concat(iBMPMappings.get(cp) \|\| []);
	254	}
	255	}
	256	}
	257	return result.length == 0 ? false : result;
	258	};
	259
	260	// https://tc39.es/ecma262/#sec-maybesimplecasefolding
	261	const simpleCaseFolding = (codePoint) => {
	262	// Fast path for ASCII characters
	263	if (codePoint <= 0x7F) {
	264	if (codePoint >= 0x41 && codePoint <= 0x5A) {
	265	return codePoint + 0x20;
	266	}
	267	return codePoint;
	268	}
	269	return iuFoldings.get(codePoint) \|\| codePoint;
	270	}
	271
	272	const buildHandler = (action) => {
	273	switch (action) {
	274	case 'union':
	275	return {
	276	single: (data, cp) => {
	277	data.singleChars.add(cp);
	278	},
	279	regSet: (data, set2) => {
	280	data.singleChars.add(set2);
	281	},
	282	range: (data, start, end) => {
	283	data.singleChars.addRange(start, end);
	284	},
	285	iuRange: (data, start, end, caseEqFlags) => {
	286	data.singleChars.iuAddRange(start, end, caseEqFlags);
	287	},
	288	nested: (data, nestedData) => {
	289	data.singleChars.add(nestedData.singleChars);
	290	for (const str of nestedData.longStrings) data.longStrings.add(str);
	291	if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
	292	}
	293	};
	294	case 'union-negative': {
	295	const regSet = (data, set2) => {
	296	data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
	297	};
	298	return {
	299	single: (data, cp) => {
	300	const unicode = UNICODE_SET.clone();
	301	data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
	302	},
	303	regSet: regSet,
	304	range: (data, start, end) => {
	305	data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
	306	},
	307	iuRange: (data, start, end, caseEqFlags) => {
	308	data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars);
	309	},
	310	nested: (data, nestedData) => {
	311	regSet(data, nestedData.singleChars);
	312	if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
	313	}
	314	};
	315	}
	316	case 'intersection': {
	317	const regSet = (data, set2) => {
	318	if (data.first) data.singleChars = set2;
	319	else data.singleChars.intersection(set2);
	320	};
	321	return {
	322	single: (data, cp) => {
	323	data.singleChars = data.first \|\| data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
	324	data.longStrings.clear();
	325	data.maybeIncludesStrings = false;
	326	},
	327	regSet: (data, set) => {
	328	regSet(data, set);
	329	data.longStrings.clear();
	330	data.maybeIncludesStrings = false;
	331	},
	332	range: (data, start, end) => {
	333	if (data.first) data.singleChars.addRange(start, end);
	334	else data.singleChars.intersection(regenerate().addRange(start, end));
	335	data.longStrings.clear();
	336	data.maybeIncludesStrings = false;
	337	},
	338	iuRange: (data, start, end, caseEqFlags) => {
	339	if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
	340	else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags));
	341	data.longStrings.clear();
	342	data.maybeIncludesStrings = false;
	343	},
	344	nested: (data, nestedData) => {
	345	regSet(data, nestedData.singleChars);
	346
	347	if (data.first) {
	348	data.longStrings = nestedData.longStrings;
	349	data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
	350	} else {
	351	for (const str of data.longStrings) {
	352	if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
	353	}
	354	if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
	355	}
	356	}
	357	};
	358	}
	359	case 'subtraction': {
	360	const regSet = (data, set2) => {
	361	if (data.first) data.singleChars.add(set2);
	362	else data.singleChars.remove(set2);
	363	};
	364	return {
	365	single: (data, cp) => {
	366	if (data.first) data.singleChars.add(cp);
	367	else data.singleChars.remove(cp);
	368	},
	369	regSet: regSet,
	370	range: (data, start, end) => {
	371	if (data.first) data.singleChars.addRange(start, end);
	372	else data.singleChars.removeRange(start, end);
	373	},
	374	iuRange: (data, start, end, caseEqFlags) => {
	375	if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
	376	else data.singleChars.iuRemoveRange(start, end, caseEqFlags);
	377	},
	378	nested: (data, nestedData) => {
	379	regSet(data, nestedData.singleChars);
	380
	381	if (data.first) {
	382	data.longStrings = nestedData.longStrings;
	383	data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
	384	} else {
	385	for (const str of data.longStrings) {
	386	if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
	387	}
	388	}
	389	}
	390	};
	391	}
	392	// The `default` clause is only here as a safeguard; it should never be
	393	// reached. Code coverage tools should ignore it.
	394	/* node:coverage ignore next */
	395	default:
	396	throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
	397	}
	398	};
	399
	400	const getCharacterClassEmptyData = () => ({
	401	transformed: config.transform.unicodeFlag,
	402	singleChars: regenerate(),
	403	longStrings: new Set(),
	404	hasEmptyString: false,
	405	first: true,
	406	maybeIncludesStrings: false
	407	});
	408
	409	const concatCaseEquivalents = (codePoint, caseEqFlags) => {
	410	const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags);
	411	if (caseEquivalents) {
	412	return [codePoint, ...caseEquivalents];
	413	}
	414	return [codePoint];
	415	};
	416
	417	const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => {
	418	let data = getCharacterClassEmptyData();
	419
	420	for (const string of classStrings.strings) {
	421	if (string.characters.length === 1) {
	422	const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint
	423	concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
	424	data.singleChars.add(cp);
	425	});
	426	} else {
	427	let stringifiedString = '';
	428	if (caseEqFlags) {
	429	for (const ch of string.characters) {
	430	const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
	431	const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
	432	stringifiedString += set.toString(regenerateOptions);
	433	}
	434	} else {
	435	for (const ch of string.characters) {
	436	const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
	437	if (codePoint !== ch.codePoint) {
	438	stringifiedString += regenerate(codePoint).toString(regenerateOptions);
	439	} else {
	440	stringifiedString += generate(ch);
	441	}
	442	}
	443	}
	444
	445	data.longStrings.add(stringifiedString);
	446	data.maybeIncludesStrings = true;
	447	}
	448	}
	449
	450	return data;
	451	}
	452
	453	const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => {
	454	let data = getCharacterClassEmptyData();
	455
	456	let handlePositive;
	457	let handleNegative;
	458
	459	let caseEqFlags = configGetCaseEqFlags();
	460
	461	switch (characterClassItem.kind) {
	462	case 'union':
	463	handlePositive = buildHandler('union');
	464	handleNegative = buildHandler('union-negative');
	465	break;
	466	case 'intersection':
	467	handlePositive = buildHandler('intersection');
	468	handleNegative = buildHandler('subtraction');
	469	if (config.transform.unicodeSetsFlag) data.transformed = true;
	470	if (config.isIgnoreCaseMode) {
	471	shouldApplySCF = true;
	472	}
	473	break;
	474	case 'subtraction':
	475	handlePositive = buildHandler('subtraction');
	476	handleNegative = buildHandler('intersection');
	477	if (config.transform.unicodeSetsFlag) data.transformed = true;
	478	if (config.isIgnoreCaseMode) {
	479	shouldApplySCF = true;
	480	}
	481	break;
	482	// The `default` clause is only here as a safeguard; it should never be
	483	// reached. Code coverage tools should ignore it.
	484	/* node:coverage ignore next */
	485	default:
	486	throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
	487	}
	488
	489	for (const item of characterClassItem.body) {
	490	switch (item.type) {
	491	case 'value':
	492	const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint;
	493	const list = concatCaseEquivalents(codePoint, caseEqFlags);
	494	handlePositive.regSet(data, regenerate(list));
	495	if (list.length > 1) {
	496	data.transformed = true;
	497	}
	498	break;
	499	case 'characterClassRange':
	500	const min = item.min.codePoint;
	501	const max = item.max.codePoint;
	502	if (shouldApplySCF) {
	503	let list = [];
	504	for (let cp = min; cp <= max; cp++) {
	505	list.push(simpleCaseFolding(cp));
	506	}
	507	handlePositive.regSet(data, regenerate(list));
	508	} else {
	509	handlePositive.range(data, min, max);
	510	}
	511	if (caseEqFlags) {
	512	// If shouldApplySCF is true, it is still ok to call iuRange because
	513	// the set [min, max] shares the same case equivalents with scf([min, max])
	514	handlePositive.iuRange(data, min, max, caseEqFlags);
	515	data.transformed = true;
	516	}
	517	break;
	518	case 'characterClassEscape':
	519	handlePositive.regSet(data, getCharacterClassEscapeSet(
	520	item.value,
	521	config.flags.unicode \|\| config.flags.unicodeSets,
	522	config.flags.ignoreCase,
	523	shouldApplySCF
	524	));
	525	break;
	526	case 'unicodePropertyEscape':
	527	const nestedData = getUnicodePropertyEscapeCharacterClassData(
	528	item.value,
	529	item.negative,
	530	config.flags.unicodeSets && config.isIgnoreCaseMode,
	531	shouldApplySCF
	532	);
	533	handlePositive.nested(data, nestedData);
	534	data.transformed =
	535	data.transformed \|\|
	536	config.transform.unicodePropertyEscapes \|\|
	537	(config.transform.unicodeSetsFlag && (nestedData.maybeIncludesStrings \|\| characterClassItem.kind !== "union" \|\| item.negative));
	538	break;
	539	case 'characterClass':
	540	const handler = item.negative ? handleNegative : handlePositive;
	541	const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF);
	542	handler.nested(data, res);
	543	data.transformed = true;
	544	break;
	545	case 'classStrings':
	546	handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF));
	547	data.transformed = true;
	548	break;
	549	// The `default` clause is only here as a safeguard; it should never be
	550	// reached. Code coverage tools should ignore it.
	551	/* node:coverage ignore next */
	552	default:
	553	throw new Error(`Unknown term type: ${ item.type }`);
	554	}
	555
	556	data.first = false;
	557	}
	558
	559	if (characterClassItem.negative && data.maybeIncludesStrings) {
	560	throw new SyntaxError('Cannot negate set containing strings');
	561	}
	562
	563	return data;
	564	}
	565
	566	const processCharacterClass = (
	567	characterClassItem,
	568	regenerateOptions,
	569	computed = computeCharacterClass(characterClassItem, regenerateOptions)
	570	) => {
	571	const negative = characterClassItem.negative;
	572	const { singleChars, transformed, longStrings } = computed;
	573	if (transformed) {
	574	// If single chars already contains some astral character, regenerate (bmpOnly: true) will create valid regex strings
	575	const bmpOnly = regenerateContainsAstral(singleChars);
	576	const setStr = singleChars.toString(Object.assign({}, regenerateOptions, { bmpOnly: bmpOnly }));
	577
	578	if (negative) {
	579	if (config.useUnicodeFlag) {
	580	update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
	581	} else {
	582	if (config.flags.unicode \|\| config.flags.unicodeSets) {
	583	if (config.flags.ignoreCase) {
	584	const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
	585	// Assumption: singleChars do not contain lone surrogates.
	586	// Regex like /[^\ud800]/u is not supported
	587	const surrogateOrBMPSetStr = singleChars
	588	.clone()
	589	.remove(astralCharsSet)
	590	.addRange(0xd800, 0xdfff)
	591	.toString({ bmpOnly: true });
	592	// Don't generate negative lookahead for astral characters
	593	// because the case folding is not working anyway as we break
	594	// code points into surrogate pairs.
	595	const astralNegativeSetStr = ASTRAL_SET
	596	.clone()
	597	.remove(astralCharsSet)
	598	.toString(regenerateOptions);
	599	// The transform here does not support lone surrogates.
	600	update(
	601	characterClassItem,
	602	`(?!${surrogateOrBMPSetStr})[^]\|${astralNegativeSetStr}`
	603	);
	604	} else {
	605	// Generate negative set directly when case folding is not involved.
	606	const negativeSet = UNICODE_SET.clone().remove(singleChars);
	607	update(characterClassItem, negativeSet.toString(regenerateOptions));
	608	}
	609	} else {
	610	update(characterClassItem, `(?!${setStr})[^]`);
	611	}
	612	}
	613	} else {
	614	const hasEmptyString = longStrings.has('');
	615	const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
	616
	617	if (setStr !== '[]' \|\| longStrings.size === 0) {
	618	pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
	619	}
	620
	621	update(characterClassItem, pieces.join('\|'));
	622	}
	623	}
	624	return characterClassItem;
	625	};
	626
	627	const assertNoUnmatchedReferences = (groups) => {
	628	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
	629	if (unmatchedReferencesNames.length > 0) {
	630	throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
	631	}
	632	};
	633
	634	const processModifiers = (item, regenerateOptions, groups) => {
	635	const enabling = item.modifierFlags.enabling;
	636	const disabling = item.modifierFlags.disabling;
	637
	638	const oldData = Object.assign({}, config.modifiersData);
	639
	640	for (const flag of enabling) {
	641	config.modifiersData[flag] = true;
	642	}
	643	for (const flag of disabling) {
	644	config.modifiersData[flag] = false;
	645	}
	646
	647	if (config.transform.modifiers) {
	648	delete item.modifierFlags;
	649	item.behavior = 'ignore';
	650	}
	651
	652	item.body = item.body.map(term => {
	653	return processTerm(term, regenerateOptions, groups);
	654	});
	655
	656	config.modifiersData = oldData;
	657
	658	return item;
	659	}
	660
	661	const processTerm = (item, regenerateOptions, groups) => {
	662	switch (item.type) {
	663	case 'dot':
	664	if (config.transform.unicodeFlag) {
	665	update(
	666	item,
	667	getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions)
	668	);
	669	} else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) {
	670	// TODO: consider changing this at the regenerate level.
	671	update(item, '[^]');
	672	}
	673	break;
	674	case 'characterClass':
	675	item = processCharacterClass(item, regenerateOptions);
	676	break;
	677	case 'unicodePropertyEscape':
	678	const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative, config.flags.unicodeSets && config.isIgnoreCaseMode);
	679	if (data.maybeIncludesStrings) {
	680	if (!config.flags.unicodeSets) {
	681	throw new Error(
	682	'Properties of strings are only supported when using the unicodeSets (v) flag.'
	683	);
	684	}
	685	if (config.transform.unicodeSetsFlag) {
	686	data.transformed = true;
	687	item = processCharacterClass(item, regenerateOptions, data);
	688	}
	689	} else if (config.transform.unicodePropertyEscapes \|\| configGetCaseEqFlags()) {
	690	update(
	691	item,
	692	data.singleChars.toString(regenerateOptions)
	693	);
	694	}
	695	break;
	696	case 'characterClassEscape':
	697	if (config.transform.unicodeFlag) {
	698	update(
	699	item,
	700	getCharacterClassEscapeSet(
	701	item.value,
	702	/* config.transform.unicodeFlag implies config.flags.unicode */ true,
	703	config.flags.ignoreCase
	704	).toString(regenerateOptions)
	705	);
	706	}
	707	break;
	708	case 'group':
	709	if (item.behavior == 'normal') {
	710	groups.lastIndex++;
	711	}
	712	if (item.name) {
	713	const name = item.name.value;
	714
	715	if (groups.namesConflicts[name]) {
	716	throw new Error(
	717	`Group '${ name }' has already been defined in this context.`
	718	);
	719	}
	720	groups.namesConflicts[name] = true;
	721
	722	if (config.transform.namedGroups) {
	723	delete item.name;
	724	}
	725
	726	const index = groups.lastIndex;
	727	if (!groups.names[name]) {
	728	groups.names[name] = [];
	729	}
	730	groups.names[name].push(index);
	731
	732	if (groups.onNamedGroup) {
	733	groups.onNamedGroup.call(null, name, index);
	734	}
	735
	736	if (groups.unmatchedReferences[name]) {
	737	delete groups.unmatchedReferences[name];
	738	}
	739	}
	740	if (item.modifierFlags) {
	741	return processModifiers(item, regenerateOptions, groups);
	742	}
	743	/* falls through */
	744	case 'quantifier':
	745	item.body = item.body.map(term => {
	746	return processTerm(term, regenerateOptions, groups);
	747	});
	748	break;
	749	case 'disjunction':
	750	const outerNamesConflicts = groups.namesConflicts;
	751	item.body = item.body.map(term => {
	752	groups.namesConflicts = Object.create(outerNamesConflicts);
	753	return processTerm(term, regenerateOptions, groups);
	754	});
	755	break;
	756	case 'alternative':
	757	item.body = flatMap(item.body, term => {
	758	const res = processTerm(term, regenerateOptions, groups);
	759	// Alternatives cannot contain alternatives; flatten them.
	760	return res.type === 'alternative' ? res.body : res;
	761	});
	762	break;
	763	case 'value':
	764	const codePoint = item.codePoint;
	765	const caseEqFlags = configGetCaseEqFlags();
	766	const list = concatCaseEquivalents(codePoint, caseEqFlags);
	767	if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) {
	768	// skip regenerate when it is a printable ASCII symbol
	769	break;
	770	}
	771	const set = regenerate(list);
	772	update(item, set.toString(regenerateOptions));
	773	break;
	774	case 'reference':
	775	if (item.name) {
	776	const name = item.name.value;
	777	const indexes = groups.names[name];
	778	if (!indexes) {
	779	groups.unmatchedReferences[name] = true;
	780	}
	781
	782	if (config.transform.namedGroups) {
	783	if (indexes) {
	784	const body = indexes.map(index => ({
	785	'type': 'reference',
	786	'matchIndex': index,
	787	'raw': '\\' + index,
	788	}));
	789	if (body.length === 1) {
	790	return body[0];
	791	}
	792	return {
	793	'type': 'alternative',
	794	'body': body,
	795	'raw': body.map(term => term.raw).join(''),
	796	};
	797	}
	798
	799	// This named reference comes before the group where it’s defined,
	800	// so it’s always an empty match.
	801	return {
	802	'type': 'group',
	803	'behavior': 'ignore',
	804	'body': [],
	805	'raw': '(?:)',
	806	};
	807	}
	808	}
	809	break;
	810	case 'anchor':
	811	if (config.modifiersData.m && config.transform.modifiers) {
	812	if (item.kind == 'start') {
	813	update(item, `(?:^\|(?<=${NEWLINE_SET.toString()}))`);
	814	} else if (item.kind == 'end') {
	815	update(item, `(?:$\|(?=${NEWLINE_SET.toString()}))`);
	816	}
	817	}
	818	case 'empty':
	819	// Nothing to do here.
	820	break;
	821	// The `default` clause is only here as a safeguard; it should never be
	822	// reached. Code coverage tools should ignore it.
	823	/* node:coverage ignore next */
	824	default:
	825	throw new Error(`Unknown term type: ${ item.type }`);
	826	}
	827	return item;
	828	};
	829
	830	const config = {
	831	'flags': {
	832	'ignoreCase': false,
	833	'unicode': false,
	834	'unicodeSets': false,
	835	'dotAll': false,
	836	'multiline': false,
	837	},
	838	'transform': {
	839	'dotAllFlag': false,
	840	'unicodeFlag': false,
	841	'unicodeSetsFlag': false,
	842	'unicodePropertyEscapes': false,
	843	'namedGroups': false,
	844	'modifiers': false,
	845	},
	846	'modifiersData': {
	847	'i': undefined,
	848	's': undefined,
	849	'm': undefined,
	850	},
	851	get useUnicodeFlag() {
	852	return (this.flags.unicode \|\| this.flags.unicodeSets) && !this.transform.unicodeFlag;
	853	},
	854	get isDotAllMode() {
	855	return (this.modifiersData.s !== undefined ? this.modifiersData.s : this.flags.dotAll);
	856	},
	857	get isIgnoreCaseMode() {
	858	return (this.modifiersData.i !== undefined ? this.modifiersData.i : this.flags.ignoreCase);
	859	}
	860	};
	861
	862	const validateOptions = (options) => {
	863	if (!options) return;
	864
	865	for (const key of Object.keys(options)) {
	866	const value = options[key];
	867	switch (key) {
	868	case 'dotAllFlag':
	869	case 'unicodeFlag':
	870	case 'unicodePropertyEscapes':
	871	case 'unicodeSetsFlag':
	872	case 'namedGroups':
	873	if (value != null && value !== false && value !== 'transform') {
	874	throw new Error(`.${key} must be false (default) or 'transform'.`);
	875	}
	876	break;
	877	// todo: remove modifiers: 'parse' in regexpu-core v7
	878	case 'modifiers':
	879	if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
	880	throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
	881	}
	882	break;
	883	case 'onNamedGroup':
	884	case 'onNewFlags':
	885	if (value != null && typeof value !== 'function') {
	886	throw new Error(`.${key} must be a function.`);
	887	}
	888	break;
	889	default:
	890	throw new Error(`.${key} is not a valid regexpu-core option.`);
	891	}
	892	}
	893	};
	894
	895	const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
	896	const transform = (options, name) => options ? options[name] === 'transform' : false;
	897
	898	const rewritePattern = (pattern, flags, options) => {
	899	validateOptions(options);
	900
	901	config.flags.unicode = hasFlag(flags, 'u');
	902	config.flags.unicodeSets = hasFlag(flags, 'v');
	903	config.flags.ignoreCase = hasFlag(flags, 'i');
	904	config.flags.dotAll = hasFlag(flags, 's');
	905	config.flags.multiline = hasFlag(flags, 'm');
	906
	907	config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
	908	config.transform.unicodeFlag = (config.flags.unicode \|\| config.flags.unicodeSets) && transform(options, 'unicodeFlag');
	909	config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
	910
	911	// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
	912	config.transform.unicodePropertyEscapes = (config.flags.unicode \|\| config.flags.unicodeSets) && (
	913	transform(options, 'unicodeFlag') \|\| transform(options, 'unicodePropertyEscapes')
	914	);
	915	config.transform.namedGroups = transform(options, 'namedGroups');
	916	config.transform.modifiers = transform(options, 'modifiers');
	917
	918	config.modifiersData.i = undefined;
	919	config.modifiersData.s = undefined;
	920	config.modifiersData.m = undefined;
	921
	922	const regjsparserFeatures = {
	923	// Enable every stable RegExp feature by default
	924	'modifiers': true,
	925	'unicodePropertyEscape': true,
	926	'unicodeSet': true,
	927	'namedGroups': true,
	928	'lookbehind': true,
	929	};
	930
	931	const regenerateOptions = {
	932	'hasUnicodeFlag': config.useUnicodeFlag,
	933	'bmpOnly': !config.flags.unicode && !config.flags.unicodeSets
	934	};
	935
	936	const groups = {
	937	'onNamedGroup': options && options.onNamedGroup,
	938	'lastIndex': 0,
	939	'names': Object.create(null), // { [name]: Array<index> }
	940	'namesConflicts': Object.create(null), // { [name]: true }
	941	'unmatchedReferences': Object.create(null) // { [name]: true }
	942	};
	943
	944	const tree = parse(pattern, flags, regjsparserFeatures);
	945
	946	if (config.transform.modifiers) {
	947	if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
	948	// the pattern _likely_ contain inline disabled modifiers
	949	// we need to traverse to make sure that they are actually modifiers and to collect them
	950	const allDisabledModifiers = Object.create(null)
	951	const itemStack = [tree];
	952	let node;
	953	while (node = itemStack.pop(), node != undefined) {
	954	if (Array.isArray(node)) {
	955	Array.prototype.push.apply(itemStack, node);
	956	} else if (typeof node == 'object' && node != null) {
	957	for (const key of Object.keys(node)) {
	958	const value = node[key];
	959	if (key == 'modifierFlags') {
	960	for (const flag of value.disabling) {
	961	allDisabledModifiers[flag] = true;
	962	}
	963	} else if (typeof value == 'object' && value != null) {
	964	itemStack.push(value);
	965	}
	966	}
	967	}
	968	}
	969	if (allDisabledModifiers.i) {
	970	config.modifiersData.i = config.flags.ignoreCase;
	971	}
	972	if (allDisabledModifiers.m) {
	973	config.modifiersData.m = config.flags.multiline;
	974	}
	975	if (allDisabledModifiers.s) {
	976	config.modifiersData.s = config.flags.dotAll;
	977	}
	978	}
	979	}
	980
	981	// Note: `processTerm` mutates `tree` and `groups`.
	982	processTerm(tree, regenerateOptions, groups);
	983	assertNoUnmatchedReferences(groups);
	984
	985	const onNewFlags = options && options.onNewFlags;
	986	if (onNewFlags) {
	987	let newFlags = flags.split('').filter((flag) => !config.modifiersData[flag]).join('');
	988	if (config.transform.unicodeSetsFlag) {
	989	newFlags = newFlags.replace('v', 'u');
	990	}
	991	if (config.transform.unicodeFlag) {
	992	newFlags = newFlags.replace('u', '');
	993	}
	994	if (config.transform.dotAllFlag) {
	995	newFlags = newFlags.replace('s', '');
	996	}
	997	onNewFlags(newFlags);
	998	}
	999
	1000	return generate(tree);
	1001	};
	1002
	1003	module.exports = rewritePattern;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: