Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trip-planner-front/node_modules/regexpu-core/rewrite-pattern.js@ 76712b2

Last change on this file since 76712b2 was 6a3a178, checked in by Ema <ema_spirova@…>, 3 years ago
initial commit
Property mode set to `100644`
File size: 9.9 KB

Line
1	'use strict';
2
3	const generate = require('regjsgen').generate;
4	const parse = require('regjsparser').parse;
5	const regenerate = require('regenerate');
6	const unicodeMatchProperty = require('unicode-match-property-ecmascript');
7	const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
8	const iuMappings = require('./data/iu-mappings.js');
9	const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
10
11	// Prepare a Regenerate set containing all code points, used for negative
12	// character classes (if any).
13	const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
14	// Without the `u` flag, the range stops at 0xFFFF.
15	// https://mths.be/es6#sec-pattern-semantics
16	const BMP_SET = regenerate().addRange(0x0, 0xFFFF);
17
18	// Prepare a Regenerate set containing all code points that are supposed to be
19	// matched by `/./u`. https://mths.be/es6#sec-atom
20	const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
21	.remove(
22	// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
23	0x000A, // Line Feed <LF>
24	0x000D, // Carriage Return <CR>
25	0x2028, // Line Separator <LS>
26	0x2029 // Paragraph Separator <PS>
27	);
28
29	const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {
30	if (unicode) {
31	if (ignoreCase) {
32	return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
33	}
34	return ESCAPE_SETS.UNICODE.get(character);
35	}
36	return ESCAPE_SETS.REGULAR.get(character);
37	};
38
39	const getUnicodeDotSet = (dotAll) => {
40	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
41	};
42
43	const getUnicodePropertyValueSet = (property, value) => {
44	const path = value ?
45	`${ property }/${ value }` :
46	`Binary_Property/${ property }`;
47	try {
48	return require(`regenerate-unicode-properties/${ path }.js`);
49	} catch (exception) {
50	throw new Error(
51	`Failed to recognize value \`${ value }\` for property ` +
52	`\`${ property }\`.`
53	);
54	}
55	};
56
57	const handleLoneUnicodePropertyNameOrValue = (value) => {
58	// It could be a `General_Category` value or a binary property.
59	// Note: `unicodeMatchPropertyValue` throws on invalid values.
60	try {
61	const property = 'General_Category';
62	const category = unicodeMatchPropertyValue(property, value);
63	return getUnicodePropertyValueSet(property, category);
64	} catch (exception) {}
65	// It’s not a `General_Category` value, so check if it’s a binary
66	// property. Note: `unicodeMatchProperty` throws on invalid properties.
67	const property = unicodeMatchProperty(value);
68	return getUnicodePropertyValueSet(property);
69	};
70
71	const getUnicodePropertyEscapeSet = (value, isNegative) => {
72	const parts = value.split('=');
73	const firstPart = parts[0];
74	let set;
75	if (parts.length == 1) {
76	set = handleLoneUnicodePropertyNameOrValue(firstPart);
77	} else {
78	// The pattern consists of two parts, i.e. `Property=Value`.
79	const property = unicodeMatchProperty(firstPart);
80	const value = unicodeMatchPropertyValue(property, parts[1]);
81	set = getUnicodePropertyValueSet(property, value);
82	}
83	if (isNegative) {
84	return UNICODE_SET.clone().remove(set);
85	}
86	return set.clone();
87	};
88
89	// Given a range of code points, add any case-folded code points in that range
90	// to a set.
91	regenerate.prototype.iuAddRange = function(min, max) {
92	const $this = this;
93	do {
94	const folded = caseFold(min);
95	if (folded) {
96	$this.add(folded);
97	}
98	} while (++min <= max);
99	return $this;
100	};
101
102	const update = (item, pattern) => {
103	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');
104	switch (tree.type) {
105	case 'characterClass':
106	case 'group':
107	case 'value':
108	// No wrapping needed.
109	break;
110	default:
111	// Wrap the pattern in a non-capturing group.
112	tree = wrap(tree, pattern);
113	}
114	Object.assign(item, tree);
115	};
116
117	const wrap = (tree, pattern) => {
118	// Wrap the pattern in a non-capturing group.
119	return {
120	'type': 'group',
121	'behavior': 'ignore',
122	'body': [tree],
123	'raw': `(?:${ pattern })`
124	};
125	};
126
127	const caseFold = (codePoint) => {
128	return iuMappings.get(codePoint) \|\| false;
129	};
130
131	const processCharacterClass = (characterClassItem, regenerateOptions) => {
132	const set = regenerate();
133	for (const item of characterClassItem.body) {
134	switch (item.type) {
135	case 'value':
136	set.add(item.codePoint);
137	if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
138	const folded = caseFold(item.codePoint);
139	if (folded) {
140	set.add(folded);
141	}
142	}
143	break;
144	case 'characterClassRange':
145	const min = item.min.codePoint;
146	const max = item.max.codePoint;
147	set.addRange(min, max);
148	if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
149	set.iuAddRange(min, max);
150	}
151	break;
152	case 'characterClassEscape':
153	set.add(getCharacterClassEscapeSet(
154	item.value,
155	config.unicode,
156	config.ignoreCase
157	));
158	break;
159	case 'unicodePropertyEscape':
160	set.add(getUnicodePropertyEscapeSet(item.value, item.negative));
161	break;
162	// The `default` clause is only here as a safeguard; it should never be
163	// reached. Code coverage tools should ignore it.
164	/* istanbul ignore next */
165	default:
166	throw new Error(`Unknown term type: ${ item.type }`);
167	}
168	}
169	if (characterClassItem.negative) {
170	update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`)
171	} else {
172	update(characterClassItem, set.toString(regenerateOptions));
173	}
174	return characterClassItem;
175	};
176
177	const updateNamedReference = (item, index) => {
178	delete item.name;
179	item.matchIndex = index;
180	};
181
182	const assertNoUnmatchedReferences = (groups) => {
183	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
184	if (unmatchedReferencesNames.length > 0) {
185	throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
186	}
187	};
188
189	const processTerm = (item, regenerateOptions, groups) => {
190	switch (item.type) {
191	case 'dot':
192	if (config.useDotAllFlag) {
193	break;
194	} else if (config.unicode) {
195	update(
196	item,
197	getUnicodeDotSet(config.dotAll).toString(regenerateOptions)
198	);
199	} else if (config.dotAll) {
200	// TODO: consider changing this at the regenerate level.
201	update(item, '[\\s\\S]');
202	}
203	break;
204	case 'characterClass':
205	item = processCharacterClass(item, regenerateOptions);
206	break;
207	case 'unicodePropertyEscape':
208	if (config.unicodePropertyEscape) {
209	update(
210	item,
211	getUnicodePropertyEscapeSet(item.value, item.negative)
212	.toString(regenerateOptions)
213	);
214	}
215	break;
216	case 'characterClassEscape':
217	update(
218	item,
219	getCharacterClassEscapeSet(
220	item.value,
221	config.unicode,
222	config.ignoreCase
223	).toString(regenerateOptions)
224	);
225	break;
226	case 'group':
227	if (item.behavior == 'normal') {
228	groups.lastIndex++;
229	}
230	if (item.name && config.namedGroup) {
231	const name = item.name.value;
232
233	if (groups.names[name]) {
234	throw new Error(
235	`Multiple groups with the same name (${ name }) are not allowed.`
236	);
237	}
238
239	const index = groups.lastIndex;
240	delete item.name;
241
242	groups.names[name] = index;
243	if (groups.onNamedGroup) {
244	groups.onNamedGroup.call(null, name, index);
245	}
246
247	if (groups.unmatchedReferences[name]) {
248	groups.unmatchedReferences[name].forEach(reference => {
249	updateNamedReference(reference, index);
250	});
251	delete groups.unmatchedReferences[name];
252	}
253	}
254	/* falls through */
255	case 'alternative':
256	case 'disjunction':
257	case 'quantifier':
258	item.body = item.body.map(term => {
259	return processTerm(term, regenerateOptions, groups);
260	});
261	break;
262	case 'value':
263	const codePoint = item.codePoint;
264	const set = regenerate(codePoint);
265	if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {
266	const folded = caseFold(codePoint);
267	if (folded) {
268	set.add(folded);
269	}
270	}
271	update(item, set.toString(regenerateOptions));
272	break;
273	case 'reference':
274	if (item.name) {
275	const name = item.name.value;
276	const index = groups.names[name];
277	if (index) {
278	updateNamedReference(item, index);
279	break;
280	}
281
282	if (!groups.unmatchedReferences[name]) {
283	groups.unmatchedReferences[name] = [];
284	}
285	// Keep track of references used before the corresponding group.
286	groups.unmatchedReferences[name].push(item);
287	}
288	break;
289	case 'anchor':
290	case 'empty':
291	case 'group':
292	// Nothing to do here.
293	break;
294	// The `default` clause is only here as a safeguard; it should never be
295	// reached. Code coverage tools should ignore it.
296	/* istanbul ignore next */
297	default:
298	throw new Error(`Unknown term type: ${ item.type }`);
299	}
300	return item;
301	};
302
303	const config = {
304	'ignoreCase': false,
305	'unicode': false,
306	'dotAll': false,
307	'useDotAllFlag': false,
308	'useUnicodeFlag': false,
309	'unicodePropertyEscape': false,
310	'namedGroup': false
311	};
312	const rewritePattern = (pattern, flags, options) => {
313	config.unicode = flags && flags.includes('u');
314	const regjsparserFeatures = {
315	'unicodePropertyEscape': config.unicode,
316	'namedGroups': true,
317	'lookbehind': options && options.lookbehind
318	};
319	config.ignoreCase = flags && flags.includes('i');
320	const supportDotAllFlag = options && options.dotAllFlag;
321	config.dotAll = supportDotAllFlag && flags && flags.includes('s');
322	config.namedGroup = options && options.namedGroup;
323	config.useDotAllFlag = options && options.useDotAllFlag;
324	config.useUnicodeFlag = options && options.useUnicodeFlag;
325	config.unicodePropertyEscape = options && options.unicodePropertyEscape;
326	if (supportDotAllFlag && config.useDotAllFlag) {
327	throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!');
328	}
329	const regenerateOptions = {
330	'hasUnicodeFlag': config.useUnicodeFlag,
331	'bmpOnly': !config.unicode
332	};
333	const groups = {
334	'onNamedGroup': options && options.onNamedGroup,
335	'lastIndex': 0,
336	'names': Object.create(null), // { [name]: index }
337	'unmatchedReferences': Object.create(null) // { [name]: Array<reference> }
338	};
339	const tree = parse(pattern, flags, regjsparserFeatures);
340	// Note: `processTerm` mutates `tree` and `groups`.
341	processTerm(tree, regenerateOptions, groups);
342	assertNoUnmatchedReferences(groups);
343	return generate(tree);
344	};
345
346	module.exports = rewritePattern;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: