[79a0317] | 1 | /*!
|
---|
| 2 | * regjsgen 0.8.0
|
---|
| 3 | * Copyright 2014-2023 Benjamin Tan <https://ofcr.se/>
|
---|
| 4 | * Available under the MIT license <https://github.com/bnjmnt4n/regjsgen/blob/main/LICENSE-MIT.txt>
|
---|
| 5 | */
|
---|
| 6 | ;(function() {
|
---|
| 7 | 'use strict';
|
---|
| 8 |
|
---|
| 9 | // Used to determine if values are of the language type `Object`.
|
---|
| 10 | var objectTypes = {
|
---|
| 11 | 'function': true,
|
---|
| 12 | 'object': true
|
---|
| 13 | };
|
---|
| 14 |
|
---|
| 15 | // Used as a reference to the global object.
|
---|
| 16 | var root = (objectTypes[typeof window] && window) || this;
|
---|
| 17 |
|
---|
| 18 | // Detect free variable `exports`.
|
---|
| 19 | var freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports;
|
---|
| 20 |
|
---|
| 21 | // Detect free variable `module`.
|
---|
| 22 | var hasFreeModule = objectTypes[typeof module] && module && !module.nodeType;
|
---|
| 23 |
|
---|
| 24 | // Detect free variable `global` from Node.js or Browserified code and use it as `root`.
|
---|
| 25 | var freeGlobal = freeExports && hasFreeModule && typeof global == 'object' && global;
|
---|
| 26 | if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
|
---|
| 27 | root = freeGlobal;
|
---|
| 28 | }
|
---|
| 29 |
|
---|
| 30 | // Used to check objects for own properties.
|
---|
| 31 | var hasOwnProperty = Object.prototype.hasOwnProperty;
|
---|
| 32 |
|
---|
| 33 | /*--------------------------------------------------------------------------*/
|
---|
| 34 |
|
---|
| 35 | // Generates a string based on the given code point.
|
---|
| 36 | // Based on https://mths.be/fromcodepoint by @mathias.
|
---|
| 37 | function fromCodePoint() {
|
---|
| 38 | var codePoint = Number(arguments[0]);
|
---|
| 39 |
|
---|
| 40 | if (
|
---|
| 41 | !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
---|
| 42 | codePoint < 0 || // not a valid Unicode code point
|
---|
| 43 | codePoint > 0x10FFFF || // not a valid Unicode code point
|
---|
| 44 | Math.floor(codePoint) != codePoint // not an integer
|
---|
| 45 | ) {
|
---|
| 46 | throw RangeError('Invalid code point: ' + codePoint);
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | if (codePoint <= 0xFFFF) {
|
---|
| 50 | // BMP code point
|
---|
| 51 | return String.fromCharCode(codePoint);
|
---|
| 52 | } else {
|
---|
| 53 | // Astral code point; split in surrogate halves
|
---|
| 54 | // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
---|
| 55 | codePoint -= 0x10000;
|
---|
| 56 | var highSurrogate = (codePoint >> 10) + 0xD800;
|
---|
| 57 | var lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
---|
| 58 | return String.fromCharCode(highSurrogate, lowSurrogate);
|
---|
| 59 | }
|
---|
| 60 | }
|
---|
| 61 |
|
---|
| 62 | /*--------------------------------------------------------------------------*/
|
---|
| 63 |
|
---|
| 64 | // Ensures that nodes have the correct types.
|
---|
| 65 | var assertTypeRegexMap = {};
|
---|
| 66 | function assertType(type, expected) {
|
---|
| 67 | if (expected.indexOf('|') == -1) {
|
---|
| 68 | if (type == expected) {
|
---|
| 69 | return;
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | throw Error('Invalid node type: ' + type + '; expected type: ' + expected);
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | expected = hasOwnProperty.call(assertTypeRegexMap, expected)
|
---|
| 76 | ? assertTypeRegexMap[expected]
|
---|
| 77 | : (assertTypeRegexMap[expected] = RegExp('^(?:' + expected + ')$'));
|
---|
| 78 |
|
---|
| 79 | if (expected.test(type)) {
|
---|
| 80 | return;
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | throw Error('Invalid node type: ' + type + '; expected types: ' + expected);
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | /*--------------------------------------------------------------------------*/
|
---|
| 87 |
|
---|
| 88 | // Generates a regular expression string based on an AST.
|
---|
| 89 | function generate(node) {
|
---|
| 90 | var type = node.type;
|
---|
| 91 |
|
---|
| 92 | if (hasOwnProperty.call(generators, type)) {
|
---|
| 93 | return generators[type](node);
|
---|
| 94 | }
|
---|
| 95 |
|
---|
| 96 | throw Error('Invalid node type: ' + type);
|
---|
| 97 | }
|
---|
| 98 |
|
---|
| 99 | // Constructs a string by concatentating the output of each term.
|
---|
| 100 | function generateSequence(generator, terms, /* optional */ separator) {
|
---|
| 101 | var i = -1,
|
---|
| 102 | length = terms.length,
|
---|
| 103 | result = '',
|
---|
| 104 | term;
|
---|
| 105 |
|
---|
| 106 | while (++i < length) {
|
---|
| 107 | term = terms[i];
|
---|
| 108 |
|
---|
| 109 | if (separator && i > 0) result += separator;
|
---|
| 110 |
|
---|
| 111 | // Ensure that `\0` null escapes followed by number symbols are not
|
---|
| 112 | // treated as backreferences.
|
---|
| 113 | if (
|
---|
| 114 | i + 1 < length &&
|
---|
| 115 | terms[i].type == 'value' &&
|
---|
| 116 | terms[i].kind == 'null' &&
|
---|
| 117 | terms[i + 1].type == 'value' &&
|
---|
| 118 | terms[i + 1].kind == 'symbol' &&
|
---|
| 119 | terms[i + 1].codePoint >= 48 &&
|
---|
| 120 | terms[i + 1].codePoint <= 57
|
---|
| 121 | ) {
|
---|
| 122 | result += '\\000';
|
---|
| 123 | continue;
|
---|
| 124 | }
|
---|
| 125 |
|
---|
| 126 | result += generator(term);
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | return result;
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | /*--------------------------------------------------------------------------*/
|
---|
| 133 |
|
---|
| 134 | function generateAlternative(node) {
|
---|
| 135 | assertType(node.type, 'alternative');
|
---|
| 136 |
|
---|
| 137 | return generateSequence(generateTerm, node.body);
|
---|
| 138 | }
|
---|
| 139 |
|
---|
| 140 | function generateAnchor(node) {
|
---|
| 141 | assertType(node.type, 'anchor');
|
---|
| 142 |
|
---|
| 143 | switch (node.kind) {
|
---|
| 144 | case 'start':
|
---|
| 145 | return '^';
|
---|
| 146 | case 'end':
|
---|
| 147 | return '$';
|
---|
| 148 | case 'boundary':
|
---|
| 149 | return '\\b';
|
---|
| 150 | case 'not-boundary':
|
---|
| 151 | return '\\B';
|
---|
| 152 | default:
|
---|
| 153 | throw Error('Invalid assertion');
|
---|
| 154 | }
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | var atomType = 'anchor|characterClass|characterClassEscape|dot|group|reference|unicodePropertyEscape|value';
|
---|
| 158 |
|
---|
| 159 | function generateAtom(node) {
|
---|
| 160 | assertType(node.type, atomType);
|
---|
| 161 |
|
---|
| 162 | return generate(node);
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | function generateCharacterClass(node) {
|
---|
| 166 | assertType(node.type, 'characterClass');
|
---|
| 167 |
|
---|
| 168 | var kind = node.kind;
|
---|
| 169 | var separator = kind === 'intersection' ? '&&' : kind === 'subtraction' ? '--' : '';
|
---|
| 170 |
|
---|
| 171 | return '[' +
|
---|
| 172 | (node.negative ? '^' : '') +
|
---|
| 173 | generateSequence(generateClassAtom, node.body, separator) +
|
---|
| 174 | ']';
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | function generateCharacterClassEscape(node) {
|
---|
| 178 | assertType(node.type, 'characterClassEscape');
|
---|
| 179 |
|
---|
| 180 | return '\\' + node.value;
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | function generateCharacterClassRange(node) {
|
---|
| 184 | assertType(node.type, 'characterClassRange');
|
---|
| 185 |
|
---|
| 186 | var min = node.min,
|
---|
| 187 | max = node.max;
|
---|
| 188 |
|
---|
| 189 | if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
|
---|
| 190 | throw Error('Invalid character class range');
|
---|
| 191 | }
|
---|
| 192 |
|
---|
| 193 | return generateClassAtom(min) + '-' + generateClassAtom(max);
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | function generateClassAtom(node) {
|
---|
| 197 | assertType(node.type, 'anchor|characterClass|characterClassEscape|characterClassRange|dot|value|unicodePropertyEscape|classStrings');
|
---|
| 198 |
|
---|
| 199 | return generate(node);
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | function generateClassStrings(node) {
|
---|
| 203 | assertType(node.type, 'classStrings');
|
---|
| 204 |
|
---|
| 205 | return '\\q{' + generateSequence(generateClassString, node.strings, '|') + '}';
|
---|
| 206 | }
|
---|
| 207 |
|
---|
| 208 | function generateClassString(node) {
|
---|
| 209 | assertType(node.type, 'classString');
|
---|
| 210 |
|
---|
| 211 | return generateSequence(generate, node.characters);
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | function generateDisjunction(node) {
|
---|
| 215 | assertType(node.type, 'disjunction');
|
---|
| 216 |
|
---|
| 217 | return generateSequence(generate, node.body, '|');
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 |
|
---|
| 221 | function generateDot(node) {
|
---|
| 222 | assertType(node.type, 'dot');
|
---|
| 223 |
|
---|
| 224 | return '.';
|
---|
| 225 | }
|
---|
| 226 |
|
---|
| 227 | function generateGroup(node) {
|
---|
| 228 | assertType(node.type, 'group');
|
---|
| 229 |
|
---|
| 230 | var result = '';
|
---|
| 231 |
|
---|
| 232 | switch (node.behavior) {
|
---|
| 233 | case 'normal':
|
---|
| 234 | if (node.name) {
|
---|
| 235 | result += '?<' + generateIdentifier(node.name) + '>';
|
---|
| 236 | }
|
---|
| 237 | break;
|
---|
| 238 | case 'ignore':
|
---|
| 239 | if (node.modifierFlags) {
|
---|
| 240 | result += '?';
|
---|
| 241 | if (node.modifierFlags.enabling) result += node.modifierFlags.enabling;
|
---|
| 242 | if (node.modifierFlags.disabling) result += "-" + node.modifierFlags.disabling;
|
---|
| 243 | result += ':';
|
---|
| 244 | } else {
|
---|
| 245 | result += '?:';
|
---|
| 246 | }
|
---|
| 247 | break;
|
---|
| 248 | case 'lookahead':
|
---|
| 249 | result += '?=';
|
---|
| 250 | break;
|
---|
| 251 | case 'negativeLookahead':
|
---|
| 252 | result += '?!';
|
---|
| 253 | break;
|
---|
| 254 | case 'lookbehind':
|
---|
| 255 | result += '?<=';
|
---|
| 256 | break;
|
---|
| 257 | case 'negativeLookbehind':
|
---|
| 258 | result += '?<!';
|
---|
| 259 | break;
|
---|
| 260 | default:
|
---|
| 261 | throw Error('Invalid behaviour: ' + node.behaviour);
|
---|
| 262 | }
|
---|
| 263 |
|
---|
| 264 | result += generateSequence(generate, node.body);
|
---|
| 265 |
|
---|
| 266 | return '(' + result + ')';
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | function generateIdentifier(node) {
|
---|
| 270 | assertType(node.type, 'identifier');
|
---|
| 271 |
|
---|
| 272 | return node.value;
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | function generateQuantifier(node) {
|
---|
| 276 | assertType(node.type, 'quantifier');
|
---|
| 277 |
|
---|
| 278 | var quantifier = '',
|
---|
| 279 | min = node.min,
|
---|
| 280 | max = node.max;
|
---|
| 281 |
|
---|
| 282 | if (max == null) {
|
---|
| 283 | if (min == 0) {
|
---|
| 284 | quantifier = '*';
|
---|
| 285 | } else if (min == 1) {
|
---|
| 286 | quantifier = '+';
|
---|
| 287 | } else {
|
---|
| 288 | quantifier = '{' + min + ',}';
|
---|
| 289 | }
|
---|
| 290 | } else if (min == max) {
|
---|
| 291 | quantifier = '{' + min + '}';
|
---|
| 292 | } else if (min == 0 && max == 1) {
|
---|
| 293 | quantifier = '?';
|
---|
| 294 | } else {
|
---|
| 295 | quantifier = '{' + min + ',' + max + '}';
|
---|
| 296 | }
|
---|
| 297 |
|
---|
| 298 | if (!node.greedy) {
|
---|
| 299 | quantifier += '?';
|
---|
| 300 | }
|
---|
| 301 |
|
---|
| 302 | return generateAtom(node.body[0]) + quantifier;
|
---|
| 303 | }
|
---|
| 304 |
|
---|
| 305 | function generateReference(node) {
|
---|
| 306 | assertType(node.type, 'reference');
|
---|
| 307 |
|
---|
| 308 | if (node.matchIndex) {
|
---|
| 309 | return '\\' + node.matchIndex;
|
---|
| 310 | }
|
---|
| 311 | if (node.name) {
|
---|
| 312 | return '\\k<' + generateIdentifier(node.name) + '>';
|
---|
| 313 | }
|
---|
| 314 |
|
---|
| 315 | throw new Error('Unknown reference type');
|
---|
| 316 | }
|
---|
| 317 |
|
---|
| 318 | function generateTerm(node) {
|
---|
| 319 | assertType(node.type, atomType + '|empty|quantifier');
|
---|
| 320 |
|
---|
| 321 | return generate(node);
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 | function generateUnicodePropertyEscape(node) {
|
---|
| 325 | assertType(node.type, 'unicodePropertyEscape');
|
---|
| 326 |
|
---|
| 327 | return '\\' + (node.negative ? 'P' : 'p') + '{' + node.value + '}';
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | function generateValue(node) {
|
---|
| 331 | assertType(node.type, 'value');
|
---|
| 332 |
|
---|
| 333 | var kind = node.kind,
|
---|
| 334 | codePoint = node.codePoint;
|
---|
| 335 |
|
---|
| 336 | if (typeof codePoint != 'number') {
|
---|
| 337 | throw new Error('Invalid code point: ' + codePoint);
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | switch (kind) {
|
---|
| 341 | case 'controlLetter':
|
---|
| 342 | return '\\c' + fromCodePoint(codePoint + 64);
|
---|
| 343 | case 'hexadecimalEscape':
|
---|
| 344 | return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
|
---|
| 345 | case 'identifier':
|
---|
| 346 | return '\\' + fromCodePoint(codePoint);
|
---|
| 347 | case 'null':
|
---|
| 348 | return '\\' + codePoint;
|
---|
| 349 | case 'octal':
|
---|
| 350 | return '\\' + ('000' + codePoint.toString(8)).slice(-3);
|
---|
| 351 | case 'singleEscape':
|
---|
| 352 | switch (codePoint) {
|
---|
| 353 | case 0x0008:
|
---|
| 354 | return '\\b';
|
---|
| 355 | case 0x0009:
|
---|
| 356 | return '\\t';
|
---|
| 357 | case 0x000A:
|
---|
| 358 | return '\\n';
|
---|
| 359 | case 0x000B:
|
---|
| 360 | return '\\v';
|
---|
| 361 | case 0x000C:
|
---|
| 362 | return '\\f';
|
---|
| 363 | case 0x000D:
|
---|
| 364 | return '\\r';
|
---|
| 365 | case 0x002D:
|
---|
| 366 | return '\\-';
|
---|
| 367 | default:
|
---|
| 368 | throw Error('Invalid code point: ' + codePoint);
|
---|
| 369 | }
|
---|
| 370 | case 'symbol':
|
---|
| 371 | return fromCodePoint(codePoint);
|
---|
| 372 | case 'unicodeEscape':
|
---|
| 373 | return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
|
---|
| 374 | case 'unicodeCodePointEscape':
|
---|
| 375 | return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
|
---|
| 376 | default:
|
---|
| 377 | throw Error('Unsupported node kind: ' + kind);
|
---|
| 378 | }
|
---|
| 379 | }
|
---|
| 380 |
|
---|
| 381 | /*--------------------------------------------------------------------------*/
|
---|
| 382 |
|
---|
| 383 | // Used to generate strings for each node type.
|
---|
| 384 | var generators = {
|
---|
| 385 | 'alternative': generateAlternative,
|
---|
| 386 | 'anchor': generateAnchor,
|
---|
| 387 | 'characterClass': generateCharacterClass,
|
---|
| 388 | 'characterClassEscape': generateCharacterClassEscape,
|
---|
| 389 | 'characterClassRange': generateCharacterClassRange,
|
---|
| 390 | 'classStrings': generateClassStrings,
|
---|
| 391 | 'disjunction': generateDisjunction,
|
---|
| 392 | 'dot': generateDot,
|
---|
| 393 | 'group': generateGroup,
|
---|
| 394 | 'quantifier': generateQuantifier,
|
---|
| 395 | 'reference': generateReference,
|
---|
| 396 | 'unicodePropertyEscape': generateUnicodePropertyEscape,
|
---|
| 397 | 'value': generateValue
|
---|
| 398 | };
|
---|
| 399 |
|
---|
| 400 | /*--------------------------------------------------------------------------*/
|
---|
| 401 |
|
---|
| 402 | // Export regjsgen.
|
---|
| 403 | var regjsgen = {
|
---|
| 404 | 'generate': generate
|
---|
| 405 | };
|
---|
| 406 |
|
---|
| 407 | // Some AMD build optimizers, like r.js, check for condition patterns like the following:
|
---|
| 408 | if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
|
---|
| 409 | // Define as an anonymous module so it can be aliased through path mapping.
|
---|
| 410 | define(function() {
|
---|
| 411 | return regjsgen;
|
---|
| 412 | });
|
---|
| 413 |
|
---|
| 414 | root.regjsgen = regjsgen;
|
---|
| 415 | }
|
---|
| 416 | // Check for `exports` after `define` in case a build optimizer adds an `exports` object.
|
---|
| 417 | else if (freeExports && hasFreeModule) {
|
---|
| 418 | // Export for CommonJS support.
|
---|
| 419 | freeExports.generate = generate;
|
---|
| 420 | }
|
---|
| 421 | else {
|
---|
| 422 | // Export to the global object.
|
---|
| 423 | root.regjsgen = regjsgen;
|
---|
| 424 | }
|
---|
| 425 | }.call(this));
|
---|