1 | const types = require('./types');
|
---|
2 | const sets = require('./sets');
|
---|
3 |
|
---|
4 |
|
---|
5 | const CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?';
|
---|
6 | const SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 };
|
---|
7 |
|
---|
8 | /**
|
---|
9 | * Finds character representations in str and convert all to
|
---|
10 | * their respective characters
|
---|
11 | *
|
---|
12 | * @param {String} str
|
---|
13 | * @return {String}
|
---|
14 | */
|
---|
15 | exports.strToChars = function(str) {
|
---|
16 | /* jshint maxlen: false */
|
---|
17 | var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z[\\\]^?])|([0tnvfr]))/g;
|
---|
18 | str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) {
|
---|
19 | if (lbs) {
|
---|
20 | return s;
|
---|
21 | }
|
---|
22 |
|
---|
23 | var code = b ? 8 :
|
---|
24 | a16 ? parseInt(a16, 16) :
|
---|
25 | b16 ? parseInt(b16, 16) :
|
---|
26 | c8 ? parseInt(c8, 8) :
|
---|
27 | dctrl ? CTRL.indexOf(dctrl) :
|
---|
28 | SLSH[eslsh];
|
---|
29 |
|
---|
30 | var c = String.fromCharCode(code);
|
---|
31 |
|
---|
32 | // Escape special regex characters.
|
---|
33 | if (/[[\]{}^$.|?*+()]/.test(c)) {
|
---|
34 | c = '\\' + c;
|
---|
35 | }
|
---|
36 |
|
---|
37 | return c;
|
---|
38 | });
|
---|
39 |
|
---|
40 | return str;
|
---|
41 | };
|
---|
42 |
|
---|
43 |
|
---|
44 | /**
|
---|
45 | * turns class into tokens
|
---|
46 | * reads str until it encounters a ] not preceeded by a \
|
---|
47 | *
|
---|
48 | * @param {String} str
|
---|
49 | * @param {String} regexpStr
|
---|
50 | * @return {Array.<Array.<Object>, Number>}
|
---|
51 | */
|
---|
52 | exports.tokenizeClass = (str, regexpStr) => {
|
---|
53 | /* jshint maxlen: false */
|
---|
54 | var tokens = [];
|
---|
55 | var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?([^])/g;
|
---|
56 | var rs, c;
|
---|
57 |
|
---|
58 |
|
---|
59 | while ((rs = regexp.exec(str)) != null) {
|
---|
60 | if (rs[1]) {
|
---|
61 | tokens.push(sets.words());
|
---|
62 |
|
---|
63 | } else if (rs[2]) {
|
---|
64 | tokens.push(sets.ints());
|
---|
65 |
|
---|
66 | } else if (rs[3]) {
|
---|
67 | tokens.push(sets.whitespace());
|
---|
68 |
|
---|
69 | } else if (rs[4]) {
|
---|
70 | tokens.push(sets.notWords());
|
---|
71 |
|
---|
72 | } else if (rs[5]) {
|
---|
73 | tokens.push(sets.notInts());
|
---|
74 |
|
---|
75 | } else if (rs[6]) {
|
---|
76 | tokens.push(sets.notWhitespace());
|
---|
77 |
|
---|
78 | } else if (rs[7]) {
|
---|
79 | tokens.push({
|
---|
80 | type: types.RANGE,
|
---|
81 | from: (rs[8] || rs[9]).charCodeAt(0),
|
---|
82 | to: rs[10].charCodeAt(0),
|
---|
83 | });
|
---|
84 |
|
---|
85 | } else if ((c = rs[12])) {
|
---|
86 | tokens.push({
|
---|
87 | type: types.CHAR,
|
---|
88 | value: c.charCodeAt(0),
|
---|
89 | });
|
---|
90 |
|
---|
91 | } else {
|
---|
92 | return [tokens, regexp.lastIndex];
|
---|
93 | }
|
---|
94 | }
|
---|
95 |
|
---|
96 | exports.error(regexpStr, 'Unterminated character class');
|
---|
97 | };
|
---|
98 |
|
---|
99 |
|
---|
100 | /**
|
---|
101 | * Shortcut to throw errors.
|
---|
102 | *
|
---|
103 | * @param {String} regexp
|
---|
104 | * @param {String} msg
|
---|
105 | */
|
---|
106 | exports.error = (regexp, msg) => {
|
---|
107 | throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg);
|
---|
108 | };
|
---|