1 | var util = require('./util');
|
---|
2 | var types = require('./types');
|
---|
3 | var sets = require('./sets');
|
---|
4 | var positions = require('./positions');
|
---|
5 |
|
---|
6 |
|
---|
7 | module.exports = function(regexpStr) {
|
---|
8 | var i = 0, l, c,
|
---|
9 | start = { type: types.ROOT, stack: []},
|
---|
10 |
|
---|
11 | // Keep track of last clause/group and stack.
|
---|
12 | lastGroup = start,
|
---|
13 | last = start.stack,
|
---|
14 | groupStack = [];
|
---|
15 |
|
---|
16 |
|
---|
17 | var repeatErr = function(i) {
|
---|
18 | util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
|
---|
19 | };
|
---|
20 |
|
---|
21 | // Decode a few escaped characters.
|
---|
22 | var str = util.strToChars(regexpStr);
|
---|
23 | l = str.length;
|
---|
24 |
|
---|
25 | // Iterate through each character in string.
|
---|
26 | while (i < l) {
|
---|
27 | c = str[i++];
|
---|
28 |
|
---|
29 | switch (c) {
|
---|
30 | // Handle escaped characters, inclues a few sets.
|
---|
31 | case '\\':
|
---|
32 | c = str[i++];
|
---|
33 |
|
---|
34 | switch (c) {
|
---|
35 | case 'b':
|
---|
36 | last.push(positions.wordBoundary());
|
---|
37 | break;
|
---|
38 |
|
---|
39 | case 'B':
|
---|
40 | last.push(positions.nonWordBoundary());
|
---|
41 | break;
|
---|
42 |
|
---|
43 | case 'w':
|
---|
44 | last.push(sets.words());
|
---|
45 | break;
|
---|
46 |
|
---|
47 | case 'W':
|
---|
48 | last.push(sets.notWords());
|
---|
49 | break;
|
---|
50 |
|
---|
51 | case 'd':
|
---|
52 | last.push(sets.ints());
|
---|
53 | break;
|
---|
54 |
|
---|
55 | case 'D':
|
---|
56 | last.push(sets.notInts());
|
---|
57 | break;
|
---|
58 |
|
---|
59 | case 's':
|
---|
60 | last.push(sets.whitespace());
|
---|
61 | break;
|
---|
62 |
|
---|
63 | case 'S':
|
---|
64 | last.push(sets.notWhitespace());
|
---|
65 | break;
|
---|
66 |
|
---|
67 | default:
|
---|
68 | // Check if c is integer.
|
---|
69 | // In which case it's a reference.
|
---|
70 | if (/\d/.test(c)) {
|
---|
71 | last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
|
---|
72 |
|
---|
73 | // Escaped character.
|
---|
74 | } else {
|
---|
75 | last.push({ type: types.CHAR, value: c.charCodeAt(0) });
|
---|
76 | }
|
---|
77 | }
|
---|
78 |
|
---|
79 | break;
|
---|
80 |
|
---|
81 |
|
---|
82 | // Positionals.
|
---|
83 | case '^':
|
---|
84 | last.push(positions.begin());
|
---|
85 | break;
|
---|
86 |
|
---|
87 | case '$':
|
---|
88 | last.push(positions.end());
|
---|
89 | break;
|
---|
90 |
|
---|
91 |
|
---|
92 | // Handle custom sets.
|
---|
93 | case '[':
|
---|
94 | // Check if this class is 'anti' i.e. [^abc].
|
---|
95 | var not;
|
---|
96 | if (str[i] === '^') {
|
---|
97 | not = true;
|
---|
98 | i++;
|
---|
99 | } else {
|
---|
100 | not = false;
|
---|
101 | }
|
---|
102 |
|
---|
103 | // Get all the characters in class.
|
---|
104 | var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
|
---|
105 |
|
---|
106 | // Increase index by length of class.
|
---|
107 | i += classTokens[1];
|
---|
108 | last.push({
|
---|
109 | type: types.SET,
|
---|
110 | set: classTokens[0],
|
---|
111 | not: not,
|
---|
112 | });
|
---|
113 |
|
---|
114 | break;
|
---|
115 |
|
---|
116 |
|
---|
117 | // Class of any character except \n.
|
---|
118 | case '.':
|
---|
119 | last.push(sets.anyChar());
|
---|
120 | break;
|
---|
121 |
|
---|
122 |
|
---|
123 | // Push group onto stack.
|
---|
124 | case '(':
|
---|
125 | // Create group.
|
---|
126 | var group = {
|
---|
127 | type: types.GROUP,
|
---|
128 | stack: [],
|
---|
129 | remember: true,
|
---|
130 | };
|
---|
131 |
|
---|
132 | c = str[i];
|
---|
133 |
|
---|
134 | // If if this is a special kind of group.
|
---|
135 | if (c === '?') {
|
---|
136 | c = str[i + 1];
|
---|
137 | i += 2;
|
---|
138 |
|
---|
139 | // Match if followed by.
|
---|
140 | if (c === '=') {
|
---|
141 | group.followedBy = true;
|
---|
142 |
|
---|
143 | // Match if not followed by.
|
---|
144 | } else if (c === '!') {
|
---|
145 | group.notFollowedBy = true;
|
---|
146 |
|
---|
147 | } else if (c !== ':') {
|
---|
148 | util.error(regexpStr,
|
---|
149 | 'Invalid group, character \'' + c +
|
---|
150 | '\' after \'?\' at column ' + (i - 1));
|
---|
151 | }
|
---|
152 |
|
---|
153 | group.remember = false;
|
---|
154 | }
|
---|
155 |
|
---|
156 | // Insert subgroup into current group stack.
|
---|
157 | last.push(group);
|
---|
158 |
|
---|
159 | // Remember the current group for when the group closes.
|
---|
160 | groupStack.push(lastGroup);
|
---|
161 |
|
---|
162 | // Make this new group the current group.
|
---|
163 | lastGroup = group;
|
---|
164 | last = group.stack;
|
---|
165 | break;
|
---|
166 |
|
---|
167 |
|
---|
168 | // Pop group out of stack.
|
---|
169 | case ')':
|
---|
170 | if (groupStack.length === 0) {
|
---|
171 | util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
|
---|
172 | }
|
---|
173 | lastGroup = groupStack.pop();
|
---|
174 |
|
---|
175 | // Check if this group has a PIPE.
|
---|
176 | // To get back the correct last stack.
|
---|
177 | last = lastGroup.options ?
|
---|
178 | lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
|
---|
179 | break;
|
---|
180 |
|
---|
181 |
|
---|
182 | // Use pipe character to give more choices.
|
---|
183 | case '|':
|
---|
184 | // Create array where options are if this is the first PIPE
|
---|
185 | // in this clause.
|
---|
186 | if (!lastGroup.options) {
|
---|
187 | lastGroup.options = [lastGroup.stack];
|
---|
188 | delete lastGroup.stack;
|
---|
189 | }
|
---|
190 |
|
---|
191 | // Create a new stack and add to options for rest of clause.
|
---|
192 | var stack = [];
|
---|
193 | lastGroup.options.push(stack);
|
---|
194 | last = stack;
|
---|
195 | break;
|
---|
196 |
|
---|
197 |
|
---|
198 | // Repetition.
|
---|
199 | // For every repetition, remove last element from last stack
|
---|
200 | // then insert back a RANGE object.
|
---|
201 | // This design is chosen because there could be more than
|
---|
202 | // one repetition symbols in a regex i.e. `a?+{2,3}`.
|
---|
203 | case '{':
|
---|
204 | var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
|
---|
205 | if (rs !== null) {
|
---|
206 | if (last.length === 0) {
|
---|
207 | repeatErr(i);
|
---|
208 | }
|
---|
209 | min = parseInt(rs[1], 10);
|
---|
210 | max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
|
---|
211 | i += rs[0].length;
|
---|
212 |
|
---|
213 | last.push({
|
---|
214 | type: types.REPETITION,
|
---|
215 | min: min,
|
---|
216 | max: max,
|
---|
217 | value: last.pop(),
|
---|
218 | });
|
---|
219 | } else {
|
---|
220 | last.push({
|
---|
221 | type: types.CHAR,
|
---|
222 | value: 123,
|
---|
223 | });
|
---|
224 | }
|
---|
225 | break;
|
---|
226 |
|
---|
227 | case '?':
|
---|
228 | if (last.length === 0) {
|
---|
229 | repeatErr(i);
|
---|
230 | }
|
---|
231 | last.push({
|
---|
232 | type: types.REPETITION,
|
---|
233 | min: 0,
|
---|
234 | max: 1,
|
---|
235 | value: last.pop(),
|
---|
236 | });
|
---|
237 | break;
|
---|
238 |
|
---|
239 | case '+':
|
---|
240 | if (last.length === 0) {
|
---|
241 | repeatErr(i);
|
---|
242 | }
|
---|
243 | last.push({
|
---|
244 | type: types.REPETITION,
|
---|
245 | min: 1,
|
---|
246 | max: Infinity,
|
---|
247 | value: last.pop(),
|
---|
248 | });
|
---|
249 | break;
|
---|
250 |
|
---|
251 | case '*':
|
---|
252 | if (last.length === 0) {
|
---|
253 | repeatErr(i);
|
---|
254 | }
|
---|
255 | last.push({
|
---|
256 | type: types.REPETITION,
|
---|
257 | min: 0,
|
---|
258 | max: Infinity,
|
---|
259 | value: last.pop(),
|
---|
260 | });
|
---|
261 | break;
|
---|
262 |
|
---|
263 |
|
---|
264 | // Default is a character that is not `\[](){}?+*^$`.
|
---|
265 | default:
|
---|
266 | last.push({
|
---|
267 | type: types.CHAR,
|
---|
268 | value: c.charCodeAt(0),
|
---|
269 | });
|
---|
270 | }
|
---|
271 |
|
---|
272 | }
|
---|
273 |
|
---|
274 | // Check if any groups have not been closed.
|
---|
275 | if (groupStack.length !== 0) {
|
---|
276 | util.error(regexpStr, 'Unterminated group');
|
---|
277 | }
|
---|
278 |
|
---|
279 | return start;
|
---|
280 | };
|
---|
281 |
|
---|
282 | module.exports.types = types;
|
---|