[6a3a178] | 1 | var util = require('./util');
|
---|
| 2 | var types = require('./types');
|
---|
| 3 | var sets = require('./sets');
|
---|
| 4 | var positions = require('./positions');
|
---|
| 5 |
|
---|
| 6 |
|
---|
| 7 | module.exports = function(regexpStr) {
|
---|
| 8 | var i = 0, l, c,
|
---|
| 9 | start = { type: types.ROOT, stack: []},
|
---|
| 10 |
|
---|
| 11 | // Keep track of last clause/group and stack.
|
---|
| 12 | lastGroup = start,
|
---|
| 13 | last = start.stack,
|
---|
| 14 | groupStack = [];
|
---|
| 15 |
|
---|
| 16 |
|
---|
| 17 | var repeatErr = function(i) {
|
---|
| 18 | util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
|
---|
| 19 | };
|
---|
| 20 |
|
---|
| 21 | // Decode a few escaped characters.
|
---|
| 22 | var str = util.strToChars(regexpStr);
|
---|
| 23 | l = str.length;
|
---|
| 24 |
|
---|
| 25 | // Iterate through each character in string.
|
---|
| 26 | while (i < l) {
|
---|
| 27 | c = str[i++];
|
---|
| 28 |
|
---|
| 29 | switch (c) {
|
---|
| 30 | // Handle escaped characters, inclues a few sets.
|
---|
| 31 | case '\\':
|
---|
| 32 | c = str[i++];
|
---|
| 33 |
|
---|
| 34 | switch (c) {
|
---|
| 35 | case 'b':
|
---|
| 36 | last.push(positions.wordBoundary());
|
---|
| 37 | break;
|
---|
| 38 |
|
---|
| 39 | case 'B':
|
---|
| 40 | last.push(positions.nonWordBoundary());
|
---|
| 41 | break;
|
---|
| 42 |
|
---|
| 43 | case 'w':
|
---|
| 44 | last.push(sets.words());
|
---|
| 45 | break;
|
---|
| 46 |
|
---|
| 47 | case 'W':
|
---|
| 48 | last.push(sets.notWords());
|
---|
| 49 | break;
|
---|
| 50 |
|
---|
| 51 | case 'd':
|
---|
| 52 | last.push(sets.ints());
|
---|
| 53 | break;
|
---|
| 54 |
|
---|
| 55 | case 'D':
|
---|
| 56 | last.push(sets.notInts());
|
---|
| 57 | break;
|
---|
| 58 |
|
---|
| 59 | case 's':
|
---|
| 60 | last.push(sets.whitespace());
|
---|
| 61 | break;
|
---|
| 62 |
|
---|
| 63 | case 'S':
|
---|
| 64 | last.push(sets.notWhitespace());
|
---|
| 65 | break;
|
---|
| 66 |
|
---|
| 67 | default:
|
---|
| 68 | // Check if c is integer.
|
---|
| 69 | // In which case it's a reference.
|
---|
| 70 | if (/\d/.test(c)) {
|
---|
| 71 | last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
|
---|
| 72 |
|
---|
| 73 | // Escaped character.
|
---|
| 74 | } else {
|
---|
| 75 | last.push({ type: types.CHAR, value: c.charCodeAt(0) });
|
---|
| 76 | }
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | break;
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | // Positionals.
|
---|
| 83 | case '^':
|
---|
| 84 | last.push(positions.begin());
|
---|
| 85 | break;
|
---|
| 86 |
|
---|
| 87 | case '$':
|
---|
| 88 | last.push(positions.end());
|
---|
| 89 | break;
|
---|
| 90 |
|
---|
| 91 |
|
---|
| 92 | // Handle custom sets.
|
---|
| 93 | case '[':
|
---|
| 94 | // Check if this class is 'anti' i.e. [^abc].
|
---|
| 95 | var not;
|
---|
| 96 | if (str[i] === '^') {
|
---|
| 97 | not = true;
|
---|
| 98 | i++;
|
---|
| 99 | } else {
|
---|
| 100 | not = false;
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | // Get all the characters in class.
|
---|
| 104 | var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
|
---|
| 105 |
|
---|
| 106 | // Increase index by length of class.
|
---|
| 107 | i += classTokens[1];
|
---|
| 108 | last.push({
|
---|
| 109 | type: types.SET,
|
---|
| 110 | set: classTokens[0],
|
---|
| 111 | not: not,
|
---|
| 112 | });
|
---|
| 113 |
|
---|
| 114 | break;
|
---|
| 115 |
|
---|
| 116 |
|
---|
| 117 | // Class of any character except \n.
|
---|
| 118 | case '.':
|
---|
| 119 | last.push(sets.anyChar());
|
---|
| 120 | break;
|
---|
| 121 |
|
---|
| 122 |
|
---|
| 123 | // Push group onto stack.
|
---|
| 124 | case '(':
|
---|
| 125 | // Create group.
|
---|
| 126 | var group = {
|
---|
| 127 | type: types.GROUP,
|
---|
| 128 | stack: [],
|
---|
| 129 | remember: true,
|
---|
| 130 | };
|
---|
| 131 |
|
---|
| 132 | c = str[i];
|
---|
| 133 |
|
---|
| 134 | // If if this is a special kind of group.
|
---|
| 135 | if (c === '?') {
|
---|
| 136 | c = str[i + 1];
|
---|
| 137 | i += 2;
|
---|
| 138 |
|
---|
| 139 | // Match if followed by.
|
---|
| 140 | if (c === '=') {
|
---|
| 141 | group.followedBy = true;
|
---|
| 142 |
|
---|
| 143 | // Match if not followed by.
|
---|
| 144 | } else if (c === '!') {
|
---|
| 145 | group.notFollowedBy = true;
|
---|
| 146 |
|
---|
| 147 | } else if (c !== ':') {
|
---|
| 148 | util.error(regexpStr,
|
---|
| 149 | 'Invalid group, character \'' + c +
|
---|
| 150 | '\' after \'?\' at column ' + (i - 1));
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | group.remember = false;
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | // Insert subgroup into current group stack.
|
---|
| 157 | last.push(group);
|
---|
| 158 |
|
---|
| 159 | // Remember the current group for when the group closes.
|
---|
| 160 | groupStack.push(lastGroup);
|
---|
| 161 |
|
---|
| 162 | // Make this new group the current group.
|
---|
| 163 | lastGroup = group;
|
---|
| 164 | last = group.stack;
|
---|
| 165 | break;
|
---|
| 166 |
|
---|
| 167 |
|
---|
| 168 | // Pop group out of stack.
|
---|
| 169 | case ')':
|
---|
| 170 | if (groupStack.length === 0) {
|
---|
| 171 | util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
|
---|
| 172 | }
|
---|
| 173 | lastGroup = groupStack.pop();
|
---|
| 174 |
|
---|
| 175 | // Check if this group has a PIPE.
|
---|
| 176 | // To get back the correct last stack.
|
---|
| 177 | last = lastGroup.options ?
|
---|
| 178 | lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
|
---|
| 179 | break;
|
---|
| 180 |
|
---|
| 181 |
|
---|
| 182 | // Use pipe character to give more choices.
|
---|
| 183 | case '|':
|
---|
| 184 | // Create array where options are if this is the first PIPE
|
---|
| 185 | // in this clause.
|
---|
| 186 | if (!lastGroup.options) {
|
---|
| 187 | lastGroup.options = [lastGroup.stack];
|
---|
| 188 | delete lastGroup.stack;
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | // Create a new stack and add to options for rest of clause.
|
---|
| 192 | var stack = [];
|
---|
| 193 | lastGroup.options.push(stack);
|
---|
| 194 | last = stack;
|
---|
| 195 | break;
|
---|
| 196 |
|
---|
| 197 |
|
---|
| 198 | // Repetition.
|
---|
| 199 | // For every repetition, remove last element from last stack
|
---|
| 200 | // then insert back a RANGE object.
|
---|
| 201 | // This design is chosen because there could be more than
|
---|
| 202 | // one repetition symbols in a regex i.e. `a?+{2,3}`.
|
---|
| 203 | case '{':
|
---|
| 204 | var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
|
---|
| 205 | if (rs !== null) {
|
---|
| 206 | if (last.length === 0) {
|
---|
| 207 | repeatErr(i);
|
---|
| 208 | }
|
---|
| 209 | min = parseInt(rs[1], 10);
|
---|
| 210 | max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
|
---|
| 211 | i += rs[0].length;
|
---|
| 212 |
|
---|
| 213 | last.push({
|
---|
| 214 | type: types.REPETITION,
|
---|
| 215 | min: min,
|
---|
| 216 | max: max,
|
---|
| 217 | value: last.pop(),
|
---|
| 218 | });
|
---|
| 219 | } else {
|
---|
| 220 | last.push({
|
---|
| 221 | type: types.CHAR,
|
---|
| 222 | value: 123,
|
---|
| 223 | });
|
---|
| 224 | }
|
---|
| 225 | break;
|
---|
| 226 |
|
---|
| 227 | case '?':
|
---|
| 228 | if (last.length === 0) {
|
---|
| 229 | repeatErr(i);
|
---|
| 230 | }
|
---|
| 231 | last.push({
|
---|
| 232 | type: types.REPETITION,
|
---|
| 233 | min: 0,
|
---|
| 234 | max: 1,
|
---|
| 235 | value: last.pop(),
|
---|
| 236 | });
|
---|
| 237 | break;
|
---|
| 238 |
|
---|
| 239 | case '+':
|
---|
| 240 | if (last.length === 0) {
|
---|
| 241 | repeatErr(i);
|
---|
| 242 | }
|
---|
| 243 | last.push({
|
---|
| 244 | type: types.REPETITION,
|
---|
| 245 | min: 1,
|
---|
| 246 | max: Infinity,
|
---|
| 247 | value: last.pop(),
|
---|
| 248 | });
|
---|
| 249 | break;
|
---|
| 250 |
|
---|
| 251 | case '*':
|
---|
| 252 | if (last.length === 0) {
|
---|
| 253 | repeatErr(i);
|
---|
| 254 | }
|
---|
| 255 | last.push({
|
---|
| 256 | type: types.REPETITION,
|
---|
| 257 | min: 0,
|
---|
| 258 | max: Infinity,
|
---|
| 259 | value: last.pop(),
|
---|
| 260 | });
|
---|
| 261 | break;
|
---|
| 262 |
|
---|
| 263 |
|
---|
| 264 | // Default is a character that is not `\[](){}?+*^$`.
|
---|
| 265 | default:
|
---|
| 266 | last.push({
|
---|
| 267 | type: types.CHAR,
|
---|
| 268 | value: c.charCodeAt(0),
|
---|
| 269 | });
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 | // Check if any groups have not been closed.
|
---|
| 275 | if (groupStack.length !== 0) {
|
---|
| 276 | util.error(regexpStr, 'Unterminated group');
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 | return start;
|
---|
| 280 | };
|
---|
| 281 |
|
---|
| 282 | module.exports.types = types;
|
---|