1 | /*---------------------------------------------------------------------------------------------
|
---|
2 | * Copyright (c) Microsoft Corporation. All rights reserved.
|
---|
3 | * Licensed under the MIT License. See License.txt in the project root for license information.
|
---|
4 | *--------------------------------------------------------------------------------------------*/
|
---|
5 | 'use strict';
|
---|
6 | /**
|
---|
7 | * Creates a JSON scanner on the given text.
|
---|
8 | * If ignoreTrivia is set, whitespaces or comments are ignored.
|
---|
9 | */
|
---|
10 | export function createScanner(text, ignoreTrivia) {
|
---|
11 | if (ignoreTrivia === void 0) { ignoreTrivia = false; }
|
---|
12 | var len = text.length;
|
---|
13 | var pos = 0, value = '', tokenOffset = 0, token = 16 /* Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* None */;
|
---|
14 | function scanHexDigits(count, exact) {
|
---|
15 | var digits = 0;
|
---|
16 | var value = 0;
|
---|
17 | while (digits < count || !exact) {
|
---|
18 | var ch = text.charCodeAt(pos);
|
---|
19 | if (ch >= 48 /* _0 */ && ch <= 57 /* _9 */) {
|
---|
20 | value = value * 16 + ch - 48 /* _0 */;
|
---|
21 | }
|
---|
22 | else if (ch >= 65 /* A */ && ch <= 70 /* F */) {
|
---|
23 | value = value * 16 + ch - 65 /* A */ + 10;
|
---|
24 | }
|
---|
25 | else if (ch >= 97 /* a */ && ch <= 102 /* f */) {
|
---|
26 | value = value * 16 + ch - 97 /* a */ + 10;
|
---|
27 | }
|
---|
28 | else {
|
---|
29 | break;
|
---|
30 | }
|
---|
31 | pos++;
|
---|
32 | digits++;
|
---|
33 | }
|
---|
34 | if (digits < count) {
|
---|
35 | value = -1;
|
---|
36 | }
|
---|
37 | return value;
|
---|
38 | }
|
---|
39 | function setPosition(newPosition) {
|
---|
40 | pos = newPosition;
|
---|
41 | value = '';
|
---|
42 | tokenOffset = 0;
|
---|
43 | token = 16 /* Unknown */;
|
---|
44 | scanError = 0 /* None */;
|
---|
45 | }
|
---|
46 | function scanNumber() {
|
---|
47 | var start = pos;
|
---|
48 | if (text.charCodeAt(pos) === 48 /* _0 */) {
|
---|
49 | pos++;
|
---|
50 | }
|
---|
51 | else {
|
---|
52 | pos++;
|
---|
53 | while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
---|
54 | pos++;
|
---|
55 | }
|
---|
56 | }
|
---|
57 | if (pos < text.length && text.charCodeAt(pos) === 46 /* dot */) {
|
---|
58 | pos++;
|
---|
59 | if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
---|
60 | pos++;
|
---|
61 | while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
---|
62 | pos++;
|
---|
63 | }
|
---|
64 | }
|
---|
65 | else {
|
---|
66 | scanError = 3 /* UnexpectedEndOfNumber */;
|
---|
67 | return text.substring(start, pos);
|
---|
68 | }
|
---|
69 | }
|
---|
70 | var end = pos;
|
---|
71 | if (pos < text.length && (text.charCodeAt(pos) === 69 /* E */ || text.charCodeAt(pos) === 101 /* e */)) {
|
---|
72 | pos++;
|
---|
73 | if (pos < text.length && text.charCodeAt(pos) === 43 /* plus */ || text.charCodeAt(pos) === 45 /* minus */) {
|
---|
74 | pos++;
|
---|
75 | }
|
---|
76 | if (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
---|
77 | pos++;
|
---|
78 | while (pos < text.length && isDigit(text.charCodeAt(pos))) {
|
---|
79 | pos++;
|
---|
80 | }
|
---|
81 | end = pos;
|
---|
82 | }
|
---|
83 | else {
|
---|
84 | scanError = 3 /* UnexpectedEndOfNumber */;
|
---|
85 | }
|
---|
86 | }
|
---|
87 | return text.substring(start, end);
|
---|
88 | }
|
---|
89 | function scanString() {
|
---|
90 | var result = '', start = pos;
|
---|
91 | while (true) {
|
---|
92 | if (pos >= len) {
|
---|
93 | result += text.substring(start, pos);
|
---|
94 | scanError = 2 /* UnexpectedEndOfString */;
|
---|
95 | break;
|
---|
96 | }
|
---|
97 | var ch = text.charCodeAt(pos);
|
---|
98 | if (ch === 34 /* doubleQuote */) {
|
---|
99 | result += text.substring(start, pos);
|
---|
100 | pos++;
|
---|
101 | break;
|
---|
102 | }
|
---|
103 | if (ch === 92 /* backslash */) {
|
---|
104 | result += text.substring(start, pos);
|
---|
105 | pos++;
|
---|
106 | if (pos >= len) {
|
---|
107 | scanError = 2 /* UnexpectedEndOfString */;
|
---|
108 | break;
|
---|
109 | }
|
---|
110 | var ch2 = text.charCodeAt(pos++);
|
---|
111 | switch (ch2) {
|
---|
112 | case 34 /* doubleQuote */:
|
---|
113 | result += '\"';
|
---|
114 | break;
|
---|
115 | case 92 /* backslash */:
|
---|
116 | result += '\\';
|
---|
117 | break;
|
---|
118 | case 47 /* slash */:
|
---|
119 | result += '/';
|
---|
120 | break;
|
---|
121 | case 98 /* b */:
|
---|
122 | result += '\b';
|
---|
123 | break;
|
---|
124 | case 102 /* f */:
|
---|
125 | result += '\f';
|
---|
126 | break;
|
---|
127 | case 110 /* n */:
|
---|
128 | result += '\n';
|
---|
129 | break;
|
---|
130 | case 114 /* r */:
|
---|
131 | result += '\r';
|
---|
132 | break;
|
---|
133 | case 116 /* t */:
|
---|
134 | result += '\t';
|
---|
135 | break;
|
---|
136 | case 117 /* u */:
|
---|
137 | var ch3 = scanHexDigits(4, true);
|
---|
138 | if (ch3 >= 0) {
|
---|
139 | result += String.fromCharCode(ch3);
|
---|
140 | }
|
---|
141 | else {
|
---|
142 | scanError = 4 /* InvalidUnicode */;
|
---|
143 | }
|
---|
144 | break;
|
---|
145 | default:
|
---|
146 | scanError = 5 /* InvalidEscapeCharacter */;
|
---|
147 | }
|
---|
148 | start = pos;
|
---|
149 | continue;
|
---|
150 | }
|
---|
151 | if (ch >= 0 && ch <= 0x1f) {
|
---|
152 | if (isLineBreak(ch)) {
|
---|
153 | result += text.substring(start, pos);
|
---|
154 | scanError = 2 /* UnexpectedEndOfString */;
|
---|
155 | break;
|
---|
156 | }
|
---|
157 | else {
|
---|
158 | scanError = 6 /* InvalidCharacter */;
|
---|
159 | // mark as error but continue with string
|
---|
160 | }
|
---|
161 | }
|
---|
162 | pos++;
|
---|
163 | }
|
---|
164 | return result;
|
---|
165 | }
|
---|
166 | function scanNext() {
|
---|
167 | value = '';
|
---|
168 | scanError = 0 /* None */;
|
---|
169 | tokenOffset = pos;
|
---|
170 | lineStartOffset = lineNumber;
|
---|
171 | prevTokenLineStartOffset = tokenLineStartOffset;
|
---|
172 | if (pos >= len) {
|
---|
173 | // at the end
|
---|
174 | tokenOffset = len;
|
---|
175 | return token = 17 /* EOF */;
|
---|
176 | }
|
---|
177 | var code = text.charCodeAt(pos);
|
---|
178 | // trivia: whitespace
|
---|
179 | if (isWhiteSpace(code)) {
|
---|
180 | do {
|
---|
181 | pos++;
|
---|
182 | value += String.fromCharCode(code);
|
---|
183 | code = text.charCodeAt(pos);
|
---|
184 | } while (isWhiteSpace(code));
|
---|
185 | return token = 15 /* Trivia */;
|
---|
186 | }
|
---|
187 | // trivia: newlines
|
---|
188 | if (isLineBreak(code)) {
|
---|
189 | pos++;
|
---|
190 | value += String.fromCharCode(code);
|
---|
191 | if (code === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
|
---|
192 | pos++;
|
---|
193 | value += '\n';
|
---|
194 | }
|
---|
195 | lineNumber++;
|
---|
196 | tokenLineStartOffset = pos;
|
---|
197 | return token = 14 /* LineBreakTrivia */;
|
---|
198 | }
|
---|
199 | switch (code) {
|
---|
200 | // tokens: []{}:,
|
---|
201 | case 123 /* openBrace */:
|
---|
202 | pos++;
|
---|
203 | return token = 1 /* OpenBraceToken */;
|
---|
204 | case 125 /* closeBrace */:
|
---|
205 | pos++;
|
---|
206 | return token = 2 /* CloseBraceToken */;
|
---|
207 | case 91 /* openBracket */:
|
---|
208 | pos++;
|
---|
209 | return token = 3 /* OpenBracketToken */;
|
---|
210 | case 93 /* closeBracket */:
|
---|
211 | pos++;
|
---|
212 | return token = 4 /* CloseBracketToken */;
|
---|
213 | case 58 /* colon */:
|
---|
214 | pos++;
|
---|
215 | return token = 6 /* ColonToken */;
|
---|
216 | case 44 /* comma */:
|
---|
217 | pos++;
|
---|
218 | return token = 5 /* CommaToken */;
|
---|
219 | // strings
|
---|
220 | case 34 /* doubleQuote */:
|
---|
221 | pos++;
|
---|
222 | value = scanString();
|
---|
223 | return token = 10 /* StringLiteral */;
|
---|
224 | // comments
|
---|
225 | case 47 /* slash */:
|
---|
226 | var start = pos - 1;
|
---|
227 | // Single-line comment
|
---|
228 | if (text.charCodeAt(pos + 1) === 47 /* slash */) {
|
---|
229 | pos += 2;
|
---|
230 | while (pos < len) {
|
---|
231 | if (isLineBreak(text.charCodeAt(pos))) {
|
---|
232 | break;
|
---|
233 | }
|
---|
234 | pos++;
|
---|
235 | }
|
---|
236 | value = text.substring(start, pos);
|
---|
237 | return token = 12 /* LineCommentTrivia */;
|
---|
238 | }
|
---|
239 | // Multi-line comment
|
---|
240 | if (text.charCodeAt(pos + 1) === 42 /* asterisk */) {
|
---|
241 | pos += 2;
|
---|
242 | var safeLength = len - 1; // For lookahead.
|
---|
243 | var commentClosed = false;
|
---|
244 | while (pos < safeLength) {
|
---|
245 | var ch = text.charCodeAt(pos);
|
---|
246 | if (ch === 42 /* asterisk */ && text.charCodeAt(pos + 1) === 47 /* slash */) {
|
---|
247 | pos += 2;
|
---|
248 | commentClosed = true;
|
---|
249 | break;
|
---|
250 | }
|
---|
251 | pos++;
|
---|
252 | if (isLineBreak(ch)) {
|
---|
253 | if (ch === 13 /* carriageReturn */ && text.charCodeAt(pos) === 10 /* lineFeed */) {
|
---|
254 | pos++;
|
---|
255 | }
|
---|
256 | lineNumber++;
|
---|
257 | tokenLineStartOffset = pos;
|
---|
258 | }
|
---|
259 | }
|
---|
260 | if (!commentClosed) {
|
---|
261 | pos++;
|
---|
262 | scanError = 1 /* UnexpectedEndOfComment */;
|
---|
263 | }
|
---|
264 | value = text.substring(start, pos);
|
---|
265 | return token = 13 /* BlockCommentTrivia */;
|
---|
266 | }
|
---|
267 | // just a single slash
|
---|
268 | value += String.fromCharCode(code);
|
---|
269 | pos++;
|
---|
270 | return token = 16 /* Unknown */;
|
---|
271 | // numbers
|
---|
272 | case 45 /* minus */:
|
---|
273 | value += String.fromCharCode(code);
|
---|
274 | pos++;
|
---|
275 | if (pos === len || !isDigit(text.charCodeAt(pos))) {
|
---|
276 | return token = 16 /* Unknown */;
|
---|
277 | }
|
---|
278 | // found a minus, followed by a number so
|
---|
279 | // we fall through to proceed with scanning
|
---|
280 | // numbers
|
---|
281 | case 48 /* _0 */:
|
---|
282 | case 49 /* _1 */:
|
---|
283 | case 50 /* _2 */:
|
---|
284 | case 51 /* _3 */:
|
---|
285 | case 52 /* _4 */:
|
---|
286 | case 53 /* _5 */:
|
---|
287 | case 54 /* _6 */:
|
---|
288 | case 55 /* _7 */:
|
---|
289 | case 56 /* _8 */:
|
---|
290 | case 57 /* _9 */:
|
---|
291 | value += scanNumber();
|
---|
292 | return token = 11 /* NumericLiteral */;
|
---|
293 | // literals and unknown symbols
|
---|
294 | default:
|
---|
295 | // is a literal? Read the full word.
|
---|
296 | while (pos < len && isUnknownContentCharacter(code)) {
|
---|
297 | pos++;
|
---|
298 | code = text.charCodeAt(pos);
|
---|
299 | }
|
---|
300 | if (tokenOffset !== pos) {
|
---|
301 | value = text.substring(tokenOffset, pos);
|
---|
302 | // keywords: true, false, null
|
---|
303 | switch (value) {
|
---|
304 | case 'true': return token = 8 /* TrueKeyword */;
|
---|
305 | case 'false': return token = 9 /* FalseKeyword */;
|
---|
306 | case 'null': return token = 7 /* NullKeyword */;
|
---|
307 | }
|
---|
308 | return token = 16 /* Unknown */;
|
---|
309 | }
|
---|
310 | // some
|
---|
311 | value += String.fromCharCode(code);
|
---|
312 | pos++;
|
---|
313 | return token = 16 /* Unknown */;
|
---|
314 | }
|
---|
315 | }
|
---|
316 | function isUnknownContentCharacter(code) {
|
---|
317 | if (isWhiteSpace(code) || isLineBreak(code)) {
|
---|
318 | return false;
|
---|
319 | }
|
---|
320 | switch (code) {
|
---|
321 | case 125 /* closeBrace */:
|
---|
322 | case 93 /* closeBracket */:
|
---|
323 | case 123 /* openBrace */:
|
---|
324 | case 91 /* openBracket */:
|
---|
325 | case 34 /* doubleQuote */:
|
---|
326 | case 58 /* colon */:
|
---|
327 | case 44 /* comma */:
|
---|
328 | case 47 /* slash */:
|
---|
329 | return false;
|
---|
330 | }
|
---|
331 | return true;
|
---|
332 | }
|
---|
333 | function scanNextNonTrivia() {
|
---|
334 | var result;
|
---|
335 | do {
|
---|
336 | result = scanNext();
|
---|
337 | } while (result >= 12 /* LineCommentTrivia */ && result <= 15 /* Trivia */);
|
---|
338 | return result;
|
---|
339 | }
|
---|
340 | return {
|
---|
341 | setPosition: setPosition,
|
---|
342 | getPosition: function () { return pos; },
|
---|
343 | scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
|
---|
344 | getToken: function () { return token; },
|
---|
345 | getTokenValue: function () { return value; },
|
---|
346 | getTokenOffset: function () { return tokenOffset; },
|
---|
347 | getTokenLength: function () { return pos - tokenOffset; },
|
---|
348 | getTokenStartLine: function () { return lineStartOffset; },
|
---|
349 | getTokenStartCharacter: function () { return tokenOffset - prevTokenLineStartOffset; },
|
---|
350 | getTokenError: function () { return scanError; },
|
---|
351 | };
|
---|
352 | }
|
---|
353 | function isWhiteSpace(ch) {
|
---|
354 | return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 11 /* verticalTab */ || ch === 12 /* formFeed */ ||
|
---|
355 | ch === 160 /* nonBreakingSpace */ || ch === 5760 /* ogham */ || ch >= 8192 /* enQuad */ && ch <= 8203 /* zeroWidthSpace */ ||
|
---|
356 | ch === 8239 /* narrowNoBreakSpace */ || ch === 8287 /* mathematicalSpace */ || ch === 12288 /* ideographicSpace */ || ch === 65279 /* byteOrderMark */;
|
---|
357 | }
|
---|
358 | function isLineBreak(ch) {
|
---|
359 | return ch === 10 /* lineFeed */ || ch === 13 /* carriageReturn */ || ch === 8232 /* lineSeparator */ || ch === 8233 /* paragraphSeparator */;
|
---|
360 | }
|
---|
361 | function isDigit(ch) {
|
---|
362 | return ch >= 48 /* _0 */ && ch <= 57 /* _9 */;
|
---|
363 | }
|
---|