source: node_modules/yaml/dist/parse/lexer.js

main
Last change on this file was d24f17c, checked in by Aleksandar Panovski <apano77@…>, 15 months ago

Initial commit

  • Property mode set to 100644
File size: 22.9 KB
Line 
1'use strict';
2
3var cst = require('./cst.js');
4
5/*
6START -> stream
7
8stream
9 directive -> line-end -> stream
10 indent + line-end -> stream
11 [else] -> line-start
12
13line-end
14 comment -> line-end
15 newline -> .
16 input-end -> END
17
18line-start
19 doc-start -> doc
20 doc-end -> stream
21 [else] -> indent -> block-start
22
23block-start
24 seq-item-start -> block-start
25 explicit-key-start -> block-start
26 map-value-start -> block-start
27 [else] -> doc
28
29doc
30 line-end -> line-start
31 spaces -> doc
32 anchor -> doc
33 tag -> doc
34 flow-start -> flow -> doc
35 flow-end -> error -> doc
36 seq-item-start -> error -> doc
37 explicit-key-start -> error -> doc
38 map-value-start -> doc
39 alias -> doc
40 quote-start -> quoted-scalar -> doc
41 block-scalar-header -> line-end -> block-scalar(min) -> line-start
42 [else] -> plain-scalar(false, min) -> doc
43
44flow
45 line-end -> flow
46 spaces -> flow
47 anchor -> flow
48 tag -> flow
49 flow-start -> flow -> flow
50 flow-end -> .
51 seq-item-start -> error -> flow
52 explicit-key-start -> flow
53 map-value-start -> flow
54 alias -> flow
55 quote-start -> quoted-scalar -> flow
56 comma -> flow
57 [else] -> plain-scalar(true, 0) -> flow
58
59quoted-scalar
60 quote-end -> .
61 [else] -> quoted-scalar
62
63block-scalar(min)
64 newline + peek(indent < min) -> .
65 [else] -> block-scalar(min)
66
67plain-scalar(is-flow, min)
68 scalar-end(is-flow) -> .
69 peek(newline + (indent < min)) -> .
70 [else] -> plain-scalar(min)
71*/
72function isEmpty(ch) {
73 switch (ch) {
74 case undefined:
75 case ' ':
76 case '\n':
77 case '\r':
78 case '\t':
79 return true;
80 default:
81 return false;
82 }
83}
84const hexDigits = '0123456789ABCDEFabcdef'.split('');
85const tagChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-#;/?:@&=+$_.!~*'()".split('');
86const invalidFlowScalarChars = ',[]{}'.split('');
87const invalidAnchorChars = ' ,[]{}\n\r\t'.split('');
88const isNotAnchorChar = (ch) => !ch || invalidAnchorChars.includes(ch);
89/**
90 * Splits an input string into lexical tokens, i.e. smaller strings that are
91 * easily identifiable by `tokens.tokenType()`.
92 *
93 * Lexing starts always in a "stream" context. Incomplete input may be buffered
94 * until a complete token can be emitted.
95 *
96 * In addition to slices of the original input, the following control characters
97 * may also be emitted:
98 *
99 * - `\x02` (Start of Text): A document starts with the next token
100 * - `\x18` (Cancel): Unexpected end of flow-mode (indicates an error)
101 * - `\x1f` (Unit Separator): Next token is a scalar value
102 * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents
103 */
104class Lexer {
105 constructor() {
106 /**
107 * Flag indicating whether the end of the current buffer marks the end of
108 * all input
109 */
110 this.atEnd = false;
111 /**
112 * Explicit indent set in block scalar header, as an offset from the current
113 * minimum indent, so e.g. set to 1 from a header `|2+`. Set to -1 if not
114 * explicitly set.
115 */
116 this.blockScalarIndent = -1;
117 /**
118 * Block scalars that include a + (keep) chomping indicator in their header
119 * include trailing empty lines, which are otherwise excluded from the
120 * scalar's contents.
121 */
122 this.blockScalarKeep = false;
123 /** Current input */
124 this.buffer = '';
125 /**
126 * Flag noting whether the map value indicator : can immediately follow this
127 * node within a flow context.
128 */
129 this.flowKey = false;
130 /** Count of surrounding flow collection levels. */
131 this.flowLevel = 0;
132 /**
133 * Minimum level of indentation required for next lines to be parsed as a
134 * part of the current scalar value.
135 */
136 this.indentNext = 0;
137 /** Indentation level of the current line. */
138 this.indentValue = 0;
139 /** Position of the next \n character. */
140 this.lineEndPos = null;
141 /** Stores the state of the lexer if reaching the end of incpomplete input */
142 this.next = null;
143 /** A pointer to `buffer`; the current position of the lexer. */
144 this.pos = 0;
145 }
146 /**
147 * Generate YAML tokens from the `source` string. If `incomplete`,
148 * a part of the last line may be left as a buffer for the next call.
149 *
150 * @returns A generator of lexical tokens
151 */
152 *lex(source, incomplete = false) {
153 if (source) {
154 this.buffer = this.buffer ? this.buffer + source : source;
155 this.lineEndPos = null;
156 }
157 this.atEnd = !incomplete;
158 let next = this.next ?? 'stream';
159 while (next && (incomplete || this.hasChars(1)))
160 next = yield* this.parseNext(next);
161 }
162 atLineEnd() {
163 let i = this.pos;
164 let ch = this.buffer[i];
165 while (ch === ' ' || ch === '\t')
166 ch = this.buffer[++i];
167 if (!ch || ch === '#' || ch === '\n')
168 return true;
169 if (ch === '\r')
170 return this.buffer[i + 1] === '\n';
171 return false;
172 }
173 charAt(n) {
174 return this.buffer[this.pos + n];
175 }
176 continueScalar(offset) {
177 let ch = this.buffer[offset];
178 if (this.indentNext > 0) {
179 let indent = 0;
180 while (ch === ' ')
181 ch = this.buffer[++indent + offset];
182 if (ch === '\r') {
183 const next = this.buffer[indent + offset + 1];
184 if (next === '\n' || (!next && !this.atEnd))
185 return offset + indent + 1;
186 }
187 return ch === '\n' || indent >= this.indentNext || (!ch && !this.atEnd)
188 ? offset + indent
189 : -1;
190 }
191 if (ch === '-' || ch === '.') {
192 const dt = this.buffer.substr(offset, 3);
193 if ((dt === '---' || dt === '...') && isEmpty(this.buffer[offset + 3]))
194 return -1;
195 }
196 return offset;
197 }
198 getLine() {
199 let end = this.lineEndPos;
200 if (typeof end !== 'number' || (end !== -1 && end < this.pos)) {
201 end = this.buffer.indexOf('\n', this.pos);
202 this.lineEndPos = end;
203 }
204 if (end === -1)
205 return this.atEnd ? this.buffer.substring(this.pos) : null;
206 if (this.buffer[end - 1] === '\r')
207 end -= 1;
208 return this.buffer.substring(this.pos, end);
209 }
210 hasChars(n) {
211 return this.pos + n <= this.buffer.length;
212 }
213 setNext(state) {
214 this.buffer = this.buffer.substring(this.pos);
215 this.pos = 0;
216 this.lineEndPos = null;
217 this.next = state;
218 return null;
219 }
220 peek(n) {
221 return this.buffer.substr(this.pos, n);
222 }
223 *parseNext(next) {
224 switch (next) {
225 case 'stream':
226 return yield* this.parseStream();
227 case 'line-start':
228 return yield* this.parseLineStart();
229 case 'block-start':
230 return yield* this.parseBlockStart();
231 case 'doc':
232 return yield* this.parseDocument();
233 case 'flow':
234 return yield* this.parseFlowCollection();
235 case 'quoted-scalar':
236 return yield* this.parseQuotedScalar();
237 case 'block-scalar':
238 return yield* this.parseBlockScalar();
239 case 'plain-scalar':
240 return yield* this.parsePlainScalar();
241 }
242 }
243 *parseStream() {
244 let line = this.getLine();
245 if (line === null)
246 return this.setNext('stream');
247 if (line[0] === cst.BOM) {
248 yield* this.pushCount(1);
249 line = line.substring(1);
250 }
251 if (line[0] === '%') {
252 let dirEnd = line.length;
253 const cs = line.indexOf('#');
254 if (cs !== -1) {
255 const ch = line[cs - 1];
256 if (ch === ' ' || ch === '\t')
257 dirEnd = cs - 1;
258 }
259 while (true) {
260 const ch = line[dirEnd - 1];
261 if (ch === ' ' || ch === '\t')
262 dirEnd -= 1;
263 else
264 break;
265 }
266 const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true));
267 yield* this.pushCount(line.length - n); // possible comment
268 this.pushNewline();
269 return 'stream';
270 }
271 if (this.atLineEnd()) {
272 const sp = yield* this.pushSpaces(true);
273 yield* this.pushCount(line.length - sp);
274 yield* this.pushNewline();
275 return 'stream';
276 }
277 yield cst.DOCUMENT;
278 return yield* this.parseLineStart();
279 }
280 *parseLineStart() {
281 const ch = this.charAt(0);
282 if (!ch && !this.atEnd)
283 return this.setNext('line-start');
284 if (ch === '-' || ch === '.') {
285 if (!this.atEnd && !this.hasChars(4))
286 return this.setNext('line-start');
287 const s = this.peek(3);
288 if (s === '---' && isEmpty(this.charAt(3))) {
289 yield* this.pushCount(3);
290 this.indentValue = 0;
291 this.indentNext = 0;
292 return 'doc';
293 }
294 else if (s === '...' && isEmpty(this.charAt(3))) {
295 yield* this.pushCount(3);
296 return 'stream';
297 }
298 }
299 this.indentValue = yield* this.pushSpaces(false);
300 if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1)))
301 this.indentNext = this.indentValue;
302 return yield* this.parseBlockStart();
303 }
304 *parseBlockStart() {
305 const [ch0, ch1] = this.peek(2);
306 if (!ch1 && !this.atEnd)
307 return this.setNext('block-start');
308 if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) {
309 const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true));
310 this.indentNext = this.indentValue + 1;
311 this.indentValue += n;
312 return yield* this.parseBlockStart();
313 }
314 return 'doc';
315 }
316 *parseDocument() {
317 yield* this.pushSpaces(true);
318 const line = this.getLine();
319 if (line === null)
320 return this.setNext('doc');
321 let n = yield* this.pushIndicators();
322 switch (line[n]) {
323 case '#':
324 yield* this.pushCount(line.length - n);
325 // fallthrough
326 case undefined:
327 yield* this.pushNewline();
328 return yield* this.parseLineStart();
329 case '{':
330 case '[':
331 yield* this.pushCount(1);
332 this.flowKey = false;
333 this.flowLevel = 1;
334 return 'flow';
335 case '}':
336 case ']':
337 // this is an error
338 yield* this.pushCount(1);
339 return 'doc';
340 case '*':
341 yield* this.pushUntil(isNotAnchorChar);
342 return 'doc';
343 case '"':
344 case "'":
345 return yield* this.parseQuotedScalar();
346 case '|':
347 case '>':
348 n += yield* this.parseBlockScalarHeader();
349 n += yield* this.pushSpaces(true);
350 yield* this.pushCount(line.length - n);
351 yield* this.pushNewline();
352 return yield* this.parseBlockScalar();
353 default:
354 return yield* this.parsePlainScalar();
355 }
356 }
357 *parseFlowCollection() {
358 let nl, sp;
359 let indent = -1;
360 do {
361 nl = yield* this.pushNewline();
362 if (nl > 0) {
363 sp = yield* this.pushSpaces(false);
364 this.indentValue = indent = sp;
365 }
366 else {
367 sp = 0;
368 }
369 sp += yield* this.pushSpaces(true);
370 } while (nl + sp > 0);
371 const line = this.getLine();
372 if (line === null)
373 return this.setNext('flow');
374 if ((indent !== -1 && indent < this.indentNext && line[0] !== '#') ||
375 (indent === 0 &&
376 (line.startsWith('---') || line.startsWith('...')) &&
377 isEmpty(line[3]))) {
378 // Allowing for the terminal ] or } at the same (rather than greater)
379 // indent level as the initial [ or { is technically invalid, but
380 // failing here would be surprising to users.
381 const atFlowEndMarker = indent === this.indentNext - 1 &&
382 this.flowLevel === 1 &&
383 (line[0] === ']' || line[0] === '}');
384 if (!atFlowEndMarker) {
385 // this is an error
386 this.flowLevel = 0;
387 yield cst.FLOW_END;
388 return yield* this.parseLineStart();
389 }
390 }
391 let n = 0;
392 while (line[n] === ',') {
393 n += yield* this.pushCount(1);
394 n += yield* this.pushSpaces(true);
395 this.flowKey = false;
396 }
397 n += yield* this.pushIndicators();
398 switch (line[n]) {
399 case undefined:
400 return 'flow';
401 case '#':
402 yield* this.pushCount(line.length - n);
403 return 'flow';
404 case '{':
405 case '[':
406 yield* this.pushCount(1);
407 this.flowKey = false;
408 this.flowLevel += 1;
409 return 'flow';
410 case '}':
411 case ']':
412 yield* this.pushCount(1);
413 this.flowKey = true;
414 this.flowLevel -= 1;
415 return this.flowLevel ? 'flow' : 'doc';
416 case '*':
417 yield* this.pushUntil(isNotAnchorChar);
418 return 'flow';
419 case '"':
420 case "'":
421 this.flowKey = true;
422 return yield* this.parseQuotedScalar();
423 case ':': {
424 const next = this.charAt(1);
425 if (this.flowKey || isEmpty(next) || next === ',') {
426 this.flowKey = false;
427 yield* this.pushCount(1);
428 yield* this.pushSpaces(true);
429 return 'flow';
430 }
431 }
432 // fallthrough
433 default:
434 this.flowKey = false;
435 return yield* this.parsePlainScalar();
436 }
437 }
438 *parseQuotedScalar() {
439 const quote = this.charAt(0);
440 let end = this.buffer.indexOf(quote, this.pos + 1);
441 if (quote === "'") {
442 while (end !== -1 && this.buffer[end + 1] === "'")
443 end = this.buffer.indexOf("'", end + 2);
444 }
445 else {
446 // double-quote
447 while (end !== -1) {
448 let n = 0;
449 while (this.buffer[end - 1 - n] === '\\')
450 n += 1;
451 if (n % 2 === 0)
452 break;
453 end = this.buffer.indexOf('"', end + 1);
454 }
455 }
456 // Only looking for newlines within the quotes
457 const qb = this.buffer.substring(0, end);
458 let nl = qb.indexOf('\n', this.pos);
459 if (nl !== -1) {
460 while (nl !== -1) {
461 const cs = this.continueScalar(nl + 1);
462 if (cs === -1)
463 break;
464 nl = qb.indexOf('\n', cs);
465 }
466 if (nl !== -1) {
467 // this is an error caused by an unexpected unindent
468 end = nl - (qb[nl - 1] === '\r' ? 2 : 1);
469 }
470 }
471 if (end === -1) {
472 if (!this.atEnd)
473 return this.setNext('quoted-scalar');
474 end = this.buffer.length;
475 }
476 yield* this.pushToIndex(end + 1, false);
477 return this.flowLevel ? 'flow' : 'doc';
478 }
479 *parseBlockScalarHeader() {
480 this.blockScalarIndent = -1;
481 this.blockScalarKeep = false;
482 let i = this.pos;
483 while (true) {
484 const ch = this.buffer[++i];
485 if (ch === '+')
486 this.blockScalarKeep = true;
487 else if (ch > '0' && ch <= '9')
488 this.blockScalarIndent = Number(ch) - 1;
489 else if (ch !== '-')
490 break;
491 }
492 return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#');
493 }
494 *parseBlockScalar() {
495 let nl = this.pos - 1; // may be -1 if this.pos === 0
496 let indent = 0;
497 let ch;
498 loop: for (let i = this.pos; (ch = this.buffer[i]); ++i) {
499 switch (ch) {
500 case ' ':
501 indent += 1;
502 break;
503 case '\n':
504 nl = i;
505 indent = 0;
506 break;
507 case '\r': {
508 const next = this.buffer[i + 1];
509 if (!next && !this.atEnd)
510 return this.setNext('block-scalar');
511 if (next === '\n')
512 break;
513 } // fallthrough
514 default:
515 break loop;
516 }
517 }
518 if (!ch && !this.atEnd)
519 return this.setNext('block-scalar');
520 if (indent >= this.indentNext) {
521 if (this.blockScalarIndent === -1)
522 this.indentNext = indent;
523 else
524 this.indentNext += this.blockScalarIndent;
525 do {
526 const cs = this.continueScalar(nl + 1);
527 if (cs === -1)
528 break;
529 nl = this.buffer.indexOf('\n', cs);
530 } while (nl !== -1);
531 if (nl === -1) {
532 if (!this.atEnd)
533 return this.setNext('block-scalar');
534 nl = this.buffer.length;
535 }
536 }
537 if (!this.blockScalarKeep) {
538 do {
539 let i = nl - 1;
540 let ch = this.buffer[i];
541 if (ch === '\r')
542 ch = this.buffer[--i];
543 const lastChar = i; // Drop the line if last char not more indented
544 while (ch === ' ' || ch === '\t')
545 ch = this.buffer[--i];
546 if (ch === '\n' && i >= this.pos && i + 1 + indent > lastChar)
547 nl = i;
548 else
549 break;
550 } while (true);
551 }
552 yield cst.SCALAR;
553 yield* this.pushToIndex(nl + 1, true);
554 return yield* this.parseLineStart();
555 }
556 *parsePlainScalar() {
557 const inFlow = this.flowLevel > 0;
558 let end = this.pos - 1;
559 let i = this.pos - 1;
560 let ch;
561 while ((ch = this.buffer[++i])) {
562 if (ch === ':') {
563 const next = this.buffer[i + 1];
564 if (isEmpty(next) || (inFlow && next === ','))
565 break;
566 end = i;
567 }
568 else if (isEmpty(ch)) {
569 let next = this.buffer[i + 1];
570 if (ch === '\r') {
571 if (next === '\n') {
572 i += 1;
573 ch = '\n';
574 next = this.buffer[i + 1];
575 }
576 else
577 end = i;
578 }
579 if (next === '#' || (inFlow && invalidFlowScalarChars.includes(next)))
580 break;
581 if (ch === '\n') {
582 const cs = this.continueScalar(i + 1);
583 if (cs === -1)
584 break;
585 i = Math.max(i, cs - 2); // to advance, but still account for ' #'
586 }
587 }
588 else {
589 if (inFlow && invalidFlowScalarChars.includes(ch))
590 break;
591 end = i;
592 }
593 }
594 if (!ch && !this.atEnd)
595 return this.setNext('plain-scalar');
596 yield cst.SCALAR;
597 yield* this.pushToIndex(end + 1, true);
598 return inFlow ? 'flow' : 'doc';
599 }
600 *pushCount(n) {
601 if (n > 0) {
602 yield this.buffer.substr(this.pos, n);
603 this.pos += n;
604 return n;
605 }
606 return 0;
607 }
608 *pushToIndex(i, allowEmpty) {
609 const s = this.buffer.slice(this.pos, i);
610 if (s) {
611 yield s;
612 this.pos += s.length;
613 return s.length;
614 }
615 else if (allowEmpty)
616 yield '';
617 return 0;
618 }
619 *pushIndicators() {
620 switch (this.charAt(0)) {
621 case '!':
622 return ((yield* this.pushTag()) +
623 (yield* this.pushSpaces(true)) +
624 (yield* this.pushIndicators()));
625 case '&':
626 return ((yield* this.pushUntil(isNotAnchorChar)) +
627 (yield* this.pushSpaces(true)) +
628 (yield* this.pushIndicators()));
629 case '-': // this is an error
630 case '?': // this is an error outside flow collections
631 case ':': {
632 const inFlow = this.flowLevel > 0;
633 const ch1 = this.charAt(1);
634 if (isEmpty(ch1) || (inFlow && invalidFlowScalarChars.includes(ch1))) {
635 if (!inFlow)
636 this.indentNext = this.indentValue + 1;
637 else if (this.flowKey)
638 this.flowKey = false;
639 return ((yield* this.pushCount(1)) +
640 (yield* this.pushSpaces(true)) +
641 (yield* this.pushIndicators()));
642 }
643 }
644 }
645 return 0;
646 }
647 *pushTag() {
648 if (this.charAt(1) === '<') {
649 let i = this.pos + 2;
650 let ch = this.buffer[i];
651 while (!isEmpty(ch) && ch !== '>')
652 ch = this.buffer[++i];
653 return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false);
654 }
655 else {
656 let i = this.pos + 1;
657 let ch = this.buffer[i];
658 while (ch) {
659 if (tagChars.includes(ch))
660 ch = this.buffer[++i];
661 else if (ch === '%' &&
662 hexDigits.includes(this.buffer[i + 1]) &&
663 hexDigits.includes(this.buffer[i + 2])) {
664 ch = this.buffer[(i += 3)];
665 }
666 else
667 break;
668 }
669 return yield* this.pushToIndex(i, false);
670 }
671 }
672 *pushNewline() {
673 const ch = this.buffer[this.pos];
674 if (ch === '\n')
675 return yield* this.pushCount(1);
676 else if (ch === '\r' && this.charAt(1) === '\n')
677 return yield* this.pushCount(2);
678 else
679 return 0;
680 }
681 *pushSpaces(allowTabs) {
682 let i = this.pos - 1;
683 let ch;
684 do {
685 ch = this.buffer[++i];
686 } while (ch === ' ' || (allowTabs && ch === '\t'));
687 const n = i - this.pos;
688 if (n > 0) {
689 yield this.buffer.substr(this.pos, n);
690 this.pos = i;
691 }
692 return n;
693 }
694 *pushUntil(test) {
695 let i = this.pos;
696 let ch = this.buffer[i];
697 while (!test(ch))
698 ch = this.buffer[++i];
699 return yield* this.pushToIndex(i, false);
700 }
701}
702
703exports.Lexer = Lexer;
Note: See TracBrowser for help on using the repository browser.