1 | var constants = require('../tokenizer/const');
|
---|
2 | var TYPE = constants.TYPE;
|
---|
3 | var NAME = constants.NAME;
|
---|
4 |
|
---|
5 | var utils = require('../tokenizer/utils');
|
---|
6 | var cmpStr = utils.cmpStr;
|
---|
7 |
|
---|
8 | var EOF = TYPE.EOF;
|
---|
9 | var WHITESPACE = TYPE.WhiteSpace;
|
---|
10 | var COMMENT = TYPE.Comment;
|
---|
11 |
|
---|
12 | var OFFSET_MASK = 0x00FFFFFF;
|
---|
13 | var TYPE_SHIFT = 24;
|
---|
14 |
|
---|
15 | var TokenStream = function() {
|
---|
16 | this.offsetAndType = null;
|
---|
17 | this.balance = null;
|
---|
18 |
|
---|
19 | this.reset();
|
---|
20 | };
|
---|
21 |
|
---|
22 | TokenStream.prototype = {
|
---|
23 | reset: function() {
|
---|
24 | this.eof = false;
|
---|
25 | this.tokenIndex = -1;
|
---|
26 | this.tokenType = 0;
|
---|
27 | this.tokenStart = this.firstCharOffset;
|
---|
28 | this.tokenEnd = this.firstCharOffset;
|
---|
29 | },
|
---|
30 |
|
---|
31 | lookupType: function(offset) {
|
---|
32 | offset += this.tokenIndex;
|
---|
33 |
|
---|
34 | if (offset < this.tokenCount) {
|
---|
35 | return this.offsetAndType[offset] >> TYPE_SHIFT;
|
---|
36 | }
|
---|
37 |
|
---|
38 | return EOF;
|
---|
39 | },
|
---|
40 | lookupOffset: function(offset) {
|
---|
41 | offset += this.tokenIndex;
|
---|
42 |
|
---|
43 | if (offset < this.tokenCount) {
|
---|
44 | return this.offsetAndType[offset - 1] & OFFSET_MASK;
|
---|
45 | }
|
---|
46 |
|
---|
47 | return this.source.length;
|
---|
48 | },
|
---|
49 | lookupValue: function(offset, referenceStr) {
|
---|
50 | offset += this.tokenIndex;
|
---|
51 |
|
---|
52 | if (offset < this.tokenCount) {
|
---|
53 | return cmpStr(
|
---|
54 | this.source,
|
---|
55 | this.offsetAndType[offset - 1] & OFFSET_MASK,
|
---|
56 | this.offsetAndType[offset] & OFFSET_MASK,
|
---|
57 | referenceStr
|
---|
58 | );
|
---|
59 | }
|
---|
60 |
|
---|
61 | return false;
|
---|
62 | },
|
---|
63 | getTokenStart: function(tokenIndex) {
|
---|
64 | if (tokenIndex === this.tokenIndex) {
|
---|
65 | return this.tokenStart;
|
---|
66 | }
|
---|
67 |
|
---|
68 | if (tokenIndex > 0) {
|
---|
69 | return tokenIndex < this.tokenCount
|
---|
70 | ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK
|
---|
71 | : this.offsetAndType[this.tokenCount] & OFFSET_MASK;
|
---|
72 | }
|
---|
73 |
|
---|
74 | return this.firstCharOffset;
|
---|
75 | },
|
---|
76 |
|
---|
77 | // TODO: -> skipUntilBalanced
|
---|
78 | getRawLength: function(startToken, mode) {
|
---|
79 | var cursor = startToken;
|
---|
80 | var balanceEnd;
|
---|
81 | var offset = this.offsetAndType[Math.max(cursor - 1, 0)] & OFFSET_MASK;
|
---|
82 | var type;
|
---|
83 |
|
---|
84 | loop:
|
---|
85 | for (; cursor < this.tokenCount; cursor++) {
|
---|
86 | balanceEnd = this.balance[cursor];
|
---|
87 |
|
---|
88 | // stop scanning on balance edge that points to offset before start token
|
---|
89 | if (balanceEnd < startToken) {
|
---|
90 | break loop;
|
---|
91 | }
|
---|
92 |
|
---|
93 | type = this.offsetAndType[cursor] >> TYPE_SHIFT;
|
---|
94 |
|
---|
95 | // check token is stop type
|
---|
96 | switch (mode(type, this.source, offset)) {
|
---|
97 | case 1:
|
---|
98 | break loop;
|
---|
99 |
|
---|
100 | case 2:
|
---|
101 | cursor++;
|
---|
102 | break loop;
|
---|
103 |
|
---|
104 | default:
|
---|
105 | // fast forward to the end of balanced block
|
---|
106 | if (this.balance[balanceEnd] === cursor) {
|
---|
107 | cursor = balanceEnd;
|
---|
108 | }
|
---|
109 |
|
---|
110 | offset = this.offsetAndType[cursor] & OFFSET_MASK;
|
---|
111 | }
|
---|
112 | }
|
---|
113 |
|
---|
114 | return cursor - this.tokenIndex;
|
---|
115 | },
|
---|
116 | isBalanceEdge: function(pos) {
|
---|
117 | return this.balance[this.tokenIndex] < pos;
|
---|
118 | },
|
---|
119 | isDelim: function(code, offset) {
|
---|
120 | if (offset) {
|
---|
121 | return (
|
---|
122 | this.lookupType(offset) === TYPE.Delim &&
|
---|
123 | this.source.charCodeAt(this.lookupOffset(offset)) === code
|
---|
124 | );
|
---|
125 | }
|
---|
126 |
|
---|
127 | return (
|
---|
128 | this.tokenType === TYPE.Delim &&
|
---|
129 | this.source.charCodeAt(this.tokenStart) === code
|
---|
130 | );
|
---|
131 | },
|
---|
132 |
|
---|
133 | getTokenValue: function() {
|
---|
134 | return this.source.substring(this.tokenStart, this.tokenEnd);
|
---|
135 | },
|
---|
136 | getTokenLength: function() {
|
---|
137 | return this.tokenEnd - this.tokenStart;
|
---|
138 | },
|
---|
139 | substrToCursor: function(start) {
|
---|
140 | return this.source.substring(start, this.tokenStart);
|
---|
141 | },
|
---|
142 |
|
---|
143 | skipWS: function() {
|
---|
144 | for (var i = this.tokenIndex, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
|
---|
145 | if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
|
---|
146 | break;
|
---|
147 | }
|
---|
148 | }
|
---|
149 |
|
---|
150 | if (skipTokenCount > 0) {
|
---|
151 | this.skip(skipTokenCount);
|
---|
152 | }
|
---|
153 | },
|
---|
154 | skipSC: function() {
|
---|
155 | while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
|
---|
156 | this.next();
|
---|
157 | }
|
---|
158 | },
|
---|
159 | skip: function(tokenCount) {
|
---|
160 | var next = this.tokenIndex + tokenCount;
|
---|
161 |
|
---|
162 | if (next < this.tokenCount) {
|
---|
163 | this.tokenIndex = next;
|
---|
164 | this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
|
---|
165 | next = this.offsetAndType[next];
|
---|
166 | this.tokenType = next >> TYPE_SHIFT;
|
---|
167 | this.tokenEnd = next & OFFSET_MASK;
|
---|
168 | } else {
|
---|
169 | this.tokenIndex = this.tokenCount;
|
---|
170 | this.next();
|
---|
171 | }
|
---|
172 | },
|
---|
173 | next: function() {
|
---|
174 | var next = this.tokenIndex + 1;
|
---|
175 |
|
---|
176 | if (next < this.tokenCount) {
|
---|
177 | this.tokenIndex = next;
|
---|
178 | this.tokenStart = this.tokenEnd;
|
---|
179 | next = this.offsetAndType[next];
|
---|
180 | this.tokenType = next >> TYPE_SHIFT;
|
---|
181 | this.tokenEnd = next & OFFSET_MASK;
|
---|
182 | } else {
|
---|
183 | this.tokenIndex = this.tokenCount;
|
---|
184 | this.eof = true;
|
---|
185 | this.tokenType = EOF;
|
---|
186 | this.tokenStart = this.tokenEnd = this.source.length;
|
---|
187 | }
|
---|
188 | },
|
---|
189 |
|
---|
190 | forEachToken(fn) {
|
---|
191 | for (var i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) {
|
---|
192 | var start = offset;
|
---|
193 | var item = this.offsetAndType[i];
|
---|
194 | var end = item & OFFSET_MASK;
|
---|
195 | var type = item >> TYPE_SHIFT;
|
---|
196 |
|
---|
197 | offset = end;
|
---|
198 |
|
---|
199 | fn(type, start, end, i);
|
---|
200 | }
|
---|
201 | },
|
---|
202 |
|
---|
203 | dump() {
|
---|
204 | var tokens = new Array(this.tokenCount);
|
---|
205 |
|
---|
206 | this.forEachToken((type, start, end, index) => {
|
---|
207 | tokens[index] = {
|
---|
208 | idx: index,
|
---|
209 | type: NAME[type],
|
---|
210 | chunk: this.source.substring(start, end),
|
---|
211 | balance: this.balance[index]
|
---|
212 | };
|
---|
213 | });
|
---|
214 |
|
---|
215 | return tokens;
|
---|
216 | }
|
---|
217 | };
|
---|
218 |
|
---|
219 | module.exports = TokenStream;
|
---|