1 | 'use strict'
|
---|
2 |
|
---|
3 | // this[BUFFER] is the remainder of a chunk if we're waiting for
|
---|
4 | // the full 512 bytes of a header to come in. We will Buffer.concat()
|
---|
5 | // it to the next write(), which is a mem copy, but a small one.
|
---|
6 | //
|
---|
7 | // this[QUEUE] is a Yallist of entries that haven't been emitted
|
---|
8 | // yet this can only get filled up if the user keeps write()ing after
|
---|
9 | // a write() returns false, or does a write() with more than one entry
|
---|
10 | //
|
---|
11 | // We don't buffer chunks, we always parse them and either create an
|
---|
12 | // entry, or push it into the active entry. The ReadEntry class knows
|
---|
13 | // to throw data away if .ignore=true
|
---|
14 | //
|
---|
15 | // Shift entry off the buffer when it emits 'end', and emit 'entry' for
|
---|
16 | // the next one in the list.
|
---|
17 | //
|
---|
18 | // At any time, we're pushing body chunks into the entry at WRITEENTRY,
|
---|
19 | // and waiting for 'end' on the entry at READENTRY
|
---|
20 | //
|
---|
21 | // ignored entries get .resume() called on them straight away
|
---|
22 |
|
---|
23 | const warner = require('./warn-mixin.js')
|
---|
24 | const Header = require('./header.js')
|
---|
25 | const EE = require('events')
|
---|
26 | const Yallist = require('yallist')
|
---|
27 | const maxMetaEntrySize = 1024 * 1024
|
---|
28 | const Entry = require('./read-entry.js')
|
---|
29 | const Pax = require('./pax.js')
|
---|
30 | const zlib = require('minizlib')
|
---|
31 |
|
---|
32 | const gzipHeader = Buffer.from([0x1f, 0x8b])
|
---|
33 | const STATE = Symbol('state')
|
---|
34 | const WRITEENTRY = Symbol('writeEntry')
|
---|
35 | const READENTRY = Symbol('readEntry')
|
---|
36 | const NEXTENTRY = Symbol('nextEntry')
|
---|
37 | const PROCESSENTRY = Symbol('processEntry')
|
---|
38 | const EX = Symbol('extendedHeader')
|
---|
39 | const GEX = Symbol('globalExtendedHeader')
|
---|
40 | const META = Symbol('meta')
|
---|
41 | const EMITMETA = Symbol('emitMeta')
|
---|
42 | const BUFFER = Symbol('buffer')
|
---|
43 | const QUEUE = Symbol('queue')
|
---|
44 | const ENDED = Symbol('ended')
|
---|
45 | const EMITTEDEND = Symbol('emittedEnd')
|
---|
46 | const EMIT = Symbol('emit')
|
---|
47 | const UNZIP = Symbol('unzip')
|
---|
48 | const CONSUMECHUNK = Symbol('consumeChunk')
|
---|
49 | const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
|
---|
50 | const CONSUMEBODY = Symbol('consumeBody')
|
---|
51 | const CONSUMEMETA = Symbol('consumeMeta')
|
---|
52 | const CONSUMEHEADER = Symbol('consumeHeader')
|
---|
53 | const CONSUMING = Symbol('consuming')
|
---|
54 | const BUFFERCONCAT = Symbol('bufferConcat')
|
---|
55 | const MAYBEEND = Symbol('maybeEnd')
|
---|
56 | const WRITING = Symbol('writing')
|
---|
57 | const ABORTED = Symbol('aborted')
|
---|
58 | const DONE = Symbol('onDone')
|
---|
59 | const SAW_VALID_ENTRY = Symbol('sawValidEntry')
|
---|
60 | const SAW_NULL_BLOCK = Symbol('sawNullBlock')
|
---|
61 | const SAW_EOF = Symbol('sawEOF')
|
---|
62 |
|
---|
63 | const noop = _ => true
|
---|
64 |
|
---|
65 | module.exports = warner(class Parser extends EE {
|
---|
66 | constructor (opt) {
|
---|
67 | opt = opt || {}
|
---|
68 | super(opt)
|
---|
69 |
|
---|
70 | this.file = opt.file || ''
|
---|
71 |
|
---|
72 | // set to boolean false when an entry starts. 1024 bytes of \0
|
---|
73 | // is technically a valid tarball, albeit a boring one.
|
---|
74 | this[SAW_VALID_ENTRY] = null
|
---|
75 |
|
---|
76 | // these BADARCHIVE errors can't be detected early. listen on DONE.
|
---|
77 | this.on(DONE, _ => {
|
---|
78 | if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) {
|
---|
79 | // either less than 1 block of data, or all entries were invalid.
|
---|
80 | // Either way, probably not even a tarball.
|
---|
81 | this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format')
|
---|
82 | }
|
---|
83 | })
|
---|
84 |
|
---|
85 | if (opt.ondone)
|
---|
86 | this.on(DONE, opt.ondone)
|
---|
87 | else {
|
---|
88 | this.on(DONE, _ => {
|
---|
89 | this.emit('prefinish')
|
---|
90 | this.emit('finish')
|
---|
91 | this.emit('end')
|
---|
92 | this.emit('close')
|
---|
93 | })
|
---|
94 | }
|
---|
95 |
|
---|
96 | this.strict = !!opt.strict
|
---|
97 | this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
|
---|
98 | this.filter = typeof opt.filter === 'function' ? opt.filter : noop
|
---|
99 |
|
---|
100 | // have to set this so that streams are ok piping into it
|
---|
101 | this.writable = true
|
---|
102 | this.readable = false
|
---|
103 |
|
---|
104 | this[QUEUE] = new Yallist()
|
---|
105 | this[BUFFER] = null
|
---|
106 | this[READENTRY] = null
|
---|
107 | this[WRITEENTRY] = null
|
---|
108 | this[STATE] = 'begin'
|
---|
109 | this[META] = ''
|
---|
110 | this[EX] = null
|
---|
111 | this[GEX] = null
|
---|
112 | this[ENDED] = false
|
---|
113 | this[UNZIP] = null
|
---|
114 | this[ABORTED] = false
|
---|
115 | this[SAW_NULL_BLOCK] = false
|
---|
116 | this[SAW_EOF] = false
|
---|
117 | if (typeof opt.onwarn === 'function')
|
---|
118 | this.on('warn', opt.onwarn)
|
---|
119 | if (typeof opt.onentry === 'function')
|
---|
120 | this.on('entry', opt.onentry)
|
---|
121 | }
|
---|
122 |
|
---|
123 | [CONSUMEHEADER] (chunk, position) {
|
---|
124 | if (this[SAW_VALID_ENTRY] === null)
|
---|
125 | this[SAW_VALID_ENTRY] = false
|
---|
126 | let header
|
---|
127 | try {
|
---|
128 | header = new Header(chunk, position, this[EX], this[GEX])
|
---|
129 | } catch (er) {
|
---|
130 | return this.warn('TAR_ENTRY_INVALID', er)
|
---|
131 | }
|
---|
132 |
|
---|
133 | if (header.nullBlock) {
|
---|
134 | if (this[SAW_NULL_BLOCK]) {
|
---|
135 | this[SAW_EOF] = true
|
---|
136 | // ending an archive with no entries. pointless, but legal.
|
---|
137 | if (this[STATE] === 'begin')
|
---|
138 | this[STATE] = 'header'
|
---|
139 | this[EMIT]('eof')
|
---|
140 | } else {
|
---|
141 | this[SAW_NULL_BLOCK] = true
|
---|
142 | this[EMIT]('nullBlock')
|
---|
143 | }
|
---|
144 | } else {
|
---|
145 | this[SAW_NULL_BLOCK] = false
|
---|
146 | if (!header.cksumValid)
|
---|
147 | this.warn('TAR_ENTRY_INVALID', 'checksum failure', {header})
|
---|
148 | else if (!header.path)
|
---|
149 | this.warn('TAR_ENTRY_INVALID', 'path is required', {header})
|
---|
150 | else {
|
---|
151 | const type = header.type
|
---|
152 | if (/^(Symbolic)?Link$/.test(type) && !header.linkpath)
|
---|
153 | this.warn('TAR_ENTRY_INVALID', 'linkpath required', {header})
|
---|
154 | else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath)
|
---|
155 | this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', {header})
|
---|
156 | else {
|
---|
157 | const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
|
---|
158 |
|
---|
159 | // we do this for meta & ignored entries as well, because they
|
---|
160 | // are still valid tar, or else we wouldn't know to ignore them
|
---|
161 | if (!this[SAW_VALID_ENTRY]) {
|
---|
162 | if (entry.remain) {
|
---|
163 | // this might be the one!
|
---|
164 | const onend = () => {
|
---|
165 | if (!entry.invalid)
|
---|
166 | this[SAW_VALID_ENTRY] = true
|
---|
167 | }
|
---|
168 | entry.on('end', onend)
|
---|
169 | } else
|
---|
170 | this[SAW_VALID_ENTRY] = true
|
---|
171 | }
|
---|
172 |
|
---|
173 | if (entry.meta) {
|
---|
174 | if (entry.size > this.maxMetaEntrySize) {
|
---|
175 | entry.ignore = true
|
---|
176 | this[EMIT]('ignoredEntry', entry)
|
---|
177 | this[STATE] = 'ignore'
|
---|
178 | entry.resume()
|
---|
179 | } else if (entry.size > 0) {
|
---|
180 | this[META] = ''
|
---|
181 | entry.on('data', c => this[META] += c)
|
---|
182 | this[STATE] = 'meta'
|
---|
183 | }
|
---|
184 | } else {
|
---|
185 | this[EX] = null
|
---|
186 | entry.ignore = entry.ignore || !this.filter(entry.path, entry)
|
---|
187 |
|
---|
188 | if (entry.ignore) {
|
---|
189 | // probably valid, just not something we care about
|
---|
190 | this[EMIT]('ignoredEntry', entry)
|
---|
191 | this[STATE] = entry.remain ? 'ignore' : 'header'
|
---|
192 | entry.resume()
|
---|
193 | } else {
|
---|
194 | if (entry.remain)
|
---|
195 | this[STATE] = 'body'
|
---|
196 | else {
|
---|
197 | this[STATE] = 'header'
|
---|
198 | entry.end()
|
---|
199 | }
|
---|
200 |
|
---|
201 | if (!this[READENTRY]) {
|
---|
202 | this[QUEUE].push(entry)
|
---|
203 | this[NEXTENTRY]()
|
---|
204 | } else
|
---|
205 | this[QUEUE].push(entry)
|
---|
206 | }
|
---|
207 | }
|
---|
208 | }
|
---|
209 | }
|
---|
210 | }
|
---|
211 | }
|
---|
212 |
|
---|
213 | [PROCESSENTRY] (entry) {
|
---|
214 | let go = true
|
---|
215 |
|
---|
216 | if (!entry) {
|
---|
217 | this[READENTRY] = null
|
---|
218 | go = false
|
---|
219 | } else if (Array.isArray(entry))
|
---|
220 | this.emit.apply(this, entry)
|
---|
221 | else {
|
---|
222 | this[READENTRY] = entry
|
---|
223 | this.emit('entry', entry)
|
---|
224 | if (!entry.emittedEnd) {
|
---|
225 | entry.on('end', _ => this[NEXTENTRY]())
|
---|
226 | go = false
|
---|
227 | }
|
---|
228 | }
|
---|
229 |
|
---|
230 | return go
|
---|
231 | }
|
---|
232 |
|
---|
233 | [NEXTENTRY] () {
|
---|
234 | do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
|
---|
235 |
|
---|
236 | if (!this[QUEUE].length) {
|
---|
237 | // At this point, there's nothing in the queue, but we may have an
|
---|
238 | // entry which is being consumed (readEntry).
|
---|
239 | // If we don't, then we definitely can handle more data.
|
---|
240 | // If we do, and either it's flowing, or it has never had any data
|
---|
241 | // written to it, then it needs more.
|
---|
242 | // The only other possibility is that it has returned false from a
|
---|
243 | // write() call, so we wait for the next drain to continue.
|
---|
244 | const re = this[READENTRY]
|
---|
245 | const drainNow = !re || re.flowing || re.size === re.remain
|
---|
246 | if (drainNow) {
|
---|
247 | if (!this[WRITING])
|
---|
248 | this.emit('drain')
|
---|
249 | } else
|
---|
250 | re.once('drain', _ => this.emit('drain'))
|
---|
251 | }
|
---|
252 | }
|
---|
253 |
|
---|
254 | [CONSUMEBODY] (chunk, position) {
|
---|
255 | // write up to but no more than writeEntry.blockRemain
|
---|
256 | const entry = this[WRITEENTRY]
|
---|
257 | const br = entry.blockRemain
|
---|
258 | const c = (br >= chunk.length && position === 0) ? chunk
|
---|
259 | : chunk.slice(position, position + br)
|
---|
260 |
|
---|
261 | entry.write(c)
|
---|
262 |
|
---|
263 | if (!entry.blockRemain) {
|
---|
264 | this[STATE] = 'header'
|
---|
265 | this[WRITEENTRY] = null
|
---|
266 | entry.end()
|
---|
267 | }
|
---|
268 |
|
---|
269 | return c.length
|
---|
270 | }
|
---|
271 |
|
---|
272 | [CONSUMEMETA] (chunk, position) {
|
---|
273 | const entry = this[WRITEENTRY]
|
---|
274 | const ret = this[CONSUMEBODY](chunk, position)
|
---|
275 |
|
---|
276 | // if we finished, then the entry is reset
|
---|
277 | if (!this[WRITEENTRY])
|
---|
278 | this[EMITMETA](entry)
|
---|
279 |
|
---|
280 | return ret
|
---|
281 | }
|
---|
282 |
|
---|
283 | [EMIT] (ev, data, extra) {
|
---|
284 | if (!this[QUEUE].length && !this[READENTRY])
|
---|
285 | this.emit(ev, data, extra)
|
---|
286 | else
|
---|
287 | this[QUEUE].push([ev, data, extra])
|
---|
288 | }
|
---|
289 |
|
---|
290 | [EMITMETA] (entry) {
|
---|
291 | this[EMIT]('meta', this[META])
|
---|
292 | switch (entry.type) {
|
---|
293 | case 'ExtendedHeader':
|
---|
294 | case 'OldExtendedHeader':
|
---|
295 | this[EX] = Pax.parse(this[META], this[EX], false)
|
---|
296 | break
|
---|
297 |
|
---|
298 | case 'GlobalExtendedHeader':
|
---|
299 | this[GEX] = Pax.parse(this[META], this[GEX], true)
|
---|
300 | break
|
---|
301 |
|
---|
302 | case 'NextFileHasLongPath':
|
---|
303 | case 'OldGnuLongPath':
|
---|
304 | this[EX] = this[EX] || Object.create(null)
|
---|
305 | this[EX].path = this[META].replace(/\0.*/, '')
|
---|
306 | break
|
---|
307 |
|
---|
308 | case 'NextFileHasLongLinkpath':
|
---|
309 | this[EX] = this[EX] || Object.create(null)
|
---|
310 | this[EX].linkpath = this[META].replace(/\0.*/, '')
|
---|
311 | break
|
---|
312 |
|
---|
313 | /* istanbul ignore next */
|
---|
314 | default: throw new Error('unknown meta: ' + entry.type)
|
---|
315 | }
|
---|
316 | }
|
---|
317 |
|
---|
318 | abort (error) {
|
---|
319 | this[ABORTED] = true
|
---|
320 | this.emit('abort', error)
|
---|
321 | // always throws, even in non-strict mode
|
---|
322 | this.warn('TAR_ABORT', error, { recoverable: false })
|
---|
323 | }
|
---|
324 |
|
---|
325 | write (chunk) {
|
---|
326 | if (this[ABORTED])
|
---|
327 | return
|
---|
328 |
|
---|
329 | // first write, might be gzipped
|
---|
330 | if (this[UNZIP] === null && chunk) {
|
---|
331 | if (this[BUFFER]) {
|
---|
332 | chunk = Buffer.concat([this[BUFFER], chunk])
|
---|
333 | this[BUFFER] = null
|
---|
334 | }
|
---|
335 | if (chunk.length < gzipHeader.length) {
|
---|
336 | this[BUFFER] = chunk
|
---|
337 | return true
|
---|
338 | }
|
---|
339 | for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
|
---|
340 | if (chunk[i] !== gzipHeader[i])
|
---|
341 | this[UNZIP] = false
|
---|
342 | }
|
---|
343 | if (this[UNZIP] === null) {
|
---|
344 | const ended = this[ENDED]
|
---|
345 | this[ENDED] = false
|
---|
346 | this[UNZIP] = new zlib.Unzip()
|
---|
347 | this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
|
---|
348 | this[UNZIP].on('error', er => this.abort(er))
|
---|
349 | this[UNZIP].on('end', _ => {
|
---|
350 | this[ENDED] = true
|
---|
351 | this[CONSUMECHUNK]()
|
---|
352 | })
|
---|
353 | this[WRITING] = true
|
---|
354 | const ret = this[UNZIP][ended ? 'end' : 'write'](chunk)
|
---|
355 | this[WRITING] = false
|
---|
356 | return ret
|
---|
357 | }
|
---|
358 | }
|
---|
359 |
|
---|
360 | this[WRITING] = true
|
---|
361 | if (this[UNZIP])
|
---|
362 | this[UNZIP].write(chunk)
|
---|
363 | else
|
---|
364 | this[CONSUMECHUNK](chunk)
|
---|
365 | this[WRITING] = false
|
---|
366 |
|
---|
367 | // return false if there's a queue, or if the current entry isn't flowing
|
---|
368 | const ret =
|
---|
369 | this[QUEUE].length ? false :
|
---|
370 | this[READENTRY] ? this[READENTRY].flowing :
|
---|
371 | true
|
---|
372 |
|
---|
373 | // if we have no queue, then that means a clogged READENTRY
|
---|
374 | if (!ret && !this[QUEUE].length)
|
---|
375 | this[READENTRY].once('drain', _ => this.emit('drain'))
|
---|
376 |
|
---|
377 | return ret
|
---|
378 | }
|
---|
379 |
|
---|
380 | [BUFFERCONCAT] (c) {
|
---|
381 | if (c && !this[ABORTED])
|
---|
382 | this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
|
---|
383 | }
|
---|
384 |
|
---|
385 | [MAYBEEND] () {
|
---|
386 | if (this[ENDED] &&
|
---|
387 | !this[EMITTEDEND] &&
|
---|
388 | !this[ABORTED] &&
|
---|
389 | !this[CONSUMING]) {
|
---|
390 | this[EMITTEDEND] = true
|
---|
391 | const entry = this[WRITEENTRY]
|
---|
392 | if (entry && entry.blockRemain) {
|
---|
393 | // truncated, likely a damaged file
|
---|
394 | const have = this[BUFFER] ? this[BUFFER].length : 0
|
---|
395 | this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${
|
---|
396 | entry.blockRemain} more bytes, only ${have} available)`, {entry})
|
---|
397 | if (this[BUFFER])
|
---|
398 | entry.write(this[BUFFER])
|
---|
399 | entry.end()
|
---|
400 | }
|
---|
401 | this[EMIT](DONE)
|
---|
402 | }
|
---|
403 | }
|
---|
404 |
|
---|
405 | [CONSUMECHUNK] (chunk) {
|
---|
406 | if (this[CONSUMING])
|
---|
407 | this[BUFFERCONCAT](chunk)
|
---|
408 | else if (!chunk && !this[BUFFER])
|
---|
409 | this[MAYBEEND]()
|
---|
410 | else {
|
---|
411 | this[CONSUMING] = true
|
---|
412 | if (this[BUFFER]) {
|
---|
413 | this[BUFFERCONCAT](chunk)
|
---|
414 | const c = this[BUFFER]
|
---|
415 | this[BUFFER] = null
|
---|
416 | this[CONSUMECHUNKSUB](c)
|
---|
417 | } else
|
---|
418 | this[CONSUMECHUNKSUB](chunk)
|
---|
419 |
|
---|
420 | while (this[BUFFER] &&
|
---|
421 | this[BUFFER].length >= 512 &&
|
---|
422 | !this[ABORTED] &&
|
---|
423 | !this[SAW_EOF]) {
|
---|
424 | const c = this[BUFFER]
|
---|
425 | this[BUFFER] = null
|
---|
426 | this[CONSUMECHUNKSUB](c)
|
---|
427 | }
|
---|
428 | this[CONSUMING] = false
|
---|
429 | }
|
---|
430 |
|
---|
431 | if (!this[BUFFER] || this[ENDED])
|
---|
432 | this[MAYBEEND]()
|
---|
433 | }
|
---|
434 |
|
---|
435 | [CONSUMECHUNKSUB] (chunk) {
|
---|
436 | // we know that we are in CONSUMING mode, so anything written goes into
|
---|
437 | // the buffer. Advance the position and put any remainder in the buffer.
|
---|
438 | let position = 0
|
---|
439 | const length = chunk.length
|
---|
440 | while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) {
|
---|
441 | switch (this[STATE]) {
|
---|
442 | case 'begin':
|
---|
443 | case 'header':
|
---|
444 | this[CONSUMEHEADER](chunk, position)
|
---|
445 | position += 512
|
---|
446 | break
|
---|
447 |
|
---|
448 | case 'ignore':
|
---|
449 | case 'body':
|
---|
450 | position += this[CONSUMEBODY](chunk, position)
|
---|
451 | break
|
---|
452 |
|
---|
453 | case 'meta':
|
---|
454 | position += this[CONSUMEMETA](chunk, position)
|
---|
455 | break
|
---|
456 |
|
---|
457 | /* istanbul ignore next */
|
---|
458 | default:
|
---|
459 | throw new Error('invalid state: ' + this[STATE])
|
---|
460 | }
|
---|
461 | }
|
---|
462 |
|
---|
463 | if (position < length) {
|
---|
464 | if (this[BUFFER])
|
---|
465 | this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
|
---|
466 | else
|
---|
467 | this[BUFFER] = chunk.slice(position)
|
---|
468 | }
|
---|
469 | }
|
---|
470 |
|
---|
471 | end (chunk) {
|
---|
472 | if (!this[ABORTED]) {
|
---|
473 | if (this[UNZIP])
|
---|
474 | this[UNZIP].end(chunk)
|
---|
475 | else {
|
---|
476 | this[ENDED] = true
|
---|
477 | this.write(chunk)
|
---|
478 | }
|
---|
479 | }
|
---|
480 | }
|
---|
481 | })
|
---|