source: node_modules/undici/lib/fetch/dataURL.js

main
Last change on this file was d24f17c, checked in by Aleksandar Panovski <apano77@…>, 15 months ago

Initial commit

  • Property mode set to 100644
File size: 17.8 KB
Line 
1const assert = require('assert')
2const { atob } = require('buffer')
3const { isomorphicDecode } = require('./util')
4
5const encoder = new TextEncoder()
6
7/**
8 * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
9 */
10const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
11const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
12/**
13 * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
14 */
15const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line
16
17// https://fetch.spec.whatwg.org/#data-url-processor
18/** @param {URL} dataURL */
19function dataURLProcessor (dataURL) {
20 // 1. Assert: dataURL’s scheme is "data".
21 assert(dataURL.protocol === 'data:')
22
23 // 2. Let input be the result of running the URL
24 // serializer on dataURL with exclude fragment
25 // set to true.
26 let input = URLSerializer(dataURL, true)
27
28 // 3. Remove the leading "data:" string from input.
29 input = input.slice(5)
30
31 // 4. Let position point at the start of input.
32 const position = { position: 0 }
33
34 // 5. Let mimeType be the result of collecting a
35 // sequence of code points that are not equal
36 // to U+002C (,), given position.
37 let mimeType = collectASequenceOfCodePointsFast(
38 ',',
39 input,
40 position
41 )
42
43 // 6. Strip leading and trailing ASCII whitespace
44 // from mimeType.
45 // Undici implementation note: we need to store the
46 // length because if the mimetype has spaces removed,
47 // the wrong amount will be sliced from the input in
48 // step #9
49 const mimeTypeLength = mimeType.length
50 mimeType = removeASCIIWhitespace(mimeType, true, true)
51
52 // 7. If position is past the end of input, then
53 // return failure
54 if (position.position >= input.length) {
55 return 'failure'
56 }
57
58 // 8. Advance position by 1.
59 position.position++
60
61 // 9. Let encodedBody be the remainder of input.
62 const encodedBody = input.slice(mimeTypeLength + 1)
63
64 // 10. Let body be the percent-decoding of encodedBody.
65 let body = stringPercentDecode(encodedBody)
66
67 // 11. If mimeType ends with U+003B (;), followed by
68 // zero or more U+0020 SPACE, followed by an ASCII
69 // case-insensitive match for "base64", then:
70 if (/;(\u0020){0,}base64$/i.test(mimeType)) {
71 // 1. Let stringBody be the isomorphic decode of body.
72 const stringBody = isomorphicDecode(body)
73
74 // 2. Set body to the forgiving-base64 decode of
75 // stringBody.
76 body = forgivingBase64(stringBody)
77
78 // 3. If body is failure, then return failure.
79 if (body === 'failure') {
80 return 'failure'
81 }
82
83 // 4. Remove the last 6 code points from mimeType.
84 mimeType = mimeType.slice(0, -6)
85
86 // 5. Remove trailing U+0020 SPACE code points from mimeType,
87 // if any.
88 mimeType = mimeType.replace(/(\u0020)+$/, '')
89
90 // 6. Remove the last U+003B (;) code point from mimeType.
91 mimeType = mimeType.slice(0, -1)
92 }
93
94 // 12. If mimeType starts with U+003B (;), then prepend
95 // "text/plain" to mimeType.
96 if (mimeType.startsWith(';')) {
97 mimeType = 'text/plain' + mimeType
98 }
99
100 // 13. Let mimeTypeRecord be the result of parsing
101 // mimeType.
102 let mimeTypeRecord = parseMIMEType(mimeType)
103
104 // 14. If mimeTypeRecord is failure, then set
105 // mimeTypeRecord to text/plain;charset=US-ASCII.
106 if (mimeTypeRecord === 'failure') {
107 mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
108 }
109
110 // 15. Return a new data: URL struct whose MIME
111 // type is mimeTypeRecord and body is body.
112 // https://fetch.spec.whatwg.org/#data-url-struct
113 return { mimeType: mimeTypeRecord, body }
114}
115
116// https://url.spec.whatwg.org/#concept-url-serializer
117/**
118 * @param {URL} url
119 * @param {boolean} excludeFragment
120 */
121function URLSerializer (url, excludeFragment = false) {
122 if (!excludeFragment) {
123 return url.href
124 }
125
126 const href = url.href
127 const hashLength = url.hash.length
128
129 return hashLength === 0 ? href : href.substring(0, href.length - hashLength)
130}
131
132// https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
133/**
134 * @param {(char: string) => boolean} condition
135 * @param {string} input
136 * @param {{ position: number }} position
137 */
138function collectASequenceOfCodePoints (condition, input, position) {
139 // 1. Let result be the empty string.
140 let result = ''
141
142 // 2. While position doesn’t point past the end of input and the
143 // code point at position within input meets the condition condition:
144 while (position.position < input.length && condition(input[position.position])) {
145 // 1. Append that code point to the end of result.
146 result += input[position.position]
147
148 // 2. Advance position by 1.
149 position.position++
150 }
151
152 // 3. Return result.
153 return result
154}
155
156/**
157 * A faster collectASequenceOfCodePoints that only works when comparing a single character.
158 * @param {string} char
159 * @param {string} input
160 * @param {{ position: number }} position
161 */
162function collectASequenceOfCodePointsFast (char, input, position) {
163 const idx = input.indexOf(char, position.position)
164 const start = position.position
165
166 if (idx === -1) {
167 position.position = input.length
168 return input.slice(start)
169 }
170
171 position.position = idx
172 return input.slice(start, position.position)
173}
174
175// https://url.spec.whatwg.org/#string-percent-decode
176/** @param {string} input */
177function stringPercentDecode (input) {
178 // 1. Let bytes be the UTF-8 encoding of input.
179 const bytes = encoder.encode(input)
180
181 // 2. Return the percent-decoding of bytes.
182 return percentDecode(bytes)
183}
184
185// https://url.spec.whatwg.org/#percent-decode
186/** @param {Uint8Array} input */
187function percentDecode (input) {
188 // 1. Let output be an empty byte sequence.
189 /** @type {number[]} */
190 const output = []
191
192 // 2. For each byte byte in input:
193 for (let i = 0; i < input.length; i++) {
194 const byte = input[i]
195
196 // 1. If byte is not 0x25 (%), then append byte to output.
197 if (byte !== 0x25) {
198 output.push(byte)
199
200 // 2. Otherwise, if byte is 0x25 (%) and the next two bytes
201 // after byte in input are not in the ranges
202 // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
203 // and 0x61 (a) to 0x66 (f), all inclusive, append byte
204 // to output.
205 } else if (
206 byte === 0x25 &&
207 !/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
208 ) {
209 output.push(0x25)
210
211 // 3. Otherwise:
212 } else {
213 // 1. Let bytePoint be the two bytes after byte in input,
214 // decoded, and then interpreted as hexadecimal number.
215 const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2])
216 const bytePoint = Number.parseInt(nextTwoBytes, 16)
217
218 // 2. Append a byte whose value is bytePoint to output.
219 output.push(bytePoint)
220
221 // 3. Skip the next two bytes in input.
222 i += 2
223 }
224 }
225
226 // 3. Return output.
227 return Uint8Array.from(output)
228}
229
230// https://mimesniff.spec.whatwg.org/#parse-a-mime-type
231/** @param {string} input */
232function parseMIMEType (input) {
233 // 1. Remove any leading and trailing HTTP whitespace
234 // from input.
235 input = removeHTTPWhitespace(input, true, true)
236
237 // 2. Let position be a position variable for input,
238 // initially pointing at the start of input.
239 const position = { position: 0 }
240
241 // 3. Let type be the result of collecting a sequence
242 // of code points that are not U+002F (/) from
243 // input, given position.
244 const type = collectASequenceOfCodePointsFast(
245 '/',
246 input,
247 position
248 )
249
250 // 4. If type is the empty string or does not solely
251 // contain HTTP token code points, then return failure.
252 // https://mimesniff.spec.whatwg.org/#http-token-code-point
253 if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
254 return 'failure'
255 }
256
257 // 5. If position is past the end of input, then return
258 // failure
259 if (position.position > input.length) {
260 return 'failure'
261 }
262
263 // 6. Advance position by 1. (This skips past U+002F (/).)
264 position.position++
265
266 // 7. Let subtype be the result of collecting a sequence of
267 // code points that are not U+003B (;) from input, given
268 // position.
269 let subtype = collectASequenceOfCodePointsFast(
270 ';',
271 input,
272 position
273 )
274
275 // 8. Remove any trailing HTTP whitespace from subtype.
276 subtype = removeHTTPWhitespace(subtype, false, true)
277
278 // 9. If subtype is the empty string or does not solely
279 // contain HTTP token code points, then return failure.
280 if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
281 return 'failure'
282 }
283
284 const typeLowercase = type.toLowerCase()
285 const subtypeLowercase = subtype.toLowerCase()
286
287 // 10. Let mimeType be a new MIME type record whose type
288 // is type, in ASCII lowercase, and subtype is subtype,
289 // in ASCII lowercase.
290 // https://mimesniff.spec.whatwg.org/#mime-type
291 const mimeType = {
292 type: typeLowercase,
293 subtype: subtypeLowercase,
294 /** @type {Map<string, string>} */
295 parameters: new Map(),
296 // https://mimesniff.spec.whatwg.org/#mime-type-essence
297 essence: `${typeLowercase}/${subtypeLowercase}`
298 }
299
300 // 11. While position is not past the end of input:
301 while (position.position < input.length) {
302 // 1. Advance position by 1. (This skips past U+003B (;).)
303 position.position++
304
305 // 2. Collect a sequence of code points that are HTTP
306 // whitespace from input given position.
307 collectASequenceOfCodePoints(
308 // https://fetch.spec.whatwg.org/#http-whitespace
309 char => HTTP_WHITESPACE_REGEX.test(char),
310 input,
311 position
312 )
313
314 // 3. Let parameterName be the result of collecting a
315 // sequence of code points that are not U+003B (;)
316 // or U+003D (=) from input, given position.
317 let parameterName = collectASequenceOfCodePoints(
318 (char) => char !== ';' && char !== '=',
319 input,
320 position
321 )
322
323 // 4. Set parameterName to parameterName, in ASCII
324 // lowercase.
325 parameterName = parameterName.toLowerCase()
326
327 // 5. If position is not past the end of input, then:
328 if (position.position < input.length) {
329 // 1. If the code point at position within input is
330 // U+003B (;), then continue.
331 if (input[position.position] === ';') {
332 continue
333 }
334
335 // 2. Advance position by 1. (This skips past U+003D (=).)
336 position.position++
337 }
338
339 // 6. If position is past the end of input, then break.
340 if (position.position > input.length) {
341 break
342 }
343
344 // 7. Let parameterValue be null.
345 let parameterValue = null
346
347 // 8. If the code point at position within input is
348 // U+0022 ("), then:
349 if (input[position.position] === '"') {
350 // 1. Set parameterValue to the result of collecting
351 // an HTTP quoted string from input, given position
352 // and the extract-value flag.
353 parameterValue = collectAnHTTPQuotedString(input, position, true)
354
355 // 2. Collect a sequence of code points that are not
356 // U+003B (;) from input, given position.
357 collectASequenceOfCodePointsFast(
358 ';',
359 input,
360 position
361 )
362
363 // 9. Otherwise:
364 } else {
365 // 1. Set parameterValue to the result of collecting
366 // a sequence of code points that are not U+003B (;)
367 // from input, given position.
368 parameterValue = collectASequenceOfCodePointsFast(
369 ';',
370 input,
371 position
372 )
373
374 // 2. Remove any trailing HTTP whitespace from parameterValue.
375 parameterValue = removeHTTPWhitespace(parameterValue, false, true)
376
377 // 3. If parameterValue is the empty string, then continue.
378 if (parameterValue.length === 0) {
379 continue
380 }
381 }
382
383 // 10. If all of the following are true
384 // - parameterName is not the empty string
385 // - parameterName solely contains HTTP token code points
386 // - parameterValue solely contains HTTP quoted-string token code points
387 // - mimeType’s parameters[parameterName] does not exist
388 // then set mimeType’s parameters[parameterName] to parameterValue.
389 if (
390 parameterName.length !== 0 &&
391 HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
392 (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
393 !mimeType.parameters.has(parameterName)
394 ) {
395 mimeType.parameters.set(parameterName, parameterValue)
396 }
397 }
398
399 // 12. Return mimeType.
400 return mimeType
401}
402
403// https://infra.spec.whatwg.org/#forgiving-base64-decode
404/** @param {string} data */
405function forgivingBase64 (data) {
406 // 1. Remove all ASCII whitespace from data.
407 data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '') // eslint-disable-line
408
409 // 2. If data’s code point length divides by 4 leaving
410 // no remainder, then:
411 if (data.length % 4 === 0) {
412 // 1. If data ends with one or two U+003D (=) code points,
413 // then remove them from data.
414 data = data.replace(/=?=$/, '')
415 }
416
417 // 3. If data’s code point length divides by 4 leaving
418 // a remainder of 1, then return failure.
419 if (data.length % 4 === 1) {
420 return 'failure'
421 }
422
423 // 4. If data contains a code point that is not one of
424 // U+002B (+)
425 // U+002F (/)
426 // ASCII alphanumeric
427 // then return failure.
428 if (/[^+/0-9A-Za-z]/.test(data)) {
429 return 'failure'
430 }
431
432 const binary = atob(data)
433 const bytes = new Uint8Array(binary.length)
434
435 for (let byte = 0; byte < binary.length; byte++) {
436 bytes[byte] = binary.charCodeAt(byte)
437 }
438
439 return bytes
440}
441
442// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
443// tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
444/**
445 * @param {string} input
446 * @param {{ position: number }} position
447 * @param {boolean?} extractValue
448 */
449function collectAnHTTPQuotedString (input, position, extractValue) {
450 // 1. Let positionStart be position.
451 const positionStart = position.position
452
453 // 2. Let value be the empty string.
454 let value = ''
455
456 // 3. Assert: the code point at position within input
457 // is U+0022 (").
458 assert(input[position.position] === '"')
459
460 // 4. Advance position by 1.
461 position.position++
462
463 // 5. While true:
464 while (true) {
465 // 1. Append the result of collecting a sequence of code points
466 // that are not U+0022 (") or U+005C (\) from input, given
467 // position, to value.
468 value += collectASequenceOfCodePoints(
469 (char) => char !== '"' && char !== '\\',
470 input,
471 position
472 )
473
474 // 2. If position is past the end of input, then break.
475 if (position.position >= input.length) {
476 break
477 }
478
479 // 3. Let quoteOrBackslash be the code point at position within
480 // input.
481 const quoteOrBackslash = input[position.position]
482
483 // 4. Advance position by 1.
484 position.position++
485
486 // 5. If quoteOrBackslash is U+005C (\), then:
487 if (quoteOrBackslash === '\\') {
488 // 1. If position is past the end of input, then append
489 // U+005C (\) to value and break.
490 if (position.position >= input.length) {
491 value += '\\'
492 break
493 }
494
495 // 2. Append the code point at position within input to value.
496 value += input[position.position]
497
498 // 3. Advance position by 1.
499 position.position++
500
501 // 6. Otherwise:
502 } else {
503 // 1. Assert: quoteOrBackslash is U+0022 (").
504 assert(quoteOrBackslash === '"')
505
506 // 2. Break.
507 break
508 }
509 }
510
511 // 6. If the extract-value flag is set, then return value.
512 if (extractValue) {
513 return value
514 }
515
516 // 7. Return the code points from positionStart to position,
517 // inclusive, within input.
518 return input.slice(positionStart, position.position)
519}
520
521/**
522 * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
523 */
524function serializeAMimeType (mimeType) {
525 assert(mimeType !== 'failure')
526 const { parameters, essence } = mimeType
527
528 // 1. Let serialization be the concatenation of mimeType’s
529 // type, U+002F (/), and mimeType’s subtype.
530 let serialization = essence
531
532 // 2. For each name → value of mimeType’s parameters:
533 for (let [name, value] of parameters.entries()) {
534 // 1. Append U+003B (;) to serialization.
535 serialization += ';'
536
537 // 2. Append name to serialization.
538 serialization += name
539
540 // 3. Append U+003D (=) to serialization.
541 serialization += '='
542
543 // 4. If value does not solely contain HTTP token code
544 // points or value is the empty string, then:
545 if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
546 // 1. Precede each occurence of U+0022 (") or
547 // U+005C (\) in value with U+005C (\).
548 value = value.replace(/(\\|")/g, '\\$1')
549
550 // 2. Prepend U+0022 (") to value.
551 value = '"' + value
552
553 // 3. Append U+0022 (") to value.
554 value += '"'
555 }
556
557 // 5. Append value to serialization.
558 serialization += value
559 }
560
561 // 3. Return serialization.
562 return serialization
563}
564
565/**
566 * @see https://fetch.spec.whatwg.org/#http-whitespace
567 * @param {string} char
568 */
569function isHTTPWhiteSpace (char) {
570 return char === '\r' || char === '\n' || char === '\t' || char === ' '
571}
572
573/**
574 * @see https://fetch.spec.whatwg.org/#http-whitespace
575 * @param {string} str
576 */
577function removeHTTPWhitespace (str, leading = true, trailing = true) {
578 let lead = 0
579 let trail = str.length - 1
580
581 if (leading) {
582 for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
583 }
584
585 if (trailing) {
586 for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
587 }
588
589 return str.slice(lead, trail + 1)
590}
591
592/**
593 * @see https://infra.spec.whatwg.org/#ascii-whitespace
594 * @param {string} char
595 */
596function isASCIIWhitespace (char) {
597 return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
598}
599
600/**
601 * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
602 */
603function removeASCIIWhitespace (str, leading = true, trailing = true) {
604 let lead = 0
605 let trail = str.length - 1
606
607 if (leading) {
608 for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
609 }
610
611 if (trailing) {
612 for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
613 }
614
615 return str.slice(lead, trail + 1)
616}
617
618module.exports = {
619 dataURLProcessor,
620 URLSerializer,
621 collectASequenceOfCodePoints,
622 collectASequenceOfCodePointsFast,
623 stringPercentDecode,
624 parseMIMEType,
625 collectAnHTTPQuotedString,
626 serializeAMimeType
627}
Note: See TracBrowser for help on using the repository browser.