[6a3a178] | 1 | 'use strict'
|
---|
| 2 |
|
---|
| 3 | const util = require('util')
|
---|
| 4 |
|
---|
| 5 | const pMap = require('p-map')
|
---|
| 6 | const contentPath = require('./content/path')
|
---|
| 7 | const fixOwner = require('./util/fix-owner')
|
---|
| 8 | const fs = require('fs')
|
---|
| 9 | const fsm = require('fs-minipass')
|
---|
| 10 | const glob = util.promisify(require('glob'))
|
---|
| 11 | const index = require('./entry-index')
|
---|
| 12 | const path = require('path')
|
---|
| 13 | const rimraf = util.promisify(require('rimraf'))
|
---|
| 14 | const ssri = require('ssri')
|
---|
| 15 |
|
---|
| 16 | const hasOwnProperty = (obj, key) =>
|
---|
| 17 | Object.prototype.hasOwnProperty.call(obj, key)
|
---|
| 18 |
|
---|
| 19 | const stat = util.promisify(fs.stat)
|
---|
| 20 | const truncate = util.promisify(fs.truncate)
|
---|
| 21 | const writeFile = util.promisify(fs.writeFile)
|
---|
| 22 | const readFile = util.promisify(fs.readFile)
|
---|
| 23 |
|
---|
| 24 | const verifyOpts = (opts) => ({
|
---|
| 25 | concurrency: 20,
|
---|
| 26 | log: { silly () {} },
|
---|
| 27 | ...opts,
|
---|
| 28 | })
|
---|
| 29 |
|
---|
| 30 | module.exports = verify
|
---|
| 31 |
|
---|
| 32 | function verify (cache, opts) {
|
---|
| 33 | opts = verifyOpts(opts)
|
---|
| 34 | opts.log.silly('verify', 'verifying cache at', cache)
|
---|
| 35 |
|
---|
| 36 | const steps = [
|
---|
| 37 | markStartTime,
|
---|
| 38 | fixPerms,
|
---|
| 39 | garbageCollect,
|
---|
| 40 | rebuildIndex,
|
---|
| 41 | cleanTmp,
|
---|
| 42 | writeVerifile,
|
---|
| 43 | markEndTime,
|
---|
| 44 | ]
|
---|
| 45 |
|
---|
| 46 | return steps
|
---|
| 47 | .reduce((promise, step, i) => {
|
---|
| 48 | const label = step.name
|
---|
| 49 | const start = new Date()
|
---|
| 50 | return promise.then((stats) => {
|
---|
| 51 | return step(cache, opts).then((s) => {
|
---|
| 52 | s &&
|
---|
| 53 | Object.keys(s).forEach((k) => {
|
---|
| 54 | stats[k] = s[k]
|
---|
| 55 | })
|
---|
| 56 | const end = new Date()
|
---|
| 57 | if (!stats.runTime)
|
---|
| 58 | stats.runTime = {}
|
---|
| 59 |
|
---|
| 60 | stats.runTime[label] = end - start
|
---|
| 61 | return Promise.resolve(stats)
|
---|
| 62 | })
|
---|
| 63 | })
|
---|
| 64 | }, Promise.resolve({}))
|
---|
| 65 | .then((stats) => {
|
---|
| 66 | stats.runTime.total = stats.endTime - stats.startTime
|
---|
| 67 | opts.log.silly(
|
---|
| 68 | 'verify',
|
---|
| 69 | 'verification finished for',
|
---|
| 70 | cache,
|
---|
| 71 | 'in',
|
---|
| 72 | `${stats.runTime.total}ms`
|
---|
| 73 | )
|
---|
| 74 | return stats
|
---|
| 75 | })
|
---|
| 76 | }
|
---|
| 77 |
|
---|
| 78 | function markStartTime (cache, opts) {
|
---|
| 79 | return Promise.resolve({ startTime: new Date() })
|
---|
| 80 | }
|
---|
| 81 |
|
---|
| 82 | function markEndTime (cache, opts) {
|
---|
| 83 | return Promise.resolve({ endTime: new Date() })
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | function fixPerms (cache, opts) {
|
---|
| 87 | opts.log.silly('verify', 'fixing cache permissions')
|
---|
| 88 | return fixOwner
|
---|
| 89 | .mkdirfix(cache, cache)
|
---|
| 90 | .then(() => {
|
---|
| 91 | // TODO - fix file permissions too
|
---|
| 92 | return fixOwner.chownr(cache, cache)
|
---|
| 93 | })
|
---|
| 94 | .then(() => null)
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | // Implements a naive mark-and-sweep tracing garbage collector.
|
---|
| 98 | //
|
---|
| 99 | // The algorithm is basically as follows:
|
---|
| 100 | // 1. Read (and filter) all index entries ("pointers")
|
---|
| 101 | // 2. Mark each integrity value as "live"
|
---|
| 102 | // 3. Read entire filesystem tree in `content-vX/` dir
|
---|
| 103 | // 4. If content is live, verify its checksum and delete it if it fails
|
---|
| 104 | // 5. If content is not marked as live, rimraf it.
|
---|
| 105 | //
|
---|
| 106 | function garbageCollect (cache, opts) {
|
---|
| 107 | opts.log.silly('verify', 'garbage collecting content')
|
---|
| 108 | const indexStream = index.lsStream(cache)
|
---|
| 109 | const liveContent = new Set()
|
---|
| 110 | indexStream.on('data', (entry) => {
|
---|
| 111 | if (opts.filter && !opts.filter(entry))
|
---|
| 112 | return
|
---|
| 113 |
|
---|
| 114 | liveContent.add(entry.integrity.toString())
|
---|
| 115 | })
|
---|
| 116 | return new Promise((resolve, reject) => {
|
---|
| 117 | indexStream.on('end', resolve).on('error', reject)
|
---|
| 118 | }).then(() => {
|
---|
| 119 | const contentDir = contentPath.contentDir(cache)
|
---|
| 120 | return glob(path.join(contentDir, '**'), {
|
---|
| 121 | follow: false,
|
---|
| 122 | nodir: true,
|
---|
| 123 | nosort: true,
|
---|
| 124 | }).then((files) => {
|
---|
| 125 | return Promise.resolve({
|
---|
| 126 | verifiedContent: 0,
|
---|
| 127 | reclaimedCount: 0,
|
---|
| 128 | reclaimedSize: 0,
|
---|
| 129 | badContentCount: 0,
|
---|
| 130 | keptSize: 0,
|
---|
| 131 | }).then((stats) =>
|
---|
| 132 | pMap(
|
---|
| 133 | files,
|
---|
| 134 | (f) => {
|
---|
| 135 | const split = f.split(/[/\\]/)
|
---|
| 136 | const digest = split.slice(split.length - 3).join('')
|
---|
| 137 | const algo = split[split.length - 4]
|
---|
| 138 | const integrity = ssri.fromHex(digest, algo)
|
---|
| 139 | if (liveContent.has(integrity.toString())) {
|
---|
| 140 | return verifyContent(f, integrity).then((info) => {
|
---|
| 141 | if (!info.valid) {
|
---|
| 142 | stats.reclaimedCount++
|
---|
| 143 | stats.badContentCount++
|
---|
| 144 | stats.reclaimedSize += info.size
|
---|
| 145 | } else {
|
---|
| 146 | stats.verifiedContent++
|
---|
| 147 | stats.keptSize += info.size
|
---|
| 148 | }
|
---|
| 149 | return stats
|
---|
| 150 | })
|
---|
| 151 | } else {
|
---|
| 152 | // No entries refer to this content. We can delete.
|
---|
| 153 | stats.reclaimedCount++
|
---|
| 154 | return stat(f).then((s) => {
|
---|
| 155 | return rimraf(f).then(() => {
|
---|
| 156 | stats.reclaimedSize += s.size
|
---|
| 157 | return stats
|
---|
| 158 | })
|
---|
| 159 | })
|
---|
| 160 | }
|
---|
| 161 | },
|
---|
| 162 | { concurrency: opts.concurrency }
|
---|
| 163 | ).then(() => stats)
|
---|
| 164 | )
|
---|
| 165 | })
|
---|
| 166 | })
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | function verifyContent (filepath, sri) {
|
---|
| 170 | return stat(filepath)
|
---|
| 171 | .then((s) => {
|
---|
| 172 | const contentInfo = {
|
---|
| 173 | size: s.size,
|
---|
| 174 | valid: true,
|
---|
| 175 | }
|
---|
| 176 | return ssri
|
---|
| 177 | .checkStream(new fsm.ReadStream(filepath), sri)
|
---|
| 178 | .catch((err) => {
|
---|
| 179 | if (err.code !== 'EINTEGRITY')
|
---|
| 180 | throw err
|
---|
| 181 |
|
---|
| 182 | return rimraf(filepath).then(() => {
|
---|
| 183 | contentInfo.valid = false
|
---|
| 184 | })
|
---|
| 185 | })
|
---|
| 186 | .then(() => contentInfo)
|
---|
| 187 | })
|
---|
| 188 | .catch((err) => {
|
---|
| 189 | if (err.code === 'ENOENT')
|
---|
| 190 | return { size: 0, valid: false }
|
---|
| 191 |
|
---|
| 192 | throw err
|
---|
| 193 | })
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | function rebuildIndex (cache, opts) {
|
---|
| 197 | opts.log.silly('verify', 'rebuilding index')
|
---|
| 198 | return index.ls(cache).then((entries) => {
|
---|
| 199 | const stats = {
|
---|
| 200 | missingContent: 0,
|
---|
| 201 | rejectedEntries: 0,
|
---|
| 202 | totalEntries: 0,
|
---|
| 203 | }
|
---|
| 204 | const buckets = {}
|
---|
| 205 | for (const k in entries) {
|
---|
| 206 | /* istanbul ignore else */
|
---|
| 207 | if (hasOwnProperty(entries, k)) {
|
---|
| 208 | const hashed = index.hashKey(k)
|
---|
| 209 | const entry = entries[k]
|
---|
| 210 | const excluded = opts.filter && !opts.filter(entry)
|
---|
| 211 | excluded && stats.rejectedEntries++
|
---|
| 212 | if (buckets[hashed] && !excluded)
|
---|
| 213 | buckets[hashed].push(entry)
|
---|
| 214 | else if (buckets[hashed] && excluded) {
|
---|
| 215 | // skip
|
---|
| 216 | } else if (excluded) {
|
---|
| 217 | buckets[hashed] = []
|
---|
| 218 | buckets[hashed]._path = index.bucketPath(cache, k)
|
---|
| 219 | } else {
|
---|
| 220 | buckets[hashed] = [entry]
|
---|
| 221 | buckets[hashed]._path = index.bucketPath(cache, k)
|
---|
| 222 | }
|
---|
| 223 | }
|
---|
| 224 | }
|
---|
| 225 | return pMap(
|
---|
| 226 | Object.keys(buckets),
|
---|
| 227 | (key) => {
|
---|
| 228 | return rebuildBucket(cache, buckets[key], stats, opts)
|
---|
| 229 | },
|
---|
| 230 | { concurrency: opts.concurrency }
|
---|
| 231 | ).then(() => stats)
|
---|
| 232 | })
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | function rebuildBucket (cache, bucket, stats, opts) {
|
---|
| 236 | return truncate(bucket._path).then(() => {
|
---|
| 237 | // This needs to be serialized because cacache explicitly
|
---|
| 238 | // lets very racy bucket conflicts clobber each other.
|
---|
| 239 | return bucket.reduce((promise, entry) => {
|
---|
| 240 | return promise.then(() => {
|
---|
| 241 | const content = contentPath(cache, entry.integrity)
|
---|
| 242 | return stat(content)
|
---|
| 243 | .then(() => {
|
---|
| 244 | return index
|
---|
| 245 | .insert(cache, entry.key, entry.integrity, {
|
---|
| 246 | metadata: entry.metadata,
|
---|
| 247 | size: entry.size,
|
---|
| 248 | })
|
---|
| 249 | .then(() => {
|
---|
| 250 | stats.totalEntries++
|
---|
| 251 | })
|
---|
| 252 | })
|
---|
| 253 | .catch((err) => {
|
---|
| 254 | if (err.code === 'ENOENT') {
|
---|
| 255 | stats.rejectedEntries++
|
---|
| 256 | stats.missingContent++
|
---|
| 257 | return
|
---|
| 258 | }
|
---|
| 259 | throw err
|
---|
| 260 | })
|
---|
| 261 | })
|
---|
| 262 | }, Promise.resolve())
|
---|
| 263 | })
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | function cleanTmp (cache, opts) {
|
---|
| 267 | opts.log.silly('verify', 'cleaning tmp directory')
|
---|
| 268 | return rimraf(path.join(cache, 'tmp'))
|
---|
| 269 | }
|
---|
| 270 |
|
---|
| 271 | function writeVerifile (cache, opts) {
|
---|
| 272 | const verifile = path.join(cache, '_lastverified')
|
---|
| 273 | opts.log.silly('verify', 'writing verifile to ' + verifile)
|
---|
| 274 | try {
|
---|
| 275 | return writeFile(verifile, '' + +new Date())
|
---|
| 276 | } finally {
|
---|
| 277 | fixOwner.chownr.sync(cache, verifile)
|
---|
| 278 | }
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | module.exports.lastRun = lastRun
|
---|
| 282 |
|
---|
| 283 | function lastRun (cache) {
|
---|
| 284 | return readFile(path.join(cache, '_lastverified'), 'utf8').then(
|
---|
| 285 | (data) => new Date(+data)
|
---|
| 286 | )
|
---|
| 287 | }
|
---|