source: node_modules/@fastify/busboy/deps/streamsearch/sbmh.js

main
Last change on this file was d24f17c, checked in by Aleksandar Panovski <apano77@…>, 15 months ago

Initial commit

  • Property mode set to 100644
File size: 7.0 KB
Line 
1'use strict'
2
3/**
4 * Copyright Brian White. All rights reserved.
5 *
6 * @see https://github.com/mscdex/streamsearch
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 *
26 * Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
27 * by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
28 */
29const EventEmitter = require('node:events').EventEmitter
30const inherits = require('node:util').inherits
31
32function SBMH (needle) {
33 if (typeof needle === 'string') {
34 needle = Buffer.from(needle)
35 }
36
37 if (!Buffer.isBuffer(needle)) {
38 throw new TypeError('The needle has to be a String or a Buffer.')
39 }
40
41 const needleLength = needle.length
42
43 if (needleLength === 0) {
44 throw new Error('The needle cannot be an empty String/Buffer.')
45 }
46
47 if (needleLength > 256) {
48 throw new Error('The needle cannot have a length bigger than 256.')
49 }
50
51 this.maxMatches = Infinity
52 this.matches = 0
53
54 this._occ = new Array(256)
55 .fill(needleLength) // Initialize occurrence table.
56 this._lookbehind_size = 0
57 this._needle = needle
58 this._bufpos = 0
59
60 this._lookbehind = Buffer.alloc(needleLength)
61
62 // Populate occurrence table with analysis of the needle,
63 // ignoring last letter.
64 for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var
65 this._occ[needle[i]] = needleLength - 1 - i
66 }
67}
68inherits(SBMH, EventEmitter)
69
70SBMH.prototype.reset = function () {
71 this._lookbehind_size = 0
72 this.matches = 0
73 this._bufpos = 0
74}
75
76SBMH.prototype.push = function (chunk, pos) {
77 if (!Buffer.isBuffer(chunk)) {
78 chunk = Buffer.from(chunk, 'binary')
79 }
80 const chlen = chunk.length
81 this._bufpos = pos || 0
82 let r
83 while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) }
84 return r
85}
86
87SBMH.prototype._sbmh_feed = function (data) {
88 const len = data.length
89 const needle = this._needle
90 const needleLength = needle.length
91 const lastNeedleChar = needle[needleLength - 1]
92
93 // Positive: points to a position in `data`
94 // pos == 3 points to data[3]
95 // Negative: points to a position in the lookbehind buffer
96 // pos == -2 points to lookbehind[lookbehind_size - 2]
97 let pos = -this._lookbehind_size
98 let ch
99
100 if (pos < 0) {
101 // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
102 // search with character lookup code that considers both the
103 // lookbehind buffer and the current round's haystack data.
104 //
105 // Loop until
106 // there is a match.
107 // or until
108 // we've moved past the position that requires the
109 // lookbehind buffer. In this case we switch to the
110 // optimized loop.
111 // or until
112 // the character to look at lies outside the haystack.
113 while (pos < 0 && pos <= len - needleLength) {
114 ch = this._sbmh_lookup_char(data, pos + needleLength - 1)
115
116 if (
117 ch === lastNeedleChar &&
118 this._sbmh_memcmp(data, pos, needleLength - 1)
119 ) {
120 this._lookbehind_size = 0
121 ++this.matches
122 this.emit('info', true)
123
124 return (this._bufpos = pos + needleLength)
125 }
126 pos += this._occ[ch]
127 }
128
129 // No match.
130
131 if (pos < 0) {
132 // There's too few data for Boyer-Moore-Horspool to run,
133 // so let's use a different algorithm to skip as much as
134 // we can.
135 // Forward pos until
136 // the trailing part of lookbehind + data
137 // looks like the beginning of the needle
138 // or until
139 // pos == 0
140 while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos }
141 }
142
143 if (pos >= 0) {
144 // Discard lookbehind buffer.
145 this.emit('info', false, this._lookbehind, 0, this._lookbehind_size)
146 this._lookbehind_size = 0
147 } else {
148 // Cut off part of the lookbehind buffer that has
149 // been processed and append the entire haystack
150 // into it.
151 const bytesToCutOff = this._lookbehind_size + pos
152 if (bytesToCutOff > 0) {
153 // The cut off data is guaranteed not to contain the needle.
154 this.emit('info', false, this._lookbehind, 0, bytesToCutOff)
155 }
156
157 this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff,
158 this._lookbehind_size - bytesToCutOff)
159 this._lookbehind_size -= bytesToCutOff
160
161 data.copy(this._lookbehind, this._lookbehind_size)
162 this._lookbehind_size += len
163
164 this._bufpos = len
165 return len
166 }
167 }
168
169 pos += (pos >= 0) * this._bufpos
170
171 // Lookbehind buffer is now empty. We only need to check if the
172 // needle is in the haystack.
173 if (data.indexOf(needle, pos) !== -1) {
174 pos = data.indexOf(needle, pos)
175 ++this.matches
176 if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) }
177
178 return (this._bufpos = pos + needleLength)
179 } else {
180 pos = len - needleLength
181 }
182
183 // There was no match. If there's trailing haystack data that we cannot
184 // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
185 // data is less than the needle size) then match using a modified
186 // algorithm that starts matching from the beginning instead of the end.
187 // Whatever trailing data is left after running this algorithm is added to
188 // the lookbehind buffer.
189 while (
190 pos < len &&
191 (
192 data[pos] !== needle[0] ||
193 (
194 (Buffer.compare(
195 data.subarray(pos, pos + len - pos),
196 needle.subarray(0, len - pos)
197 ) !== 0)
198 )
199 )
200 ) {
201 ++pos
202 }
203 if (pos < len) {
204 data.copy(this._lookbehind, 0, pos, pos + (len - pos))
205 this._lookbehind_size = len - pos
206 }
207
208 // Everything until pos is guaranteed not to contain needle data.
209 if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) }
210
211 this._bufpos = len
212 return len
213}
214
215SBMH.prototype._sbmh_lookup_char = function (data, pos) {
216 return (pos < 0)
217 ? this._lookbehind[this._lookbehind_size + pos]
218 : data[pos]
219}
220
221SBMH.prototype._sbmh_memcmp = function (data, pos, len) {
222 for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
223 if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false }
224 }
225 return true
226}
227
228module.exports = SBMH
Note: See TracBrowser for help on using the repository browser.