1 |
|
---|
2 | var fs = require('fs');
|
---|
3 |
|
---|
4 | var utf8 = require('./encoding/utf8'),
|
---|
5 | unicode = require('./encoding/unicode'),
|
---|
6 | mbcs = require('./encoding/mbcs'),
|
---|
7 | sbcs = require('./encoding/sbcs'),
|
---|
8 | iso2022 = require('./encoding/iso2022');
|
---|
9 |
|
---|
10 | var self = this;
|
---|
11 |
|
---|
12 | var recognisers = [
|
---|
13 | new utf8,
|
---|
14 | new unicode.UTF_16BE,
|
---|
15 | new unicode.UTF_16LE,
|
---|
16 | new unicode.UTF_32BE,
|
---|
17 | new unicode.UTF_32LE,
|
---|
18 | new mbcs.sjis,
|
---|
19 | new mbcs.big5,
|
---|
20 | new mbcs.euc_jp,
|
---|
21 | new mbcs.euc_kr,
|
---|
22 | new mbcs.gb_18030,
|
---|
23 | new iso2022.ISO_2022_JP,
|
---|
24 | new iso2022.ISO_2022_KR,
|
---|
25 | new iso2022.ISO_2022_CN,
|
---|
26 | new sbcs.ISO_8859_1,
|
---|
27 | new sbcs.ISO_8859_2,
|
---|
28 | new sbcs.ISO_8859_5,
|
---|
29 | new sbcs.ISO_8859_6,
|
---|
30 | new sbcs.ISO_8859_7,
|
---|
31 | new sbcs.ISO_8859_8,
|
---|
32 | new sbcs.ISO_8859_9,
|
---|
33 | new sbcs.windows_1251,
|
---|
34 | new sbcs.windows_1256,
|
---|
35 | new sbcs.KOI8_R
|
---|
36 | ];
|
---|
37 |
|
---|
38 | module.exports.detect = function(buffer, opts) {
|
---|
39 |
|
---|
40 | // Tally up the byte occurence statistics.
|
---|
41 | var fByteStats = [];
|
---|
42 | for (var i = 0; i < 256; i++)
|
---|
43 | fByteStats[i] = 0;
|
---|
44 |
|
---|
45 | for (var i = buffer.length - 1; i >= 0; i--)
|
---|
46 | fByteStats[buffer[i] & 0x00ff]++;
|
---|
47 |
|
---|
48 | var fC1Bytes = false;
|
---|
49 | for (var i = 0x80; i <= 0x9F; i += 1) {
|
---|
50 | if (fByteStats[i] != 0) {
|
---|
51 | fC1Bytes = true;
|
---|
52 | break;
|
---|
53 | }
|
---|
54 | }
|
---|
55 |
|
---|
56 | var context = {
|
---|
57 | fByteStats: fByteStats,
|
---|
58 | fC1Bytes: fC1Bytes,
|
---|
59 | fRawInput: buffer,
|
---|
60 | fRawLength: buffer.length,
|
---|
61 | fInputBytes: buffer,
|
---|
62 | fInputLen: buffer.length
|
---|
63 | };
|
---|
64 |
|
---|
65 | var matches = recognisers.map(function(rec) {
|
---|
66 | return rec.match(context);
|
---|
67 | }).filter(function(match) {
|
---|
68 | return !!match;
|
---|
69 | }).sort(function(a, b) {
|
---|
70 | return b.confidence - a.confidence;
|
---|
71 | });
|
---|
72 |
|
---|
73 | if (opts && opts.returnAllMatches === true) {
|
---|
74 | return matches;
|
---|
75 | }
|
---|
76 | else {
|
---|
77 | return matches.length > 0 ? matches[0].name : null;
|
---|
78 | }
|
---|
79 | };
|
---|
80 |
|
---|
81 | module.exports.detectFile = function(filepath, opts, cb) {
|
---|
82 | if (typeof opts === 'function') {
|
---|
83 | cb = opts;
|
---|
84 | opts = undefined;
|
---|
85 | }
|
---|
86 |
|
---|
87 | var fd;
|
---|
88 |
|
---|
89 | var handler = function(err, buffer) {
|
---|
90 | if (fd) {
|
---|
91 | fs.closeSync(fd);
|
---|
92 | }
|
---|
93 |
|
---|
94 | if (err) return cb(err, null);
|
---|
95 | cb(null, self.detect(buffer, opts));
|
---|
96 | };
|
---|
97 |
|
---|
98 | if (opts && opts.sampleSize) {
|
---|
99 | fd = fs.openSync(filepath, 'r'),
|
---|
100 | sample = Buffer.allocUnsafe(opts.sampleSize);
|
---|
101 |
|
---|
102 | fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
|
---|
103 | handler(err, sample);
|
---|
104 | });
|
---|
105 | return;
|
---|
106 | }
|
---|
107 |
|
---|
108 | fs.readFile(filepath, handler);
|
---|
109 | };
|
---|
110 |
|
---|
111 | module.exports.detectFileSync = function(filepath, opts) {
|
---|
112 | if (opts && opts.sampleSize) {
|
---|
113 | var fd = fs.openSync(filepath, 'r'),
|
---|
114 | sample = Buffer.allocUnsafe(opts.sampleSize);
|
---|
115 |
|
---|
116 | fs.readSync(fd, sample, 0, opts.sampleSize);
|
---|
117 | fs.closeSync(fd);
|
---|
118 | return self.detect(sample, opts);
|
---|
119 | }
|
---|
120 |
|
---|
121 | return self.detect(fs.readFileSync(filepath), opts);
|
---|
122 | };
|
---|
123 |
|
---|
124 | // Wrappers for the previous functions to return all encodings
|
---|
125 | module.exports.detectAll = function(buffer, opts) {
|
---|
126 | if (typeof opts !== 'object') {
|
---|
127 | opts = {};
|
---|
128 | }
|
---|
129 | opts.returnAllMatches = true;
|
---|
130 | return self.detect(buffer, opts);
|
---|
131 | }
|
---|
132 |
|
---|
133 | module.exports.detectFileAll = function(filepath, opts, cb) {
|
---|
134 | if (typeof opts === 'function') {
|
---|
135 | cb = opts;
|
---|
136 | opts = undefined;
|
---|
137 | }
|
---|
138 | if (typeof opts !== 'object') {
|
---|
139 | opts = {};
|
---|
140 | }
|
---|
141 | opts.returnAllMatches = true;
|
---|
142 | self.detectFile(filepath, opts, cb);
|
---|
143 | }
|
---|
144 |
|
---|
145 | module.exports.detectFileAllSync = function(filepath, opts) {
|
---|
146 | if (typeof opts !== 'object') {
|
---|
147 | opts = {};
|
---|
148 | }
|
---|
149 | opts.returnAllMatches = true;
|
---|
150 | return self.detectFileSync(filepath, opts);
|
---|
151 | }
|
---|