source: trip-planner-front/node_modules/chardet/encoding/iso2022.js

Last change on this file was 6a3a178, checked in by Ema <ema_spirova@…>, 3 years ago

initial commit

  • Property mode set to 100644
File size: 4.0 KB
Line 
1var util = require('util'),
2 Match = require ('../match');
3
4
5/**
6 * This is a superclass for the individual detectors for
7 * each of the detectable members of the ISO 2022 family
8 * of encodings.
9 */
10
11function ISO_2022() {}
12
13ISO_2022.prototype.match = function(det) {
14
15 /**
16 * Matching function shared among the 2022 detectors JP, CN and KR
17 * Counts up the number of legal an unrecognized escape sequences in
18 * the sample of text, and computes a score based on the total number &
19 * the proportion that fit the encoding.
20 *
21 *
22 * @param text the byte buffer containing text to analyse
23 * @param textLen the size of the text in the byte.
24 * @param escapeSequences the byte escape sequences to test for.
25 * @return match quality, in the range of 0-100.
26 */
27
28 var i, j;
29 var escN;
30 var hits = 0;
31 var misses = 0;
32 var shifts = 0;
33 var quality;
34
35 // TODO: refactor me
36 var text = det.fInputBytes;
37 var textLen = det.fInputLen;
38
39 scanInput:
40 for (i = 0; i < textLen; i++) {
41 if (text[i] == 0x1b) {
42 checkEscapes:
43 for (escN = 0; escN < this.escapeSequences.length; escN++) {
44 var seq = this.escapeSequences[escN];
45
46 if ((textLen - i) < seq.length)
47 continue checkEscapes;
48
49 for (j = 1; j < seq.length; j++)
50 if (seq[j] != text[i + j])
51 continue checkEscapes;
52
53
54 hits++;
55 i += seq.length - 1;
56 continue scanInput;
57 }
58
59 misses++;
60 }
61
62 // Shift in/out
63 if (text[i] == 0x0e || text[i] == 0x0f)
64 shifts++;
65
66 }
67
68 if (hits == 0)
69 return null;
70
71 //
72 // Initial quality is based on relative proportion of recongized vs.
73 // unrecognized escape sequences.
74 // All good: quality = 100;
75 // half or less good: quality = 0;
76 // linear inbetween.
77 quality = (100 * hits - 100 * misses) / (hits + misses);
78
79 // Back off quality if there were too few escape sequences seen.
80 // Include shifts in this computation, so that KR does not get penalized
81 // for having only a single Escape sequence, but many shifts.
82 if (hits + shifts < 5)
83 quality -= (5 - (hits + shifts)) * 10;
84
85 return quality <= 0 ? null : new Match(det, this, quality);
86};
87
88module.exports.ISO_2022_JP = function() {
89 this.name = function() {
90 return 'ISO-2022-JP';
91 };
92 this.escapeSequences = [
93 [ 0x1b, 0x24, 0x28, 0x43 ], // KS X 1001:1992
94 [ 0x1b, 0x24, 0x28, 0x44 ], // JIS X 212-1990
95 [ 0x1b, 0x24, 0x40 ], // JIS C 6226-1978
96 [ 0x1b, 0x24, 0x41 ], // GB 2312-80
97 [ 0x1b, 0x24, 0x42 ], // JIS X 208-1983
98 [ 0x1b, 0x26, 0x40 ], // JIS X 208 1990, 1997
99 [ 0x1b, 0x28, 0x42 ], // ASCII
100 [ 0x1b, 0x28, 0x48 ], // JIS-Roman
101 [ 0x1b, 0x28, 0x49 ], // Half-width katakana
102 [ 0x1b, 0x28, 0x4a ], // JIS-Roman
103 [ 0x1b, 0x2e, 0x41 ], // ISO 8859-1
104 [ 0x1b, 0x2e, 0x46 ] // ISO 8859-7
105 ];
106};
107util.inherits(module.exports.ISO_2022_JP, ISO_2022);
108
109
110
111module.exports.ISO_2022_KR = function() {
112 this.name = function() {
113 return 'ISO-2022-KR';
114 };
115 this.escapeSequences = [
116 [ 0x1b, 0x24, 0x29, 0x43 ]
117 ];
118};
119util.inherits(module.exports.ISO_2022_KR, ISO_2022);
120
121
122
123module.exports.ISO_2022_CN = function() {
124 this.name = function() {
125 return 'ISO-2022-CN';
126 };
127 this.escapeSequences = [
128 [ 0x1b, 0x24, 0x29, 0x41 ], // GB 2312-80
129 [ 0x1b, 0x24, 0x29, 0x47 ], // CNS 11643-1992 Plane 1
130 [ 0x1b, 0x24, 0x2A, 0x48 ], // CNS 11643-1992 Plane 2
131 [ 0x1b, 0x24, 0x29, 0x45 ], // ISO-IR-165
132 [ 0x1b, 0x24, 0x2B, 0x49 ], // CNS 11643-1992 Plane 3
133 [ 0x1b, 0x24, 0x2B, 0x4A ], // CNS 11643-1992 Plane 4
134 [ 0x1b, 0x24, 0x2B, 0x4B ], // CNS 11643-1992 Plane 5
135 [ 0x1b, 0x24, 0x2B, 0x4C ], // CNS 11643-1992 Plane 6
136 [ 0x1b, 0x24, 0x2B, 0x4D ], // CNS 11643-1992 Plane 7
137 [ 0x1b, 0x4e ], // SS2
138 [ 0x1b, 0x4f ] // SS3
139 ];
140};
141util.inherits(module.exports.ISO_2022_CN, ISO_2022);
Note: See TracBrowser for help on using the repository browser.