1 | var should = require('should'),
|
---|
2 | needle = require('./../'),
|
---|
3 | Q = require('q'),
|
---|
4 | chardet = require('jschardet'),
|
---|
5 | helpers = require('./helpers');
|
---|
6 |
|
---|
7 | describe('character encoding', function() {
|
---|
8 |
|
---|
9 | var url;
|
---|
10 | this.timeout(5000);
|
---|
11 |
|
---|
12 | describe('Given content-type: "text/html; charset=EUC-JP"', function() {
|
---|
13 |
|
---|
14 | before(function() {
|
---|
15 | url = 'http://www.nina.jp/server/slackware/webapp/tomcat_charset.html';
|
---|
16 | })
|
---|
17 |
|
---|
18 | describe('with decode = false', function() {
|
---|
19 |
|
---|
20 | it('does not decode', function(done) {
|
---|
21 |
|
---|
22 | needle.get(url, { decode: false }, function(err, resp) {
|
---|
23 | resp.body.should.be.a.String;
|
---|
24 | chardet.detect(resp.body).encoding.should.eql('windows-1252');
|
---|
25 | resp.body.indexOf('EUCを使う').should.eql(-1);
|
---|
26 | done();
|
---|
27 | })
|
---|
28 |
|
---|
29 | })
|
---|
30 |
|
---|
31 | })
|
---|
32 |
|
---|
33 | describe('with decode = true', function() {
|
---|
34 |
|
---|
35 | it('decodes', function(done) {
|
---|
36 |
|
---|
37 | needle.get(url, { decode: true }, function(err, resp) {
|
---|
38 | resp.body.should.be.a.String;
|
---|
39 | chardet.detect(resp.body).encoding.should.eql('ascii');
|
---|
40 | resp.body.indexOf('EUCを使う').should.not.eql(-1);
|
---|
41 | done();
|
---|
42 | })
|
---|
43 |
|
---|
44 | })
|
---|
45 |
|
---|
46 | })
|
---|
47 |
|
---|
48 | })
|
---|
49 |
|
---|
50 | describe('Given content-type: "text/html but file is charset: gb2312', function() {
|
---|
51 |
|
---|
52 | it('encodes to UTF-8', function(done) {
|
---|
53 |
|
---|
54 | // Our Needle wrapper that requests a chinese website.
|
---|
55 | var task = Q.nbind(needle.get, needle, 'http://www.chinesetop100.com/');
|
---|
56 |
|
---|
57 | // Different instantiations of this task
|
---|
58 | var tasks = [Q.fcall(task, {decode: true}),
|
---|
59 | Q.fcall(task, {decode: false})];
|
---|
60 |
|
---|
61 | var results = tasks.map(function(task) {
|
---|
62 | return task.then(function(obj) {
|
---|
63 | return obj[0].body;
|
---|
64 | });
|
---|
65 | });
|
---|
66 |
|
---|
67 | // Execute all requests concurrently
|
---|
68 | Q.all(results).done(function(bodies) {
|
---|
69 |
|
---|
70 | var charsets = [
|
---|
71 | chardet.detect(bodies[0]).encoding,
|
---|
72 | chardet.detect(bodies[1]).encoding,
|
---|
73 | ]
|
---|
74 |
|
---|
75 | // We wanted to decode our first stream as specified by options
|
---|
76 | charsets[0].should.equal('ascii');
|
---|
77 | bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);
|
---|
78 |
|
---|
79 | // But not our second stream
|
---|
80 | charsets[1].should.equal('windows-1252');
|
---|
81 | bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);
|
---|
82 |
|
---|
83 | done();
|
---|
84 | });
|
---|
85 | })
|
---|
86 | })
|
---|
87 |
|
---|
88 | describe('Given content-type: "text/html"', function () {
|
---|
89 |
|
---|
90 | var server,
|
---|
91 | port = 54321,
|
---|
92 | text = 'Magyarországi Fióktelepe'
|
---|
93 |
|
---|
94 | before(function(done) {
|
---|
95 | server = helpers.server({
|
---|
96 | port: port,
|
---|
97 | response: text,
|
---|
98 | headers: { 'Content-Type': 'text/html' }
|
---|
99 | }, done);
|
---|
100 | })
|
---|
101 |
|
---|
102 | after(function(done) {
|
---|
103 | server.close(done)
|
---|
104 | })
|
---|
105 |
|
---|
106 | describe('with decode = false', function () {
|
---|
107 | it('decodes by default to utf-8', function (done) {
|
---|
108 |
|
---|
109 | needle.get('http://localhost:' + port, { decode: false }, function (err, resp) {
|
---|
110 | resp.body.should.be.a.String;
|
---|
111 | chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');
|
---|
112 | resp.body.should.eql('Magyarországi Fióktelepe')
|
---|
113 | done();
|
---|
114 | })
|
---|
115 |
|
---|
116 | })
|
---|
117 |
|
---|
118 | })
|
---|
119 |
|
---|
120 | })
|
---|
121 | })
|
---|