请求代理

爬虫过程中解决一些网站针对 IP 访问次数限制。下面进行简单模拟:

服务端: server.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
var http = require('http');

http.createServer(function(request, response) {

// 获取客户端 IP
var ip = request.headers['x-forwarded-for'] ||
request.connection.remoteAddress ||
request.socket.remoteAddress ||
request.connection.socket.remoteAddress;
console.log(request.headers);
console.log(request.httpVersion);
console.log(request.method);
console.log(request.url);
console.log(ip);
console.log('-----------')
response.write("OK");
response.end();
}).listen(8889);

正常请求

客户端: index.js

1
2
3
4
5
6
7
var request = require("request")
// 未加代理的情况
request('https://104.194.66.230:8889/', function (error, response, body) {
console.log('error:', error);
console.log('statusCode:', response && response.statusCode);
console.log('body:', body);
});

正常访问

客户端使用代理

客户端: index.js

1
2
3
4
5
6
7
8
9
10
11
var request = require("request")
request({
'url':'https://104.194.66.230:8889/',
'proxy':'https://111.7.130.36:8080' // proxy
},
function (error, response, body) {
console.log('error:', error);
console.log('statusCode:', response && response.statusCode);
console.log('body:', body);
}
);

使用代理