代码样例-Http代理

本文档包含编程请求http代理服务器的代码样例,供开发者参考。

代码样例使用说明

  1. 代码样例不能直接运行,因为代码中的代理服务器59.38.241.25:23916、用户名myusername、密码mypassword都是虚构的,您替换成自己真实的信息就可以正常运行了。
  2. 代码样例正常运行所需的运行环境和注意事项在样例末尾均有说明,使用前请仔细阅读。
  3. 使用代码样例过程中遇到问题请联系售后客服,我们会为您提供技术支持。

Python2

requests

requests(推荐)

使用提示

  1. 基于requests的代码样例支持访问http,https网页,推荐使用
  2. requests不是python原生库,需要安装才能使用: pip install requests
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""使用requests请求代理服务器
请求http和https网页均适用
"""

import requests

# 要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

# 代理服务器
proxy = "59.38.241.25:23916"

# 用户名和密码(私密代理/独享代理)
username = "myusername"
password = "mypassword"

proxies = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy},
    "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy}
}

headers = {
    "Accept-Encoding": "Gzip",  # 使用gzip压缩传输数据让访问更快
}

r = requests.get(page_url, proxies=proxies, headers=headers)

print r.status_code  # 获取Reponse的返回码

if r.status_code == 200:
    r.enconding = "utf-8"  # 设置返回内容的编码
    print r.content  # 获取页面内容

urllib2

urllib2

使用提示

  • 基于urllib2的代码样例同时支持访问http和https网页
  • 运行环境要求 python2.6 / 2.7
#!/usr/bin/env python
#-*- coding: utf-8 -*-

"""使用urllib2请求代理服务器
请求http和https网页均适用
"""

import urllib2
import zlib
import ssl

ssl._create_default_https_context = ssl._create_unverified_context  # 全局取消证书验证,避免访问https网页报错

#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器
proxy = "59.38.241.25:23916"

#用户名和密码(私密代理/独享代理)
username = "myusername"
password = "mypassword"

proxies = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy},
    "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy}
}

req = urllib2.Request(page_url)
req.add_header("Accept-Encoding", "Gzip") #使用gzip压缩传输数据让访问更快
proxy_hander = urllib2.ProxyHandler(proxies)
opener = urllib2.build_opener(proxy_hander)
urllib2.install_opener(opener)
r = urllib2.urlopen(req)

print r.code
content_encoding = r.headers.getheader("Content-Encoding")
if content_encoding and "gzip" in content_encoding:
    print zlib.decompress(r.read(), 16+zlib.MAX_WBITS) #获取页面内容
else:
    print r.read() #获取页面内容

Selenium

Chrome
Chrome(IP白名单 Linux)

使用提示

  • 基于白名单的http/https代理Chrome
  • 运行环境要求python2.x + selenium + Chrome + Chromedriver + xvfb
  • 安装xvfb:pip install xvfbwrapper
  • Ubuntu下开发环境配置参考
# _*_ coding:utf‐8 _*
'''
基于selenium下Chrome的sock5白名单访问代理
ubuntu14.04环境
目前使用虚拟显示支持无窗口操作
官方目前没有支持账号密码认证代理
'''
from xvfbwrapper import Xvfb
from selenium import webdriver
import time

#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器ip和端口s
proxy = '59.38.241.25:23916'

#复制一份配置文件
desired_capabilities = webdriver.DesiredCapabilities.INTERNETEXPLORER.copy()
#改变配置文件的复制版本.
PROXY = proxy
desired_capabilities['proxy'] = {
    "httpProxy": PROXY,
    "ftpProxy": PROXY,
    "sslProxy": PROXY,
    "noProxy": None,
    "proxyType": "MANUAL",
    "class": "org.openqa.selenium.Proxy",
    "autodetect": False
}

# 使用新实例驱动
options = webdriver.ChromeOptions()

#如果当前的Chrome支持无窗口,启动无窗口模式
options.add_argument('headless')

#无窗口启动Chrome
xvfb = Xvfb()
xvfb.start()
driver = webdriver.Chrome(chrome_options=options,desired_capabilities=desired_capabilities)

#尝试访问登陆页面,登录页面有你的代理IP
driver.get(page_url)

#打出网页title和网页源代码
print driver.title
print driver.page_source

#退出
driver.quit()
xvfb.stop()
Chrome(IP白名单 Windows)

使用提示

# -*- coding: utf-8 -*-
"""
 selenium + chrome + windows + ip白名单
"""

from selenium import webdriver

# 代理服务器ip和端口
proxy = '42.49.11.109:16816'

# 目标网页
page_url = "https://dev.kuaidaili.com/testproxy"

co = webdriver.ChromeOptions()
co.add_argumen('--proxy-server=%s' % proxy) # 设置代理
co.add_argument('--headless')  # 无窗口模式, 可选

# 启动chrome
driver = webdriver.Chrome(chrome_options=co)

# 抓取网页
driver.get(page_url)

# 输出网页内容
print(driver.page_source)

# 退出chrome
driver.quit()
Chrome(用户名密码认证 Windows)

使用提示

# -*- coding: utf-8 -*-
"""
    selenium + chrome + windows + 用户名密码认证
"""
from selenium import webdriver

proxyauth_plugin_path = create_proxyauth_extension(
    proxy_host="42.49.11.109",  # 代理服务器ip
    proxy_port=16816,           # 代理服务器端口
    proxy_username="yourusername",    # 用户名
    proxy_password="yourpassword"   # 密码
)

# 目标网页
page_url = "https://dev.kuaidaili.com/testproxy"

co = webdriver.ChromeOptions()
co.add_argument("--start-maximized")  # 最大化窗口, 不加可能报错
co.add_extension(proxyauth_plugin_path) # 添加扩展

# 启动chrome, 抓取网页
driver = webdriver.Chrome(chrome_options=co)
driver.get("https://dev.kuaidaili.com/testproxy")

# 输出网页内容
print(driver.page_source)

# 自动生成扩展的函数
def create_proxyauth_extension(proxy_host, proxy_port,
                               proxy_username, proxy_password,
                               scheme='http', plugin_path=None):
    import string
    import zipfile

    if plugin_path is None:
        plugin_path = 'vimm_chrome_proxyauth_plugin.zip'

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",   
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
    """
    var config = {
            mode: "fixed_servers",
            rules: {
              singleProxy: {
                scheme: "${scheme}",
                host: "${host}",
                port: parseInt(${port})
              },
              bypassList: ["foobar.com"]
            }
          };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "${username}",
                password: "${password}"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )
    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path
PhantomJS
PhantomJS(IP白名单)

使用提示

  • 基于白名单的http/https代理PhantomJS
  • 运行环境要求python2.x + selenium + PhantomJS
  • selenium + PhantomJS 可以直接使用pip安装
# -*- coding: utf-8 -*-
'''
使用selenium下的无窗口浏览器phantomjs的shttp白名单免登录示例
因为是无窗口浏览器,所以截了一张图片放在本程序当前目录下
同时打印了访问网址的title和源代码
'''
from selenium import webdriver

#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器ip和端口
proxy = '59.38.241.25:23918'

#代理设置参数
service_args = [
    '--proxy=%s' % proxy,
    '--proxy-type=http',
    #'--proxy-auth=%s:%s' % (username, password)
]

#启动PhantomJS,访问网址
driver = webdriver.PhantomJS(service_args=service_args)
driver.get(page_url)

#打出网页title和网页源代码
print driver.title
print driver.page_source

#退出PhantomJS
driver.quit()
PhantomJS(用户名密码认证)

使用提示

  • 基于密码认证的http/https代理PhantomJS
  • 运行环境要求python2.x + selenium + PhantomJS
  • selenium + PhantomJS 可以直接使用pip安装
# -*- coding: utf-8 -*-
'''
使用selenium下的无窗口浏览器phantomjs的http认证代理示例
因为是无窗口浏览器,所以截了一张图片放在本程序当前目录下
同时打印了访问网址的title和源代码
'''
from selenium import webdriver
#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器的ip和端口
proxy = '59.38.241.25:23916'

#用户名和密码(私密代理/独享代理)
username = 'myusername'
password = 'mypassword'

#代理参数
service_args = [
    '--proxy=%s' % proxy,
    '--proxy-type=http',
    '--proxy-auth=%s:%s' % (username, password)
]

#启动PhantomJS并打开目标网页
driver = webdriver.PhantomJS(service_args=service_args)
driver.get(page_url)

#打印访问网页的title和源代码
print driver.title
print driver.page_source

driver.quit()

Python3

requests

requests(推荐)

使用提示

  1. 基于requests的代码样例支持访问http,https网页,推荐使用
  2. requests不是python原生库,需要安装才能使用: pip install requests
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import requests

"""使用requests请求代理服务器,适用于http,https"""

#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy/"

#代理服务器
proxy = "59.38.241.25:23916"

#用户名和密码(私密代理/独享代理)
username = "myusername"
password = "mypassword"

proxies = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy},
    "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy}
}

headers = {
    "Accept-Encoding": "Gzip",  # 使用gzip压缩传输数据让访问更快
}

res = requests.get(url=page_url, proxies=proxies, headers=headers)
print(res.status_code) # 获取Reponse的返回码
if res.status_code == 200:
    print(res.content.decode('utf-8')) # 获取页面内容

urllib

urllib

使用提示

  • 基于urllib的代码样例同时支持访问http和https网页
  • 运行环境要求 python3.x
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
import zlib
import ssl

ssl._create_default_https_context = ssl._create_unverified_context  # 全局取消证书验证,避免访问https网页报错

"""使用urllib.request模块请求代理服务器,http和https网页均适用"""

# 要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy/"

# 代理服务器
proxy = "59.38.241.25:23916"

# 用户名和密码(私密代理/独享代理)
username = "myusername"
password = "mypassword"

proxies = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy},
    "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': proxy}
}

headers = {
    "Accept-Encoding": "Gzip",  # 使用gzip压缩传输数据让访问更快
}

proxy_hander = urllib.request.ProxyHandler(proxies)
opener = urllib.request.build_opener(proxy_hander)

req = urllib.request.Request(url=page_url, headers=headers)

result = opener.open(req)
print(result.status)  # 获取Response的返回码

content_encoding = result.headers.get('Content-Encoding')
if content_encoding and "gzip" in content_encoding:
    print(zlib.decompress(result.read(), 16 + zlib.MAX_WBITS).decode('utf-8'))  # 获取页面内容
else:
    print(result.read().decode('utf-8'))  # 获取页面内容

Python-Scrapy

scrapy项目标准目录结构如下:
scrapy项目结构

使用提示

  1. http/https网页均可适用
  2. scrapy不是python原生库,需要安装才能使用: pip install scrapy
  3. 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl main
utils.py

在第二级scrapy_proxy目录下新建utils.py, 添加如下代码

# -*- coding: utf-8 -*-
import logging
import requests
import json

orderid = 'yourorderid'  # 订单号
# 提取代理链接,以私密代理为例
api_url = "https://dps.kdlapi.com/api/getdps/?orderid={}&num=1&pt=1&format=json&sep=1"

logger = logging.getLogger(__name__)

def fetch_one_proxy():
    """
        提取一个代理
    """
    fetch_url = api_url.format(orderid)
    r = requests.get(fetch_url)
    if r.status_code != 200:
        logger.error("fail to fetch proxy")
        return False
    content = json.loads(r.content.decode('utf-8'))
    ips = content['data']['proxy_list']
    return ips[0]

middlewares.py

middlewares.py里添加如下代码进行代理设置

import base64
import logging
from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware

from .utils import fetch_one_proxy

# 非开放代理且未添加白名单,需用户名密码认证
username = "yourusername"
password = "yourpassword"
proxy = fetch_one_proxy() # 获取一个代理

THRESHOLD = 3  # 换ip阈值
fail_time = 0  # 此ip异常次数

logger = logging.getLogger(__name__)

# 代理中间件
class ProxyMiddleware(object):

    def process_request(self, request, spider):
        proxy_url = 'http://%s:%s@%s' % (username, password, proxy)
        request.meta['proxy'] = proxy_url  # 设置代理
        logger.debug("using proxy: {}".format(request.meta['proxy']))
        # 设置代理身份认证
        # Python3 写法
        auth = "Basic %s" % (base64.b64encode(('%s:%s' % (username, password)).encode('utf-8'))).decode('utf-8')
        # Python2 写法
        # auth = "Basic " + base64.b64encode('%s:%s' % (username, password))
        request.headers['Proxy-Authorization'] = auth


    def process_response(self, request, response, spider):
        """
            如果状态码异常,则增加ip异常次数
            当异常次数达到阈值, 则更换ip,
            此换ip策略比较简略, 仅供参考
        """
        global fail_time, proxy, THRESHOLD
        if not(200 <= response.status < 300):
            fail_time += 1
            if fail_time >= THRESHOLD:
                proxy = fetch_one_proxy()
                fail_time = 0
        return response

class AgentMiddleware(UserAgentMiddleware):
    """
        User-Agent中间件, 设置User-Agent
    """
    def __init__(self, user_agent=''):
        self.user_agent = user_agent

    def process_request(self, request, spider):
        ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0) Gecko/20100101 Firefox/39.0'
        request.headers.setdefault('User-Agent', ua)

settings.py

settings.py里设置DOWNLOADER_MIDDLEWARES使新增的middleware生效

ROBOTSTXT_OBEY = False  # 将此变量设为False, 提高成功率
DOWNLOADER_MIDDLEWARES = {
    'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 110,
    'scrapy_proxy.middlewares.ProxyMiddleware': 100,
    'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 2,
    'scrapy_proxy.middlewares.AgentMiddleware': 1,
}

main.py

在spiders目录下手动创建爬虫文件main.py

# -*- coding: utf-8 -*-
import scrapy

class MainSpider(scrapy.Spider):
    """
        spider文件, 以爬取京东首页为例
    """
    name = "main"
    allowed_domains = ["www.jd.com"]
    start_urls = ['https://www.jd.com']

    def parse(self, response):
        # 获取返回内容(请求ip)
        print('------ response ------', response.text)

Java

jdk

使用原生库

使用提示

  1. 此样例同时支持访问http和https网页
  2. 运行环境要求 jdk >= 1.6
package com.kuaidaili.sdk;

import java.util.HashMap;
import java.util.Map;

/**
 * 使用jdk原生库请求代理服务器
 * 请求http和https网页均适用
 */
public class TestProxy {

    private static String pageUrl = "http://dev.kdlapi.com/testproxy"; //要访问的目标网页
    private static String proxyIp = "59.38.241.25"; //代理服务器IP
    private static String proxyPort = "23916"; //代理服务器IP
    private static String username = "myusername"; //用户名
    private static String password = "mypassword"; //密码

    public static void main(String[] args) {
        HttpRequest request = new HttpRequest();
        Map<String, String> params = new HashMap<String, String>();
        Map<String, String> headers = new HashMap<String, String>();

        headers.put("Accept-Encoding", "gzip"); //使用gzip压缩传输数据让访问更快

        Map<String, String> proxySettings = new HashMap<String, String>();
        proxySettings.put("ip", proxyIp);
        proxySettings.put("port", proxyPort);
        proxySettings.put("username", username);
        proxySettings.put("password", password);

        try{
            HttpResponse response = request.sendGet(pageUrl, params, headers, proxySettings);
            System.out.println(response.getCode());
            System.out.println(response.getContent());
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}
查看工具类HttpRequest和HttpResponse

HttpRequest.java

package com.kuaidaili.sdk;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.Vector;
import java.util.zip.GZIPInputStream;

/**
 * HTTP请求对象
 */
public class HttpRequest {

    private String defaultContentEncoding;
    private int connectTimeout = 1000;
    private int readTimeout = 1000;

    public HttpRequest() {
        this.defaultContentEncoding = Charset.defaultCharset().name();
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "GET", null, null, proxySettings);
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, Map<String, String> params, final Map<String, String> proxySettings)
            throws IOException {
        return this.send(urlString, "GET", params, null, proxySettings);
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param headers header集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, Map<String, String> params,
            Map<String, String> headers, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "GET", params, headers, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "POST", null, null, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, Map<String, String> params, final Map<String, String> proxySettings)
            throws IOException {
        return this.send(urlString, "POST", params, null, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param headers header集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, Map<String, String> params,
            Map<String, String> headers, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "POST", params, headers, proxySettings);
    }

    /**
     * 发送HTTP请求
     */
    private HttpResponse send(String urlString, String method,
            Map<String, String> parameters, Map<String, String> headers, final Map<String, String> proxySettings)
            throws IOException {
        HttpURLConnection urlConnection = null;

        if (method.equalsIgnoreCase("GET") && parameters != null) {
            StringBuffer param = new StringBuffer();
            int i = 0;
            for (String key : parameters.keySet()) {
                if (i == 0)
                    param.append("?");
                else
                    param.append("&");
                param.append(key).append("=").append(URLEncoder.encode(parameters.get(key), "utf-8"));
                i++;
            }
            urlString += param;
        }
        URL url = new URL(urlString);
        if(proxySettings != null){
            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxySettings.get("ip"), Integer.parseInt(proxySettings.get("port"))));
            urlConnection = (HttpURLConnection) url.openConnection(proxy);
            if(proxySettings.containsKey("username")){
                Authenticator authenticator = new Authenticator() {
                    public PasswordAuthentication getPasswordAuthentication() {
                        return (new PasswordAuthentication(proxySettings.get("username"),
                                proxySettings.get("password").toCharArray()));
                    }
                };
                Authenticator.setDefault(authenticator);
            }
        }
        else{
            urlConnection = (HttpURLConnection) url.openConnection();
        }

        urlConnection.setRequestMethod(method);
        urlConnection.setDoOutput(true);
        urlConnection.setDoInput(true);
        urlConnection.setUseCaches(false);

        urlConnection.setConnectTimeout(connectTimeout);
        urlConnection.setReadTimeout(readTimeout);

        if (headers != null)
            for (String key : headers.keySet()) {
                urlConnection.addRequestProperty(key, headers.get(key));
            }

        if (method.equalsIgnoreCase("POST") && parameters != null) {
            StringBuffer param = new StringBuffer();
            int i = 0;
            for (String key : parameters.keySet()) {
                if(i > 0) param.append("&");
                param.append(key).append("=").append(URLEncoder.encode(parameters.get(key), "utf-8"));
                i++;
            }
            System.out.println(param.toString());
            urlConnection.getOutputStream().write(param.toString().getBytes());
            urlConnection.getOutputStream().flush();
            urlConnection.getOutputStream().close();
        }

        return this.makeContent(urlString, urlConnection);
    }

    /**
     * 得到响应对象
     */
    private HttpResponse makeContent(String urlString,
            HttpURLConnection urlConnection) throws IOException {
        HttpResponse response = new HttpResponse();
        try {
            InputStream in = urlConnection.getInputStream();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
            if ("gzip".equals(urlConnection.getContentEncoding())) bufferedReader =  new BufferedReader(new InputStreamReader(new GZIPInputStream(in)));
            response.contentCollection = new Vector<String>();
            StringBuffer temp = new StringBuffer();
            String line = bufferedReader.readLine();
            while (line != null) {
                response.contentCollection.add(line);
                temp.append(line).append("\r\n");
                line = bufferedReader.readLine();
            }
            bufferedReader.close();

            String encoding = urlConnection.getContentEncoding();
            if (encoding == null)
                encoding = this.defaultContentEncoding;

            response.urlString = urlString;

            response.defaultPort = urlConnection.getURL().getDefaultPort();
            response.file = urlConnection.getURL().getFile();
            response.host = urlConnection.getURL().getHost();
            response.path = urlConnection.getURL().getPath();
            response.port = urlConnection.getURL().getPort();
            response.protocol = urlConnection.getURL().getProtocol();
            response.query = urlConnection.getURL().getQuery();
            response.ref = urlConnection.getURL().getRef();
            response.userInfo = urlConnection.getURL().getUserInfo();
            response.contentLength = urlConnection.getContentLength();

            response.content = new String(temp.toString().getBytes());
            response.contentEncoding = encoding;
            response.code = urlConnection.getResponseCode();
            response.message = urlConnection.getResponseMessage();
            response.contentType = urlConnection.getContentType();
            response.method = urlConnection.getRequestMethod();
            response.connectTimeout = urlConnection.getConnectTimeout();
            response.readTimeout = urlConnection.getReadTimeout();

            return response;
        } catch (IOException e) {
            throw e;
        } finally {
            if (urlConnection != null){
                urlConnection.disconnect();
            }
        }
    }

    public static byte[] gunzip(byte[] bytes) {  
        if (bytes == null || bytes.length == 0) {  
            return null;  
        }  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        ByteArrayInputStream in = new ByteArrayInputStream(bytes);  
        try {  
            GZIPInputStream ungzip = new GZIPInputStream(in);  
            byte[] buffer = new byte[256];  
            int n;  
            while ((n = ungzip.read(buffer)) >= 0) {  
                out.write(buffer, 0, n);  
            }  
        } catch (IOException e) {  
            System.err.println("gzip uncompress error.");
            e.printStackTrace();
        }  

        return out.toByteArray();  
    }

    /**
     * 得到默认的响应字符集
     */
    public String getDefaultContentEncoding() {
        return this.defaultContentEncoding;
    }

    /**
     * 设置默认的响应字符集
     */
    public void setDefaultContentEncoding(String defaultContentEncoding) {
        this.defaultContentEncoding = defaultContentEncoding;
    }

    public int getConnectTimeout() {
        return connectTimeout;
    }

    public void setConnectTimeout(int connectTimeout) {
        this.connectTimeout = connectTimeout;
    }

    public int getReadTimeout() {
        return readTimeout;
    }

    public void setReadTimeout(int readTimeout) {
        this.readTimeout = readTimeout;
    }
}

HttpResponse.java

package com.kuaidaili.sdk;

import java.util.Vector;

/**
 * HTTP响应对象
 */
public class HttpResponse {

    String urlString;
    int defaultPort;
    String file;
    String host;
    String path;
    int port;
    String protocol;
    String query;
    String ref;
    String userInfo;
    String contentEncoding;
    int contentLength;
    String content;
    String contentType;
    int code;
    String message;
    String method;

    int connectTimeout;

    int readTimeout;

    Vector<String> contentCollection;

    public String getContent() {
        return content;
    }

    public String getContentType() {
        return contentType;
    }

    public int getCode() {
        return code;
    }

    public String getMessage() {
        return message;
    }

    public Vector<String> getContentCollection() {
        return contentCollection;
    }

    public String getContentEncoding() {
        return contentEncoding;
    }

    public String getMethod() {
        return method;
    }

    public int getConnectTimeout() {
        return connectTimeout;
    }

    public int getReadTimeout() {
        return readTimeout;
    }

    public String getUrlString() {
        return urlString;
    }

    public int getDefaultPort() {
        return defaultPort;
    }

    public String getFile() {
        return file;
    }

    public String getHost() {
        return host;
    }

    public String getPath() {
        return path;
    }

    public int getPort() {
        return port;
    }

    public String getProtocol() {
        return protocol;
    }

    public String getQuery() {
        return query;
    }

    public String getRef() {
        return ref;
    }

    public String getUserInfo() {
        return userInfo;
    }

}

httpclient

HttpClient-4.5.6

使用提示

  1. 此样例同时支持访问http和https网页
  2. 建议使用白名单访问(HttpClient在使用用户名密码会出现一定数量的认证失败)
  3. 运行环境要求 jdk >= 1.6
  4. 依赖包(点击下载):
    httpclient-4.5.6.jar
    httpcore-4.4.10.jar
    commons-codec-1.10.jar
    commons-logging-1.2.jar
package com.kuaidaili.sdk;

import java.net.URL;

import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

/**
 * 使用httpclient请求代理服务器
 * 请求http和https网页均适用
 */
public class TestProxyHttpClient {

    private static String pageUrl = "http://dev.kdlapi.com/testproxy"; //要访问的目标网页
    private static String proxyIp = "59.38.241.25"; //代理服务器IP
    private static int proxyPort = 23916; //代理服务器IP
    private static String username = "myusername"; //用户名
    private static String password = "mypassword"; //密码

    public static void main(String[] args) throws Exception {
        CredentialsProvider credsProvider = new BasicCredentialsProvider();
        credsProvider.setCredentials(
                new AuthScope(proxyIp, proxyPort),
                new UsernamePasswordCredentials(username, password));
        CloseableHttpClient httpclient = HttpClients.custom()
                .setDefaultCredentialsProvider(credsProvider).build();
        try {
            URL url = new URL(pageUrl);
            HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
            HttpHost proxy = new HttpHost(proxyIp, proxyPort);

            RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
            HttpGet httpget = new HttpGet(url.getPath());
            httpget.setConfig(config);
            httpget.addHeader("Accept-Encoding", "gzip"); //使用gzip压缩传输数据让访问更快

            System.out.println("Executing request " + httpget.getRequestLine() + " to " + target + " via " + proxy);

            CloseableHttpResponse response = httpclient.execute(target, httpget);
            try {
                System.out.println("----------------------------------------");
                System.out.println(response.getStatusLine());
                System.out.println(EntityUtils.toString(response.getEntity()));
            } finally {
                response.close();
            }
        } finally {
            httpclient.close();
        }
    }
}

GoLang

标准库

标准库

使用提示

  • http和https网页均可适用
// 请求代理服务器
// http和https网页均适用

package main

import (
    "compress/gzip"
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "net/url"
    "os"
)

func main() {
    // 用户名密码(私密代理/独享代理)
    username := "myusername"
    password := "mypassword"

    // 代理服务器
    proxy_raw := "59.38.241.25:23916"
    proxy_str := fmt.Sprintf("http://%s:%s@%s", username, password, proxy_raw)
    proxy, err := url.Parse(proxy_str)

    // 目标网页
    page_url := "http://dev.kdlapi.com/testproxy"

    //  请求目标网页
    client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxy)}}
    req, _ := http.NewRequest("GET", page_url, nil)
    req.Header.Add("Accept-Encoding", "gzip") //使用gzip压缩传输数据让访问更快
    res, err := client.Do(req)

    if err != nil {
        // 请求发生异常
        fmt.Println(err.Error())
    } else {
        defer res.Body.Close() //保证最后关闭Body

        fmt.Println("status code:", res.StatusCode) // 获取状态码

        // 有gzip压缩时,需要解压缩读取返回内容
        if res.Header.Get("Content-Encoding") == "gzip" {
            reader, _ := gzip.NewReader(res.Body) // gzip解压缩
            defer reader.Close()
            io.Copy(os.Stdout, reader)
            os.Exit(0) // 正常退出
        }

        // 无gzip压缩, 读取返回内容
        body, _ := ioutil.ReadAll(res.Body)
        fmt.Println(string(body))
    }
}

CSharp

标准库

标准库

使用提示

  • http和https网页均可适用
using System;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;

namespace csharp_http
{
    class Program
    {
        static void Main(string[] args)
        {
            // 要访问的目标网页
            string page_url = "http://dev.kdlapi.com/testproxy";

            // 构造请求
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(page_url);
            request.Method = "GET";
            request.Headers.Add("Accept-Encoding", "Gzip");  // 使用gzip压缩传输数据让访问更快

            // 代理服务器
            string proxy_ip = "59.38.241.25";
            int proxy_port = 23916;

            // 用户名密码 <私密代理/独享代理>
            string username = "myusername";
            string password = "mypassword";

            // 设置代理 <开放代理或私密/独享代理&已添加白名单>
            // request.Proxy = new WebProxy(proxy_ip, proxy_port);

            // 设置代理 <私密/独享代理&未添加白名单>
            WebProxy proxy = new WebProxy();
            proxy.Address = new Uri(String.Format("http://{0}:{1}", proxy_ip, proxy_port));
            proxy.Credentials = new NetworkCredential(username, password);
            request.Proxy = proxy;

            // 请求目标网页
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            Console.WriteLine((int)response.StatusCode);  // 获取状态码
            // 解压缩读取返回内容
            using (StreamReader reader =  new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))) {
                Console.WriteLine(reader.ReadToEnd());
            }
        }
    }
}

Node.js

标准库(仅适用http)

标准库(仅适用http请求)

使用提示

  • 仅适用http网页
const http = require('http');  // 引入内置http模块

// 用户名密码及auth, 若已添加白名单则不需要添加
const username = 'yourusername';
const password = 'yourpassword';
const auth = 'Basic ' + new Buffer(username + ':' + password).toString('base64');

// 要请求的目标地址
let url = 'http://dev.kuaidaili.com/testproxy';  

let options = {
    host: '113.72.108.33', // 代理服务器ip
    port: 16816,         // 代理服务器端口
    path: url,
    headers: {
        "Host": 'www.baidu.com',
        "Proxy-Authorization": auth,
        "Accept-Encoding": "gzip"   // 使用gzip压缩让数据传输更快
    }
};

// 请求目标地址
http.get(options, (res) => {
    // 输出状态码
    console.log(res.statusCode);

    // 输出返回内容(使用了gzip压缩)
    if (res.headers['content-encoding'] && res.headers['content-encoding'].indexOf('gzip') != -1) {
        let zlib = require('zlib');
        let unzip = zlib.createGunzip();
        res.pipe(unzip).pipe(process.stdout);
    } else {
        // 输出返回内容(未使用gzip压缩)
        res.pipe(process.stdout);
    }

});

标准库(均适用)

标准库(适用http和https请求)

使用提示

  • http网页和https网页均可适用
let http = require('http'); // 引入内置http模块
let tls = require('tls'); // 引入内置tls模块
let util = require('util');

// 用户名密码及auth, 若已添加白名单则不需要添加
const username = 'yourusername';
const password = 'yourpassword';
const auth = 'Basic ' + new Buffer(username + ':' + password).toString('base64');

// 代理服务器ip和端口
let proxy_ip = '42.69.11.103';
let proxy_port = 16816;

// 要访问的主机和路径, 以京东首页为例
let remote_host = 'www.jd.com';
let remote_path = '/';

// 发起CONNECT请求
let req = http.request({
    host: proxy_ip,
    port: proxy_port,
    method: 'CONNECT',
    path: util.format('%s:443', remote_host),
    headers: {
        "Host": remote_host,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36",
        "Proxy-Authorization": auth,
        "Accept-Encoding": "gzip"   // 使用gzip压缩让数据传输更快
    }
});


req.on('connect', function (res, socket, head) {
    // TLS握手
    let tlsConnection = tls.connect({
        host: remote_host,
        socket: socket
    }, function () {
        // 发起GET请求
        tlsConnection.write(util.format('GET %s HTTP/1.1\r\nHost: %s\r\n\r\n', remote_path, remote_host));
    });

    tlsConnection.on('data', function (data) {
        // 输出响应结果(完整的响应报文串)
        console.log(data.toString());
    });
});

req.end();

request

request(推荐使用)

使用提示

  • 请先安装request库: npm install request
  • http网页和https网页均可适用
let request = require('request'); // 引入第三方request库
let util = require('util');
let zlib = require('zlib');

// 用户名密码, 若已添加白名单则不需要添加
const username = 'yourusername';
const password = 'yourpassword';

// 要访问的目标地址
let page_url = 'https://www.jd.com'

// 代理服务器ip和端口
let proxy_ip = '42.69.11.103';
let proxy_port = 16816;

// 完整代理服务器url
let proxy = util.format('http://%s:%s@%s:%d', username, password, proxy_ip, proxy_port);  

// 发起请求
request({
    url: page_url,
    method: 'GET',
    proxy: proxy,
    headers: {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36",
        "Accept-Encoding": "gzip"   // 使用gzip压缩让数据传输更快
    },
    encoding: null,  // 方便解压缩返回的数据
}, function(error, res, body) {
    if (!error && res.statusCode == 200) {
        // 输出返回内容(使用了gzip压缩)
        if (res.headers['content-encoding'] && res.headers['content-encoding'].indexOf('gzip') != -1) {
            zlib.gunzip(body, function(err, dezipped) {
                console.log(dezipped.toString()); 
            });
        } else {
            // 输出返回内容(没有使用gzip压缩)
            console.log(body);
        }
    } else {
        console.log(error);
    }
});

puppeteer

puppeteer(IP白名单)

使用提示

  • 基于用户名密码认证的http/https代理Puppeteer
  • 运行环境要求: node7.6.0或以上 + puppeteer
  • 请先安装puppeteer: npm i puppeteer
// 引入puppeteer模块
const puppeteer = require('puppeteer');

// 要访问的目标网页
const url = 'http://dev.kuaidaili.com/testproxy';

// 添加headers
const headers = {
    'Accept-Encoding': 'gzip' // 使用gzip压缩让数据传输更快
};

// 代理服务器ip和端口
let proxy_ip = '223.198.230.41'
let proxy_port = 19732

(async ()=> {
    // 新建一个浏览器实例
    const browser = await puppeteer.launch({
        headless: false,  // 是否不显示窗口, 默认为true, 设为false便于调试
        args: [
            `--proxy-server=${proxy_ip}:${proxy_port}`,
            '--no-sandbox',
            '--disable-setuid-sandbox'
        ]
    });

    // 打开一个新页面
    const page = await browser.newPage();

    // 设置headers
    await page.setExtraHTTPHeaders(headers);

    // 访问目标网页
    await page.goto(url);

})();
puppeteer(用户名密码认证)

使用提示

  • 基于白名单的http/https代理Puppeteer
  • 运行环境要求: node7.6.0或以上 + puppeteer
  • 请先安装puppeteer: npm i puppeteer
// 引入puppeteer模块
const puppeteer = require('puppeteer');

// 要访问的目标网页
const url = 'http://dev.kuaidaili.com/testproxy';

// 添加headers
const headers = {
    'Accept-Encoding': 'gzip' // 使用gzip压缩让数据传输更快
};

// 代理服务器ip和端口
let proxy_ip = '223.198.230.41'
let proxy_port = 19732

// 用户名密码 (可到会员中心查看)
const username = 'yourusername';
const password = 'yourpassword';

(async ()=> {
    // 新建一个浏览器实例
    const browser = await puppeteer.launch({
        headless: false,  // 是否不显示窗口, 默认为true, 设为false便于调试
        args: [
            `--proxy-server=${proxy_ip}:${proxy_port}`,
            '--no-sandbox',
            '--disable-setuid-sandbox'
        ]
    });

    // 打开一个新页面
    const page = await browser.newPage();

    // 设置headers
    await page.setExtraHTTPHeaders(headers);

    // 用户民密码认证
    await page.authenticate({username: username, password: password});

    // 访问目标网页
    await page.goto(url);
})();

Ruby

net/http

net/http(IP白名单)

使用提示

  • 基于ip白名单的http/https代理net/http
# -*- coding: utf-8 -*-

require 'net/http'  # 引入内置net/http模块
require 'zlib'
require 'stringio'

# 代理服务器ip 和 端口
proxy_ip = '42.49.11.109'
proxy_port = 16816


# 要访问的目标网页, 以快代理testproxy页面为例
page_url = "https://dev.kuaidaili.com/testproxy"
uri = URI(page_url)

# 新建代理实例
proxy = Net::HTTP::Proxy(proxy_ip, proxy_port)

# 创建新的请求对象 
req = Net::HTTP::Get.new(uri)
# 设置User-Agent
req['User-Agent'] = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
req['Accept-Encoding'] = 'gzip'  # 使用gzip压缩传输数据让访问更快


# 使用代理发起请求, 若访问的是http网页, 请将use_ssl设为false
res = proxy.start(uri.hostname, uri.port, :use_ssl => true) do |http|
    http.request(req)
end

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end
net/http(用户名密码认证)

使用提示

  • 基于用户名密码认证的http/https代理net/http
# -*- coding: utf-8 -*-

require 'net/http'  # 引入内置net/http模块
require 'zlib'
require 'stringio'

# 代理服务器ip 和 端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 用户名密码
username = 'yourusername'
password = 'yourpassword'

# 要访问的目标网页, 以快代理testproxy页面为例
page_url = "https://dev.kuaidaili.com/testproxy"
uri = URI(page_url)

# 新建代理实例
proxy = Net::HTTP::Proxy(proxy_ip, proxy_port, username, password)

# 创建新的请求对象 
req = Net::HTTP::Get.new(uri)
# 设置代理用户名密码认证
req.basic_auth(username, password)
# 设置User-Agent
req['User-Agent'] = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
req['Accept-Encoding'] = 'gzip'  # 使用gzip压缩传输数据让访问更快


# 使用代理发起请求, 若访问的是http网页, 请将use_ssl设为false
res = proxy.start(uri.hostname, uri.port, :use_ssl => true) do |http|
    http.request(req)
end

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end

httparty

httparty(IP白名单)

使用提示

  • 基于IP白名单认证的http/https代理httparty
require "httparty"  # 引入httparty模块
require 'zlib'
require 'stringio'

# 代理服务器ip和端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 要访问的目标网页, 以京东首页为例
page_url = 'https://dev.kuaidaili.com/testproxy'

# 设置headers
headers = {
    "User-Agent" => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Accept-Encoding" => "gzip",
}

# 设置代理
options = {
    :headers => headers, 
    :http_proxyaddr => proxy_ip, 
    :http_proxyport => proxy_port,
}

# 发起请求
res = HTTParty.get(page_url, options)

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end
httparty(用户名密码认证)

使用提示

  • 基于用户名密码认证的http/https代理httparty
require "httparty"  # 引入httparty模块
require 'zlib'
require 'stringio'

# 代理服务器ip和端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 用户名密码
username = 'yourusername'
password = 'yourpassword'

# 要访问的目标网页, 以京东首页为例
page_url = 'https://dev.kuaidaili.com/testproxy'

# 设置headers
headers = {
    "User-Agent" => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Accept-Encoding" => "gzip",
}

# 设置代理
options = {
    :headers => headers, 
    :http_proxyaddr => proxy_ip, 
    :http_proxyport => proxy_port, 
    :http_proxyuser => username, 
    :http_proxypass => password,
}

# 发起请求
res = HTTParty.get(page_url, options)

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end

php

curl

curl

使用提示

  1. 此样例同时支持访问http和https网页
  2. curl不是php原生库,需要安装才能使用:
    Ubuntu/Debian系统:apt-get install php5-curl
    CentOS系统:yum install php-curl
<?php
//要访问的目标页面
$page_url = "http://dev.kdlapi.com/testproxy";

//代理服务器
$proxy = "59.38.241.25:23916";

//用户名和密码(私密代理/独享代理)
$username   = "myusername";
$password   = "mypassword";

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $page_url);

curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);  
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);

//设置代理
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($ch, CURLOPT_PROXY, $proxy);
//设置代理用户名密码(私密代理/独享代理)
curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "{$username}:{$password}");

//自定义header
$headers = array();
$headers[] = 'User-Agent: Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);';
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

//自定义cookie
curl_setopt($ch, CURLOPT_COOKIE,''); 

curl_setopt($ch, CURLOPT_ENCODING, 'gzip'); //使用gzip压缩传输数据让访问更快

curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);

curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

$result = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);

echo $result;
echo "\n\nfetch ".$info['url']."\ntimeuse: ".$info['total_time']."s\n\n";
?>