代码样例-Http代理

本文档包含编程请求http代理服务器的代码样例,供开发者参考。

代码样例使用说明

  1. 代码样例不能直接运行,因为代码中的代理服务器59.38.241.25:23916、用户名username、密码password都是虚构的,您替换成自己真实的信息就可以正常运行了。
  2. 代码样例正常运行所需的运行环境和注意事项在样例末尾均有说明,使用前请仔细阅读。
  3. 使用代码样例过程中遇到问题请联系售后客服,我们会为您提供技术支持。

Python2

requests

requests(推荐)

使用提示

  1. 基于requests的代码样例支持访问http,https网页,推荐使用
  2. requests不是python原生库,需要安装才能使用: pip install requests
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""使用requests请求代理服务器
请求http和https网页均适用
"""

import requests
import random

page_url = "http://dev.kdlapi.com/testproxy"  # 要访问的目标网页
# API接口,返回格式为json
api_url = "https://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&signature=atvb6a4981d03pvpqalolea9e0k2pmi6&protocol=1&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"

# API接口返回的ip
proxy_ip = requests.get(api_url).json()['data']['proxy_list']

# 用户名和密码(私密代理/独享代理)
username = "username"
password = "password"

# 私密代理、独享代理
# proxies = {
#     "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)},
#     "https": "https://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)}
# }
# 开放代理
proxies = {
    "http": "http://%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)},
    "https": "https://%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)}
}
headers = {
    "Accept-Encoding": "Gzip",  # 使用gzip压缩传输数据让访问更快
}
r = requests.get(page_url, proxies=proxies, headers=headers)
# 发送post请求
#r = requests.post("http://dev.kdlapi.com/testproxy", data={"info": "send post request"}, headers=headers)
print(r.status_code)  # 获取Response的返回码

if r.status_code == 200:
    r.enconding = "utf-8"  # 设置返回内容的编码
    print(r.content)  # 获取页面内容

urllib2

urllib2

使用提示

  • 基于urllib2的代码样例同时支持访问http和https网页
  • 运行环境要求 python2.6 / 2.7
#!/usr/bin/env python
#-*- coding: utf-8 -*-

"""使用urllib2请求代理服务器
请求http和https网页均适用
"""

import urllib2
from urllib import urlencode
import zlib
import ssl
import json
import random

ssl._create_default_https_context = ssl._create_unverified_context  # 全局取消证书验证,避免访问https网页报错

# 要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

# API接口,返回格式为json
api_url = "https://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&signature=atvb6a4981d03pvpqalolea9e0k2pmi6&protocol=1&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"
ip_list = json.loads(urllib2.urlopen(api_url).read(), encoding="utf-8")['data']['proxy_list']
proxy = random.choice(ip_list)

# 用户名和密码(私密代理/独享代理)
username = "username"
password = "password"

# 私密代理、独享代理
# proxies = {
#     "http": "http://%(user)s:%(pwd)s@%(ip)s/" % {'user': username, 'pwd': password, 'ip': proxy},
#     "https": "http://%(user)s:%(pwd)s@%(ip)s/" % {'user': username, 'pwd': password, 'ip': proxy}
# }

# 开放代理
proxies = {
    "http": "http://%(ip)s/" % {'ip': proxy},
    "https": "http://%(ip)s/" % {'ip': proxy}
}
req = urllib2.Request(page_url)
# 发送post请求
# req = urllib2.Request("http://dev.kdlapi.com/testproxy", data=bytes(urlencode({'info': 'send post request'})))
req.add_header("Accept-Encoding", "Gzip") #使用gzip压缩传输数据让访问更快
proxy_hander = urllib2.ProxyHandler(proxies)
opener = urllib2.build_opener(proxy_hander)
urllib2.install_opener(opener)
r = urllib2.urlopen(req)

print r.code
content_encoding = r.headers.getheader("Content-Encoding")
if content_encoding and "gzip" in content_encoding:
    print zlib.decompress(r.read(), 16+zlib.MAX_WBITS) #获取页面内容
else:
    print r.read()  # 获取页面内容

Selenium

FireFox
Firefox(IP白名单)

使用提示

  • 基于白名单的http/https代理Geckodriver
  • 运行环境要求python2/3 + selenium + Firefox + Geckodriver + windows/linux
  • FireFox各种选项, 浏览器输入网址:about:config
# _*_ coding:utf‐8 _*

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import requests
import random
import re

page_url = "http://dev.kdlapi.com/testproxy"  # 要访问的目标网页
# API接口,返回格式为json
api_url = ""

# API接口返回的ip
proxy = requests.get(api_url).json()['data']['proxy_list']
ip, port = random.choice(proxy).split(":")
port = int(port)

profile = webdriver.FirefoxProfile()
# 不使用代理的协议,注释掉对应的选项即可
settings = {
    'network.proxy.type': 1,  # 0: 不使用代理;1: 手动配置代理
    'network.proxy.http': ip,
    'network.proxy.http_port': port,
    'network.proxy.ssl': ip,  # 如果是https, 需要开启
    'network.proxy.ssl_port': port,
    'network.proxy.socks': ip,
    'network.proxy.socks_port': port,
    # 'network.proxy.ftp': ip,
    # 'network.proxy.ftp_port': port
}
for key, value in settings.items():
    profile.set_preference(key, value)
profile.update_preferences()

options = Options()
# 无界面启动浏览器
options.add_argument('--headless')
driver = webdriver.Firefox(firefox_profile=profile, options=options)
driver.get(page_url)
driver.close()
driver.quit()
Firefox(用户名密码验证)

使用提示

  • 运行环境要求python2/3 + selenium + Firefox + Geckodriver + windows/linux
  • FireFox各种选项, 浏览器输入网址:about:config
  • 需要额外的插件:modify_header.xpi closeproxy.xpi
# _*_ coding:utf‐8 _*

'''
# Python + Selenium + Firefox 设置密码时,需要使用到两个插件:
# 插件1: modify_headers-0.7.1.1-fx.xpi
# 插件2: closeproxy.xpi
# 这两个插件可以直接在百度上找

# FireFox使用需要用户密码授权的代理,需要用到插件closeproxy.xpi,来自动填写用户名和密码
# 但是,这是这个插件是只支持FireFox 56.0以下的版本。56.0以上的版本,目前没有找到解决方案。
# 建议使用Chrome、Plantomjs等
# 如果非要使用FireFox 56以上的版本的话,可以使用类似pyautogui、win32gui之类的模块,捕获那个输入用户名和密码的窗口,然后进行输入
'''


from selenium import webdriver
from base64 import b64encode
import requests
import random

api_url = "https://dps.kdlapi.com/api/getdps/?orderid=927441114016541&num=10&pt=1&format=json&sep=1"
page_url = "https://dev.kdlapi.com/testproxy"  # 要访问的目标网页
proxy_ip = requests.get(api_url).json()['data']['proxy_list']
host, port = random.choice(proxy_ip).split(":")
proxy = {
    "host": host,
    "port": port,
    "user": "username",
    "pass": "password"
}

profile = webdriver.FirefoxProfile()

# add new header
profile.add_extension("modify_headers-0.7.1.1-fx.xpi")
profile.set_preference("extensions.modify_headers.currentVersion", "0.7.1.1-fx")
profile.set_preference("modifyheaders.config.active", True)
profile.set_preference("modifyheaders.headers.count", 1)
profile.set_preference("modifyheaders.headers.action0", "Add")
profile.set_preference("modifyheaders.headers.name0", "Proxy-Switch-Ip")
profile.set_preference("modifyheaders.headers.value0", "yes")
profile.set_preference("modifyheaders.headers.enabled0", True)

# add proxy
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', proxy["host"])
profile.set_preference('network.proxy.http_port', int(proxy['port']))
profile.set_preference('network.proxy.ssl', proxy["host"])
profile.set_preference('network.proxy.ssl_port', int(proxy['port']))
profile.set_preference('network.proxy.no_proxies_on', 'localhost, 127.0.0.1')
profile.set_preference("network.proxy.username", proxy['user'])
profile.set_preference("network.proxy.password", proxy['pass'])

# Proxy auto login
profile.add_extension('closeproxy.xpi')
credentials = '{user}:{pass}'.format(**proxy)
credentials = b64encode(credentials.encode('utf-8')).decode('utf-8')
profile.set_preference('extensions.closeproxyauth.authtoken', credentials)

profile.update_preferences()

driver = webdriver.Firefox(profile)
driver.get(page_url)
print(driver.page_source)

# driver.quit()
Chrome
Chrome(IP白名单)

使用提示

#_*_ coding:utf‐8 _*
"""
selenium + chrome 使用白名单或开放代理。Windows和Linux都可用
说明:
    1.需要安装google chrome浏览器、chromedriver驱动(下载好,放到python下的Scripts目录下)
    2.需要安装的python包:selenium、requests
    3.Linux下建议使用普通用户运行,因为chrome浏览器默认是不能以root用户运行的。
"""
from selenium import webdriver
import requests
import random

api_url = "https://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&signature=atvb6a4981d03pvpqalolea9e0k2pmi6&protocol=1&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"
page_url = "https://dev.kdlapi.com/testproxy"  # 要访问的目标网页
proxy_ip = requests.get(api_url).json()['data']['proxy_list']

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % random.choice(proxy_ip))
# chrome_options.add_argument('--headless')  # 无界面启动chrome
chrome = webdriver.Chrome(options=chrome_options)
chrome.get(page_url)
print(chrome.page_source)
chrome.quit()
Chrome(用户名密码认证)

使用提示

  • 基于用户名密码认证的http/https代理Chrome
  • 运行环境要求python2/3 + selenium + Chrome + Chromedriver + windows/linux
  • 请先下载chromedriver并将chromedriver添加到环境变量
# -*- coding: utf-8 -*-
"""selenium + chrome + windows + 用户名密码认证
    支持使用http、https、socks代理,支持访问http和https的网页
"""
import os
import zipfile
import requests
import random
import time
from selenium import webdriver

# 目标网页
page_url = "http://dev.kuaidaili.com/testproxy"
# api链接
api_url = "https://dps.kdlapi.com/api/getdps/?orderid=927441114016541&num=10&pt=1&format=json&sep=1"
proxy_ip = requests.get(api_url).json()['data']['proxy_list']

PROXY_HOST, PROXY_PORT = random.choice(proxy_ip).split(":")
PROXY_USER = 'username'  # username
PROXY_PASS = 'password'  # password

# 动态生成chrome代理插件代码,再使用zipfile模块打包,最后使用chrome_options.add_extension加载插件

manifest_json = """
{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
}
"""

background_js = """
var config = {
        mode: "fixed_servers",
        rules: {
        singleProxy: {
            scheme: "http", /*如果使用的是socks代理,这个值改为"socks"即可*/
            host: "%s",
            port: parseInt(%s)
        },
        bypassList: ["localhost"]
        }
    };

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
    return {
        authCredentials: {
            username: "%s",
            password: "%s"
        }
    };
}

chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None):
    # path = os.path.dirname(os.path.abspath(__file__)) # 如果没有把chromedriver放到python\script\下,则需要指定路径
    chrome_options = webdriver.ChromeOptions()
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
    driver = webdriver.Chrome(
        # os.path.join(path, 'chromedriver'),
        chrome_options=chrome_options)
    return driver


def main():
    driver = get_chromedriver(use_proxy=True)
    driver.get(page_url)
    time.sleep(5)  # 等待5秒,否则程序执行完毕会立即关闭


if __name__ == '__main__':
    main()
PhantomJS
PhantomJS(IP白名单)

使用提示

  • 基于白名单的http/https代理PhantomJS
  • 运行环境要求python2.x + selenium + PhantomJS
  • selenium + PhantomJS 可以直接使用pip安装
# -*- coding: utf-8 -*-
'''
使用selenium下的无窗口浏览器phantomjs的shttp白名单免登录示例
因为是无窗口浏览器,所以截了一张图片放在本程序当前目录下
同时打印了访问网址的title和源代码
'''
from selenium import webdriver

#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器ip和端口
proxy = '59.38.241.25:23918'

#代理设置参数
service_args = [
    '--proxy=%s' % proxy,
    '--proxy-type=http',
    #'--proxy-auth=%s:%s' % (username, password)
]

#启动PhantomJS,访问网址
driver = webdriver.PhantomJS(service_args=service_args)
driver.get(page_url)

#打出网页title和网页源代码
print driver.title
print driver.page_source

#退出PhantomJS
driver.quit()
PhantomJS(用户名密码认证)

使用提示

  • 基于密码认证的http/https代理PhantomJS
  • 运行环境要求python2.x + selenium + PhantomJS
  • selenium + PhantomJS 可以直接使用pip安装
# -*- coding: utf-8 -*-
'''
使用selenium下的无窗口浏览器phantomjs的http认证代理示例
因为是无窗口浏览器,所以截了一张图片放在本程序当前目录下
同时打印了访问网址的title和源代码
'''
from selenium import webdriver
#要访问的目标网页
page_url = "http://dev.kdlapi.com/testproxy"

#代理服务器的ip和端口
proxy = '59.38.241.25:23916'

#用户名和密码(私密代理/独享代理)
username = 'username'
password = 'password'

#代理参数
service_args = [
    '--proxy=%s' % proxy,
    '--proxy-type=http',
    '--proxy-auth=%s:%s' % (username, password)
]

#启动PhantomJS并打开目标网页
driver = webdriver.PhantomJS(service_args=service_args)
driver.get(page_url)

#打印访问网页的title和源代码
print driver.title
print driver.page_source

driver.quit()

Python3

requests

requests(推荐)

使用提示

  1. 基于requests的代码样例支持访问http,https网页,推荐使用
  2. requests不是python原生库,需要安装才能使用: pip install requests
# -*- coding: utf-8 -*-
"""python3提取api链接并使用requests库进行http代理"""
import requests
import random

page_url = "http://dev.kdlapi.com/testproxy"  # 要访问的目标网页
# API接口,返回格式为json
api_url = "http://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&signature=atvb6a4981d03pvpqalolea9e0k2pmi6&protocol=1&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"  # API接口

# API接口返回的ip
proxy_ip = requests.get(api_url).json()['data']['proxy_list']

# 用户名和密码(私密代理/独享代理)
username = "username"
password = "password"
proxies = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)},
    "https": "https://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)}
}

# 开放代理, 不需要用户名和密码
# proxies = {
#     "http": "http://%(proxy)s/" % {'proxy': random.choice(proxy_ip)},
#     "https": "https://%(proxy)s/" % {'proxy': random.choice(proxy_ip)}
# }
# print(proxies)
headers = {
    "Accept-Encoding": "Gzip",  # 使用gzip压缩传输数据让访问更快
}
r = requests.get(page_url, proxies=proxies, headers=headers)
# 发送post请求
# r = requests.post("http://dev.kdlapi.com/testproxy", data={"info": "send post request"}, headers=headers)
print(r.status_code)  # 获取Response的返回码

if r.status_code == 200:
    r.enconding = "utf-8"  # 设置返回内容的编码
    # 获取页面内容
    print(r.content)
    # print(r.text)

urllib

urllib

使用提示

  • 基于urllib的代码样例同时支持访问http和https网页
  • 运行环境要求 python3.x
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
from urllib.parse import urlencode
import zlib
import json
import random

"""使用urllib.request模块请求代理服务器,http和https网页均适用"""

#import ssl

#ssl._create_default_https_context = ssl._create_unverified_context


# 要访问的目标网页
page_url = "https://dev.kdlapi.com/testproxy/"

# API接口, API加密传输:加密(https)、返回格式:json
api_url = "https://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&signature=atvb6a4981d03pvpqalolea9e0k2pmi6&protocol=2&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"
# API接口返回的ip
response = urllib.request.urlopen(api_url)
json_dict = json.loads(response.read().decode('utf-8'))
ip_list = json_dict['data']['proxy_list']

# 用户名和密码(私密代理/独享代理)
username = "username"
password = "password"

headers = {"Accept-Encoding": "Gzip"}  # 使用gzip压缩传输数据让访问更快

#proxy_values = "http://%(user)s:%(pwd)s@%(ip)s" % {'user': username, 'pwd': password, 'ip': random.choice(ip_list)}
# 开放代理不需要密码
proxy_values = "%(ip)s" % {'ip': random.choice(ip_list)}

proxies = {"http": proxy_values, "https": proxy_values}
print(proxies)
handler = urllib.request.ProxyHandler(proxies)
opener = urllib.request.build_opener(handler)

req = urllib.request.Request(url=page_url, headers=headers)
# 发送post请求
# data = bytes(urlencode({"info": "send post request"}), encoding="utf-8")
# req = urllib.request.Request(url="http://dev.kdlapi.com/testproxy", headers=headers, data=data, method="post")

result = opener.open(req)
print(result.status)  # 获取Response的返回码

content_encoding = result.headers.get('Content-Encoding')
if content_encoding and "gzip" in content_encoding:
    print(zlib.decompress(result.read(), 16 + zlib.MAX_WBITS).decode('utf-8'))  # 获取页面内容
else:
    print(result.read().decode('utf-8'))  # 获取页面内容

Python-Scrapy

scrapy项目标准目录结构如下:
scrapy项目结构

使用提示

  1. http/https网页均可适用
  2. scrapy不是python原生库,需要安装才能使用: pip install scrapy
  3. 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl testproxy, 或者直接运行testproxy.py文件
utils.py

在第二级scrapy_proxy目录下新建utils.py, 添加如下代码

# -*- coding: utf-8 -*-

import requests
import logging
import random

logger = logging.getLogger(__name__)


def get_one_proxy(api_url):
    """ 使用API接口获取一个代理IP"""
    # API接口
    api_url = api_url
    # API接口返回的IP
    r = requests.get(api_url)
    if r.status_code != 200:
        logger.error("fail to get proxy")
        return None
    ip_list = r.json()['data']['proxy_list']
    return random.choice(ip_list)


if __name__ == '__main__':
    print("proxy: ", get_one_proxy())

middlewares.py

middlewares.py里添加如下代码进行代理设置

# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

from scrapy import signals
from .utils import get_one_proxy
import logging
import base64


# API接口,返回格式为json
api_url = ""
# 非开放代理且未添加白名单,需用户名密码认证

username = "username"
password = "password"
logger = logging.getLogger(__name__)


class ProxyDownloadMiddleware(object):

    def process_request(self, request, spider):
        if request.url.startswith("http://"):
            request.meta['proxy'] = "http://{proxy_ip}".format(proxy_ip=get_one_proxy(api_url))
        elif request.url.startswith("https://"):
            request.meta['proxy'] = "https://{proxy_ip}".format(proxy_ip=get_one_proxy(api_url))
        logging.debug("using proxy: {}".format(request.meta['proxy']))
        # 使用私密代理或独享代理需要将用户名和密码进行base64编码,然后赋值给request.headers["Proxy-Authorization"]
        #
        # 如果是开放代理就不需要以下步骤,直接设置代理IP即可
        user_password = "{username}:{password}".format(username=username, password=password)
        b64_user_password = base64.b64encode(user_password.encode("utf-8"))
        request.headers["Proxy-Authorization"] = "Basic " + b64_user_password.decode("utf-8")
        return None

settings.py

settings.py里设置DOWNLOADER_MIDDLEWARES使新增的middleware生效

ROBOTSTXT_OBEY = False  # 将此变量设为False, 提高成功率
DOWNLOADER_MIDDLEWARES = {
    'scrapy_proxy.middlewares.ProxyDownloadMiddleware': 100,
}

testproxy.py
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline

class TestproxySpider(scrapy.Spider):
    name = 'testproxy'
    allowed_domains = ['kdlapi.com']
    start_urls = ['http://dev.kdlapi.com/testproxy']

    # 如果需要发送post请求,需要重写start_requests方法
    #     # def start_requests(self):
    #     #     self.url = "http://dev.kdlapi.com/testproxy"
    #     #     yield scrapy.FormRequest(
    #     #         url=self.url,
    #     #         formdata={"info": "send post request"},
    #     #         callback=self.parse,
    #     #         # meta参数可以在不同方法之间传递数据
    #     #         meta={"key": "value"}
    #     #     )

    def parse(self, response):
        print(response.text)


if __name__ == "__main__":
    cmdline.execute('scrapy crawl testproxy'.split())

scrapy多线程

多线程使用提示

scrapy本身就是多线程的,可以直接设置线程数,参考文档

from twisted.internet import reactor
reactor.suggestThreadPoolSize(30)

Java

jdk

使用原生库

使用提示

  1. 此样例同时支持访问http和https网页
  2. 运行环境要求 jdk >= 1.6
package com.kuaidaili.sdk;

import java.util.HashMap;
import java.util.Map;

/**
 * 使用jdk原生库请求代理服务器
 * 请求http和https网页均适用
 */
public class TestProxy {

    private static String pageUrl = "http://dev.kdlapi.com/testproxy"; //要访问的目标网页
    private static String proxyIp = "59.38.241.25"; //代理服务器IP
    private static String proxyPort = "23916"; //代理服务器IP
    private static String username = "username"; //用户名
    private static String password = "password"; //密码

    public static void main(String[] args) {
        //确保使用用户名密码鉴权正常运行
        System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");

        HttpRequest request = new HttpRequest();
        Map<String, String> params = new HashMap<String, String>();
        Map<String, String> headers = new HashMap<String, String>();

        headers.put("Accept-Encoding", "gzip"); //使用gzip压缩传输数据让访问更快

        Map<String, String> proxySettings = new HashMap<String, String>();
        proxySettings.put("ip", proxyIp);
        proxySettings.put("port", proxyPort);
        proxySettings.put("username", username);
        proxySettings.put("password", password);

        try{
            HttpResponse response = request.sendGet(pageUrl, params, headers, proxySettings);
            System.out.println(response.getCode());
            System.out.println(response.getContent());
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}
查看工具类HttpRequest和HttpResponse

HttpRequest.java

package com.kuaidaili.sdk;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.Vector;
import java.util.zip.GZIPInputStream;

/**
 * HTTP请求对象
 */
public class HttpRequest {

    private String defaultContentEncoding;
    private int connectTimeout = 1000;
    private int readTimeout = 1000;

    public HttpRequest() {
        this.defaultContentEncoding = Charset.defaultCharset().name();
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "GET", null, null, proxySettings);
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, Map<String, String> params, final Map<String, String> proxySettings)
            throws IOException {
        return this.send(urlString, "GET", params, null, proxySettings);
    }

    /**
     * 发送GET请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param headers header集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendGet(String urlString, Map<String, String> params,
            Map<String, String> headers, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "GET", params, headers, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "POST", null, null, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, Map<String, String> params, final Map<String, String> proxySettings)
            throws IOException {
        return this.send(urlString, "POST", params, null, proxySettings);
    }

    /**
     * 发送POST请求
     *
     * @param urlString URL地址
     * @param params 参数集合
     * @param headers header集合
     * @param proxySettings 代理设置,null代表不设置代理
     * @return 响应对象
     */
    public HttpResponse sendPost(String urlString, Map<String, String> params,
            Map<String, String> headers, final Map<String, String> proxySettings) throws IOException {
        return this.send(urlString, "POST", params, headers, proxySettings);
    }

    /**
     * 发送HTTP请求
     */
    private HttpResponse send(String urlString, String method,
            Map<String, String> parameters, Map<String, String> headers, final Map<String, String> proxySettings)
            throws IOException {
        HttpURLConnection urlConnection = null;

        if (method.equalsIgnoreCase("GET") && parameters != null) {
            StringBuffer param = new StringBuffer();
            int i = 0;
            for (String key : parameters.keySet()) {
                if (i == 0)
                    param.append("?");
                else
                    param.append("&");
                param.append(key).append("=").append(URLEncoder.encode(parameters.get(key), "utf-8"));
                i++;
            }
            urlString += param;
        }
        URL url = new URL(urlString);
        if(proxySettings != null){
            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxySettings.get("ip"), Integer.parseInt(proxySettings.get("port"))));
            urlConnection = (HttpURLConnection) url.openConnection(proxy);
            if(proxySettings.containsKey("username")){
                Authenticator authenticator = new Authenticator() {
                    public PasswordAuthentication getPasswordAuthentication() {
                        return (new PasswordAuthentication(proxySettings.get("username"),
                                proxySettings.get("password").toCharArray()));
                    }
                };
                Authenticator.setDefault(authenticator);
            }
        }
        else{
            urlConnection = (HttpURLConnection) url.openConnection();
        }

        urlConnection.setRequestMethod(method);
        urlConnection.setDoOutput(true);
        urlConnection.setDoInput(true);
        urlConnection.setUseCaches(false);

        urlConnection.setConnectTimeout(connectTimeout);
        urlConnection.setReadTimeout(readTimeout);

        if (headers != null)
            for (String key : headers.keySet()) {
                urlConnection.addRequestProperty(key, headers.get(key));
            }

        if (method.equalsIgnoreCase("POST") && parameters != null) {
            StringBuffer param = new StringBuffer();
            int i = 0;
            for (String key : parameters.keySet()) {
                if(i > 0) param.append("&");
                param.append(key).append("=").append(URLEncoder.encode(parameters.get(key), "utf-8"));
                i++;
            }
            System.out.println(param.toString());
            urlConnection.getOutputStream().write(param.toString().getBytes());
            urlConnection.getOutputStream().flush();
            urlConnection.getOutputStream().close();
        }

        return this.makeContent(urlString, urlConnection);
    }

    /**
     * 得到响应对象
     */
    private HttpResponse makeContent(String urlString,
            HttpURLConnection urlConnection) throws IOException {
        HttpResponse response = new HttpResponse();
        try {
            InputStream in = urlConnection.getInputStream();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(in));
            if ("gzip".equals(urlConnection.getContentEncoding())) bufferedReader =  new BufferedReader(new InputStreamReader(new GZIPInputStream(in)));
            response.contentCollection = new Vector<String>();
            StringBuffer temp = new StringBuffer();
            String line = bufferedReader.readLine();
            while (line != null) {
                response.contentCollection.add(line);
                temp.append(line).append("\r\n");
                line = bufferedReader.readLine();
            }
            bufferedReader.close();

            String encoding = urlConnection.getContentEncoding();
            if (encoding == null)
                encoding = this.defaultContentEncoding;

            response.urlString = urlString;

            response.defaultPort = urlConnection.getURL().getDefaultPort();
            response.file = urlConnection.getURL().getFile();
            response.host = urlConnection.getURL().getHost();
            response.path = urlConnection.getURL().getPath();
            response.port = urlConnection.getURL().getPort();
            response.protocol = urlConnection.getURL().getProtocol();
            response.query = urlConnection.getURL().getQuery();
            response.ref = urlConnection.getURL().getRef();
            response.userInfo = urlConnection.getURL().getUserInfo();
            response.contentLength = urlConnection.getContentLength();

            response.content = new String(temp.toString().getBytes());
            response.contentEncoding = encoding;
            response.code = urlConnection.getResponseCode();
            response.message = urlConnection.getResponseMessage();
            response.contentType = urlConnection.getContentType();
            response.method = urlConnection.getRequestMethod();
            response.connectTimeout = urlConnection.getConnectTimeout();
            response.readTimeout = urlConnection.getReadTimeout();

            return response;
        } catch (IOException e) {
            throw e;
        } finally {
            if (urlConnection != null){
                urlConnection.disconnect();
            }
        }
    }

    public static byte[] gunzip(byte[] bytes) {  
        if (bytes == null || bytes.length == 0) {  
            return null;  
        }  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        ByteArrayInputStream in = new ByteArrayInputStream(bytes);  
        try {  
            GZIPInputStream ungzip = new GZIPInputStream(in);  
            byte[] buffer = new byte[256];  
            int n;  
            while ((n = ungzip.read(buffer)) >= 0) {  
                out.write(buffer, 0, n);  
            }  
        } catch (IOException e) {  
            System.err.println("gzip uncompress error.");
            e.printStackTrace();
        }  

        return out.toByteArray();  
    }

    /**
     * 得到默认的响应字符集
     */
    public String getDefaultContentEncoding() {
        return this.defaultContentEncoding;
    }

    /**
     * 设置默认的响应字符集
     */
    public void setDefaultContentEncoding(String defaultContentEncoding) {
        this.defaultContentEncoding = defaultContentEncoding;
    }

    public int getConnectTimeout() {
        return connectTimeout;
    }

    public void setConnectTimeout(int connectTimeout) {
        this.connectTimeout = connectTimeout;
    }

    public int getReadTimeout() {
        return readTimeout;
    }

    public void setReadTimeout(int readTimeout) {
        this.readTimeout = readTimeout;
    }
}

HttpResponse.java

package com.kuaidaili.sdk;

import java.util.Vector;

/**
 * HTTP响应对象
 */
public class HttpResponse {

    String urlString;
    int defaultPort;
    String file;
    String host;
    String path;
    int port;
    String protocol;
    String query;
    String ref;
    String userInfo;
    String contentEncoding;
    int contentLength;
    String content;
    String contentType;
    int code;
    String message;
    String method;

    int connectTimeout;

    int readTimeout;

    Vector<String> contentCollection;

    public String getContent() {
        return content;
    }

    public String getContentType() {
        return contentType;
    }

    public int getCode() {
        return code;
    }

    public String getMessage() {
        return message;
    }

    public Vector<String> getContentCollection() {
        return contentCollection;
    }

    public String getContentEncoding() {
        return contentEncoding;
    }

    public String getMethod() {
        return method;
    }

    public int getConnectTimeout() {
        return connectTimeout;
    }

    public int getReadTimeout() {
        return readTimeout;
    }

    public String getUrlString() {
        return urlString;
    }

    public int getDefaultPort() {
        return defaultPort;
    }

    public String getFile() {
        return file;
    }

    public String getHost() {
        return host;
    }

    public String getPath() {
        return path;
    }

    public int getPort() {
        return port;
    }

    public String getProtocol() {
        return protocol;
    }

    public String getQuery() {
        return query;
    }

    public String getRef() {
        return ref;
    }

    public String getUserInfo() {
        return userInfo;
    }

}

httpclient

HttpClient-4.5.6

使用提示

  1. 此样例同时支持访问http和https网页
  2. 建议使用白名单访问(HttpClient在使用用户名密码会出现一定数量的认证失败)
  3. 运行环境要求 jdk >= 1.6
  4. 依赖包(点击下载):
    httpclient-4.5.6.jar
    httpcore-4.4.10.jar
    commons-codec-1.10.jar
    commons-logging-1.2.jar
package com.kuaidaili.sdk;

import java.net.URL;

import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

/**
 * 使用httpclient请求代理服务器
 * 请求http和https网页均适用
 */
public class TestProxyHttpClient {

    private static String pageUrl = "http://dev.kdlapi.com/testproxy"; //要访问的目标网页
    private static String proxyIp = "59.38.241.25"; //代理服务器IP
    private static int proxyPort = 23916; //代理服务器IP
    private static String username = "username"; //用户名
    private static String password = "password"; //密码

    public static void main(String[] args) throws Exception {
        //确保使用用户名密码鉴权正常运行
        System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");

        CredentialsProvider credsProvider = new BasicCredentialsProvider();
        credsProvider.setCredentials(
                new AuthScope(proxyIp, proxyPort),
                new UsernamePasswordCredentials(username, password));
        CloseableHttpClient httpclient = HttpClients.custom()
                .setDefaultCredentialsProvider(credsProvider).build();
        try {
            URL url = new URL(pageUrl);
            HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
            HttpHost proxy = new HttpHost(proxyIp, proxyPort);

            RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
            HttpGet httpget = new HttpGet(url.getPath());
            httpget.setConfig(config);
            httpget.addHeader("Accept-Encoding", "gzip"); //使用gzip压缩传输数据让访问更快

            System.out.println("Executing request " + httpget.getRequestLine() + " to " + target + " via " + proxy);

            CloseableHttpResponse response = httpclient.execute(target, httpget);
            try {
                System.out.println("----------------------------------------");
                System.out.println(response.getStatusLine());
                System.out.println(EntityUtils.toString(response.getEntity()));
            } finally {
                response.close();
            }
        } finally {
            httpclient.close();
        }
    }
}

jsoup

使用jsoup发起请求

使用提示

  1. 此样例同时支持访问http和https网页
  2. 建议使用白名单访问
  3. 使用到了第三方库jsoup、fastjson 下载地址

ProxyTest.java

package com.kuaidaili.sdk;

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;


 * 使用Jsoup发起请求
 */
public class ProxyTest4Jsoup {

    private static String pageUrl = "http://dev.kdlapi.com/testproxy"; // 访问的目标网页

    public static void main(String[] args) {
        doCrawl();
    }

    public static void doCrawl() {

        List<String> proxyList = null;
        try {
            proxyList = getProxyListFromAPI();
        } catch (IOException e) {
            e.printStackTrace();
        }

        List<String> articles = new ArrayList<String>();
        articles.add(pageUrl);

        //使用线程池,多线程同时抓取
        ExecutorService executorService = Executors.newCachedThreadPool();
        for (int i = 0; i < articles.size(); i++) {
            executorService.execute(new CrawlThread(articles.get(i), proxyList));
        }
    }

    private static List<String> getProxyListFromAPI() throws IOException {
        Document doc = null;
        String apiUrl = "http://svip.kdlapi.com/api/getproxy/?orderid=947449222924633&num=100&protocol=1&method=2&an_an=1&an_ha=1&quality=2&format=json&sep=1"; //API链接
        try {
            doc = Jsoup.connect(apiUrl)
                    .userAgent("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50")
                    .ignoreContentType(true)
                    .get(); // 调用API接口
        } catch (IOException e) {
            e.printStackTrace();
        }
        //解析请求到的代理ip
       String ipStr = doc.body().text();

        Map<String,Object> resultMap= (Map<String, Object>) JSONObject.parse(ipStr);
        List<String> ipList= (List<String>) ((JSONObject) resultMap.get("data")).get("proxy_list");

        ipList.forEach(s->{
            System.out.println(s);
        });
        return ipList;
    }

    public static void visit(String proxy, String url) {
        String[] r = proxy.split(":");
        String ip = r[0];
        Integer port = Integer.valueOf(r[1]);

        System.out.println("-----------------");
        System.out.println("使用代理:" + proxy);
        try {
            Document document = Jsoup.connect(url)
                    .userAgent("Mozilla")
                    // .cookie("auth", "token")
                    // .timeout(3000)
                    .proxy(ip, port)
                    .ignoreContentType(true)
                    .get();
                    //.post();

            System.out.println(document.html());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

CrawlThread.java

    package com.kuaidaili.sdk;

    import java.util.List;

    /**
     * 负责抓取的线程。
     */

    public class CrawlThread implements Runnable{

        private List<String> proxyList;
        private String urlString;
        public CrawlThread(String url, List<String> proxyList) {
            this.proxyList =  proxyList;
            this.urlString = url;
        }

        @Override
        public void run() {
            for (String ip : proxyList) {
                ProxyTest4Jsoup.visit(ip, urlString);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

    }

GoLang

标准库

标准库

使用提示

  • http和https网页均可适用
// 请求代理服务器
// http和https网页均适用

package main

import (
    "compress/gzip"
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "net/url"
    "os"
)

func main() {
    // 用户名密码(私密代理/独享代理)
    username := "username"
    password := "password"

    // 代理服务器
    proxy_raw := "59.38.241.25:23916"
    proxy_str := fmt.Sprintf("http://%s:%s@%s", username, password, proxy_raw)
    proxy, err := url.Parse(proxy_str)

    // 目标网页
    page_url := "http://dev.kdlapi.com/testproxy"

    //  请求目标网页
    client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxy)}}
    req, _ := http.NewRequest("GET", page_url, nil)
    req.Header.Add("Accept-Encoding", "gzip") //使用gzip压缩传输数据让访问更快
    res, err := client.Do(req)

    if err != nil {
        // 请求发生异常
        fmt.Println(err.Error())
    } else {
        defer res.Body.Close() //保证最后关闭Body

        fmt.Println("status code:", res.StatusCode) // 获取状态码

        // 有gzip压缩时,需要解压缩读取返回内容
        if res.Header.Get("Content-Encoding") == "gzip" {
            reader, _ := gzip.NewReader(res.Body) // gzip解压缩
            defer reader.Close()
            io.Copy(os.Stdout, reader)
            os.Exit(0) // 正常退出
        }

        // 无gzip压缩, 读取返回内容
        body, _ := ioutil.ReadAll(res.Body)
        fmt.Println(string(body))
    }
}

CSharp

标准库

标准库

使用提示

  • http和https网页均可适用
using System;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;

namespace csharp_http
{
    class Program
    {
        static void Main(string[] args)
        {
            // 要访问的目标网页
            string page_url = "http://dev.kdlapi.com/testproxy";

            // 构造请求
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(page_url);
            request.Method = "GET";
            request.Headers.Add("Accept-Encoding", "Gzip");  // 使用gzip压缩传输数据让访问更快

            // 代理服务器
            string proxy_ip = "59.38.241.25";
            int proxy_port = 23916;

            // 用户名密码 <私密代理/独享代理>
            string username = "username";
            string password = "password";

            // 设置代理 <开放代理或私密/独享代理&已添加白名单>
            // request.Proxy = new WebProxy(proxy_ip, proxy_port);

            // 设置代理 <私密/独享代理&未添加白名单>
            WebProxy proxy = new WebProxy();
            proxy.Address = new Uri(String.Format("http://{0}:{1}", proxy_ip, proxy_port));
            proxy.Credentials = new NetworkCredential(username, password);
            request.Proxy = proxy;

            // 请求目标网页
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            Console.WriteLine((int)response.StatusCode);  // 获取状态码
            // 解压缩读取返回内容
            using (StreamReader reader =  new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))) {
                Console.WriteLine(reader.ReadToEnd());
            }
        }
    }
}

Node.js

标准库(http+url)

标准库(http,https均适用)

使用提示

  • http,https均适用
const http = require("http");  // 引入内置http模块
const url  = require("url");


// 要访问的目标页面
const targetUrl = "https://www.baidu.com/";
const urlParsed   = url.parse(targetUrl);

// 代理ip
const proxyIp = "proxyIp";  // 代理服务器ip
const proxyPort = "proxyPort"; // 代理服务器host

// http代理验证信息
const username = "yourusername";
const password = "yourpassword";
const base64    = new Buffer(username + ":" + password).toString("base64");
const options = {
    host    : proxyIp,
    port    : proxyPort,
    path    : targetUrl,
    method  : "GET",
    headers : {
        "Host"                : urlParsed.hostname,
        "Proxy-Authorization" : "Basic " + base64
    }
};

http.request(options,  (res) => {
        console.log("got response: " + res.statusCode);
        // 输出返回内容(使用了gzip压缩)
        if (res.headers['content-encoding'] && res.headers['content-encoding'].indexOf('gzip') != -1) {
            let zlib = require('zlib');
            let unzip = zlib.createGunzip();
            res.pipe(unzip).pipe(process.stdout);
        } else {
            // 输出返回内容(未使用gzip压缩)
            res.pipe(process.stdout);
        }
    })
    .on("error", (err) => {
        console.log(err);
    })
    .end()
;

标准库(http+tls+util)

标准库(适用http和https请求)

使用提示

  • http网页和https网页均可适用
let http = require('http'); // 引入内置http模块
let tls = require('tls'); // 引入内置tls模块
let util = require('util');

// 用户名密码及auth, 若已添加白名单则不需要添加
const username = 'yourusername';
const password = 'yourpassword';
const auth = 'Basic ' + new Buffer(username + ':' + password).toString('base64');

// 代理服务器ip和端口
let proxy_ip = '42.69.11.103';
let proxy_port = 16816;

// 要访问的主机和路径, 以京东首页为例
let remote_host = 'www.jd.com';
let remote_path = '/';

// 发起CONNECT请求
let req = http.request({
    host: proxy_ip,
    port: proxy_port,
    method: 'CONNECT',
    path: util.format('%s:443', remote_host),
    headers: {
        "Host": remote_host,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36",
        "Proxy-Authorization": auth,
        "Accept-Encoding": "gzip"   // 使用gzip压缩让数据传输更快
    }
});


req.on('connect', function (res, socket, head) {
    // TLS握手
    let tlsConnection = tls.connect({
        host: remote_host,
        socket: socket
    }, function () {
        // 发起GET请求
        tlsConnection.write(util.format('GET %s HTTP/1.1\r\nHost: %s\r\n\r\n', remote_path, remote_host));
    });

    tlsConnection.on('data', function (data) {
        // 输出响应结果(完整的响应报文串)
        console.log(data.toString());
    });
});

req.end();

request

request

使用提示

  • 请先安装request库: npm install request
  • http网页和https网页均可适用
let request = require('request'); // 引入第三方request库
let util = require('util');
let zlib = require('zlib');

// 用户名密码, 若已添加白名单则不需要添加
const username = 'yourusername';
const password = 'yourpassword';

// 要访问的目标地址
let page_url = 'https://www.jd.com'

// 代理服务器ip和端口
let proxy_ip = '42.69.11.103';
let proxy_port = 16816;

// 完整代理服务器url
let proxy = util.format('http://%s:%s@%s:%d', username, password, proxy_ip, proxy_port);  

// 发起请求
request({
    url: page_url,
    method: 'GET',
    proxy: proxy,
    headers: {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36",
        "Accept-Encoding": "gzip"   // 使用gzip压缩让数据传输更快
    },
    encoding: null,  // 方便解压缩返回的数据
}, function(error, res, body) {
    if (!error && res.statusCode == 200) {
        // 输出返回内容(使用了gzip压缩)
        if (res.headers['content-encoding'] && res.headers['content-encoding'].indexOf('gzip') != -1) {
            zlib.gunzip(body, function(err, dezipped) {
                console.log(dezipped.toString()); 
            });
        } else {
            // 输出返回内容(没有使用gzip压缩)
            console.log(body);
        }
    } else {
        console.log(error);
    }
});

puppeteer

puppeteer(IP白名单)

使用提示

  • 基于用户名密码认证的http/https代理Puppeteer
  • 运行环境要求: node7.6.0或以上 + puppeteer
  • 请先安装puppeteer: npm i puppeteer
// 引入puppeteer模块
const puppeteer = require('puppeteer');

// 要访问的目标网页
const url = 'http://dev.kuaidaili.com/testproxy';

// 添加headers
const headers = {
    'Accept-Encoding': 'gzip' // 使用gzip压缩让数据传输更快
};

// 代理服务器ip和端口
let proxy_ip = '223.198.230.41'
let proxy_port = 19732

(async ()=> {
    // 新建一个浏览器实例
    const browser = await puppeteer.launch({
        headless: false,  // 是否不显示窗口, 默认为true, 设为false便于调试
        args: [
            `--proxy-server=${proxy_ip}:${proxy_port}`,
            '--no-sandbox',
            '--disable-setuid-sandbox'
        ]
    });

    // 打开一个新页面
    const page = await browser.newPage();

    // 设置headers
    await page.setExtraHTTPHeaders(headers);

    // 访问目标网页
    await page.goto(url);

})();
puppeteer(用户名密码认证)

使用提示

  • 基于白名单的http/https代理Puppeteer
  • 运行环境要求: node7.6.0或以上 + puppeteer
  • 请先安装puppeteer: npm i puppeteer
// 引入puppeteer模块
const puppeteer = require('puppeteer');

// 要访问的目标网页
const url = 'http://dev.kuaidaili.com/testproxy';

// 添加headers
const headers = {
    'Accept-Encoding': 'gzip' // 使用gzip压缩让数据传输更快
};

// 代理服务器ip和端口
let proxy_ip = '223.198.230.41'
let proxy_port = 19732

// 用户名密码 (可到会员中心查看)
const username = 'yourusername';
const password = 'yourpassword';

(async ()=> {
    // 新建一个浏览器实例
    const browser = await puppeteer.launch({
        headless: false,  // 是否不显示窗口, 默认为true, 设为false便于调试
        args: [
            `--proxy-server=${proxy_ip}:${proxy_port}`,
            '--no-sandbox',
            '--disable-setuid-sandbox'
        ]
    });

    // 打开一个新页面
    const page = await browser.newPage();

    // 设置headers
    await page.setExtraHTTPHeaders(headers);

    // 用户民密码认证
    await page.authenticate({username: username, password: password});

    // 访问目标网页
    await page.goto(url);
})();

Ruby

net/http

net/http(IP白名单)

使用提示

  • 基于ip白名单的http/https代理net/http
# -*- coding: utf-8 -*-

require 'net/http'  # 引入内置net/http模块
require 'zlib'
require 'stringio'

# 代理服务器ip 和 端口
proxy_ip = '42.49.11.109'
proxy_port = 16816


# 要访问的目标网页, 以快代理testproxy页面为例
page_url = "https://dev.kuaidaili.com/testproxy"
uri = URI(page_url)

# 新建代理实例
proxy = Net::HTTP::Proxy(proxy_ip, proxy_port)

# 创建新的请求对象 
req = Net::HTTP::Get.new(uri)
# 设置User-Agent
req['User-Agent'] = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
req['Accept-Encoding'] = 'gzip'  # 使用gzip压缩传输数据让访问更快


# 使用代理发起请求, 若访问的是http网页, 请将use_ssl设为false
res = proxy.start(uri.hostname, uri.port, :use_ssl => true) do |http|
    http.request(req)
end

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end
net/http(用户名密码认证)

使用提示

  • 基于用户名密码认证的http/https代理net/http
# -*- coding: utf-8 -*-

require 'net/http'  # 引入内置net/http模块
require 'zlib'
require 'stringio'

# 代理服务器ip 和 端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 用户名密码
username = 'yourusername'
password = 'yourpassword'

# 要访问的目标网页, 以快代理testproxy页面为例
page_url = "https://dev.kuaidaili.com/testproxy"
uri = URI(page_url)

# 新建代理实例
proxy = Net::HTTP::Proxy(proxy_ip, proxy_port, username, password)

# 创建新的请求对象 
req = Net::HTTP::Get.new(uri)
# 设置代理用户名密码认证
req.basic_auth(username, password)
# 设置User-Agent
req['User-Agent'] = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
req['Accept-Encoding'] = 'gzip'  # 使用gzip压缩传输数据让访问更快


# 使用代理发起请求, 若访问的是http网页, 请将use_ssl设为false
res = proxy.start(uri.hostname, uri.port, :use_ssl => true) do |http|
    http.request(req)
end

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end

httparty

httparty(IP白名单)

使用提示

  • 基于IP白名单认证的http/https代理httparty
require "httparty"  # 引入httparty模块
require 'zlib'
require 'stringio'

# 代理服务器ip和端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 要访问的目标网页, 以快代理testproxy页面为例
page_url = 'https://dev.kuaidaili.com/testproxy'

# 设置headers
headers = {
    "User-Agent" => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Accept-Encoding" => "gzip",
}

# 设置代理
options = {
    :headers => headers, 
    :http_proxyaddr => proxy_ip, 
    :http_proxyport => proxy_port,
}

# 发起请求
res = HTTParty.get(page_url, options)

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end
httparty(用户名密码认证)

使用提示

  • 基于用户名密码认证的http/https代理httparty
require "httparty"  # 引入httparty模块
require 'zlib'
require 'stringio'

# 代理服务器ip和端口
proxy_ip = '42.49.11.109'
proxy_port = 16816

# 用户名密码
username = 'yourusername'
password = 'yourpassword'

# 要访问的目标网页,以快代理testproxy页面为例
page_url = 'https://dev.kuaidaili.com/testproxy'

# 设置headers
headers = {
    "User-Agent" => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Accept-Encoding" => "gzip",
}

# 设置代理
options = {
    :headers => headers, 
    :http_proxyaddr => proxy_ip, 
    :http_proxyport => proxy_port, 
    :http_proxyuser => username, 
    :http_proxypass => password,
}

# 发起请求
res = HTTParty.get(page_url, options)

# 输出状态码
puts "status code: #{res.code}"

# 输出响应体
if  res.code.to_i != 200 then
    puts "page content: #{res.body}"
else
    gz = Zlib::GzipReader.new(StringIO.new(res.body.to_s))
    puts "page content: #{gz.read}" 
end

php

curl

curl

使用提示

  1. 此样例同时支持访问http和https网页
  2. curl不是php原生库,需要安装才能使用:
    Ubuntu/Debian系统:apt-get install php5-curl
    CentOS系统:yum install php-curl
<?php
//要访问的目标页面
$page_url = "http://dev.kdlapi.com/testproxy";


//API接口, format=json
$api_url = "https://dps.kdlapi.com/api/getdps/?orderid=927441114016541&num=10&pt=1&format=json&sep=1";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $api_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$output=curl_exec($ch); //string
// 随机使用一个ip
$result = (array)json_decode($output);
$ip_list = ((array)$result["data"])["proxy_list"];

$proxy = $ip_list[rand(0, count($ip_list))];


//用户名和密码(私密代理/独享代理)
$username   = "username";
$password   = "password";

//$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $page_url);

//发送post请求
//$requestData["post"] = "send post request";
//curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($requestData));

curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);  
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);

//设置代理
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($ch, CURLOPT_PROXY, $proxy);
//设置代理用户名密码(私密代理/独享代理)
//如果是开放代理,请注释掉下面两句
curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "{$username}:{$password}");

//自定义header
$headers = array();
$headers["user-agent"] = 'User-Agent: Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);';
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

//自定义cookie
curl_setopt($ch, CURLOPT_COOKIE,''); 

curl_setopt($ch, CURLOPT_ENCODING, 'gzip'); //使用gzip压缩传输数据让访问更快

curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);

curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

$result = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);

echo $result;
echo "\n\nfetch ".$info['url']."\ntimeuse: ".$info['total_time']."s\n\n";
?>

易语言

易语言使用代理

使用提示

  • 需要用到两个模块:精易模块和鱼刺类
  • 代码如图

易语言代码图1
易语言代码图2

' 独享代理、私密代理的用户名和密码
' 如果是开放代理,用户名和密码留空就好
用户名 = “”
密码 = “”
' ip地址和端口
ip = “”
Get.SetProxy (ip, 用户名, 密码)

Get.Open (, “http://dev.kdlapi.com/testproxy”)
Get.SetUserAgent (“Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)”)
Get.Send ()

Ret = Get.GetResponseText ()
调试输出 (“当前IP为:” + Ret)

返回 (0)  ' 可以根据您的需要返回任意数值