python调用浏览器打开指定网站
# -*- coding:utf-8 -*-
# 网站刷点击
import os
import re
import sys
import time
import random
import driver
import datetime
import requests
import threading
import webbrowser as web
from bs4 import BeautifulSoup
# 这里修改需要访问的网址
# url_wz = "https://mall.jd.com/index-842813.html" # 老店铺
# url_wz = "https://mall.jd.hk/index-10108276.html"
# 设置user-agent列表,每次请求时,可在此列表中随机挑选一个user-agnet
user_agent = [
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0; Baiduspider-ads) Gecko/17.0 Firefox/17.0",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9b4) Gecko/2008030317 Firefox/3.0b4",
"Mozilla/5.0 (Windows; U; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; BIDUBrowser 7.6)",
"Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.99 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko",
]
#定义浏览器路径
browser_paths = [
r\'C:\\Program Files (x86)\\Internet Explorer\\iexplore.exe\',
r\'C:\\Windows\\SystemApps\\Microsoft.MicrosoftEdge_8wekyb3d8bbwe\\MicrosoftEdge.exe\',
r\'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe\'
# r\'D:\\MySoftware\\360jisu\\360Chrome\\Chrome\\Application\\360chrome.exe\'
]
# 国内高匿代理IP,返回某页面的所有ip
def get_ip_list(page = 7):
url = \'http://www.xicidaili.com/nn/\'+str(page)
headers = {
"Accept":"text/html,application/xhtml+xml,application/xml;",
"Accept-Encoding":"gzip, deflate, sdch",
"Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
"Referer":"http://www.xicidaili.com",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
}
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text, \'lxml\') #html.parser
data = soup.find_all(\'td\')
# 匹配IP:<td>61.135.217.7</td>
ip_compile = re.compile(r\'<td>(\\d+\\.\\d+\\.\\d+\\.\\d+)</td>\')
# 匹配端口:<td>80</td>
port_compile = re.compile(r\'<td>(\\d+)</td>\')
# 获取所有IP,返回的是数组[]
ip = re.findall(ip_compile,str(data))
# 获取所有端口:返回的是数组[]
port = re.findall(port_compile,str(data))
# 组合IP+端口,如:61.135.217.7:80
return [":".join(i) for i in zip(ip,port)]
# 选择一个浏览器
def chose_browser(browser_path, url):
#如果传入的浏览器位置不存在,使用默认的浏览器打开
if not browser_path:
print(\'using default browser to open url\')
web.open_new_tab(url)#使用默认浏览器,就不再结束进程
else:
#判断浏览器路径是否存在
if not os.path.exists(browser_path):
print(\'current browser path not exists,using default browser\')
#浏览器位置不存在就使用默认的浏览器打开
browser_path = \'\'
web.open_new_tab(url)
else:
browser_task_name = browser_path.split(\'\\\\\')[-1]#结束任务的名字
browser_name = browser_task_name.split(\'.\')[0]#自定义的浏览器代号
print(browser_name)
web.register(browser_name, None, web.BackgroundBrowser(browser_path))
web.get(browser_name).open_new_tab(url)#使用新注册的浏览器打开网页
print(\'using %s browser open url successful\' % browser_name)
time.sleep(random(5, 20))#等待打开浏览器
kill_cmd=\'taskkill /f /IM \'+browser_task_name#拼接结束浏览器进程的命令
os.system(kill_cmd) #终结浏览器
# 干正事了
def do_cheap(code = 0, ips = []):
try:
# 随机选取一个ip
ip = random.choice(ips)
except:
return False
else:
proxies = {
"http":ip,
}
# headers_ = {
# "Accept":"*/*",
# "Accept-Encoding":"gzip, deflate, sdch",
# "Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
# "Referer":"http://www.sanplit.cn/",
# "User-Agent":random.choice(user_agent),
# }
headers_ = {
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding":"gzip, deflate, br",
"Accept-Language":"zh-CN,zh;q=0.9",
"Referer":"http://www.baidu.com/",
"User-Agent":random.choice(user_agent),
}
try:
# 执行操作行为(发送请求)
# r_wz = requests.get(url_wz, headers = headers_, proxies = proxies)
# r_wz.encoding = r_wz.apparent_encoding
if \'url_wz\' not in vars():
url_wz = "http://www.sanplit.cn/archives/"+str(random.randint(1, 7))
browser_path = random.choice(browser_paths)#随机从浏览器中选择一个路径
chose_browser(browser_path, url_wz)
# 打开浏览器好像就不能使用代理ip了0------------------------------------
except requests.exceptions.ConnectionError:
print("Connection Error")
if not ips:
print("not ip")
sys.exit()
# 删除不可用的代理IP
if ip in ips:
ips.remove(ip)
# 重新请求URL
do_cheap(code, ips)
else:
# 获取当前时间
date = datetime.datetime.now().strftime(\'%H:%M:%S\')
# print(u"第%s次 [%s] [%s]:评分%s (剩余可用代理IP数:%s)" % (code,date,ip,r_wz.text,len(ips)))
print(u"第%s次 [%s] [%s]:%s(剩余可用代理IP数:%s)" % (code,date,ip,url_wz,len(ips)))
if __name__ == \'__main__\':
ips = []
for i in range(800):
# 每隔100次重新获取一次最新的代理IP
if i % 100 == 0:
ips.extend(get_ip_list("8"))
# 启用线程,隔2秒产生一个线程
t1 = threading.Thread(target = do_cheap, args = (i,ips))
t1.start()
# time.sleep的最小单位是毫秒
time.sleep(5)