研究python也有一段时间了, 不知道什么时候开始, 学习python成为了SEO的一门必修课, 现在的百度SEO,没点真本事,还真的跟流量无缘, 真应了那张“李总”调侃SEO的图片:屌丝们,尽管做SEO,有流量算我输;
所以故事就有了下一集——python百度发包快排脚本
这个脚本呢, 也是别人分享给我的, 我研究了一下, 并且做了一些少许的修改, 下面贴出核心代码, 感兴趣的也可以拿去研究一下
你需要准备
1、代理ip
2、一个数据库
3、一台vps,需要宽带账号密码(当然一台肯定不够)
4、安装python脚本解释器
下面是源代码的核心部分(需要完整代码欢迎下方评论留言)
# -*- coding: utf-8 -*-from selenium import webdriverimport timeimport requestsimport randomimport osfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesimport tracebackimport urllib.requestimport pymysqlimport socket#import win32api #pip install pypiwin32#from selenium.webdriver.common.desired_capabilities import DesiredCapabilities#DesiredCapabilities.INTERNETEXPLORER['ignoreProtectedModeSettings'] = True#rasdial 宽带连接 19ab68----643534def connect():cmd_str = "rasdial %s %s %s" % (g_adsl_account['name'], g_adsl_account['username'], g_adsl_account['password'])os.system(cmd_str)time.sleep(5)#"rasdial 断开宽带连接 /disconnect"def disconnect():cmd_str = "rasdial %s /disconnect" % g_adsl_account['name']os.system(cmd_str)time.sleep(5)#获取ip地址def get_ip():#return ['ip','address']fp = urllib.request.urlopen("http://ip.chinaz.com/getip.aspx")mybytes = fp.read()# note that Python3 does not read the html code as string# but as html code bytearray, convert to string withmystr = mybytes.decode("utf8")fp.close()ip = mystr.find("ip")add = mystr.find("address")ip = mystr[ip+4:add-2]address = mystr[add+9:-2]return [ip,address]#将ip地址插入数据库def insert_db(ipdate):#try:#获取一个数据库连接,注意如果是UTF-8类型的,需要制定数据库conn=pymysql.connect(host='localhost',user='root',passwd='',port=3306,charset='utf8')cur=conn.cursor() #获取一个游标对象#cur.execute("CREATE DATABASE zongzong") #执行对应的SQL语句#exit()cur.execute("USE zongzong")#exit()#cur.execute("CREATE TABLE `ip_log` (`id` int(11) NOT NULL AUTO_INCREMENT,`ip` varchar(32) DEFAULT NULL,`address` varchar(64) DEFAULT NULL,`keyword` varchar(64) DEFAULT '',`url` varchar(256) DEFAULT '',`error` varchar(64) DEFAULT '',`created_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8;")#插入数据ISOTIMEFORMAT='%Y-%m-%d %X'ipdate.append( time.strftime( ISOTIMEFORMAT, time.localtime() ))cur.execute("INSERT INTO ip_log(ip,address,keyword,url,error,page,rank,created_at) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)",ipdate)#cur.execute("SELECT * FROM ip_log")#data=cur.fetchall()#print(data)cur.close()#关闭游标conn.commit()#向数据库中提交任何未解决的事务,对不支持事务的数据库不进行任何操作conn.close()#关闭到数据库的连接,释放数据库资源#except:# print("发生异常")#获取搜素出来的urldef get_search_url(driver):urls = []real = []real_url = []click_link = []content = driver.find_element_by_css_selector("div[id=\"content_left\"]")links = content.find_elements_by_tag_name("a")for link in links:if link.get_attribute('class') == "c-showurl":real.append(link.text)url = link.get_attribute('href')urls.append(url)#解密urlheader = requests.head(url).headersis_append = Truefor out_url in out_urls:if out_url in header['location']:is_append = Falsebreakif is_append == True:real_url.append(header['location'])#a标签对象click_link.append(link)#print(real)#print(urls)#return urlsreturn [real_url,click_link]#function:解析加密url,剔除竞争对手的url# def get_real_url(urls):# real_url = []# for url in urls:# header = requests.head(url).headers# is_append = True# for out_url in out_urls:# if out_url in header['location']:# is_append = False# break# if is_append == True:# real_url.append(header['location'])# return real_url#function 目标地址是否在某个list中def get_urlIndex(tagurl,urls):i = 0has = -1for url in urls:if tagurl in url:has = Truereturn ii = i+1return has#点击百度搜索内容下面的下一页def click_nextBtn(driver):div = driver.find_element_by_css_selector("div[id=\"page\"]")a = div.find_elements_by_tag_name("a")for item in a:print(item.text)if item.text == "下一页>":item.click()return driver#随机点击def click_search_url(driver,items):urls = []real = []content = driver.find_element_by_css_selector("div[id=\"content_left\"]")links = content.find_elements_by_tag_name("a")i=0'''获取当前窗口'''nowhandle = driver.current_window_handle#allhandles=driver.window_handles#for handle in allhandles:# print('....当前窗口....',handle.title)#exit()for link in links:if link.get_attribute('class') == "c-showurl":if i in items:print("随机点击item:",i)print(link.get_attribute('href'),link.text)#exit()link.click()#停留在点击页面time.sleep(random.randint(5,10))'''获取所有窗口'''allhandles=driver.window_handles#for handle in allhandles:# print('....当前窗口....',handle.title)#exit()'''循环判断窗口是否为当前窗口'''for handle in allhandles:if handle != nowhandle:print("切换到当前窗口")driver.switch_to_window(handle)print("title:",driver.title)'''关闭当前窗口'''driver.close()'''回到原先的窗口'''print("切换到原来的窗口")driver.switch_to_window(nowhandle)print("title:",driver.title)print("本次随机点击完毕!")i=i+1#获取随机点击的搜索页random.randint(0def get_random_index(index,len):if index >= 8:random_index = [random.randint(0,4),random.randint(5,8)]elif index>=4:random_index = [random.randint(0,3),random.randint(3,index)]elif index>=0:random_index = [index]elif index == -1:if len <=5:random_index = [random.randint(0,5)]else:random_index = [#random.randint(0,4),random.randint(5,len)random.randint(5,len)]return random_indexdef getUA():uaList = [#360"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",#chrome"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",#"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36","Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",#firefox#"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0","Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0",#ie11#"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",#ie8#"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; 4399Box.1357; 4399Box.1253; 4399Box.1357)",#2345王牌#"Chrome/39.0.2171.99 Safari/537.36 2345Explorer/6.5.0.11018",#搜狗#"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",#opera"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60"]headers = random.choice(uaList)return headers#屏幕浏览器窗口大小def getWindowSize():wind_size = [[1920,1080],[1600,900],[1280,720]]headers = random.choice(wind_size)return headers#屏幕分辨率设置def setDisplay():display_size = [[1920,1080],[1680,1050],[1600,900],[1440,900],[1400,1050]]d_size = random.choice(display_size)dm = win32api.EnumDisplaySettings(None, 0)dm.PelsWidth = d_size[0]dm.PelsHeight = d_size[1]dm.BitsPerPel = 32dm.DisplayFixedOutput = 0win32api.ChangeDisplaySettings(dm, 0)#拨号 19ab68----643534g_adsl_account = {"name":"宽带连接","username":"19ab68","password":"643534"}#屏蔽点击的地址(竞争对手)out_urls = ['zhimo.yuanzhumuban.cc','bbs.yuanzhumuban.cc','http://money.163.com/15/0416/11/ANANRECC00253B0H.html']##内页词targetURL = [['http://www.hkuws.com','注册离岸公司'],['zs.efu.com.cn/mornfeeit/','梦菲雪'],['zs.efu.com.cn/chengshijiaren/','城市佳人'],['www.kidsnet.cn/exposition','童装展会'],#['top.kidsnet.cn/','童装加盟排行榜'],#['www.nynet.com.cn/','内衣网'],#['www.nzw.cn/','女装网'],['zs.efu.com.cn/ks/','卡索'],['zs.efu.com.cn/distin-kidny/','迪斯廷凯'],['zs.efu.com.cn/fuzhuang/luyidigao/','路易迪高童装代{过}{滤}理'],['brand.efu.com.cn/brandshow-1221090.html','凯帝龙驰'],['zs.efu.com.cn/rabbitjero/','兔子杰罗'],['zs.efu.com.cn/wmprince/','西瓜王子'],['zs.efu.com.cn/betu','百图'],['zs.efu.com.cn/pepco/','小猪班纳'],#['http://news.ifeng.com/a/20160518/48795120_0.shtml','华夏信财'],['http://weibo.com/huaxiafinance','华夏信财'],['http://p2p.hexun.com/2016-04-26/183531215.html','华夏信财'],#['http://news.xinhuanet.com/fortune/2016-04/26/c_128932834.htm','华夏信财'],['http://www.xcf.cn/gdyw/201605/t20160526_772682.htm','华夏信财'],['http://www.huaxiaoxia.com/','华夏信财'],#['https://lc.huaxiafinance.com/','华夏信财'],['so.tedu.cn','网络营销培训机构'],['www.cosatto.net.cn','个性安全座椅'],['www.kaihuata.com/','开化旅游'],#['www.kaihuata.com/','开化'],]for targetInfo in targetURL:try:#更换ipdisconnect()connect()while(1):try:socket.gethostbyname("baidu.com")break;except:disconnect()connect()#更换分辨率#setDisplay()#启动浏览器#driver = webdriver.Ie()#driver = webdriver.Chrome()#driver = webdriver.Firefox()#设置PhantomJS的user_agentdcap = dict(DesiredCapabilities.PHANTOMJS)user_agent = getUA()print(user_agent)dcap["phantomjs.page.settings.userAgent"] = (user_agent)#dcap["phantomjs.page.settings.resourceTimeout"] = (15000)dcap["phantomjs.page.settings.loadImages"] = (False)driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=['--load-images=no'])# UA = getUA()# print(UA)# webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.User-Agent'] = UA# driver = webdriver.PhantomJS()driver.implicitly_wait(30)#清cookiedriver.delete_all_cookies()#driver.maximize_window() # 浏览器全屏显示#打开百度driver.get("http://www.baidu.com/")#driver.get("http://mch.weiba01.com/2.php")#设置浏览器窗口大小window_size = getWindowSize()driver.set_window_size(window_size[0], window_size[1])#搜索某个关键词print('打开百度成功',driver.title)target = targetInfo[0]keyword = targetInfo[1]if len(targetInfo)>2:error_keyword = targetInfo[random.randint(2,len(targetInfo)-1)]print(">>>>>>>>>>>>>>>点击的关键词:",keyword,"--->目标地址:",target,">>>>>>>>>>>>>>>>>>>>")if len(targetInfo)>2:#模拟错误关键词print("点击错误关键词:",error_keyword);driver.find_element_by_id("kw").send_keys(error_keyword)time.sleep(2)driver.find_element_by_id("su").click()time.sleep(5)driver.find_element_by_id("kw").clear()time.sleep(2)print("错误关键词点击完毕")driver.find_element_by_id("kw").send_keys(keyword)#time.sleep(2)#点击搜索按钮print("...开始点击搜索按钮..")driver.find_element_by_id("su").click()#exit()print("...点击完毕..")time.sleep(2)#获取搜索结果页 0:着陆页 1:对应的链接对象urls_res = get_search_url(driver)real_urls = urls_res[0]#get_search_url(driver)[1][2].click()#real_urls = get_real_url(urls)print("搜索出来的可点击着陆页个数:",len(real_urls))print(real_urls)index = get_urlIndex(target,real_urls)print("目标index:",index)page = 1while index == -1 and page <= 4:if page == 1:#点击前面的几个着陆页,模拟用户真实行为items = get_random_index(index,len(real_urls))#items = [4]print(items)click_search_url(driver,items)#下一页driver = click_nextBtn(driver)time.sleep(3)urls_res = get_search_url(driver)real_urls = urls_res[0]#real_urls = get_real_url(urls)print(real_urls)index = get_urlIndex(target,real_urls)page = page+1if index > 4 and page == 1:#第一页,随机点击两个或一个int = random.randint(1,2)if int == 2:items = get_random_index(index,len(real_urls))else:items = [1]print(items)click_search_url(driver,items)if page >=5:print("没有找到目标地址,放弃搜索...")print("关闭浏览器")driver.quit()time.sleep(5)data = get_ip()data.append(keyword)data.append(target)data.append("no_find")data.append(-1)data.append(-1)insert_db(data)continueprint("目标在page",page,"当前排名:",index,real_urls[index])print("反问最后的目标页...")#driver.get(real_urls[index])urls_res[1][index].click()time.sleep(5)nowhandle = driver.current_window_handleallhandles = driver.window_handles#目标页和搜索栏目页切换下for handle in allhandles:if handle != nowhandle:print("切换到当前窗口")driver.switch_to_window(handle)stime = random.randint(15,25)#stime = 5;print("目标页title:",driver.title,"停留-->",stime)time.sleep(stime)'''关闭当前窗口'''driver.close()'''回到原先的窗口'''print("切换到原来的窗口")driver.switch_to_window(nowhandle)print("title:",driver.title)#time.sleep(random.randint(40,60))#time.sleep(5)#清除所有cookieprint("打印cookie")cookie= driver.get_cookies()print(cookie)print("清除cookie")driver.delete_all_cookies()print("打印cookie:")cookie= driver.get_cookies()print(cookie)#关闭浏览器print("关闭浏览器")time.sleep(5)#driver.close()driver.quit()#time.sleep(5)#数据库记录运行信息data = get_ip()data.append(keyword)data.append(target)data.append("success")data.append(page)data.append(index)insert_db(data)except:data = get_ip()data.append(keyword)data.append(target)data.append("faild")data.append(-1)data.append(-1)insert_db(data)
在本文结尾, 较瘦还是要提醒大家, 发包快排也是在网站有底子的情况下才会有效,新站或者坚持写原创的朋友们呢 , 研究一下可以, 切莫随便线上应用,风险心里要知晓。
另外需要解释一下的就是, 一般刷快排的任何厂家 ,都会告诉你网站要有底子才会给你刷,那么这个网站底子是什么?
我们都知道,刷点击是利用用户行为加分项来达到目的的, 那么要想发包有效果,那么必须你的网站进入了用户行为评分的阶段,较瘦黎本人假定这是第二阶段
那么第一阶段就是百度机器人的质量评分阶段了,只有你站内各项评分获得了百度机器人的认可 ,才会给你一个初始排名,进入第二个阶段,这时候刷点击也好,发包也好,才会有机可乘。。。
美好的时光总是短暂的, 想知道更多技术,欢迎下方评论留言!
发表评论