#coding:utf-8 import random import re import os import time import threading import Queue import traceback import logging from threading import Thread from selenium import webdriver
lock = threading.Lock()
def async(f): """异步装饰器""" def wrapper(*args, **kwargs): thr = Thread(target=f, args=args, kwargs=kwargs) thr.start() return wrapper
class TmallCookie(object): def init(self): # cookie 队列 self.cookie_queue = Queue.Queue() self.cookie_list = list() self.load_cookie() self.parse_cookie() self.save_cookie()
def load_cookie(self): """加载本地已保存的 cookie""" lock.acquire() with open("tmall_cookie.txt", "r") as f: cookie_list = f.readlines() lock.release() for i in cookie_list: self.cookie_queue.put(i.strip()) @async def parse_cookie(self): """ 请求 cookie,并将 cookie 保存至 cookie 列表 :return: """ urls = ['https://detail.tmall.com/item.htm?id=562345301295', 'https://detail.tmall.com/item.htm?id=553941537843', 'https://detail.tmall.com/item.htm?id=558646979307', 'https://list.tmall.com/search_product.htm?spm=a221t.1812074.2005984841.8.44d84208RXceJT&q=%B9%E2%C3' '%E6%CE%C4%D0%D8&from=.list.pc_1_searchbutton&acm=lb-zebra-7777-1443323.1003.4.1158540&type=p&scm=100' '3.4.lb-zebra-7777-1443323.OTHER_14748278648600_1158540', 'https://list.tmall.com/spu_detail.htm?fmtab=sp&cat=50105508&spuid=877471268&suid=4e5fd39570486fdf2a' '9b3077572be7ab&rn=1e0abfcf6995e918ab6c7bc00d6e9be2' ] option = webdriver.ChromeOptions() option.add_argument('disable-infobars') option.add_argument('disable-gpu') option.add_argument('--headless') option.add_argument("--no-sandbox") # option.add_argument("window-size=1024,768") while True: if self.cookie_queue.qsize() < 20000: try: # driver = webdriver.Chrome('C:\\chromedriver.exe', chrome_optiOns=option) driver = webdriver.Chrome('./chromedriver', chrome_optiOns=option) driver.set_page_load_timeout(120) url = random.choice(urls) driver.get(url) time.sleep(5) try: cookies = driver.get_cookies() cookie_string = [] for cookie_info in cookies: cookie_string.append(u'%s=%s' % (cookie_info.get(u'name'), cookie_info.get(u'value'))) cookie_string = '; '.join(cookie_string) driver.close() driver.quit() except Exception as e: pass try: _tb_token_ = re.findall("(_tb_token_=.*?;)", cookie_string)[0] t = re.findall("(t=[a-z0-9]+)", cookie_string)[0] cna = re.findall("(cna=.*?;)", cookie_string)[0] cookie2 = re.findall("(cookie2=.*?;)", cookie_string)[0].replace(";", "") cookie = _tb_token_ + " " + t + "; " + cna + " " + cookie2 try: enc = re.findall("(enc=.*?;)", cookie_string)[0] cookie = _tb_token_ + " " + t + "; " + cna + " " + enc + " " + cookie2 except: pass print cookie self.cookie_queue.put(cookie) lock.acquire() self.cookie_list.append(cookie) lock.release() except Exception as e: pass except Exception as e: print traceback.format_exc() else: time.sleep(300) @async def save_cookie(self): """ 清空之前的 cookie 文件,将当天抓取的 cookie 保存至文件 :return: """ while True: time.sleep(1) if len(self.cookie_list) > 10: lock.acquire() # with open("tmall_cookie.txt", "w") as f1: # f1.truncate() # time.sleep(5) with open("tmall_cookie.txt", "a") as f2: for cookie in self.cookie_list: f2.write(cookie) f2.write("\n") self.cookie_list = [] lock.release() def get_cookie(self): """ 获取一个 cookie :return: tmall cookie """ while True: try: cookie = self.cookie_queue.get(timeout=5) break except Exception as e: logging.warning("Get cookie error: %s" % e) time.sleep(5) if self.cookie_queue.qsize() <= 5000: self.cookie_queue.put(cookie) return cookie
if name == 'main': cookie = TmallCookie() # while True: # print cookie.get_cookie() # time.sleep(2)
1 a7a2 2018-06-11 13:31:04 +08:00 按照经验不是你代码的问题 而是你用的 webdriver 库跟 chromedriver 之间的问题 同样在 macOS 下也有这个问题 可以尝试调用 kill 之类结束它,就是自己管理 |
2 1109599636 2018-07-07 17:31:43 +08:00 我以前写的时候是换的火狐的驱动.... |