import time import json import random import requests import re import urllib3 import math import os,sys from selenium import webdriver from selenium.webdriver.common.by import By
defwechat_login(self): ''' Login wechat by automatically inputing accounts and keywords and manully scanning QR code, and then you can get the cookie information, save it in local file in order to simulate loginning and crawling…… :param __username: :param __password: :return: ''' print("浏览器将自动打开并跳转至微信公众号登录页面……") time.sleep(1) driver = webdriver.Chrome() driver.get("https://mp.weixin.qq.com/") time.sleep(2) print("请拿手机扫码二维码登录公众号") time.sleep(15) print("登录成功")
cookies = driver.get_cookies() info = {} for cookie in cookies: info[cookie['name']] = cookie['value'] cookie_info = json.dumps(info) # print(cookie_info) withopen(cookie_path, 'w+', encoding='utf-8') as f: f.write(cookie_info) f.flush() print("cookies已存入cookie.txt",flush=True) driver.quit()
## 删除不需要的信息 tmp = 0 for i, tag in enumerate(wb.find_all('section')): if'招聘信息'in tag.prettify(): tmp = i break
for i, tag in enumerate(wb.find_all('section')): if'前言'in tag.prettify() and i>=3: # tag.extract() tag.decompose() if i >= tmp: # tag.extract() tag.decompose()