【Python requests多页面爬取案例】 񩲝,,原文:
【Python requests多页面爬取案例】 ,,原文:
原文: http://blog.gqylpy.com/gqy/321
"```python
import requests
from fake_useragent import UserAgent # 随机ua库
class Boring():
def __init__(self, page_scope=(4, 7)): """ :param page_scope: 页码范围 """ self.page_scope = page_scope self.all_id = self.get_all_company_id() self.enterprise_info = self.get_all_company_info() self.show_enterprise_info()@propertydef firefox_ua(self): """返回随机火狐UA头""" ua = UserAgent(use_cache_server=False) return {'User-Agent': ua.Firefox} # ua.Firefox:随机生成火狐浏览器UAdef get_all_company_id(self): """ 将返回指定页码数内的公司的id :param start_page: 起始页码 :param end_page: 结束页码 """ all_id = {} url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList' # 此连接见图1 for page in range(self.page_scope[0], self.page_scope[1] + 1): json_text = requests.post(url, data=self.post_data(page), headers=self.firefox_ua).json() current_page_all_id = [dict['ID'] for dict in json_text['list']] all_id.setdefault(page, current_page_all_id) return all_iddef get_all_company_info(self): """开始获取公司信息""" url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById' # 见图3 enterprise_info = {} for page in self.all_id: for id in self.all_id.get(page): response = requests.post(url, data={'id': id}, headers=self.firefox_ua) # data={'id': id}:见图4 if response.headers['Content-Type'] == 'application/json;charset=UTF-8': json_text = response.json() enterprise_info.setdefault(json_text.get('businessPerson'), json_text.get('epsName')) # 这里仅获取企业负责人和企业名 return enterprise_infodef show_enterprise_info(self): [print(k, v) for k, v in self.enterprise_info.items()]def post_data(self, page): """获取公司列表时要提交的form""" return { 'on': 'true', 'page': page, 'pageSize': '15', 'productName': '', 'conditionType': '1', 'applyname': '', 'applysn': '', } # 见图2
go
Boring()
```
"
原文: http://blog.gqylpy.com/gqy/321
【Python requests多页面爬取案例】
相关内容
- 4.28-python学习笔记(转义符&input函数),,参考书
- python 使用 matplotlib.pyplot来画柱状图和饼图,,导入包i
- python小游戏,,import tim
- python3 获取阿里云OSS 最新存储容量 SDK API,,模块aliyun
- 学习随笔 --python实现熵权法,,一、熵权法介绍熵最先
- python第三方库 - dateurtil,,简介 扩展并增强
- Appium+Python之PO模型(Page object Model),,思考:我们进行
- Python学习---IO的异步[gevent+Grequests模块],,安装gevent模块
- Python--格式化cookie为字典类型,,import req
- 【python】Numpy中stack(),hstack(),vstack()函数详解,,转自
评论关闭