Python中aiohttp的简单使用
1.定义
aiohttp 是一个基于 asyncio 的异步 HTTP 网络模块,它既提供了服务端,又提供了客户端
2.基本使用
- import aiohttp
- import asyncio
- async def fetch(session, url):
- # 声明一个支持异步的上下文管理器
- async with session.get(url) as response:
- # response.text()是coroutine对象 需要加await
- return await response.text(), response.status
- async def main():
- # 声明一个支持异步的上下文管理器
- async with aiohttp.ClientSession() as session:
- html, status = await fetch(session, ‘https://cuiqingcai.com’)
- print(f‘html: {html[:100]}…’)
- print(f‘status: {status}’)
- if __name__ == ‘__main__’:
- # python 3.7 及以后,不需要显式声明事件循环,可以使用 asyncio.run(main())来代替最后的启动操作
- asyncio.get_event_loop().run_until_complete(main())
3.请求类型
- session.post(‘http://httpbin.org/post’, data=b‘data’)
- session.put(‘http://httpbin.org/put’, data=b‘data’)
- session.delete(‘http://httpbin.org/delete’)
- session.head(‘http://httpbin.org/get’)
- session.options(‘http://httpbin.org/get’)
- session.patch(‘http://httpbin.org/patch’, data=b‘data’)
4.相应字段
- print(‘status:’, response.status) # 状态码
- print(‘headers:’, response.headers)# 响应头
- print(‘body:’, await response.text())# 响应体
- print(‘bytes:’, await response.read())# 响应体二进制内容
- print(‘json:’, await response.json())# 响应体json数据
5.超时设置
- import aiohttp
- import asyncio
- async def main():
- #设置 1 秒的超时
- timeout = aiohttp.ClientTimeout(total=1)
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(‘https://httpbin.org/get’) as response:
- print(‘status:’, response.status)
- if __name__ == ‘__main__’:
- asyncio.get_event_loop().run_until_complete(main())
6.并发限制
- import asyncio
- import aiohttp
- # 声明最大并发量为5
- CONCURRENCY = 5
- semaphore = asyncio.Semaphore(CONCURRENCY)
- URL = ‘https://www.baidu.com’
- session = None
- async def scrape_api():
- async with semaphore:
- print(‘scraping’, URL)
- async with session.get(URL) as response:
- await asyncio.sleep(1)
- return await response.text()
- async def main():
- global session
- session = aiohttp.ClientSession()
- scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
- await asyncio.gather(*scrape_index_tasks)
- if __name__ == ‘__main__’:
- asyncio.get_event_loop().run_until_complete(main())
7.实际应用
- import asyncio
- import aiohttp
- import logging
- import json
- logging.basicConfig(level=logging.INFO,
- format=‘%(asctime)s – %(levelname)s: %(message)s’)
- INDEX_URL = ‘https://dynamic5.scrape.center/api/book/?limit=18&offset={offset}’
- DETAIL_URL = ‘https://dynamic5.scrape.center/api/book/{id}’
- PAGE_SIZE = 18
- PAGE_NUMBER = 100
- CONCURRENCY = 5
- semaphore = asyncio.Semaphore(CONCURRENCY)
- session = None
- async def scrape_api(url):
- async with semaphore:
- try:
- logging.info(‘scraping %s’, url)
- async with session.get(url) as response:
- return await response.json()
- except aiohttp.ClientError:
- logging.error(‘error occurred while scraping %s’, url, exc_info=True)
- async def scrape_index(page):
- url = INDEX_URL.format(offset=PAGE_SIZE * (page – 1))
- return await scrape_api(url)
- async def main():
- global session
- session = aiohttp.ClientSession()
- scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
- results = await asyncio.gather(*scrape_index_tasks)
- logging.info(‘results %s’, json.dumps(results, ensure_ascii=False, indent=2))
- if __name__ == ‘__main__’:
- asyncio.get_event_loop().run_until_complete(main())
到此这篇关于Python中aiohttp的简单使用的文章就介绍到这了,更多相关Python aiohttp 内容请搜索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们!
发表评论