请求
请求方式
get请求
import requests request = requests.get("http://httpbin.org/get") print(request.text)
带参数get请求
import requests #date 以字典形式 data = { 'name':'germy', 'age':22 } response = requests.get("http://httpbin.org/get",params = data) print(response.text)
添加Headers
import requests headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } response = requests.get('http://www.zhihu.com/explore',headers = headers) print(response.text)
解析json
import requests import json response = requests.get("http://httpbin.org/get") print(type(response.text)) print(response.json()) print(json.loads(response.text)) print(type(json.loads(response.text))) print(type(response.json()))
获取二进制数据
response = requests.get("http://github.com/favicon.ico") print(type(response.text),type(response.content)) print(response.text) print(response.content) import requests #注意文件路径要转义字符 #二进制数据要以b的方式写入写出 response = requests.get("http://github.com/favicon.ico") with open(r'C:\Users\key\Desktop\py\favicon.ico','wb') as f: f.write(response.content) f.close()
基本post请求
import requests data = { 'name':'gemey', 'age':'22' } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } response = requests.post("http://httpbin.org/post", data = data, headers = headers) print(response.json())
响应
response属性
import requests response = requests.get('http://www.jianshu.com') print(type(response.status_code), response.status_code) print(type(response.headers), response.headers) print(type(response.cookies), response.cookies) print(type(response.url), response.url) print(type(response.history), response.history)
状态码判断
import requests response = requests.get('http://www.baidu.com') print(response.status_code) exit()if not response.status_code == 200 else print('Request successfully')
常见状态码:
-
100~199
:表示服务器成功接收部分请求,要求客户端继续提交其余请求才能完成整个处理过程。 -
200~299
:表示服务器成功接收请求并已完成整个处理过程。常用200(OK 请求成功)。 -
300~399
:为完成请求,客户需进一步细化请求。例如:请求的资源已经移动一个新地址、常用302(所请求的页面已经临时转移至新的url)、307和304(使用缓存资源)。 -
400~499
:客户端的请求有错误,常用404(服务器无法找到被请求的页面)、403(服务器拒绝访问,权限不够)。 -
500~599
:服务器端出现错误,常用500(请求未完成。服务器遇到不可预知的情况)。
编码
import requests response = requests.get('http://xxxxxxxxx.com') #查看编码方式 print(response.encoding) #改变编码方式 response.encoding = 'ISO-8859-1' #处理网页源代码乱码 response.encoding = response.apparent_encoding
高级操作
文件上传
import requests #上传文件要以二进制模式打开文件 files ={'files':open(r'C:\Users\key\Desktop\py\favicon.ico', 'rb')} response = requests.post('http://httpbin.org/post',files = files ) print(response.text)
获取cookies
import requests response = requests.get('http://www.baidu.com') for key, value in response.cookies.items(): print(key+'='+value)
会话维持,用来模拟登陆
import requests s = requests.Session() s.get('http://httpbin.org/cookies/set/number/123456.') response = s.get('http://httpbin.org/cookies') print(response.text)
证书验证
import requests from requests.packages import urllib3 urllib3.disable_warnings() #消除警告信息 response = requests.get('http://www.12306.cn', verify = False) print(response.status_code) import requests response = requests.get('http://www.12306.cn', cert = (r'/xx/crt', r'/xx/xx')) print(response.status_code)
代理设置
import requests proxies = { 'http': 'http://127.0.0.1:9743', 'https': 'https://127.0.0.1:9743' } response =requests.get('http://www.baidu.com', proxies = proxies) import requests proxies = { 'http': 'http://user:password@127.0.0.1:9743/', } response =requests.get('http://www.baidu.com', proxies = proxies) import requests proxies = { 'http': 'socks5://127.0.0.1:9743', 'https': 'socks5://127.0.0.1:9743' } response =requests.get('http://www.baidu.com', proxies = proxies)
认证设置
import requests r = requests.get('http://xxxxxxxxx.com', auth = ('user', '123'))
异常处理
# 先捕捉子类异常,在捕捉父类异常 import requests from requests.exceptions import ReadTimeout, HTTPError, RequestException try: response = requests.get('http://xxxxxxxxx.com',timeout = 1) print(response.status_code) except ReadTimeout: print('timeout') except HTTPError: print('httperror') except RequestException: print('error')
更多操作参考官方文档:https://requests.readthedocs.io/zh_CN/latest/