导出网站的cookie给requests和scrapy

导出网站的cookie给requests和scrapy

Edit This Cookie

在设置中将导出格式设置为: Netscape HTTP Cookie File

保存到 cookie_file = 'c:/temp/cookie.txt'

注意:带#的行也要复制进去。示例:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This file was generated by EditThisCookie
.cpquery.cnipa.gov.cn	TRUE	/	FALSE	0	_gscbrs_930750436	1
.cpquery.cnipa.gov.cn	TRUE	/	FALSE	1650520120	_gscs_930750436	t50518320twgbh649|pv:1
.cpquery.cnipa.gov.cn	TRUE	/	FALSE	1713590320	_gscu_930750436	50424030muza0119
cpquery.cnipa.gov.cn	FALSE	/	FALSE	1650550396	bg6	62|BH0/r
cpquery.cnipa.gov.cn	FALSE	/	FALSE	0	JSESSIONID	a8aedc2b8c96e02a9741d4f5be1a
cpquery.cnipa.gov.cn	FALSE	/	FALSE	1965784029	UR3ZMlLdcLIE80S	1qJmCEYVmJ5c3GH7XC9LsGT_JQj5eDIKafIPtVAL6BSABt6X5bS2TAtZpNrKiONw
cpquery.cnipa.gov.cn	FALSE	/	FALSE	1965878349	UR3ZMlLdcLIE80T	4qsUUCBYuiRY4_R_kewfMI9XrpjPbw_.uKTBLsGermqGUuJCBcBGCTe1x_ub_3C7UkeWNJYlvPk1BvpvayLaMHyWbfbLaXQ4t.VTI2RGXUNeIv_sPhGWC5XclgAFOZ_M.5fnneoDv6PfCZK_j.VGkI2p50s0lbqbuWIfMQ3Mx6a1DsCHSszk2lK3XbHpRedYB4kfq6vUNljvpFGPC.iFktGjxG4YXW4UTCa2_QdnIsQA6eOaccC8.ZSnyEDcyJkGEKclsdLVV9_wJq.cPfVdM.HI.q5moAKSvI_ZneMYEOjw9eljC4LlOjZl4q6CggAwx21epwBrBzgH.YheHtcmVYl4wIjjUxywTvn0q3oYcrN07pZ9A2zWZ2ed6DTEqJGi_LiL
1
2
3
from http.cookiejar import MozillaCookieJar
cookie = MozillaCookieJar()
cookie.load(cookie_file)
1
2
3
4
5
6
import requests
session = requests.session()
session.cookies = MozillaCookieJar(filename=cookie_file)
session.cookies.load()
url = 'https://www.morningstar.cn/fundcompare/compare.aspx'
res = session.get(url)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# session接上面
cookie_dict = requests.utils.dict_from_cookiejar(session.cookies)
fetch(url, cookies=cookie_dict) # scrapy shell
yield scrapy.Request(
    url, 
    callback=lambda r: print(r),
    cookies=cookie_dict,
    meta={
        'dont_redirect': True,
        'handle_httpstatus_list': [302],
        'dont_cache': True
    }
)