简单爬虫爬去51job职位

python学习网 2017-06-27 13:55:04
 1 #-*- coding:utf-8 -*-
 2 from urllib import request
 3 from bs4 import BeautifulSoup
 4 from urllib import parse
 5 import pymysql
 6 url = "http://search.51job.com/jobsearch/search_result.php"
 7 rep = request.Request(url)
 8 rep.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36")
 9 rep.add_header("Origin","http://search.51job.com")
10 postData = parse.urlencode([
11     ("fromJs", "1"),
12     ("jobarea", "040000"),
13     ("keyword", "python"),
14     ("keywordtype", "2"),
15     ("lang", "c"),
16     ("stype", "2"),
17     ("postchannel", "0000"),
18     ("fromType", "1"),
19     ("confirmdate", "9")
20 ])
21 print(postData)
22 return_ = request.urlopen(rep,data=postData.encode("gbk"))
23 content = return_.read().decode("gb18030")
24 sp = BeautifulSoup(content,"html.parser")
25 f = open("b.txt",'w')
26 
27 info_set = set([])
28 j = 0
29 for i in sp.find("div",class_="dw_table").find_all("div",class_="el"):
30   if j==0:
31       j = j + 1
32       continue;
33   j = j + 1
34   content = i.find("a").get_text().strip()+"*"+i.find("span",class_="t2").string+"*"+i.find("span",class_="t3").string+"*"+i.find("span",class_="t4").string+"*"+i.find("span",class_="t5").string+"\n"
35   f.write(str(content))
36 print("下载完成")
37 print(info_set)
38 f.close()
39 #分页有困难
View Code

 

阅读(843) 评论(0)