案例 — 使用bs4 爬取猫眼电影热榜

python学习网 2019-07-13 07:10:02

使用 BeautiSoup库bs4模块 主要使用select 、for循环和 存入本地txt

 

from bs4 import BeautifulSoup
from urllib import request


url = "http://maoyan.com/board"
rsq = request.urlopen(url)
html = rsq.read().decode()

soup = BeautifulSoup(html,"lxml")  

items = soup.select('dd')       # 查找所有 <dd> </dd>

with open("D://maoyan.txt","w",encoding="utf-8") as f:            # 构建本地txt文档

    for item in items:
        title = item.select('p a[data-act="boarditem-click" ]')[0].get_text()                  # 提取标题
        star = item.select('p[class = "star"]')[0].get_text().replace("\n","").strip(" ")      # 提取主演
        score = item.select('p[class = "score"]')[0].get_text().strip('\n').strip(' ')         # 提取分数
        releasetime = item.select('p[class = "releasetime"]')[0].get_text()                    # 提取上映时间
        datas = title + "  " + releasetime + "  "+ star + "  " + score + "\n"                   # 数据整合
        print(datas)
        f.write(datas)        # 利用for循环把每条datas信息写入本地
f.close()
print("Sucessful")

 

阅读(2637) 评论(0)