BeatifulSoup爬取图片案例

yao
yao
2022-02-09 / 0 评论 / 58 阅读 / 正在检测是否收录...

在这个案例中我们可以选择下载类型,下载数量
爬取的网站:http://bizhi360.com/

代码如下

```python

import requests
from bs4 import BeautifulSoup
import time
import os
url = "http://bizhi360.com/"
kind_list = ['desk', 'fengjing', 'meinv', 'weimei', 'dongman', 'dongtai', 'feizhuliu', 'chuangyi',
'keai', 'katong', 'kuanping', '3d', 'youxi', 'dongwu', 'qiche', 'yueli',]
kind_clist =["壁纸图片大全","风景","美女","唯美","动漫","动态","非主流","创意","可爱","卡通",
"宽屏","3d","游戏","动物","汽车","月历"]
print(kind_clist)
print(kind_list)
inp = input("请输入你要的类型(全部直接回车):")
begin = int(input("请输入开始下载页面(每页24张)"))
end = int(input("请输入结束下载页面(每页24张)"))
if inp in kind_list:
kind_list = []
kind_list.append(inp)
num = 0
for k in kind_list:
num = 0
kind = k
#创建类别目录
if not os.path.exists("images\\"+kind):
os.makedirs("images\\"+kind)
print("创建该目录成功:"+"images\\"+kind)
for h in range(begin, end+1):
if h == 1:
resp = requests.get(url+kind)
else:
resp = requests.get(url + kind+f"/list_{h}.html")
resp.encoding = 'UTF-8'
#把源代码交给bs4
main_page = BeautifulSoup(resp.text, "html.parser")
#查找数据
#find(标签,属性值=值)
#find_all(标签,属性值=值)
#find(标签,属性值=值 【或者】attrs = { "属性值":"值",....})
#attrs = { "属性值":"值",....}
pic_list = main_page.find("div", class_="pic-list")# _ 是用来区分python函数

#获取,打开每个照片的地址
pic_a = pic_list.find_all("a")

for n in pic_a:
href = n.get("href") #获取每一个图片的子页面链接
child_resp = requests.get(url+href)
child_resp.encoding = "UTF-8"
child_page = BeautifulSoup(child_resp.text, "html.parser")
pic_img_list = child_page.find("figure")
if pic_img_list == None:
continue
else:
pic_img = pic_img_list.find('img')
pic_src = pic_img.get("src")
#下载图片
pic_resp = requests.get(pic_src)
pic_name = pic_src.split("/")[-1] #创建图片名
with open("images/"+kind+"/"+pic_name, "wb") as f:
f.write(pic_resp.content) #pic_resp.content这里拿的是字节
num += 1
print(f"已下载{num}张",end="-")
time.sleep(1)
child_resp.close()
print(" ")
print(f"类型:{kind}下载完成,共下载{num}")
resp.close()

```

0

评论 (0)

取消