python爬虫

    科技2023-10-17  99

    import requests from lxml import etree import os if name ==‘main’: #资源url url = ‘http://pic.netbian.com/4kmeinv/’ #伪装UA headers = { ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36’ } #请求网页 page_text = requests.get(url = url , headers = headers).text #构造对象 tree = etree.HTML(page_text) #提取标签 li_list = tree.xpath(’//ul[@class = “clearfix”]/li’) #创建文件夹 if not os.path.exists(’./pic’): os.mkdir(’./pic’) for li in li_list: #提取图片的地址 img_src = li.xpath(’./a/img/@src’)[0] #拼接地址 complite_url = ‘http://pic.netbian.com’ + img_src #图片名字 img_name = li.xpath(’./a/img/@alt’)[0] + ‘.jpg’ #转码 img_name = img_name.encode(‘iso-8859-1’).decode(‘gbk’) #提取图片 img = requests.get(url = complite_url,headers = headers).content #储存图片 filename = ‘pic/’ + img_name with open(filename , ‘wb’) as fp: fp.write(img) print(img_name +“下载完成!”)

    Processed: 0.014, SQL: 8