网站截图:
很多人学习python,不知道从何学起。 很多人学习python,掌握了基本语法过后,不知道在哪里寻找案例上手。 很多已经做案例的人,却不知道如何去学习更加高深的知识。 那么针对这三类人,我给大家提供一个好的学习平台,免费领取视频教程,电子书籍,以及课程的源代码!??¤ QQ群:961562169
spider4taopiaopiao.py
爬取网站电影排行榜 import requests import re import os import time import json def mySpider(): # 伪装 用于可以伪装成浏览器。 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36' } print("网页请求中...") time.sleep(0.5) url = "https://dianying.taobao.com/showList.htm?spm=a1z21.6646273.city.2.4ed46d6ekOc3wH&n_s=new&city=310100" response = requests.get(url, headers=headers) html = response.text # 获取html信息 # print(html) print("网页信息已获取...") time.sleep(0.5) destinationPath = "result.txt" fd = open(destinationPath,"w+",encoding='utf-8') fd.writelines(html) end = html.find('<!-- 即将热映 -->') # print("位置为:",end) if end != -1: html = html[:end] fd.close() s = '<img width="160" height="224" data-src="(.*?)" src=' +\ '.*?<span class="bt-l">(.+?)</span>.*?<span class="bt-r">(\d.\d)?</span>' + \ ".*?<span>导演:(.*?)</span>" + ".*?<span>主演:(.*?)</span>" + ".*?<span>类型:(.*?)</span>"+\ ".*?<span>地区:(.*?)</span>" + ".*?<span>语言:(.*?)</span>" + ".*?<span>片长:(.*?)</span>" pattern = re.compile(s,re.S) items = re.findall(pattern, html) # print(items) # print(type(items)) # print(type(html)) for outer in range(len(items)): items[outer] = list(items[outer]) for i in range(len(items[outer])): if items[outer][i] == "": items[outer][i] = "暂无信息" else: # pass # · items[outer][i] = items[outer][i].replace("·","·") # print(items) destinationPath = "items.json" fd = open(destinationPath,"w+",encoding='utf-8') json.dump(items,fd) fd.close() # 建立下载目录 dir_name = "./images" if not os.path.exists(dir_name): os.mkdir(dir_name) cnt = 0 for item in items: url = item[0] # 以'/'来分割字符串 file_name = str(cnt) + ".jpg" cnt += 1 response = requests.get(url, headers=headers) # 保存 with open(dir_name + "/" + file_name, 'wb') as f: f.write(response.content) # 将图片写入到文件夹下保存 info = "图片文件: {0:25}{1}".format(file_name," 成功下载...") print(info) # print(items) return items if __name__ == "__main__": # pass mySpider()运行结果展示:
GUI4Spider.py
制作简易的tkinter GUI 图形化用户交互界面 from spider4taopiaopiao import mySpider from tkinter import * import time from PIL import Image,ImageTk import json # sourcePath = "items.json" # fs = open(sourcePath,"r",encoding='utf-8') # items = json.load(fs) # fs.close() # print(len(items)) # print(items) items = mySpider() # 0图片链接 1电影名 2评分 3导演 4主演 5类型 6地区 7语言 8片长 infoMap = { 0:"图片链接:", 1:"电影名:", 2:"评分:", 3:"导演:", 4:"主演:", 5:"类型:", 6:"地区:", 7:"语言:", 8:"片长:" } current_rank = 1 total_rank = len(items) root = Tk() root.title("淘票票电影热映排行榜,更新时间:"+\ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) root.geometry('800x800') root.iconbitmap("movie.ico") def showPre(): global current_rank,total_rank if current_rank <= 1: current_rank = 2 current_rank -= 1 print("显示前一部电影...",current_rank) labimgconfig() labInfoConfig() def showNxt(): global current_rank,total_rank if current_rank >= total_rank: current_rank = total_rank-1 current_rank += 1 print("显示后一部电影...",current_rank) labimgconfig() labInfoConfig() def labimgconfig(): filename = "images/" + str(current_rank-1) + ".jpg" global newImage newImage = getImage(filename) labimg.config(image=newImage) def getImage(filename): imageJPG = Image.open(filename) image = ImageTk.PhotoImage(imageJPG) return image def labInfoConfig(): info = items[current_rank-1] for i in range(len(labInfo)): labInfo[i].config(text=infoMap[i+1]+info[i+1]) labRank.config(text="排名:#" +str(current_rank)) image = getImage("images/0.jpg") labimg = Label(root) # 设置Widget控件显示的图像 labimg.config(image=image) labimg.pack() # 包装与定位组件 colors = ["Red","Orange","Yellow","Green","Blue","Violet","Purple","Chocolate"] labInfo = [] for color in colors: labtemp = Label(root,bg=color,width=200,height=3,wraplength=1000) labtemp.pack() labInfo.append(labtemp) labRank = Label(root,bg="Red",width=9,height=3,text="排名:#" +str(current_rank)) labRank.pack() # labName = Label(root,bg="Red",width=50,height=2) # labName.pack() # labScore = Label(root,bg="Orange",width=50,height=2) # labScore.pack() # labDirector = Label(root,bg="Yellow",width=50,height=2) # labDirector.pack() # labActor = Label(root,bg="Green",width=50,height=2) # labActor.pack() # labType = Label(root,bg="Blue",width=50,height=2) # labType.pack() # labDistrict = Label(root,bg="Violet",width=50,height=2) # labDistrict.pack() # labLanguage = Label(root,bg="Purple",width=50,height=2) # labLanguage.pack() # labLength = Label(root,bg="Chocolate",width=50,height=2) # labLength.pack() btnPre = Button(root,width=15,height=5,text="显示前一个",command=showPre) btnNxt = Button(root,width=15,height=5,text="显示后一个",command=showNxt) btnPre.pack(side=LEFT,anchor=S) btnNxt.pack(side=RIGHT,anchor=S) showPre() root.mainloop()运行结果如下: