Python爬取新闻并自动发送到邮箱

    科技2022-07-11  104

    完整代码

    import requests import pandas as pd import yagmail import random import csv from lxml import etree # 测试,固定时间发送邮件-模块 import schedule import time shijian = time.strftime('%Y-%m-%d')# 保存新闻数据(csv) def get_text(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36 Edg/81.0.416.68' } url = 'https://news.163.com/' res = requests.get(url = url,headers = headers) etrees = etree.HTML(res.text) news_show = etrees.xpath('//*[@id="js_top_news"]/div[2]') finall1 = [] # 新闻数据(json格式) list0 = [] # 获取新闻url for news in news_show: finall1.append(news.xpath('./h2/a/@href')[0]) for i in news.xpath('./ul/li/a/@href'): finall1.append(i) # 新闻url遍历 for fina1 in finall1: # print(fina1) res2 = requests.get(url = fina1,headers = headers) tree = etree.HTML(res2.text) # 内容 li_list = tree.xpath('//div[@class="post_text"]/p/text()') if li_list != []: if len(li_list) > 4: # print(fina1) time_list = tree.xpath('//*[@id="epContentLeft"]/div[1]//text()') title_list = tree.xpath('//div[@class="post_content_main"]/h1/text()') # 把列表的多个元素合成单个元素 wenben_list = eval(str(li_list).replace(',','').replace(' ','')) times = eval(str(time_list[0:2]).replace(',',' ').replace('','')) edit_rens = tree.xpath('//div[@class="ep-source cDGray"]//span[@class="ep-editor"]/text()') whole_dict = {} for title,time,wenben,edits in zip(title_list,times,wenben_list,edit_rens): title = "".join(title.split()) time = "-".join(time.split()) wenben = "".join(wenben.split()) whole_dict['标题'] = title whole_dict['来源时间'] = time whole_dict['内容'] = wenben whole_dict['编辑'] = edits whole_dict['新闻链接'] = fina1 list0.append(whole_dict) # 保存新闻文件 tets = pd.DataFrame(data = list0) tets.to_csv(f'./{shijian}新闻.csv',mode='w+',index=False) # 读取新闻数据 def read_news(): lines = [] with open(f'./{shijian}新闻.csv','r') as file: news = csv.reader(file) for row in news: lines.append(row) i = random.randint(1,len(lines) - 1) return lines[i] # 发送邮件 def send_mail(): [标题,来源时间,内容,编辑,新闻链接] = read_news() yag = yagmail.SMTP(user='邮箱',password='授权码',host='smtp.qq.com') contents = [ 标题, 来源时间, 内容, 编辑, 新闻链接, ] print(contents) yag.send('发送的邮箱','标题',contents)if __name__ == '__main__': # get_text() send_mail() print('发送成功') # 定时发送 # schedule.every().day.at("21:00").do(send_mail) # while True: # schedule.run_pending() # time.sleep(1)
    Processed: 0.016, SQL: 8