自己写的登录淘宝搜索物品 将所搜到的信息写入excel中 主要用的是selenium,显示等待,openpyxl(读写excel) Tip:本文仅供学习与参考,且勿用作不法用途~
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import time from openpyxl import Workbook class taobao: def taobao(self,user,passwd,search_goods,max_page,min_price='0',max_price='9999999999'): driver = webdriver.Chrome() driver.get('https://www.taobao.com/') driver.maximize_window() WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.LINK_TEXT,'亲,请登录'))).click()#进入登陆界面 WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.NAME,'fm-login-id'))).send_keys(user)#用户名 WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.NAME,'fm-login-password'))).send_keys(passwd)#密码 WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CLASS_NAME,'fm-btn'))).click()#点击登陆 WebDriverWait(driver,25).until(EC.visibility_of_element_located((By.CLASS_NAME,'search-combobox-input'))).send_keys(search_goods)#搜索某商品 WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CLASS_NAME,'search-button'))).click()#开始搜索 page_all = WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CLASS_NAME,'total'))).text#获取一共有多少页,返回为字符串,举例: 共 100 页, page = int(page_all.split()[1])#将字符串全部截取成为列表后选择页码并转换成数字型 if page > int(max_page): page = int(max_page) elif page <= int(max_page): max_page = page b = 2 wa = Workbook() shuju = wa.active shuju['A1'] = '商铺' shuju['B1'] = '商铺描述' shuju['C1'] = '价格' shuju['D1'] = '购买人数' shuju['E1'] = '商铺地址' for a in range(1,page+1): print('这是第', a, '页数据') items = WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.XPATH,'//div[@class="items"]/div[@class="item J_MouserOnverReq "]')))#xpath相对路径 '''参数,价格,店铺,付款人数,厂家地方''' for item in items: try: price =WebDriverWait(item,3).until(EC.visibility_of_element_located((By.XPATH,'.//strong'))).text#价格 if float(price) > float(max_price) or float(price) < float(min_price): continue else: pass except: price = '没有找到价格' try: buy_person_str = WebDriverWait(item,3).until(EC.visibility_of_element_located((By.XPATH,'.//div[@class="deal-cnt"]'))).text#购买人数 if '+人付款' in buy_person_str: buy_person = int(float(buy_person_str[0:len(buy_person_str)-5])*10000) if '万' in buy_person_str else int(buy_person_str[0:len(buy_person_str) - 4]) else: buy_person = int( buy_person_str[0:len(buy_person_str) - 3]) except: buy_person='没有找到购买人数' try: desc = WebDriverWait(item,3).until(EC.visibility_of_element_located((By.XPATH,'.//div[@class="row row-2 title"]/a'))).text#描述 except: desc = '没有找到商铺描述' try: store = WebDriverWait(item,3).until(EC.visibility_of_element_located((By.XPATH,'.//div[@class="shop"]/a'))).text#商铺名 except: store = '没有找到商铺店名' try: place = WebDriverWait(item,3).until(EC.visibility_of_element_located((By.XPATH,'.//div[@class="location"]'))).text#商铺地址 except: place = '没有找到店铺地址' shuju['A'+str(b)] = store shuju['B'+str(b)] = desc shuju['C'+str(b)] = price shuju['D'+str(b)] = buy_person shuju['E'+str(b)] = place b = b+1 print(price,buy_person,desc,store,place) WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH,'//a[@class="J_Ajax num icon-tag"]'))).click()#点击下一页 wa.save('E:\pycharm\PyCharm Community Edition 2020.2.2\py_project\TaoBao\\'+search_goods+'的前'+max_page+'页价格在'+min_price+'-'+max_price+'之间的数据'+time.strftime('%Y%m%d%H%m%S')+'.xlsx')#将数据写入excel并命名为 数据+当前时间,注意excel储存地址自己需要修改 time.sleep(5) driver.quit() if __name__ == '__main__': A = taobao() A.taobao('dqking','dqking','维生素C','1','50','150')#依次为用户名,密码,搜索商品,前几页数据,最小价格,最大价格