import datetime
a=datetime.datetime.now()
print(a)
import random
import requests
import time
from lxml import etree
import os
from pathlib import Path
def del_file(num):
root='K:/zhusc/{}.txt'.format(str(num))
my_file = Path(root)
if my_file.exists():
os.remove(my_file)
del_file('test')
def inputme(s,r):
f=open(r,'a+')
f.writelines(s)
f.write('\n')
def extract_data(num):
url="https://www.kuaidaili.com/free/inha/{}/".format(str(num))
res = requests.get(url)
time.sleep(random.uniform(1,2))
r=res.text
selector = etree.HTML(r)
for i in range(1,16):
x1='//*[@id="list"]/table/tbody/tr[{}]/td[1]/text()'.format(str(i))
x2='//*[@id="list"]/table/tbody/tr[{}]/td[2]/text()'.format(str(i))
a=str(selector.xpath(x1)[0])
b=str(selector.xpath(x2)[0])
c=a+":"+b
inputme(c,'K:/zhusc/test.txt')
print(num)
for j in range(1,20):
extract_data(j)
b=datetime.datetime.now()
print(b)
print(b-a)
转载请注明原文地址:https://blackberry.8miu.com/read-44453.html