一、需求分析
爬取网址:http://www.shanbay.com/wordlist/110521/232414/
需求:
获取所有的 python 词汇数据,形成字典,存储数据。
二、代码实现
from urllib
import request
from lxml
import etree
words
= []
def shanbei(page
):
url
= "http://www.shanbay.com/wordlist/110521/232414/"
print(url
)
rsp
= request
.urlopen
(url
)
html
= rsp
.read
()
html
= etree
.HTML
(html
)
tr_list
= html
.xpath
("//tr")
for tr
in tr_list
:
'''
查相应的单词和介绍
'''
word
= {}
strong
= tr
.xpath
('.//strong')
if len(strong
):
name
= strong
[0].text
.strip
()
word
['name'] = name
td_content
= tr
.xpath
('./td[@class="span10"]')
if len(td_content
):
content
= td_content
[0].text
.strip
()
word
['content'] = content
print(word
)
if word
!= {}:
words
.append
(word
)
if __name__
== '__main__':
shanbei
(2)
三、运行结果
转载请注明原文地址:https://blackberry.8miu.com/read-32769.html