import requests
import re
import jieba
import webbrowser
import pyecharts.options as opts
from pyecharts.charts import WordCloud
url=input("请输入网页URL:")
if url.strip()=="":
url="http://news.baidu.com/"
print("URL:",url)
response=requests.get(url)
#response.encoding="gb2312"
response.encoding="utf-8"
t=re.sub(r"\<[^\>]+\>","",response.text)
t=re.sub(r"[A-Z]|[a-z]|\d|\W|\_"," ",t)
t=re.sub(r"\s+"," ",t)
c=jieba.cut(t)
d={}
for word in c:
d[word]=d.get(word,0)+1
print(d)
data=[]
for key in d:
if len(key)>1:
data.append(tuple([key,d[key]]))
print(key,"---",d[key])
print(data)
(
WordCloud({"width":"100%","height":"980px"})
.add(series_name="内容关键词", data_pair=data, word_size_range=[8, 250])
.set_global_opts(
title_opts=opts.TitleOpts(
title="网页内容关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
),
tooltip_opts=opts.TooltipOpts(is_show=True),
)
.render("e:/tests/basic_wordcloud.html")
)
webbrowser.open("e:/tests/basic_wordcloud.html")