使用pyecharts把任意网页内中文关键词生成词云图

    科技2022-08-12  98

    import requests import re import jieba import webbrowser import pyecharts.options as opts from pyecharts.charts import WordCloud url=input("请输入网页URL:") if url.strip()=="": url="http://news.baidu.com/" print("URL:",url) response=requests.get(url) #response.encoding="gb2312" response.encoding="utf-8" t=re.sub(r"\<[^\>]+\>","",response.text) t=re.sub(r"[A-Z]|[a-z]|\d|\W|\_"," ",t) t=re.sub(r"\s+"," ",t) c=jieba.cut(t) d={} for word in c: d[word]=d.get(word,0)+1 print(d) data=[] for key in d: if len(key)>1: data.append(tuple([key,d[key]])) print(key,"---",d[key]) print(data) ( WordCloud({"width":"100%","height":"980px"}) .add(series_name="内容关键词", data_pair=data, word_size_range=[8, 250]) .set_global_opts( title_opts=opts.TitleOpts( title="网页内容关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23) ), tooltip_opts=opts.TooltipOpts(is_show=True), ) .render("e:/tests/basic_wordcloud.html") ) webbrowser.open("e:/tests/basic_wordcloud.html")

     

     

     

     

    Processed: 0.012, SQL: 8