python中的数据统计,使用matplotlib画直方图,条状图,柱状图,饼图

    科技2024-12-07  41

    一.统计文本中每种实体的个数

    1.ann

    2.代码

    import os import re import pandas as pd import numpy as np import random import math from datetime import datetime from matplotlib import pyplot as plt train_dir='../../siriyang/中医药命名实体识别/dataset/train' test_dir='../../siriyang/中医药命名实体识别/dataset/chusai_xuanshou' prepare_dir='./prepare' def get_entitie(dir): entities={} #用来存储实体名,创建为字典类别 files=os.listdir(dir) files=list(set([file.split('.')[0] for file in files if (".ann" in file or ".txt" in file) ])) for file in files: path=os.path.join(dir,file+'.ann') with open(path,'r',encoding='utf8') as f: for line in f.readlines(): name=line.split('\t')[1].split(' ')[0] #以\t制表符,为分隔符,取第二位,再在第二位中,以‘ ’空格为分隔符,取第一位,例如第一行取DRUG_EFFICACY if name in entities:#统计每个实体的个数 entities[name]+=1 else: entities[name]=1 return entities count=get_entitie(train_dir)#调用函数 print(count) print([i for i in count.keys()])#取出键 print([i for i in count.values()])#取出值

    3.结果

    二.绘出直方图

    def autolabel(rects): for rect in rects: height =rect.get_height() plt.text(rect.get_x() + rect.get_width()/2, height, height, ha='center', va='bottom')#垂直和水平的布局 #rect.get_x(),1.03*height,'%s' % int(height)) plt.xticks(range(len(num_list)), name_list, rotation=80)#rotation=80是横坐标的倾斜度 name_list = [i for i in count.keys()] num_list = [i for i in count.values()] plt.figure(figsize=(10,5))#画布大小 plt.title('Category statistics of entities',fontsize=13)#设置标题,footsize是设置字体大小 plt.xlabel(u'category',fontsize=13)#设置x轴的标题,以及它的字号大小 plt.ylabel(u'quantity',fontsize=13)#设置y轴的标题,以及它的字号 #plt.bar(data[i for i in count.keys()],data[i for i in count.values()],alpha=0.6,width=0.8,facecolor='deeppink',edgecolor='darkblue',w=1,label='number of class') autolabel(plt.bar(range(len(num_list)),num_list,width=0.8,edgecolor='darkblue',lw=1))#edgecolor:柱子轮廓色;lw:柱子轮廓的宽度; fig=plt.gcf() #plt.legend(loc=2) plt.show() fig.savefig('./Category statistics of entities.png')#把图例保存到当前的目录上

    三,绘出饼状图

    import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl def draw_pie(labels,quants): plt.figure(1,figsize=(10,10)) expl = [0,0,0,0,0,0,0,0,0,0,0,0,0]#第五块离开圆心一点点 colors = ["blue","red","coral","green","yellow","orange"]#设置颜色,可循环显示 plt.pie(quants,explode=expl,colors=colors,labels=labels,autopct='%1.1f%%',pctdistance=0.8,shadow=True)#autopct:百分数的格式 plt.title('length of the entity',bbox={'facecolor':'0.8','pad':5}) fig=plt.gcf() plt.show() fig.savefig("./pie.jpg") plt.close() labels = [i for i in dic.keys()] quants = [i for i in dic.values()] draw_pie(labels,quants)

    直方图博客链接分享

    饼状图链接分享

     

    Processed: 0.010, SQL: 8