透视表pivot

    科技2022-07-12  132

    本文介绍透视表pivot_table和交叉表crosstab。

    import pandas as pd import numpy as np %matplotlib inline %matplotlib notebook import matplotlib.pyplot as plt

    1.透视表pivot table

    # 导入数据 tips = pd.read_csv('data/tips/tips2.csv') tips['tip_pct'] = tips['tip']/tips['total_bill'] print(tips.head()) # total_bill tip sex smoker day time size tip_pct #0 16.99 1.01 Female No Sun Dinner 2 0.059447 #1 10.34 1.66 Male No Sun Dinner 3 0.160542 #2 21.01 3.50 Female No Sun Dinner 3 0.166587 #3 23.68 3.31 Male No Sun Dinner 2 0.139780 #4 24.59 3.61 Female No Sun Dinner 4 0.146808

    1.1 制作数据透视表方法1 : groupby + 聚合函数

    tips.groupby(['sex','smoker']).mean()

    .

    1.2 制作数据透视表方法2 :pivot_table

    # pivot_table1:整个dataframe tips.pivot_table(index=['sex','smoker'])

    .

    # pivot_table2:dataframe中一部分数据 tips.pivot_table(['tip_pct','size'],index=['sex','day'],columns='smoker')

    .

    # pivot_table3:传入小计margins=True tips.pivot_table(['tip_pct','size'],index=['sex','day'],columns='smoker',margins=True)

    .

    # pivot_table4:传入其他聚合函数aggfunc tips.pivot_table('tip_pct',index=['sex','smoker'],columns='day',aggfunc=len,margins=True)

    .

    # pivot_table5:fill_value tips.pivot_table('size',index=['time','sex','smoker'],columns='day',aggfunc=np.sum,fill_value=0)

    .

    2.交叉表crosstab

    # 交叉表1 data=pd.DataFrame({'sample':[1,2,3,4,5,6,7,8,9,10], 'Gender': ['Female','Male','Female','Male','Male','Male','Female','Female','Male','Female'], 'Handedness':['Right-handed','Left-handed','Right-handed','Right-handed','Left-handed','Right-handed','Right-handed','Left-handed','Right-handed','Right-handed']}) data # sample Gender Handedness #0 1 Female Right-handed #1 2 Male Left-handed #2 3 Female Right-handed #3 4 Male Right-handed #4 5 Male Left-handed #5 6 Male Right-handed #6 7 Female Right-handed #7 8 Female Left-handed #8 9 Male Right-handed #9 10 Female Right-handed

    2.1 用groupby制作出crosstab

    # 用groupby制作出crosstab data.groupby(['Gender','Handedness']).count().unstack()

    .

    2.2 用pivot_table制作出crosstab

    # 用pivot_table制作出crosstab data.pivot_table('sample',index=['Gender'],columns='Handedness',aggfunc='count',margins=True)

    .

    2.3 crosstab函数

    # crosstab1 pd.crosstab(data.Gender,data.Handedness,margins=True)

    .

    # crosstab2 pd.crosstab([tips.time,tips.day],tips.smoker,margins=True)

    Processed: 0.010, SQL: 8