用python操作excel表格,进行数据分析
项目场景:
提示:通常我们对excel表格进行删除多个项的时候,如果一条一条的删除就会花费很多时间,并且也不知道要删除的部分是否删除完,进行查询,分类的时候用python语言几条语句就能简单的完成要求。 例如:从一个有很多条学生信息的excel表中按要求进行操作
代码展示:
首先在软件中导入excel表格,即用python打开csv文件
from pandas
import Series
,DataFrame
import pandas
as pd
import numpy
as np
import csv
import codecs
data_csv
= pd
.read_csv
("d:/shiyan.csv",encoding
= "gbk")
data_csv
将数据以文件形式导出保存
data_csv
.to_csv
('d:/1.csv')
data_csv
= data_csv
.to_excel
("d:/1.xls")
删除后两列所有信息
data_csv
= data_csv
.drop
(['任选课类别_显示值','考试类型_显示值'],axis
=1)
针对表中的字要修改的进行统一修改
data_csv
=data_csv
.replace
(['合格','良好'],[60,80])
特定要求筛选
data0_csv
= data_csv
[data_csv
['学号']>20120000]
data0_csv
= data_csv
[data_csv
['院系_显示值'] == '文学与新闻传播学院']
TeamID1
= data_csv
[data_csv
['TeamID'] == 1]
分组
group
= TeamID1
['成绩'].groupby
(TeamID1
['学号'])
a
= group
.agg
(['sum']+['mean']+['max']+['min']+['count'])
用python函数对成绩进行分析,count=9表明有9门学科
a
.columns
=['总分','每学期该专业平均分','每学期该专业最高分','每学期该专业最低分','每学期该专业考试门数']
分别用迭代递归缓存求斐波拉契数列
from tkinter
import *
import easygui
import time
master
= Tk
()
frame
= Frame
(master
)
frame
.pack
(padx
= 20,pady
= 20)
v1
= StringVar
()
v2
= StringVar
()
v3
= StringVar
()
v4
= StringVar
()
v5
= StringVar
()
v6
= StringVar
()
def test(content
):
return content
.isdigit
()
testCMD
= frame
.register
(test
)
Label
(frame
,text
='迭代算法:',padx
=10).grid
(row
=0,column
=0)
Label
(frame
,text
='输入项数:',padx
=10).grid
(row
=0,column
=2)
e2
= Entry
(frame
,width
=10,textvariable
=v1
,validate
='key',\
validatecommand
=(test
,'%p')).grid
(row
=0,column
=3)
Label
(frame
,text
='结果',padx
=10).grid
(row
=0,column
=4)
e3
= Entry
(frame
,width
=15,textvariable
=v2
,state
='readonly').grid
(row
=0,column
=5)
Label
(frame
,text
='花费时间:',padx
=10).grid
(row
=0,column
=6)
e4
= Entry
(frame
,width
=15,textvariable
=v3
,state
='readonly').grid
(row
=0,column
=7)
Label
(frame
,text
='递归算法:',padx
=10).grid
(row
=1,column
=0)
Label
(frame
,text
='输入项数:',padx
=10).grid
(row
=1,column
=2)
e5
= Entry
(frame
,width
=10,textvariable
=v4
,validate
='key',\
validatecommand
=(test
,'%p')).grid
(row
=1,column
=3)
Label
(frame
,text
='结果',padx
=10).grid
(row
=1,column
=4)
e6
= Entry
(frame
,width
=15,textvariable
=v5
,state
='readonly').grid
(row
=1,column
=5)
Label
(frame
,text
='花费时间:',padx
=10).grid
(row
=1,column
=6)
e7
= Entry
(frame
,width
=15,textvariable
=v6
,state
='readonly').grid
(row
=1,column
=7)
def calc():
if v1
.get
()=='':
easygui
.msgbox
('请先输入内容 !')
return
result1
= fibonacci3
(12)
v2
.set(result1
)
result2
= Time3
()
v3
.set(result2
)
def calc():
if v4
.get
()=='':
easygui
.msgbox
('请先输入内容 !')
return
result1
= fibonacci1
()
v5
.set(result1
)
result2
= Time3
()
v6
.set(result2
)
"""def test():
if e1.get() == 'f1':
print("正确")
fibonacci3(10)
#
return True
else:
fibonacci3(5)
print('错误')
e1.delete(0,END)
return False """
def fibonacci1(n
):
if n
==0 or n
==1:
return 1
else:
return fibonacci1
(n
-1) + fibonacci1
(n
-2)
def Time3():
start_CPU
= time
.clock
()
fibonacci3
(32)
end_CPU
= time
.clock
()
return end_CPU
- start_CPU
def fibonacci3(n
):
a
, b
= 0, 1
for i
in range(1, n
+1):
a
, b
= b
, a
+b
return b
Button
(frame
,text
='结果1',command
=calc
).grid
(row
=4,column
=3,pady
=15)
Button
(frame
,text
='结果2',command
=calc
).grid
(row
=4,column
=4,pady
=15)
mainloop
()
斐波拉契数列:
def calc1():
if v1
.get
()=='':
easygui
.msgbox
('请先输入内容 !')
return
result1
= fibonacci1
(v1
.get
())
v2
.set(result1
)
result2
= Time1
()
v3
.set(result2
)
def fibonacci1(n
):
a
, b
= 0, 1
for i
in range(1, n
+1):
a
, b
= b
, a
+b
return b
def Time1(n
):
start_CPU
= time
.clock
()
fibonacci1
(n
)
end_CPU
= time
.clock
()
return end_CPU
- start_CPU
知识扩充:
CSV:
csv是最通用的一种文件格式,它可以非常容易地被导入各种PC表格及数据库中。此文件,一行即为数据表的一行。生成数据表字段用半角逗号隔开。csv文件用记事本和excel都能打开,用记事本打开显示逗号,用excel打开,没有逗号了,逗号都用来分列了,还可有Editplus打开。
python常用的科学计算库:
Numpy: Pandas Scipy