import pandas
as pd
import numpy
as np
import matplotlib
.pyplot
as plt
%matplotlib inline
bike
=pd
.read_csv
("/home/ysc/数据分析/data/第十讲/bike.csv",encoding
='utf-8')
bike
datetime租赁时间 season季节(1:春 2;夏。。)holiday假期:0非假期,1假期 workingday工作日:0工作日1非工作日 weather天气,数值越大天气越差 temp气温 atemp气温 humidity湿度 windspeed风速 casual普通用户 registered注册用户 count租赁自行车数量
时间段与租赁的关系
bike
.isnull
().sum()
将datetime数据类型转换为datetime类型
bike
['datetime']=pd
.to_datetime
(bike
['datetime'])
bike
.dtypes
将datetime设置为DataFrame的索引,这样就成为了时间序列数据
bike
=bike
.set_index
('datetime')
bike
.head
()
探索数据–降采样到年份数据
y_bike
=bike
.groupby
(lambda x
:x
.year
).mean
()
y_bike
['count']
y_bike
['count'].plot
(kind
='bar')
将数据重采样到月份
m_bike
=bike
.resample
('M',kind
='period').mean
()
fig
,axes
=plt
.subplots
(2,1)
m_bike
['2011']['count'].plot
(ax
=axes
[0],sharex
=True)
m_bike
['2012']['count'].plot
(ax
=axes
[1])
分析每天和每小时的租赁数分布与天和时的关系–天和时单独存储
bike
['day']=bike
.index
.day
bike
['hour']=bike
.index
.hour
bike
.head
()
d_bike
=bike
.groupby
('day')['count'].mean
()
d_bike
d_bike
.plot
()
h_bike
=bike
.groupby
('hour')['count'].mean
()
h_bike
.plot
()
分析天气对租车的影响
weather_bike
=bike
.groupby
('weather')['count'].mean
()
weather_bike
.plot
(kind
='bar')