import numpy as np
import matplotlib.pyplot as plt
import os
def data_query():
file_dir = '../data'
file_names = ['2019-q1_trip_history_data - short.csv',
'2019-q2_trip_history_data - short.csv',
'2019-q3_trip_history_data - short.csv',
'2019-q4_trip_history_data - short.csv']
arrs=[]
for file_name in file_names:
file_path = os.path.join(file_dir,file_name)
arr = np.loadtxt(file_path,dtype=np.str, delimiter=',',skiprows=1)
arrs.append(arr)
return arrs
def data_process(arrs):
#1.得到一个全年的大表arr_all
arr_all = np.vstack(arrs)
#2.在表中分别对member.casual进行过滤,分成两个表
arr_members = arr_all[arr_all[:, -1] == "Member"]
arr_casuals = arr_all[arr_all[:, -1] == "Casual"]
#3.只需要member的duration,casualduration数据,并且转换分钟处理
#4.得到member_duration,casual_duration
arr_min_members = arr_members[:,0].astype(np.float)/1000/60
arr_min_casuals = arr_casuals[:,0].astype(np.float)/1000/60
return arr_min_members,arr_min_casuals
def data_analysis(process_data):
pass
def data_display(arr_min_members,arr_min_casuals):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 1. plt.hist来画图
# 2. 两个图 subfigure用子图分别呈现
# 3. member_duration_lst + casual_duration_lst(以分钟为单位)
plt.figure(figsize=(10, 5))
ax1 = plt.subplot(1, 2, 1)
plt.title("共享单车全年不同类型用户骑行时间直方图")
# 0~180分钟
plt.hist(arr_min_members, bins=12, range=(0, 181), color='g', edgecolor='k')
plt.xticks(range(0, 181, 15), range(0, 181, 15))
plt.title("Member")
# plt.ylim(0,2500)
plt.subplot(1, 2, 2, sharey=ax1)
plt.hist(arr_min_casuals, bins=12, range=(0, 181), color='r', edgecolor='y')
plt.xticks(range(0, 181, 15), range(0, 181, 15))
plt.title("Casual")
# plt.ylim(0,2500)
plt.savefig("../output/zhifangtu2.png")
plt.show()
if __name__ == '__main__':
raw_data = data_query()
arr_min_members,arr_min_casuals = data_process(raw_data)
#analysised_data= data_analysis(processed_data)
data_display(arr_min_members,arr_min_casuals)