未来のタクシー需要を先読みしよう!
mini_forest
import pandas as pd pd.options.display.max_columns = 100 pd.options.display.max_rows = 999 pd.options.display.float_format = '{:.6f}'.format import numpy as np import math import warnings warnings.filterwarnings('ignore') %matplotlib inline import matplotlib import matplotlib.pyplot as plt import matplotlib.colors as mcolors import seaborn as sns import datetime import time
from google.colab import drive drive.mount('/content/drive')
Mounted at /content/drive
path = "/content/drive/MyDrive/tmp/7_probspace/3_taxi/" train = pd.read_csv(path+'data/train_data.csv', parse_dates=["tpep_pickup_datetime"]).rename(columns={'tpep_pickup_datetime':'ds'})
train_ds = train.set_index("ds").astype(int) train_ds.loc[train_ds.sum(axis=1)<=100,:]
train_ds.loc[((train_ds.index >= "2017-03-12 01:00:00")&(train_ds.index <= "2017-03-12 04:00:00")),:] # 他の時間帯 # train_ds.loc[((train_ds.index >= "2018-03-11 01:00:00")&(train_ds.index <= "2018-03-12 04:00:00")),:] # train_ds.loc[((train_ds.index >= "2019-03-10 01:00:00")&(train_ds.index <= "2019-03-12 04:00:00")),:]
train_plot = train[("2017-3-10" <= train["ds"])&(train["ds"] < "2017-3-14")] fig, ax = plt.subplots(79, 1, figsize=(20, 79*4)) for i,obj in enumerate(list(map(str, range(79)))): ax[i].plot(train_plot['ds'], train_plot[obj]) ax[i].axvspan(datetime.datetime(2017, 3, 12, 2, 0), datetime.datetime(2017, 3, 12, 2, 30), color="orange") ax[i].set_title(obj) plt.show() plt.close() plt.clf() # グラフは添付データにて
Output hidden; open in https://colab.research.google.com to view.
# 補間する場合のサンプルコードをご参考までに ## pandas の interpolate を利用 train_hosei = train.copy() train_hosei.loc[((train_hosei["ds"]>="2017-03-12 02:00:00")&(train_hosei["ds"]<="2017-03-12 02:30:00")),list(map(str, range(79)))] = float('nan') train_hosei.loc[((train_hosei["ds"]>="2018-03-11 02:00:00")&(train_hosei["ds"]<="2018-03-11 02:30:00")),list(map(str, range(79)))] = float('nan') train_hosei.loc[((train_hosei["ds"]>="2019-03-10 02:00:00")&(train_hosei["ds"]<="2019-03-10 02:30:00")),list(map(str, range(79)))] = float('nan') train_hosei = train_hosei.set_index("ds") train_hosei = train_hosei.interpolate(method='linear', axis=0) train_hosei = train_hosei.round().astype(int) train_hosei = train_hosei.reset_index() train_hosei[(train_hosei["ds"]>="2019-03-10 01:00:00")&(train_hosei["ds"]<="2019-03-10 04:00:00")]
# ところで、11月の第1日曜日午前2時は? display(train_ds.loc[((train_ds.index >= "2017-11-05 01:30:00")&(train_ds.index <= "2017-11-05 03:00:00")),:]) # 直前に増加?
train_plot = train[("2017-10-28" <= train["ds"])&(train["ds"] < "2017-11-14")] fig, ax = plt.subplots(79, 1, figsize=(20, 79*4)) for i,obj in enumerate(list(map(str, range(79)))): ax[i].plot(train_plot['ds'], train_plot[obj]) # ax[i].axvspan("2017-11-05 02:00:00", "2017-11-05 03:00:00", color="orange") ax[i].axvspan(datetime.datetime(2017, 11, 5, 2), datetime.datetime(2017, 11, 5, 3), color="orange") ax[i].set_title(obj) plt.show() plt.close() plt.clf() # グラフは添付データにて