# 手動でクロスバリデーション期間の設定
## train期間は2年間で、それを1週間づつずらしていく
## valid期間は1週間で、それを1週間ずらしていく
t_start = datetime.datetime(2017, 7, 1)
t_end = datetime.datetime(2019, 11, 17)
t_end_test = datetime.datetime(2019, 12, 1)
num_valid = 10
list_term = []
for i in range(num_valid):
list_term += [
[
t_start + datetime.timedelta(days=7*i), # trainの最初の日
t_end + datetime.timedelta(days=-7*(num_valid-i)), # validの最初の日
t_end + datetime.timedelta(days=-7*(num_valid-i-1)), # validの最後の次の日
]
]
# testデータ作成
target = "y_diff"
feature_c = [
"area","month","day","hour","yobi","week_number_year","holidays","lag_7_holidays",
]
feature_r = [
# 全部書き出しているのは、後で特徴量を手動で選定しようとしたからです、、、選定しておりません。。。
## diff
'lag_7_y_diff','lag_8_y_diff','lag_14_y_diff','lag_15_y_diff','lag_21_y_diff','lag_28_y_diff','lag_35_y_diff','lag_42_y_diff','lag_49_y_diff','lag_56_y_diff',
'lag_357_y_diff','lag_364_y_diff','lag_371_y_diff','lag_182_y_diff','lag_343_y_diff',
## y
'lag_7_y','lag_8_y','lag_14_y','lag_15_y','lag_21_y','lag_28_y','lag_35_y','lag_42_y','lag_49_y','lag_56_y',
'lag_357_y','lag_364_y','lag_371_y','lag_182_y','lag_343_y',
## ratio
'lag_7_y_ratio','lag_8_y_ratio','lag_14_y_ratio','lag_15_y_ratio','lag_21_y_ratio','lag_28_y_ratio','lag_35_y_ratio','lag_42_y_ratio','lag_49_y_ratio','lag_56_y_ratio',
'lag_357_y_ratio','lag_364_y_ratio','lag_371_y_ratio','lag_182_y_ratio','lag_343_y_ratio',
## rolling
'ma_1_48_lag_7_y_diff','ma_1_96_lag_7_y_diff','ma_1_144_lag_7_y_diff','ma_1_192_lag_7_y_diff','ma_1_240_lag_7_y_diff','ma_1_288_lag_7_y_diff','ma_1_336_lag_7_y_diff',
'ma_1_384_lag_7_y_diff','ma_1_432_lag_7_y_diff','ma_1_480_lag_7_y_diff','ma_1_528_lag_7_y_diff','ma_1_576_lag_7_y_diff','ma_1_624_lag_7_y_diff','ma_1_672_lag_7_y_diff',
'std_1_48_lag_7_y_diff','std_1_96_lag_7_y_diff','std_1_144_lag_7_y_diff','std_1_192_lag_7_y_diff','std_1_240_lag_7_y_diff','std_1_288_lag_7_y_diff','std_1_336_lag_7_y_diff',
'std_1_384_lag_7_y_diff','std_1_432_lag_7_y_diff','std_1_480_lag_7_y_diff','std_1_528_lag_7_y_diff','std_1_576_lag_7_y_diff','std_1_624_lag_7_y_diff','std_1_672_lag_7_y_diff',
'max_1_48_lag_7_y_diff','max_1_96_lag_7_y_diff','max_1_144_lag_7_y_diff','max_1_192_lag_7_y_diff','max_1_240_lag_7_y_diff','max_1_288_lag_7_y_diff','max_1_336_lag_7_y_diff',
'max_1_384_lag_7_y_diff','max_1_432_lag_7_y_diff','max_1_480_lag_7_y_diff','max_1_528_lag_7_y_diff','max_1_576_lag_7_y_diff','max_1_624_lag_7_y_diff','max_1_672_lag_7_y_diff',
'min_1_48_lag_7_y_diff','min_1_96_lag_7_y_diff','min_1_144_lag_7_y_diff','min_1_192_lag_7_y_diff','min_1_240_lag_7_y_diff','min_1_288_lag_7_y_diff','min_1_336_lag_7_y_diff',
'min_1_384_lag_7_y_diff','min_1_432_lag_7_y_diff','min_1_480_lag_7_y_diff','min_1_528_lag_7_y_diff','min_1_576_lag_7_y_diff','min_1_624_lag_7_y_diff','min_1_672_lag_7_y_diff',
## sum
'lag_14_y_diff_cumsum','lag_21_y_diff_cumsum','lag_28_y_diff_cumsum','lag_35_y_diff_cumsum','lag_42_y_diff_cumsum','lag_49_y_diff_cumsum','lag_56_y_diff_cumsum',
'lag_14_y_cumsum','lag_21_y_cumsum','lag_28_y_cumsum','lag_35_y_cumsum','lag_42_y_cumsum','lag_49_y_cumsum','lag_56_y_cumsum',
'lag_14_y_ratio_cumsum','lag_21_y_ratio_cumsum','lag_28_y_ratio_cumsum','lag_35_y_ratio_cumsum','lag_42_y_ratio_cumsum','lag_49_y_ratio_cumsum','lag_56_y_ratio_cumsum',
'lag_14_y_diff_cumsum_ratio','lag_21_y_diff_cumsum_ratio','lag_28_y_diff_cumsum_ratio','lag_35_y_diff_cumsum_ratio','lag_42_y_diff_cumsum_ratio','lag_49_y_diff_cumsum_ratio','lag_56_y_diff_cumsum_ratio',
## temp
"HourlyDewPointTemperature","HourlyDryBulbTemperature","HourlyPrecipitation","HourlyPressureChange","HourlyPressureTendency","HourlyRelativeHumidity","HourlySkyConditions","HourlySeaLevelPressure","HourlyStationPressure","HourlyVisibility","HourlyWetBulbTemperature","HourlyWindDirection","HourlyWindGustSpeed","HourlyWindSpeed",
"ma_-48_48_HourlyDewPointTemperature","lag_1_HourlyDewPointTemperature_diff","lag_1_HourlyDewPointTemperature_ratio","lag_7_HourlyDewPointTemperature_diff","lag_7_HourlyDewPointTemperature_ratio","lag_1_ma_-48_48_HourlyDewPointTemperature_diff","lag_1_ma_-48_48_HourlyDewPointTemperature_ratio","lag_7_ma_-48_48_HourlyDewPointTemperature_diff","lag_7_ma_-48_48_HourlyDewPointTemperature_ratio",
"ma_-48_48_HourlyDryBulbTemperature","lag_1_HourlyDryBulbTemperature_diff","lag_1_HourlyDryBulbTemperature_ratio","lag_7_HourlyDryBulbTemperature_diff","lag_7_HourlyDryBulbTemperature_ratio","lag_1_ma_-48_48_HourlyDryBulbTemperature_diff","lag_1_ma_-48_48_HourlyDryBulbTemperature_ratio","lag_7_ma_-48_48_HourlyDryBulbTemperature_diff","lag_7_ma_-48_48_HourlyDryBulbTemperature_ratio",
"ma_-48_48_HourlyPrecipitation","lag_1_HourlyPrecipitation_diff","lag_1_HourlyPrecipitation_ratio","lag_7_HourlyPrecipitation_diff","lag_7_HourlyPrecipitation_ratio","lag_1_ma_-48_48_HourlyPrecipitation_diff","lag_1_ma_-48_48_HourlyPrecipitation_ratio","lag_7_ma_-48_48_HourlyPrecipitation_diff","lag_7_ma_-48_48_HourlyPrecipitation_ratio",
"ma_-48_48_HourlyPressureChange","lag_1_HourlyPressureChange_diff","lag_1_HourlyPressureChange_ratio","lag_7_HourlyPressureChange_diff","lag_7_HourlyPressureChange_ratio","lag_1_ma_-48_48_HourlyPressureChange_diff","lag_1_ma_-48_48_HourlyPressureChange_ratio","lag_7_ma_-48_48_HourlyPressureChange_diff","lag_7_ma_-48_48_HourlyPressureChange_ratio",
"ma_-48_48_HourlyPressureTendency","lag_1_HourlyPressureTendency_diff","lag_1_HourlyPressureTendency_ratio","lag_7_HourlyPressureTendency_diff","lag_7_HourlyPressureTendency_ratio","lag_1_ma_-48_48_HourlyPressureTendency_diff","lag_1_ma_-48_48_HourlyPressureTendency_ratio","lag_7_ma_-48_48_HourlyPressureTendency_diff","lag_7_ma_-48_48_HourlyPressureTendency_ratio",
"ma_-48_48_HourlyRelativeHumidity","lag_1_HourlyRelativeHumidity_diff","lag_1_HourlyRelativeHumidity_ratio","lag_7_HourlyRelativeHumidity_diff","lag_7_HourlyRelativeHumidity_ratio","lag_1_ma_-48_48_HourlyRelativeHumidity_diff","lag_1_ma_-48_48_HourlyRelativeHumidity_ratio","lag_7_ma_-48_48_HourlyRelativeHumidity_diff","lag_7_ma_-48_48_HourlyRelativeHumidity_ratio",
"ma_-48_48_HourlySkyConditions","lag_1_HourlySkyConditions_diff","lag_1_HourlySkyConditions_ratio","lag_7_HourlySkyConditions_diff","lag_7_HourlySkyConditions_ratio","lag_1_ma_-48_48_HourlySkyConditions_diff","lag_1_ma_-48_48_HourlySkyConditions_ratio","lag_7_ma_-48_48_HourlySkyConditions_diff","lag_7_ma_-48_48_HourlySkyConditions_ratio",
"ma_-48_48_HourlySeaLevelPressure","lag_1_HourlySeaLevelPressure_diff","lag_1_HourlySeaLevelPressure_ratio","lag_7_HourlySeaLevelPressure_diff","lag_7_HourlySeaLevelPressure_ratio","lag_1_ma_-48_48_HourlySeaLevelPressure_diff","lag_1_ma_-48_48_HourlySeaLevelPressure_ratio","lag_7_ma_-48_48_HourlySeaLevelPressure_diff","lag_7_ma_-48_48_HourlySeaLevelPressure_ratio",
"ma_-48_48_HourlyStationPressure","lag_1_HourlyStationPressure_diff","lag_1_HourlyStationPressure_ratio","lag_7_HourlyStationPressure_diff","lag_7_HourlyStationPressure_ratio","lag_1_ma_-48_48_HourlyStationPressure_diff","lag_1_ma_-48_48_HourlyStationPressure_ratio","lag_7_ma_-48_48_HourlyStationPressure_diff","lag_7_ma_-48_48_HourlyStationPressure_ratio",
"ma_-48_48_HourlyVisibility","lag_1_HourlyVisibility_diff","lag_1_HourlyVisibility_ratio","lag_7_HourlyVisibility_diff","lag_7_HourlyVisibility_ratio","lag_1_ma_-48_48_HourlyVisibility_diff","lag_1_ma_-48_48_HourlyVisibility_ratio","lag_7_ma_-48_48_HourlyVisibility_diff","lag_7_ma_-48_48_HourlyVisibility_ratio",
"ma_-48_48_HourlyWetBulbTemperature","lag_1_HourlyWetBulbTemperature_diff","lag_1_HourlyWetBulbTemperature_ratio","lag_7_HourlyWetBulbTemperature_diff","lag_7_HourlyWetBulbTemperature_ratio","lag_1_ma_-48_48_HourlyWetBulbTemperature_diff","lag_1_ma_-48_48_HourlyWetBulbTemperature_ratio","lag_7_ma_-48_48_HourlyWetBulbTemperature_diff","lag_7_ma_-48_48_HourlyWetBulbTemperature_ratio",
"ma_-48_48_HourlyWindDirection","lag_1_HourlyWindDirection_diff","lag_1_HourlyWindDirection_ratio","lag_7_HourlyWindDirection_diff","lag_7_HourlyWindDirection_ratio","lag_1_ma_-48_48_HourlyWindDirection_diff","lag_1_ma_-48_48_HourlyWindDirection_ratio","lag_7_ma_-48_48_HourlyWindDirection_diff","lag_7_ma_-48_48_HourlyWindDirection_ratio",
"ma_-48_48_HourlyWindGustSpeed","lag_1_HourlyWindGustSpeed_diff","lag_1_HourlyWindGustSpeed_ratio","lag_7_HourlyWindGustSpeed_diff","lag_7_HourlyWindGustSpeed_ratio","lag_1_ma_-48_48_HourlyWindGustSpeed_diff","lag_1_ma_-48_48_HourlyWindGustSpeed_ratio","lag_7_ma_-48_48_HourlyWindGustSpeed_diff","lag_7_ma_-48_48_HourlyWindGustSpeed_ratio",
"ma_-48_48_HourlyWindSpeed","lag_1_HourlyWindSpeed_diff","lag_1_HourlyWindSpeed_ratio","lag_7_HourlyWindSpeed_diff","lag_7_HourlyWindSpeed_ratio","lag_1_ma_-48_48_HourlyWindSpeed_diff","lag_1_ma_-48_48_HourlyWindSpeed_ratio","lag_7_ma_-48_48_HourlyWindSpeed_diff","lag_7_ma_-48_48_HourlyWindSpeed_ratio",
]
feature = feature_r + feature_c
# 型変換
data_df.loc[:,feature_c] = data_df[feature_c].astype('category')
test_X = data_df[data_df["ds"] >= t_end_test][["ds","id"]+feature].copy().reset_index(drop=True)
print(test_X.shape)
test_X.head()