# 目的変数の設定
## 一週間前(7*48行前)の同時刻との差分
data_df.loc[:,"y_diff"] = data_df["y"] - data_df[["id", "y"]].groupby("id")["y"].transform(lambda x: x.shift(periods=48*7))
# 特徴量追加
## 時間関連
data_df.loc[:,"hour_minute"] = data_df["ds"].dt.strftime('%H%M')
data_df.loc[:,"ds_h"] = pd.to_datetime(data_df["ds"].dt.strftime('%Y-%m-%d %H')+":00:00")
data_df.loc[:,"ds_ymd"] = pd.to_datetime(data_df["ds"].dt.strftime('%Y-%m-%d ')+"00:00:00")
data_df.loc[:,"year"] = data_df["ds"].dt.year
data_df.loc[:,"month"] = data_df["ds"].dt.month
data_df.loc[:,"day"] = data_df["ds"].dt.day
data_df.loc[:,"hour"] = data_df["ds"].dt.hour
data_df.loc[:,"ampm"] = [ 1 if ampm == "AM" else 0 for ampm in data_df["ds"].dt.strftime('%p') ]
data_df.loc[:,"peak"] = 1 # 深夜4〜5時はとても少ないので、そこをフラグ化
data_df.loc[(4 <= data_df["hour"])&(data_df["hour"] < 6),"peak"] = 0
data_df.loc[:,"minute"] = data_df["ds"].dt.minute
data_df.loc[:,"quarter"] = data_df["ds"].dt.quarter
data_df.loc[:,"yobi"] = data_df["ds"].dt.weekday
data_df.loc[:,"week_number_year"] = data_df["ds"].dt.strftime('%U').astype(int) # 年における第何週か
data_df.loc[:,"week_number_month"] = [ (day - 1) // 7 + 1 for day in data_df["day"] ] # 月における第何週か
## 過去の情報を特徴量に追加
### 1,2,3,4…週間前の階差
data_df.loc[:,[
"lag_7_y_diff","lag_8_y_diff","lag_14_y_diff","lag_15_y_diff","lag_21_y_diff","lag_28_y_diff","lag_35_y_diff","lag_42_y_diff","lag_49_y_diff","lag_364_y_diff","lag_182_y_diff",
]] = create_lags(data_df, ["id","hour_minute"], "y_diff", [7,8,14,15,21,28,35,42,49,7*52,7*26,])
### 1,2,3,4…週間前の原系列
data_df.loc[:,[
"lag_7_y","lag_8_y","lag_14_y","lag_15_y","lag_21_y","lag_28_y","lag_35_y","lag_42_y","lag_49_y","lag_364_y","lag_182_y",
]] = create_lags(data_df, ["id","hour_minute"], "y", [7,8,14,15,21,28,35,42,49,7*52,7*26,])
### ratio
data_df.loc[:,"lag_7_y_ratio"] = data_df.loc[:,"lag_7_y_diff"] / data_df.loc[:,"lag_7_y"]
data_df.loc[:,"lag_8_y_ratio"] = data_df.loc[:,"lag_8_y_diff"] / data_df.loc[:,"lag_8_y"]
data_df.loc[:,"lag_14_y_ratio"] = data_df.loc[:,"lag_14_y_diff"] / data_df.loc[:,"lag_14_y"]
data_df.loc[:,"lag_15_y_ratio"] = data_df.loc[:,"lag_15_y_diff"] / data_df.loc[:,"lag_15_y"]
data_df.loc[:,"lag_21_y_ratio"] = data_df.loc[:,"lag_21_y_diff"] / data_df.loc[:,"lag_21_y"]
data_df.loc[:,"lag_28_y_ratio"] = data_df.loc[:,"lag_28_y_diff"] / data_df.loc[:,"lag_28_y"]
data_df.loc[:,"lag_35_y_ratio"] = data_df.loc[:,"lag_35_y_diff"] / data_df.loc[:,"lag_35_y"]
data_df.loc[:,"lag_42_y_ratio"] = data_df.loc[:,"lag_35_y_diff"] / data_df.loc[:,"lag_35_y"]
data_df.loc[:,"lag_49_y_ratio"] = data_df.loc[:,"lag_35_y_diff"] / data_df.loc[:,"lag_35_y"]
data_df.loc[:,"lag_364_y_ratio"] = data_df.loc[:,"lag_364_y_diff"] / data_df.loc[:,"lag_364_y"]
data_df.loc[:,"lag_182_y_ratio"] = data_df.loc[:,"lag_182_y_diff"] / data_df.loc[:,"lag_182_y"]
### 階差列のcumsum
data_df.loc[:,"lag_7_14_diff_sum"] = data_df["lag_7_y_diff"] + data_df["lag_14_y_diff"]
data_df.loc[:,"lag_7_14_21_diff_sum"] = data_df["lag_7_14_diff_sum"] + data_df["lag_21_y_diff"]
data_df.loc[:,"lag_7_14_21_28_diff_sum"] = data_df["lag_7_14_21_diff_sum"] + data_df["lag_28_y_diff"]
data_df.loc[:,"lag_7_14_21_28_35_diff_sum"] = data_df["lag_7_14_21_28_diff_sum"] + data_df["lag_35_y_diff"]
data_df.loc[:,"lag_7_14_21_28_35_42_diff_sum"] = data_df["lag_7_14_21_28_35_diff_sum"] + data_df["lag_42_y_diff"]
data_df.loc[:,"lag_7_14_21_28_35_42_49_diff_sum"] = data_df["lag_7_14_21_28_35_42_diff_sum"] + data_df["lag_49_y_diff"]
### 原系列系列のcumsum
data_df.loc[:,"lag_7_14_sum"] = data_df["lag_7_y"] + data_df["lag_14_y"]
data_df.loc[:,"lag_7_14_21_sum"] = data_df["lag_7_14_sum"] + data_df["lag_21_y"]
data_df.loc[:,"lag_7_14_21_28_sum"] = data_df["lag_7_14_21_sum"] + data_df["lag_28_y"]
data_df.loc[:,"lag_7_14_21_28_35_sum"] = data_df["lag_7_14_21_28_sum"] + data_df["lag_35_y"]
data_df.loc[:,"lag_7_14_21_28_35_42_sum"] = data_df["lag_7_14_21_28_35_sum"] + data_df["lag_42_y"]
data_df.loc[:,"lag_7_14_21_28_35_42_49_sum"] = data_df["lag_7_14_21_28_35_42_sum"] + data_df["lag_49_y"]
# 確認のため、id==0の00:00、00:30だけ表示
data_df[((data_df["id"]==0)&((data_df["hour_minute"]=="0000")|(data_df["hour_minute"]=="0030")))].head(100)