kotrying
# Library
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm.auto import tqdm
import statsmodels
import statsmodels.api as sm
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_pinball_loss
from lightgbm import LGBMRegressor
import warnings
warnings.simplefilter('ignore')
# mount
from google.colab import drive
if not os.path.isdir('/content/drive'):
drive.mount('/content/drive')
Mounted at /content/drive
構成
MyDrive
├<weather_merchandising>
├<notebook>
│ └run.ipynb
├<data>
│ ├train_data.csv
│ ├submission.csv
│ └test_data.csv
└<output>
# Config
DRIVE_PATH = "/content/drive/MyDrive/ML/PROBSPACE/weather_merchandising"
INPUT = os.path.join(DRIVE_PATH, "data")
OUTPUT = os.path.join(DRIVE_PATH, "output")
TRAIN_FILE = os.path.join(INPUT, "train_data.csv")
TEST_FILE = os.path.join(INPUT, "test_data.csv")
SUB_FILE = os.path.join(INPUT, "submission.csv")
seed =42
# plot style
pd.set_option('display.max_columns', 200)
plt.rcParams['axes.facecolor'] = 'EEFFFE'
# Data
train = pd.read_csv(TRAIN_FILE)
test = pd.read_csv(TEST_FILE)
sub = pd.read_csv(SUB_FILE)
# Target Columns
target_columns = ['ice1', 'ice2', 'ice3', 'oden1', 'oden2', 'oden3', 'oden4', 'hot1',
'hot2', 'hot3', 'dessert1', 'dessert2', 'dessert3', 'dessert4',
'dessert5', 'drink1', 'drink2', 'drink3', 'drink4', 'drink5', 'drink6',
'alcol1', 'alcol2', 'alcol3', 'snack1', 'snack2', 'snack3', 'bento1',
'bento2', 'bento3', 'bento4', 'tild1', 'tild2', 'men1', 'men2', 'men3',
'men4', 'men5', 'men6']
def preprocessing(df, mode='train'):
df_tmp = df.copy()
input_year = 2018
df_tmp['time'] = pd.to_datetime(df_tmp.date, format='%m/%d')
df_tmp['year'] = df_tmp['time'].dt.year
df_tmp['month'] = df_tmp['time'].dt.month
df_tmp['day'] = df_tmp['time'].dt.day
if mode=='train':
df_tmp.loc[df_tmp['month']>3, 'year'] = input_year
df_tmp.loc[df_tmp['month']<=3, 'year'] = input_year + 1
else:
df_tmp['year'] = input_year + 1
df_tmp['time'] = pd.to_datetime({'year':df_tmp.year, 'month':df_tmp.month, 'day':df_tmp.day})
df_tmp['weekday'] = df_tmp['time'].dt.weekday
return df_tmp
train_df = preprocessing(train, mode='train')
test_df = preprocessing(test, mode='test')
all_df = pd.concat([train_df, test_df]).reset_index(drop=True)
display(train_df.head(3))
display(test_df.head(3))
id | date | highest | lowest | rain | ice1 | ice2 | ice3 | oden1 | oden2 | oden3 | oden4 | hot1 | hot2 | hot3 | dessert1 | dessert2 | dessert3 | dessert4 | dessert5 | drink1 | drink2 | drink3 | drink4 | drink5 | drink6 | alcol1 | alcol2 | alcol3 | snack1 | snack2 | snack3 | bento1 | bento2 | bento3 | bento4 | tild1 | tild2 | men1 | men2 | men3 | men4 | men5 | men6 | time | year | month | day | weekday | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 4/11 | 21.9 | 12.4 | 0.0 | 25 | 72 | 26 | 10 | 23 | 52 | 35 | 180 | 254 | 270 | 42 | 58 | 50 | 59 | 67 | 54 | 45 | 28 | 49 | 22 | 8 | 63 | 51 | 59 | 26 | 21 | 35 | 56 | 46 | 70 | 27 | 12 | 12 | 57 | 30 | 41 | 38 | 37 | 35 | 2018-04-11 | 2018 | 4 | 11 | 2 |
1 | 2 | 4/12 | 25.9 | 13.9 | 0.0 | 30 | 85 | 33 | 9 | 18 | 42 | 26 | 202 | 219 | 235 | 22 | 36 | 5 | 28 | 37 | 69 | 54 | 35 | 58 | 22 | 9 | 77 | 66 | 72 | 36 | 32 | 63 | 8 | 14 | 23 | 9 | 5 | 8 | 19 | 9 | 13 | 26 | 4 | 16 | 2018-04-12 | 2018 | 4 | 12 | 3 |
2 | 3 | 4/13 | 20.9 | 11.9 | 0.0 | 21 | 68 | 28 | 12 | 22 | 57 | 31 | 164 | 210 | 223 | 20 | 41 | 5 | 30 | 32 | 46 | 38 | 24 | 45 | 26 | 9 | 81 | 69 | 74 | 36 | 25 | 57 | 9 | 12 | 19 | 6 | 4 | 9 | 23 | 9 | 11 | 33 | 4 | 13 | 2018-04-13 | 2018 | 4 | 13 | 4 |
id | date | highest | lowest | rain | time | year | month | day | weekday | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3/27 | 19.7 | 7.3 | 0.0 | 2019-03-27 | 2019 | 3 | 27 | 2 |
1 | 2 | 3/28 | 16.9 | 9.0 | 0.0 | 2019-03-28 | 2019 | 3 | 28 | 3 |
2 | 3 | 3/29 | 9.3 | 6.8 | 0.0 | 2019-03-29 | 2019 | 3 | 29 | 4 |
plot_col = [c for c in train.columns if c not in ['id', 'date', 'time', 'year', 'month', 'day', 'weekday']]
ncols = len(plot_col) // 13
plt.subplots(14, ncols, sharey=True, sharex=True, figsize=(30, 80))
for i, col in enumerate(plot_col):
plt.subplot(14, ncols, i+1)
plt.plot(train_df.date, train_df[col], alpha=1, color='orange', label=col)
for x in [20,51,81,112,143,173.204,234,265,296,324,350]: # beginning of month, last line is train/test split
plt.axvline(x)
plt.xlabel(col)
plt.legend()
plt.xticks([])
plt.show()
# アイスは夏の分布が大きく異なるものがある>学習から除外
# おでんは7月から9月は提供していない>学習から除外
# ホットは周期性と小さい下降トレンド
# アイスやドリンクは気温とかなり相関があるが、ドリンクは売り上げの下限値がありそう
# 横ばいのものにも曜日の周期性は見られる
曜日の周期性を調べる
train_df.groupby('weekday').mean()[target_columns].style.background_gradient()
# アイスは0で売れる
# おでんは2,3で売れない
# ホットは1,2,5,6で売れる
# デザートは2,6で売れる
# ドリンク145は0で、ドリンク23は4で、ドリンク6は0,2,3で売れる
# アルコールは0から6にかけて売れ行きが増加
# スナックは3,4,0の順で売れている
# 弁当は1,2,5,6で大きく売れる
# チルドは2で大きく売れる
# 麺は1,2,5,6で大きく売れる
ice1 | ice2 | ice3 | oden1 | oden2 | oden3 | oden4 | hot1 | hot2 | hot3 | dessert1 | dessert2 | dessert3 | dessert4 | dessert5 | drink1 | drink2 | drink3 | drink4 | drink5 | drink6 | alcol1 | alcol2 | alcol3 | snack1 | snack2 | snack3 | bento1 | bento2 | bento3 | bento4 | tild1 | tild2 | men1 | men2 | men3 | men4 | men5 | men6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
weekday | |||||||||||||||||||||||||||||||||||||||
0 | 55.360000 | 74.300000 | 41.920000 | 29.720000 | 36.020000 | 69.940000 | 42.220000 | 141.800000 | 210.760000 | 213.620000 | 25.060000 | 44.580000 | 7.040000 | 29.500000 | 38.200000 | 56.900000 | 44.560000 | 28.520000 | 50.300000 | 25.340000 | 10.000000 | 42.460000 | 26.440000 | 37.440000 | 31.180000 | 24.980000 | 45.860000 | 9.900000 | 12.700000 | 20.200000 | 8.180000 | 6.720000 | 9.020000 | 21.800000 | 10.500000 | 14.340000 | 32.420000 | 4.960000 | 18.840000 |
1 | 41.740000 | 72.420000 | 38.680000 | 26.980000 | 36.700000 | 67.080000 | 39.460000 | 161.000000 | 236.660000 | 236.760000 | 25.000000 | 44.160000 | 7.060000 | 29.520000 | 37.980000 | 54.660000 | 43.700000 | 27.840000 | 49.320000 | 24.920000 | 9.620000 | 51.080000 | 37.920000 | 48.220000 | 21.920000 | 10.960000 | 35.260000 | 49.860000 | 42.380000 | 69.760000 | 27.420000 | 6.780000 | 9.000000 | 52.400000 | 32.120000 | 38.760000 | 41.500000 | 41.360000 | 39.600000 |
2 | 40.860000 | 71.380000 | 36.680000 | 27.920000 | 34.100000 | 66.380000 | 40.800000 | 159.940000 | 229.140000 | 236.620000 | 45.420000 | 65.400000 | 49.520000 | 58.140000 | 66.860000 | 53.040000 | 42.320000 | 27.380000 | 48.680000 | 24.780000 | 10.140000 | 61.820000 | 55.340000 | 58.880000 | 26.080000 | 19.720000 | 43.560000 | 50.820000 | 43.440000 | 69.160000 | 28.360000 | 14.080000 | 11.740000 | 51.800000 | 30.160000 | 38.900000 | 41.220000 | 40.200000 | 38.420000 |
3 | 42.900000 | 71.460000 | 37.840000 | 27.400000 | 34.980000 | 68.740000 | 39.900000 | 142.360000 | 213.920000 | 215.600000 | 24.780000 | 44.540000 | 7.020000 | 29.360000 | 38.000000 | 53.960000 | 43.320000 | 27.880000 | 48.540000 | 25.000000 | 9.900000 | 73.560000 | 67.440000 | 70.800000 | 35.160000 | 33.600000 | 54.640000 | 10.160000 | 13.640000 | 20.160000 | 8.420000 | 6.340000 | 8.980000 | 21.200000 | 10.380000 | 13.980000 | 31.140000 | 4.920000 | 18.080000 |
4 | 41.380000 | 70.860000 | 37.060000 | 31.440000 | 35.840000 | 71.340000 | 44.280000 | 139.720000 | 211.040000 | 212.440000 | 24.660000 | 43.380000 | 6.660000 | 29.280000 | 36.760000 | 52.760000 | 47.080000 | 31.200000 | 48.900000 | 24.760000 | 9.260000 | 80.560000 | 77.980000 | 79.700000 | 32.280000 | 25.100000 | 47.740000 | 10.100000 | 13.240000 | 19.680000 | 8.080000 | 6.520000 | 8.740000 | 21.380000 | 10.520000 | 14.120000 | 32.240000 | 5.000000 | 18.520000 |
5 | 43.660000 | 71.800000 | 38.500000 | 31.340000 | 37.100000 | 78.760000 | 46.180000 | 159.000000 | 232.000000 | 233.600000 | 25.400000 | 45.100000 | 7.180000 | 29.760000 | 38.280000 | 54.940000 | 43.500000 | 28.160000 | 49.440000 | 24.740000 | 9.540000 | 91.160000 | 92.380000 | 90.660000 | 21.300000 | 10.680000 | 36.420000 | 50.560000 | 44.340000 | 70.300000 | 28.400000 | 6.020000 | 8.960000 | 51.340000 | 30.040000 | 39.420000 | 41.760000 | 41.140000 | 38.860000 |
6 | 44.680000 | 71.680000 | 38.620000 | 30.860000 | 39.080000 | 73.280000 | 45.360000 | 155.080000 | 231.540000 | 235.220000 | 45.100000 | 65.820000 | 48.520000 | 57.980000 | 67.940000 | 54.000000 | 43.840000 | 27.980000 | 49.220000 | 24.920000 | 9.700000 | 101.520000 | 109.120000 | 103.080000 | 25.380000 | 20.020000 | 44.460000 | 49.640000 | 43.840000 | 68.900000 | 27.860000 | 7.360000 | 9.660000 | 52.280000 | 31.080000 | 40.480000 | 42.080000 | 40.840000 | 40.580000 |
日付の周期性を調べる
train_df.groupby('day').mean()[target_columns].style.background_gradient()
# 月初め、月末などに特徴があるのかもしれない
ice1 | ice2 | ice3 | oden1 | oden2 | oden3 | oden4 | hot1 | hot2 | hot3 | dessert1 | dessert2 | dessert3 | dessert4 | dessert5 | drink1 | drink2 | drink3 | drink4 | drink5 | drink6 | alcol1 | alcol2 | alcol3 | snack1 | snack2 | snack3 | bento1 | bento2 | bento3 | bento4 | tild1 | tild2 | men1 | men2 | men3 | men4 | men5 | men6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
day | |||||||||||||||||||||||||||||||||||||||
1 | 54.454545 | 78.545455 | 40.727273 | 29.545455 | 32.454545 | 69.454545 | 43.272727 | 146.909091 | 214.818182 | 222.363636 | 28.727273 | 47.272727 | 14.090909 | 33.090909 | 41.000000 | 65.909091 | 49.454545 | 32.090909 | 55.181818 | 24.909091 | 9.000000 | 74.272727 | 70.909091 | 71.454545 | 27.090909 | 19.818182 | 41.545455 | 31.454545 | 29.909091 | 47.636364 | 19.636364 | 6.272727 | 9.000000 | 35.818182 | 20.909091 | 26.454545 | 37.727273 | 22.818182 | 28.181818 |
2 | 64.090909 | 74.818182 | 43.454545 | 30.545455 | 35.727273 | 67.636364 | 50.454545 | 142.000000 | 228.000000 | 228.636364 | 31.000000 | 50.454545 | 20.636364 | 39.181818 | 47.090909 | 57.272727 | 45.272727 | 29.000000 | 48.727273 | 24.636364 | 8.727273 | 76.000000 | 70.000000 | 74.636364 | 26.181818 | 19.181818 | 40.727273 | 39.454545 | 37.272727 | 51.181818 | 23.000000 | 6.818182 | 9.181818 | 43.727273 | 24.727273 | 31.909091 | 40.727273 | 30.909091 | 32.727273 |
3 | 48.090909 | 74.909091 | 39.363636 | 27.636364 | 47.727273 | 65.545455 | 41.636364 | 143.545455 | 220.454545 | 219.636364 | 30.363636 | 50.181818 | 19.636364 | 39.000000 | 46.181818 | 58.272727 | 45.636364 | 29.636364 | 50.909091 | 24.818182 | 9.181818 | 74.272727 | 70.636364 | 72.000000 | 26.454545 | 22.000000 | 46.636364 | 30.454545 | 29.363636 | 44.909091 | 18.454545 | 5.727273 | 9.090909 | 37.090909 | 20.454545 | 27.181818 | 34.181818 | 23.636364 | 26.636364 |
4 | 40.000000 | 75.090909 | 37.090909 | 22.727273 | 28.909091 | 57.636364 | 35.818182 | 149.272727 | 228.727273 | 225.909091 | 26.727273 | 44.272727 | 13.545455 | 33.909091 | 40.272727 | 58.545455 | 46.181818 | 29.181818 | 50.545455 | 25.090909 | 9.181818 | 66.363636 | 59.727273 | 64.272727 | 28.545455 | 21.454545 | 43.181818 | 26.727273 | 26.454545 | 44.454545 | 17.000000 | 8.636364 | 9.909091 | 37.636364 | 20.727273 | 28.181818 | 36.909091 | 24.636364 | 30.000000 |
5 | 43.181818 | 75.636364 | 39.454545 | 25.636364 | 29.636364 | 63.272727 | 36.272727 | 159.636364 | 227.000000 | 231.818182 | 27.636364 | 45.090909 | 16.363636 | 35.636364 | 44.000000 | 58.454545 | 48.181818 | 28.818182 | 49.363636 | 24.545455 | 9.363636 | 68.818182 | 60.363636 | 68.363636 | 25.363636 | 18.363636 | 42.363636 | 39.454545 | 35.181818 | 56.454545 | 24.090909 | 8.545455 | 9.818182 | 45.272727 | 25.363636 | 33.636364 | 37.909091 | 32.363636 | 36.636364 |
6 | 39.090909 | 71.090909 | 38.454545 | 24.000000 | 27.909091 | 64.818182 | 36.909091 | 156.818182 | 225.545455 | 229.727273 | 30.272727 | 49.363636 | 24.090909 | 41.272727 | 46.363636 | 51.272727 | 42.000000 | 27.000000 | 50.727273 | 26.090909 | 12.454545 | 74.727273 | 75.363636 | 75.090909 | 27.818182 | 21.818182 | 42.545455 | 36.181818 | 32.181818 | 51.636364 | 21.909091 | 9.000000 | 9.818182 | 41.818182 | 21.909091 | 30.181818 | 37.272727 | 26.727273 | 31.727273 |
7 | 33.727273 | 72.636364 | 35.636364 | 27.000000 | 34.181818 | 68.909091 | 39.727273 | 151.545455 | 218.818182 | 226.545455 | 24.818182 | 43.090909 | 12.181818 | 33.454545 | 41.818182 | 54.363636 | 45.000000 | 28.727273 | 50.545455 | 25.090909 | 9.636364 | 69.363636 | 65.272727 | 68.000000 | 29.090909 | 24.090909 | 50.636364 | 25.545455 | 23.818182 | 39.090909 | 15.909091 | 5.909091 | 8.454545 | 30.909091 | 17.454545 | 21.636364 | 32.818182 | 16.818182 | 22.090909 |
8 | 35.636364 | 67.727273 | 34.818182 | 31.454545 | 38.000000 | 73.363636 | 40.727273 | 144.545455 | 221.181818 | 221.454545 | 24.909091 | 44.454545 | 13.272727 | 32.272727 | 41.181818 | 51.545455 | 45.272727 | 28.909091 | 46.818182 | 24.545455 | 9.545455 | 73.727273 | 69.000000 | 72.545455 | 27.363636 | 19.909091 | 41.090909 | 32.454545 | 31.000000 | 45.090909 | 17.272727 | 8.454545 | 10.363636 | 37.545455 | 21.909091 | 28.545455 | 39.363636 | 24.727273 | 34.090909 |
9 | 42.727273 | 69.454545 | 38.181818 | 35.454545 | 41.909091 | 83.454545 | 46.909091 | 142.181818 | 222.000000 | 227.000000 | 27.636364 | 46.000000 | 19.727273 | 38.272727 | 46.454545 | 53.909091 | 44.090909 | 27.545455 | 50.909091 | 26.909091 | 12.636364 | 76.272727 | 74.454545 | 74.545455 | 27.000000 | 19.545455 | 42.363636 | 38.818182 | 36.363636 | 56.636364 | 21.181818 | 10.000000 | 10.545455 | 46.272727 | 24.636364 | 33.636364 | 44.454545 | 33.272727 | 37.000000 |
10 | 44.000000 | 68.545455 | 37.454545 | 34.818182 | 40.818182 | 77.000000 | 44.363636 | 138.818182 | 218.181818 | 214.181818 | 27.909091 | 48.363636 | 18.909091 | 36.909091 | 46.090909 | 52.363636 | 42.727273 | 26.363636 | 49.545455 | 26.909091 | 12.909091 | 76.090909 | 76.363636 | 80.909091 | 27.909091 | 22.454545 | 46.909091 | 31.363636 | 29.545455 | 48.363636 | 19.454545 | 7.181818 | 9.545455 | 39.363636 | 21.727273 | 29.090909 | 35.545455 | 24.818182 | 28.818182 |
11 | 39.500000 | 67.666667 | 36.000000 | 32.916667 | 40.000000 | 75.666667 | 47.250000 | 143.166667 | 220.750000 | 217.750000 | 26.000000 | 43.416667 | 15.583333 | 34.750000 | 43.333333 | 48.916667 | 40.416667 | 29.666667 | 48.000000 | 25.583333 | 12.000000 | 68.333333 | 60.333333 | 63.333333 | 28.833333 | 19.916667 | 45.166667 | 30.083333 | 28.500000 | 42.750000 | 18.000000 | 7.833333 | 9.250000 | 36.833333 | 20.750000 | 27.666667 | 38.750000 | 24.000000 | 28.750000 |
12 | 29.166667 | 68.833333 | 33.416667 | 27.916667 | 45.083333 | 80.750000 | 39.666667 | 158.083333 | 227.416667 | 229.916667 | 25.500000 | 45.083333 | 14.833333 | 35.750000 | 43.500000 | 53.333333 | 41.500000 | 27.500000 | 48.083333 | 24.583333 | 10.000000 | 69.750000 | 61.083333 | 64.083333 | 28.583333 | 18.750000 | 41.333333 | 37.083333 | 33.833333 | 53.416667 | 20.250000 | 8.083333 | 9.916667 | 40.416667 | 24.250000 | 31.416667 | 39.166667 | 29.583333 | 32.583333 |
13 | 41.166667 | 69.166667 | 36.166667 | 27.166667 | 33.083333 | 69.083333 | 40.500000 | 158.166667 | 226.583333 | 226.750000 | 28.250000 | 49.166667 | 22.083333 | 39.416667 | 46.416667 | 51.250000 | 42.833333 | 26.833333 | 48.166667 | 24.750000 | 9.666667 | 76.083333 | 71.750000 | 72.083333 | 28.083333 | 21.583333 | 47.666667 | 35.583333 | 28.916667 | 48.083333 | 18.500000 | 7.666667 | 10.166667 | 40.000000 | 22.583333 | 28.666667 | 37.666667 | 25.666667 | 31.083333 |
14 | 47.833333 | 69.333333 | 41.166667 | 33.416667 | 41.666667 | 79.916667 | 48.750000 | 148.666667 | 217.666667 | 225.583333 | 23.583333 | 44.000000 | 12.000000 | 32.166667 | 40.416667 | 51.250000 | 41.500000 | 27.833333 | 47.666667 | 25.083333 | 9.000000 | 70.500000 | 63.583333 | 68.083333 | 27.750000 | 23.083333 | 51.000000 | 26.750000 | 24.250000 | 39.250000 | 16.333333 | 8.750000 | 10.583333 | 34.916667 | 19.250000 | 25.166667 | 37.750000 | 19.833333 | 27.583333 |
15 | 40.416667 | 67.083333 | 37.416667 | 36.250000 | 43.750000 | 78.000000 | 51.000000 | 149.916667 | 225.000000 | 230.500000 | 25.083333 | 47.666667 | 15.583333 | 34.166667 | 41.500000 | 48.083333 | 40.000000 | 25.500000 | 45.083333 | 25.166667 | 10.916667 | 76.666667 | 74.500000 | 74.916667 | 28.833333 | 20.083333 | 42.833333 | 35.333333 | 29.083333 | 49.166667 | 19.333333 | 5.666667 | 8.250000 | 38.083333 | 22.750000 | 28.916667 | 36.583333 | 25.750000 | 27.083333 |
16 | 39.333333 | 68.500000 | 38.333333 | 33.333333 | 38.083333 | 82.500000 | 51.000000 | 147.333333 | 223.750000 | 225.916667 | 42.833333 | 67.500000 | 30.666667 | 49.500000 | 68.166667 | 52.750000 | 41.500000 | 26.666667 | 47.250000 | 24.916667 | 9.416667 | 70.916667 | 67.250000 | 71.583333 | 28.166667 | 20.083333 | 39.666667 | 37.083333 | 33.250000 | 51.000000 | 21.500000 | 9.250000 | 10.333333 | 42.000000 | 25.916667 | 32.583333 | 42.166667 | 30.250000 | 35.333333 |
17 | 40.583333 | 70.916667 | 39.083333 | 32.750000 | 37.416667 | 77.000000 | 45.333333 | 152.333333 | 219.166667 | 222.750000 | 40.083333 | 62.000000 | 26.583333 | 44.416667 | 53.416667 | 51.666667 | 42.833333 | 26.416667 | 47.250000 | 24.000000 | 8.916667 | 70.166667 | 67.750000 | 70.416667 | 29.333333 | 20.666667 | 45.083333 | 32.583333 | 31.416667 | 50.166667 | 19.833333 | 6.750000 | 8.583333 | 38.000000 | 22.083333 | 26.666667 | 36.500000 | 24.166667 | 29.166667 |
18 | 39.416667 | 68.333333 | 36.916667 | 31.000000 | 36.166667 | 69.833333 | 45.250000 | 151.083333 | 220.500000 | 229.583333 | 35.583333 | 56.000000 | 20.083333 | 38.416667 | 46.416667 | 47.166667 | 40.083333 | 25.250000 | 44.833333 | 24.666667 | 9.250000 | 66.166667 | 58.166667 | 61.083333 | 27.750000 | 20.250000 | 45.250000 | 30.416667 | 27.083333 | 45.416667 | 18.250000 | 9.083333 | 10.083333 | 39.333333 | 21.083333 | 28.583333 | 37.250000 | 24.500000 | 31.333333 |
19 | 38.916667 | 72.583333 | 36.416667 | 26.250000 | 35.166667 | 68.000000 | 41.833333 | 160.416667 | 230.750000 | 231.000000 | 33.666667 | 50.500000 | 18.666667 | 36.083333 | 44.166667 | 53.166667 | 44.083333 | 28.166667 | 48.333333 | 24.416667 | 8.666667 | 67.083333 | 59.083333 | 65.333333 | 26.500000 | 20.000000 | 41.916667 | 36.083333 | 31.416667 | 53.000000 | 22.416667 | 8.416667 | 9.916667 | 40.750000 | 23.666667 | 32.000000 | 38.416667 | 29.916667 | 31.500000 |
20 | 33.166667 | 71.666667 | 34.666667 | 25.750000 | 33.166667 | 71.166667 | 40.500000 | 157.583333 | 218.916667 | 232.083333 | 34.916667 | 52.250000 | 23.083333 | 40.833333 | 47.916667 | 48.666667 | 41.833333 | 26.750000 | 47.916667 | 24.083333 | 9.666667 | 72.583333 | 69.916667 | 70.083333 | 26.750000 | 21.583333 | 44.750000 | 33.750000 | 31.000000 | 48.916667 | 19.250000 | 6.833333 | 8.666667 | 38.000000 | 21.750000 | 27.166667 | 35.333333 | 25.333333 | 27.500000 |
21 | 44.500000 | 73.750000 | 38.833333 | 26.583333 | 34.416667 | 69.166667 | 40.750000 | 158.250000 | 228.000000 | 225.000000 | 28.666667 | 46.833333 | 13.000000 | 33.833333 | 41.250000 | 56.583333 | 43.083333 | 31.083333 | 50.750000 | 24.583333 | 8.833333 | 70.583333 | 64.583333 | 70.083333 | 28.750000 | 21.916667 | 47.083333 | 27.416667 | 26.333333 | 39.083333 | 17.083333 | 6.416667 | 9.166667 | 33.750000 | 18.250000 | 24.416667 | 35.500000 | 19.583333 | 25.500000 |
22 | 52.000000 | 77.083333 | 39.250000 | 26.083333 | 34.000000 | 69.416667 | 41.750000 | 155.000000 | 230.833333 | 221.833333 | 28.416667 | 47.250000 | 15.750000 | 34.083333 | 41.333333 | 60.916667 | 47.583333 | 33.333333 | 51.916667 | 24.416667 | 8.416667 | 73.500000 | 68.750000 | 72.833333 | 26.000000 | 19.166667 | 41.250000 | 32.083333 | 32.083333 | 50.333333 | 19.666667 | 8.583333 | 9.250000 | 40.500000 | 23.416667 | 29.500000 | 37.750000 | 26.500000 | 32.750000 |
23 | 80.416667 | 69.750000 | 46.583333 | 30.416667 | 35.583333 | 69.250000 | 43.250000 | 149.083333 | 223.250000 | 224.916667 | 30.916667 | 47.500000 | 20.416667 | 37.583333 | 45.000000 | 47.083333 | 41.500000 | 26.166667 | 45.583333 | 24.333333 | 9.500000 | 73.750000 | 68.833333 | 73.333333 | 25.916667 | 19.416667 | 42.250000 | 36.750000 | 34.083333 | 54.666667 | 21.666667 | 9.833333 | 10.416667 | 43.166667 | 24.666667 | 32.416667 | 39.666667 | 29.500000 | 37.000000 |
24 | 42.500000 | 71.416667 | 38.083333 | 35.250000 | 39.416667 | 78.500000 | 48.333333 | 151.750000 | 218.500000 | 233.083333 | 29.500000 | 49.166667 | 18.916667 | 37.083333 | 44.583333 | 51.333333 | 41.833333 | 26.750000 | 49.416667 | 24.416667 | 9.416667 | 72.416667 | 65.166667 | 67.833333 | 29.083333 | 21.333333 | 43.500000 | 34.000000 | 31.333333 | 49.583333 | 20.416667 | 6.000000 | 9.000000 | 36.000000 | 23.500000 | 28.916667 | 33.583333 | 22.583333 | 28.750000 |
25 | 49.916667 | 74.250000 | 41.500000 | 31.333333 | 37.583333 | 69.666667 | 40.916667 | 145.916667 | 221.000000 | 225.166667 | 27.250000 | 45.166667 | 16.000000 | 35.333333 | 43.750000 | 57.333333 | 45.916667 | 28.583333 | 50.416667 | 25.166667 | 9.083333 | 66.583333 | 58.583333 | 62.000000 | 26.583333 | 20.333333 | 43.500000 | 29.750000 | 27.750000 | 43.833333 | 18.416667 | 6.500000 | 8.583333 | 35.500000 | 20.916667 | 25.166667 | 34.416667 | 22.416667 | 27.666667 |
26 | 45.500000 | 71.833333 | 38.750000 | 25.666667 | 35.333333 | 63.583333 | 37.666667 | 155.000000 | 228.500000 | 232.666667 | 42.000000 | 65.916667 | 27.666667 | 46.416667 | 65.666667 | 54.583333 | 43.916667 | 28.083333 | 48.500000 | 24.333333 | 8.916667 | 70.166667 | 61.250000 | 67.666667 | 24.500000 | 19.166667 | 40.833333 | 35.833333 | 34.000000 | 54.000000 | 21.666667 | 8.500000 | 9.583333 | 41.250000 | 23.416667 | 30.000000 | 40.333333 | 29.500000 | 33.916667 |
27 | 47.818182 | 73.000000 | 41.000000 | 23.818182 | 31.909091 | 60.090909 | 37.272727 | 163.000000 | 229.181818 | 229.727273 | 38.909091 | 64.000000 | 27.636364 | 43.909091 | 55.272727 | 54.818182 | 44.272727 | 27.454545 | 48.909091 | 24.818182 | 9.363636 | 74.000000 | 74.545455 | 75.363636 | 31.090909 | 22.818182 | 45.545455 | 31.272727 | 30.363636 | 50.545455 | 19.090909 | 7.181818 | 9.090909 | 38.000000 | 21.181818 | 26.454545 | 35.272727 | 23.545455 | 29.454545 |
28 | 32.545455 | 71.636364 | 34.272727 | 29.090909 | 34.454545 | 67.000000 | 40.636364 | 155.363636 | 224.272727 | 223.636364 | 34.000000 | 56.000000 | 17.818182 | 36.636364 | 44.090909 | 55.454545 | 43.909091 | 32.272727 | 50.636364 | 25.454545 | 10.545455 | 73.818182 | 67.363636 | 69.636364 | 28.090909 | 21.000000 | 48.272727 | 28.818182 | 27.545455 | 44.272727 | 17.000000 | 7.363636 | 9.181818 | 36.636364 | 18.909091 | 24.636364 | 37.909091 | 22.000000 | 28.454545 |
29 | 39.900000 | 76.000000 | 37.400000 | 23.500000 | 28.500000 | 62.000000 | 34.300000 | 152.500000 | 230.900000 | 234.000000 | 34.300000 | 53.100000 | 20.000000 | 38.500000 | 45.600000 | 61.200000 | 48.900000 | 29.300000 | 51.800000 | 24.600000 | 9.200000 | 75.300000 | 71.800000 | 71.500000 | 26.800000 | 18.600000 | 40.700000 | 38.500000 | 36.200000 | 54.100000 | 21.600000 | 6.800000 | 8.400000 | 43.900000 | 25.000000 | 30.700000 | 37.400000 | 28.800000 | 30.500000 |
30 | 48.400000 | 79.300000 | 40.100000 | 27.300000 | 31.100000 | 61.300000 | 37.500000 | 155.000000 | 223.600000 | 225.800000 | 34.000000 | 53.600000 | 24.400000 | 39.100000 | 48.500000 | 63.500000 | 50.200000 | 32.700000 | 53.900000 | 24.900000 | 9.000000 | 70.700000 | 63.100000 | 68.400000 | 27.300000 | 21.100000 | 41.500000 | 35.400000 | 31.700000 | 50.500000 | 20.300000 | 9.100000 | 9.700000 | 39.900000 | 22.900000 | 31.500000 | 38.200000 | 26.100000 | 32.700000 |
31 | 69.166667 | 75.333333 | 46.666667 | 38.500000 | 39.333333 | 80.833333 | 52.166667 | 139.833333 | 213.500000 | 212.333333 | 29.833333 | 45.833333 | 13.000000 | 34.000000 | 40.000000 | 60.500000 | 49.666667 | 29.333333 | 50.333333 | 25.333333 | 9.666667 | 61.666667 | 53.500000 | 61.333333 | 29.166667 | 25.166667 | 48.000000 | 22.333333 | 21.500000 | 34.500000 | 15.333333 | 6.666667 | 8.166667 | 29.833333 | 17.166667 | 18.333333 | 33.500000 | 15.666667 | 23.166667 |
テストデータに似ている訓練データを探す
# Adversarial Validation
train = pd.read_csv(TRAIN_FILE)
test = pd.read_csv(TEST_FILE)
train = train.drop(target_columns, axis=1)
train = preprocessing(train, mode='train')
test = preprocessing(test, mode='test')
train['target'] = 0
test['target'] = 1
all_df = pd.concat([train, test], axis=0)
target = all_df['target'].values
train_, test_ = train_test_split(all_df, test_size=0.33, random_state=42, shuffle=True)
train_x = train_.drop(['id', 'date', 'time', 'year', 'month', 'target'],axis=1)
test_x = test_.drop(['id', 'date', 'time', 'year', 'month', 'target'],axis=1)
train_y = train_['target'].values
test_y = test_['target'].values
lgb_train = lgb.Dataset(train_x, label=train_y)
lgb_test = lgb.Dataset(test_x, label=test_y)
param = {'metric': 'auc',
'seed': seed,
'verbosity': -1}
num_round = 100
clf = lgb.train(param, lgb_train, num_round, valid_sets = [lgb_test], verbose_eval=50, early_stopping_rounds = 50)
feature_imp = pd.DataFrame(sorted(zip(clf.feature_importance(), train_x.columns)), columns=['Value','Feature'])
plt.figure(figsize=(6, 5))
sns.barplot(x="Value", y="Feature", data=feature_imp.sort_values(by="Value", ascending=False).head(500))
plt.title('LightGBM Features')
plt.tight_layout()
plt.show()
display(feature_imp.sort_values(by="Value", ascending=False))
print(np.sort(clf.predict(train_x))[::-1][:30])
print(np.argsort(clf.predict(train_x))[::-1][:30])
# 似ている期間も特になし
Training until validation scores don't improve for 50 rounds. [50] valid_0's auc: 0.819469 Early stopping, best iteration is: [14] valid_0's auc: 0.891593
Value | Feature | |
---|---|---|
4 | 24 | lowest |
3 | 22 | highest |
2 | 17 | day |
1 | 5 | weekday |
0 | 1 | rain |
[0.24907411 0.24907411 0.24907411 0.24297665 0.24297665 0.24297665 0.24054652 0.23205524 0.23205524 0.23205524 0.23205524 0.23205524 0.23205524 0.23205524 0.22962511 0.2220307 0.21542067 0.17226214 0.1697324 0.1697324 0.1697324 0.1697324 0.16401879 0.15614695 0.15614695 0.15309783 0.15309783 0.14594867 0.13259713 0.13259713] [178 240 151 79 5 138 54 123 184 193 92 172 216 133 89 42 72 74 119 229 162 29 171 70 209 69 201 205 112 152]
# Data
train = pd.read_csv(TRAIN_FILE)
test = pd.read_csv(TEST_FILE)
sub = pd.read_csv(SUB_FILE)
def preprocessing(df, mode='train'):
df_tmp = df.copy()
input_year = 2018
df_tmp['time'] = pd.to_datetime(df_tmp.date, format='%m/%d')
df_tmp['year'] = df_tmp['time'].dt.year
df_tmp['month'] = df_tmp['time'].dt.month
df_tmp['day'] = df_tmp['time'].dt.day
if mode=='train':
df_tmp.loc[df_tmp['month']>3, 'year'] = input_year
df_tmp.loc[df_tmp['month']<=3, 'year'] = input_year + 1
else:
df_tmp['year'] = input_year + 1
df_tmp['time'] = pd.to_datetime({'year':df_tmp.year, 'month':df_tmp.month, 'day':df_tmp.day})
df_tmp['weekday'] = df_tmp['time'].dt.weekday
return df_tmp
train = preprocessing(train, mode='train')
test = preprocessing(test, mode='test')
train.columns
Index(['id', 'date', 'highest', 'lowest', 'rain', 'ice1', 'ice2', 'ice3', 'oden1', 'oden2', 'oden3', 'oden4', 'hot1', 'hot2', 'hot3', 'dessert1', 'dessert2', 'dessert3', 'dessert4', 'dessert5', 'drink1', 'drink2', 'drink3', 'drink4', 'drink5', 'drink6', 'alcol1', 'alcol2', 'alcol3', 'snack1', 'snack2', 'snack3', 'bento1', 'bento2', 'bento3', 'bento4', 'tild1', 'tild2', 'men1', 'men2', 'men3', 'men4', 'men5', 'men6', 'time', 'year', 'month', 'day', 'weekday'], dtype='object')
# 検証データのindexを指定
valid_index = range(297,351) # month:2,3
# 予測結果を保存する辞書型データを作成
results = dict({})
all_lgb_score = []
# 商品毎の予測を作成する
for c in tqdm(target_columns):
# 商品特有の性質を反映させた特徴量を生成
train_tmp = train.copy()
test_tmp = test.copy()
# ice
if c in target_columns[0:3]:
train_tmp = train_tmp[~train_tmp['month'].isin([7,8,9])]
train_tmp['is_wday0'] = train['weekday'].isin([0]).astype(int)
test_tmp['is_wday0'] = test['weekday'].isin([0]).astype(int)
# oden
elif c in target_columns[3:7]:
train_tmp = train_tmp[(train_tmp['month'].isin([10,11,12,1,2,3]))|(train_tmp['id'].isin(valid_index))]
train_tmp['is_wday23'] = train['weekday'].isin([2,3]).astype(int)
test_tmp['is_wday23'] = test['weekday'].isin([2,3]).astype(int)
# hot
elif c in target_columns[7:10]:
train_tmp['is_wday034'] = train['weekday'].isin([0,3,4]).astype(int)
test_tmp['is_wday034'] = test['weekday'].isin([0,3,4]).astype(int)
# dessert
elif c in target_columns[10:15]:
train_tmp['is_wday26'] = train['weekday'].isin([2,6]).astype(int)
test_tmp['is_wday26'] = test['weekday'].isin([2,6]).astype(int)
# drink145
elif c in [target_columns[15],target_columns[18],target_columns[19]]:
train_tmp['is_wday0'] = train['weekday'].isin([0]).astype(int)
test_tmp['is_wday0'] = test['weekday'].isin([0]).astype(int)
# drink23
elif c in target_columns[16:18]:
train_tmp['is_wday4'] = train['weekday'].isin([4]).astype(int)
test_tmp['is_wday4'] = test['weekday'].isin([4]).astype(int)
# snack
elif c in target_columns[24:27]:
train_tmp['is_wday4'] = train['weekday'].isin([4]).astype(int)
train_tmp['is_wday15'] = train['weekday'].isin([1,5]).astype(int)
test_tmp['is_wday4'] = test['weekday'].isin([4]).astype(int)
test_tmp['is_wday15'] = test['weekday'].isin([1,5]).astype(int)
# bento
elif c in target_columns[27:31]:
train_tmp['is_wday034'] = train['weekday'].isin([0,3,4]).astype(int)
test_tmp['is_wday034'] = test['weekday'].isin([0,3,4]).astype(int)
# tild
elif c in target_columns[31:33]:
train_tmp['is_wday2'] = train['weekday'].isin([2]).astype(int)
test_tmp['is_wday2'] = test['weekday'].isin([2]).astype(int)
# men
elif c in target_columns[33:39]:
train_tmp['is_wday034'] = train['weekday'].isin([0,3,4]).astype(int)
test_tmp['is_wday034'] = test['weekday'].isin([0,3,4]).astype(int)
train_columns = [c for c in train_tmp.columns if c not in target_columns if c not in ['date', 'time']]
x_train = train_tmp[~train_tmp['id'].isin(valid_index)][train_columns]
y_train = train_tmp[~train_tmp['id'].isin(valid_index)][c]
x_valid = train_tmp[train_tmp['id'].isin(valid_index)][train_columns]
y_valid = train_tmp[train_tmp['id'].isin(valid_index)][c]
x_test = test_tmp[train_columns]
x_train = x_train.drop(['id'],axis=1)
x_valid = x_valid.drop(['id'],axis=1)
x_test = x_test.drop(['id'],axis=1)
# 分位点を設定
qs = np.array([0.01, 0.1, 0.5, 0.9, 0.99])
lgb_scores = []
# 分位点毎に予測を作成する
for q in qs:
# モデルのインスタンスを作成
lgb = LGBMRegressor(
objective='quantile',
alpha=q,
n_estimators=10000,
max_depth=2,
colsample_bytree=0.9,
random_state=seed)
#学習を実施
lgb.fit(x_train, y_train, eval_set=(x_valid, y_valid), early_stopping_rounds=100, verbose=False)
#予測を実施
lgb_scores.append(lgb.best_score_['valid_0']['quantile']) # validationのbest_score
pred_y = lgb.predict(x_test)
#予測結果を格納
results[(c, q)] = pred_y
quantiles = [0.01, 0.1, 0.5, 0.9, 0.99]
lgb_scores = []
for i, q in enumerate(quantiles):
lgb = LGBMRegressor(
objective='quantile',
alpha=q,
n_estimators=10000,
max_depth=2,
colsample_bytree=0.9,
random_state=seed)
lgb.fit(x_train, y_train, eval_set=(x_valid, y_valid), early_stopping_rounds=100, verbose=False)
lgb_scores.append(lgb.best_score_['valid_0']['quantile']) # validationのbest_score
all_lgb_score.append(lgb_scores)
# Score
print(np.array(all_lgb_score).mean())
0%| | 0/39 [00:00<?, ?it/s]
0.8462863293289724
# 商品_分位点毎のリストに変換
submit_rows = [[f'{k[0]}_{k[1]}']+ v.tolist() for k, v in results.items()]
# テスト結果の出力
submit_df = pd.DataFrame(np.array(submit_rows)[:, 1:22].astype(float).round(), index=np.array(submit_rows)[:, 0])
submit_df.columns = list(range(1, 22))
submit_df = submit_df.transpose()
submit_df.index.name = 'id'
submit_df.to_csv(os.path.join(OUTPUT, "sub_exp00.csv"))
display(submit_df.head())
ice1_0.01 | ice1_0.1 | ice1_0.5 | ice1_0.9 | ice1_0.99 | ice2_0.01 | ice2_0.1 | ice2_0.5 | ice2_0.9 | ice2_0.99 | ice3_0.01 | ice3_0.1 | ice3_0.5 | ice3_0.9 | ice3_0.99 | oden1_0.01 | oden1_0.1 | oden1_0.5 | oden1_0.9 | oden1_0.99 | oden2_0.01 | oden2_0.1 | oden2_0.5 | oden2_0.9 | oden2_0.99 | oden3_0.01 | oden3_0.1 | oden3_0.5 | oden3_0.9 | oden3_0.99 | oden4_0.01 | oden4_0.1 | oden4_0.5 | oden4_0.9 | oden4_0.99 | hot1_0.01 | hot1_0.1 | hot1_0.5 | hot1_0.9 | hot1_0.99 | hot2_0.01 | hot2_0.1 | hot2_0.5 | hot2_0.9 | hot2_0.99 | hot3_0.01 | hot3_0.1 | hot3_0.5 | hot3_0.9 | hot3_0.99 | dessert1_0.01 | dessert1_0.1 | dessert1_0.5 | dessert1_0.9 | dessert1_0.99 | dessert2_0.01 | dessert2_0.1 | dessert2_0.5 | dessert2_0.9 | dessert2_0.99 | dessert3_0.01 | dessert3_0.1 | dessert3_0.5 | dessert3_0.9 | dessert3_0.99 | dessert4_0.01 | dessert4_0.1 | dessert4_0.5 | dessert4_0.9 | dessert4_0.99 | dessert5_0.01 | dessert5_0.1 | dessert5_0.5 | dessert5_0.9 | dessert5_0.99 | drink1_0.01 | drink1_0.1 | drink1_0.5 | drink1_0.9 | drink1_0.99 | drink2_0.01 | drink2_0.1 | drink2_0.5 | drink2_0.9 | drink2_0.99 | drink3_0.01 | drink3_0.1 | drink3_0.5 | drink3_0.9 | drink3_0.99 | drink4_0.01 | drink4_0.1 | drink4_0.5 | drink4_0.9 | drink4_0.99 | drink5_0.01 | drink5_0.1 | drink5_0.5 | drink5_0.9 | drink5_0.99 | drink6_0.01 | drink6_0.1 | drink6_0.5 | drink6_0.9 | drink6_0.99 | alcol1_0.01 | alcol1_0.1 | alcol1_0.5 | alcol1_0.9 | alcol1_0.99 | alcol2_0.01 | alcol2_0.1 | alcol2_0.5 | alcol2_0.9 | alcol2_0.99 | alcol3_0.01 | alcol3_0.1 | alcol3_0.5 | alcol3_0.9 | alcol3_0.99 | snack1_0.01 | snack1_0.1 | snack1_0.5 | snack1_0.9 | snack1_0.99 | snack2_0.01 | snack2_0.1 | snack2_0.5 | snack2_0.9 | snack2_0.99 | snack3_0.01 | snack3_0.1 | snack3_0.5 | snack3_0.9 | snack3_0.99 | bento1_0.01 | bento1_0.1 | bento1_0.5 | bento1_0.9 | bento1_0.99 | bento2_0.01 | bento2_0.1 | bento2_0.5 | bento2_0.9 | bento2_0.99 | bento3_0.01 | bento3_0.1 | bento3_0.5 | bento3_0.9 | bento3_0.99 | bento4_0.01 | bento4_0.1 | bento4_0.5 | bento4_0.9 | bento4_0.99 | tild1_0.01 | tild1_0.1 | tild1_0.5 | tild1_0.9 | tild1_0.99 | tild2_0.01 | tild2_0.1 | tild2_0.5 | tild2_0.9 | tild2_0.99 | men1_0.01 | men1_0.1 | men1_0.5 | men1_0.9 | men1_0.99 | men2_0.01 | men2_0.1 | men2_0.5 | men2_0.9 | men2_0.99 | men3_0.01 | men3_0.1 | men3_0.5 | men3_0.9 | men3_0.99 | men4_0.01 | men4_0.1 | men4_0.5 | men4_0.9 | men4_0.99 | men5_0.01 | men5_0.1 | men5_0.5 | men5_0.9 | men5_0.99 | men6_0.01 | men6_0.1 | men6_0.5 | men6_0.9 | men6_0.99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
1 | 16.0 | 19.0 | 20.0 | 21.0 | 28.0 | 51.0 | 60.0 | 60.0 | 63.0 | 90.0 | 24.0 | 26.0 | 30.0 | 33.0 | 38.0 | 10.0 | 34.0 | 45.0 | 52.0 | 103.0 | 46.0 | 45.0 | 59.0 | 58.0 | 129.0 | 82.0 | 91.0 | 108.0 | 150.0 | 249.0 | 55.0 | 59.0 | 63.0 | 68.0 | 147.0 | 88.0 | 139.0 | 158.0 | 174.0 | 217.0 | 178.0 | 216.0 | 219.0 | 234.0 | 269.0 | 214.0 | 223.0 | 236.0 | 257.0 | 281.0 | 20.0 | 49.0 | 53.0 | 53.0 | 57.0 | 46.0 | 67.0 | 78.0 | 81.0 | 78.0 | 11.0 | 15.0 | 55.0 | 62.0 | 58.0 | 25.0 | 53.0 | 62.0 | 65.0 | 66.0 | 56.0 | 67.0 | 73.0 | 81.0 | 87.0 | 28.0 | 29.0 | 36.0 | 40.0 | 68.0 | 18.0 | 28.0 | 29.0 | 34.0 | 49.0 | 12.0 | 18.0 | 22.0 | 23.0 | 33.0 | 30.0 | 36.0 | 37.0 | 40.0 | 49.0 | 22.0 | 22.0 | 24.0 | 26.0 | 30.0 | 7.0 | 7.0 | 8.0 | 8.0 | 16.0 | 54.0 | 56.0 | 61.0 | 64.0 | 76.0 | 31.0 | 45.0 | 47.0 | 52.0 | 75.0 | 52.0 | 54.0 | 57.0 | 61.0 | 66.0 | 18.0 | 22.0 | 27.0 | 32.0 | 40.0 | 9.0 | 17.0 | 21.0 | 24.0 | 37.0 | 26.0 | 37.0 | 43.0 | 53.0 | 59.0 | 23.0 | 44.0 | 51.0 | 54.0 | 74.0 | 31.0 | 37.0 | 43.0 | 48.0 | 49.0 | 15.0 | 62.0 | 70.0 | 78.0 | 81.0 | 8.0 | 24.0 | 28.0 | 30.0 | 33.0 | 4.0 | 11.0 | 13.0 | 20.0 | 21.0 | 8.0 | 10.0 | 11.0 | 16.0 | 18.0 | 26.0 | 43.0 | 50.0 | 63.0 | 66.0 | 21.0 | 24.0 | 30.0 | 34.0 | 35.0 | 27.0 | 31.0 | 37.0 | 45.0 | 54.0 | 26.0 | 33.0 | 40.0 | 48.0 | 59.0 | 30.0 | 32.0 | 41.0 | 46.0 | 51.0 | 13.0 | 31.0 | 37.0 | 51.0 | 61.0 |
2 | 16.0 | 18.0 | 20.0 | 21.0 | 29.0 | 51.0 | 54.0 | 57.0 | 58.0 | 90.0 | 24.0 | 26.0 | 30.0 | 32.0 | 38.0 | 10.0 | 14.0 | 25.0 | 48.0 | 103.0 | 27.0 | 27.0 | 37.0 | 50.0 | 129.0 | 63.0 | 65.0 | 69.0 | 143.0 | 249.0 | 37.0 | 41.0 | 54.0 | 56.0 | 147.0 | 88.0 | 138.0 | 147.0 | 159.0 | 212.0 | 178.0 | 211.0 | 225.0 | 228.0 | 256.0 | 202.0 | 206.0 | 222.0 | 238.0 | 281.0 | 20.0 | 29.0 | 31.0 | 33.0 | 39.0 | 37.0 | 49.0 | 54.0 | 55.0 | 66.0 | 5.0 | 5.0 | 10.0 | 10.0 | 18.0 | 25.0 | 28.0 | 31.0 | 35.0 | 48.0 | 32.0 | 38.0 | 38.0 | 46.0 | 65.0 | 24.0 | 23.0 | 27.0 | 30.0 | 69.0 | 18.0 | 26.0 | 28.0 | 29.0 | 49.0 | 12.0 | 15.0 | 20.0 | 25.0 | 33.0 | 30.0 | 30.0 | 33.0 | 39.0 | 49.0 | 22.0 | 22.0 | 23.0 | 26.0 | 30.0 | 7.0 | 7.0 | 8.0 | 8.0 | 16.0 | 64.0 | 66.0 | 73.0 | 76.0 | 86.0 | 31.0 | 57.0 | 60.0 | 67.0 | 77.0 | 62.0 | 63.0 | 69.0 | 78.0 | 90.0 | 18.0 | 29.0 | 35.0 | 38.0 | 40.0 | 9.0 | 27.0 | 32.0 | 36.0 | 37.0 | 26.0 | 43.0 | 52.0 | 60.0 | 65.0 | 8.0 | 9.0 | 10.0 | 12.0 | 31.0 | 11.0 | 12.0 | 14.0 | 18.0 | 22.0 | 15.0 | 17.0 | 21.0 | 23.0 | 24.0 | 6.0 | 7.0 | 8.0 | 10.0 | 14.0 | 4.0 | 4.0 | 5.0 | 15.0 | 16.0 | 7.0 | 7.0 | 9.0 | 13.0 | 18.0 | 16.0 | 18.0 | 21.0 | 30.0 | 47.0 | 8.0 | 8.0 | 10.0 | 15.0 | 16.0 | 11.0 | 11.0 | 14.0 | 22.0 | 54.0 | 24.0 | 26.0 | 30.0 | 41.0 | 59.0 | 3.0 | 4.0 | 4.0 | 11.0 | 16.0 | 13.0 | 14.0 | 16.0 | 36.0 | 61.0 |
3 | 14.0 | 15.0 | 16.0 | 18.0 | 29.0 | 25.0 | 34.0 | 38.0 | 39.0 | 90.0 | 24.0 | 25.0 | 29.0 | 32.0 | 38.0 | 10.0 | 48.0 | 49.0 | 54.0 | 103.0 | 53.0 | 63.0 | 56.0 | 60.0 | 129.0 | 125.0 | 129.0 | 112.0 | 184.0 | 372.0 | 68.0 | 70.0 | 69.0 | 80.0 | 147.0 | 84.0 | 125.0 | 133.0 | 149.0 | 212.0 | 178.0 | 198.0 | 208.0 | 220.0 | 256.0 | 195.0 | 206.0 | 220.0 | 232.0 | 257.0 | 20.0 | 25.0 | 28.0 | 31.0 | 38.0 | 37.0 | 43.0 | 47.0 | 51.0 | 66.0 | 5.0 | 5.0 | 7.0 | 8.0 | 18.0 | 25.0 | 27.0 | 28.0 | 33.0 | 48.0 | 32.0 | 34.0 | 34.0 | 46.0 | 65.0 | 16.0 | 18.0 | 19.0 | 22.0 | 70.0 | 17.0 | 22.0 | 23.0 | 28.0 | 54.0 | 11.0 | 14.0 | 15.0 | 39.0 | 53.0 | 27.0 | 28.0 | 30.0 | 33.0 | 49.0 | 22.0 | 23.0 | 24.0 | 30.0 | 33.0 | 7.0 | 8.0 | 9.0 | 19.0 | 22.0 | 73.0 | 74.0 | 79.0 | 85.0 | 89.0 | 31.0 | 69.0 | 72.0 | 81.0 | 83.0 | 70.0 | 74.0 | 78.0 | 84.0 | 90.0 | 20.0 | 27.0 | 30.0 | 37.0 | 44.0 | 9.0 | 22.0 | 25.0 | 27.0 | 37.0 | 26.0 | 44.0 | 49.0 | 60.0 | 65.0 | 8.0 | 9.0 | 10.0 | 12.0 | 28.0 | 11.0 | 12.0 | 14.0 | 18.0 | 22.0 | 16.0 | 16.0 | 21.0 | 23.0 | 22.0 | 6.0 | 7.0 | 8.0 | 10.0 | 14.0 | 4.0 | 4.0 | 5.0 | 15.0 | 20.0 | 6.0 | 7.0 | 9.0 | 13.0 | 18.0 | 17.0 | 18.0 | 21.0 | 30.0 | 57.0 | 9.0 | 9.0 | 10.0 | 14.0 | 16.0 | 11.0 | 11.0 | 14.0 | 22.0 | 54.0 | 24.0 | 27.0 | 30.0 | 41.0 | 59.0 | 3.0 | 4.0 | 4.0 | 8.0 | 16.0 | 13.0 | 14.0 | 16.0 | 34.0 | 61.0 |
4 | 16.0 | 16.0 | 17.0 | 19.0 | 29.0 | 39.0 | 41.0 | 42.0 | 43.0 | 90.0 | 24.0 | 25.0 | 28.0 | 32.0 | 38.0 | 10.0 | 38.0 | 44.0 | 54.0 | 103.0 | 51.0 | 50.0 | 57.0 | 60.0 | 129.0 | 97.0 | 99.0 | 112.0 | 176.0 | 250.0 | 60.0 | 64.0 | 67.0 | 78.0 | 147.0 | 88.0 | 132.0 | 153.0 | 165.0 | 216.0 | 178.0 | 215.0 | 224.0 | 231.0 | 268.0 | 208.0 | 225.0 | 238.0 | 251.0 | 257.0 | 20.0 | 25.0 | 28.0 | 32.0 | 39.0 | 37.0 | 43.0 | 47.0 | 51.0 | 67.0 | 5.0 | 5.0 | 7.0 | 8.0 | 18.0 | 25.0 | 27.0 | 30.0 | 33.0 | 48.0 | 32.0 | 34.0 | 35.0 | 46.0 | 65.0 | 14.0 | 14.0 | 14.0 | 21.0 | 69.0 | 18.0 | 19.0 | 19.0 | 25.0 | 56.0 | 11.0 | 11.0 | 11.0 | 16.0 | 29.0 | 27.0 | 28.0 | 30.0 | 33.0 | 48.0 | 23.0 | 24.0 | 25.0 | 27.0 | 30.0 | 8.0 | 10.0 | 11.0 | 11.0 | 16.0 | 73.0 | 83.0 | 89.0 | 98.0 | 98.0 | 31.0 | 86.0 | 92.0 | 103.0 | 110.0 | 70.0 | 84.0 | 90.0 | 98.0 | 98.0 | 17.0 | 17.0 | 22.0 | 28.0 | 42.0 | 9.0 | 10.0 | 11.0 | 13.0 | 37.0 | 26.0 | 32.0 | 35.0 | 45.0 | 65.0 | 23.0 | 48.0 | 51.0 | 54.0 | 71.0 | 35.0 | 37.0 | 45.0 | 49.0 | 56.0 | 15.0 | 61.0 | 71.0 | 75.0 | 77.0 | 8.0 | 26.0 | 28.0 | 31.0 | 33.0 | 4.0 | 4.0 | 5.0 | 9.0 | 18.0 | 7.0 | 7.0 | 8.0 | 9.0 | 18.0 | 26.0 | 45.0 | 49.0 | 56.0 | 77.0 | 22.0 | 25.0 | 30.0 | 35.0 | 34.0 | 27.0 | 33.0 | 38.0 | 41.0 | 54.0 | 26.0 | 34.0 | 40.0 | 43.0 | 59.0 | 30.0 | 35.0 | 40.0 | 39.0 | 50.0 | 13.0 | 31.0 | 36.0 | 38.0 | 63.0 |
5 | 16.0 | 17.0 | 18.0 | 20.0 | 29.0 | 49.0 | 50.0 | 52.0 | 54.0 | 90.0 | 24.0 | 26.0 | 30.0 | 32.0 | 38.0 | 10.0 | 33.0 | 42.0 | 52.0 | 103.0 | 46.0 | 48.0 | 57.0 | 60.0 | 129.0 | 82.0 | 91.0 | 112.0 | 157.0 | 250.0 | 59.0 | 62.0 | 68.0 | 74.0 | 147.0 | 88.0 | 136.0 | 155.0 | 171.0 | 216.0 | 178.0 | 214.0 | 221.0 | 235.0 | 267.0 | 214.0 | 223.0 | 237.0 | 256.0 | 281.0 | 20.0 | 45.0 | 47.0 | 51.0 | 57.0 | 46.0 | 60.0 | 67.0 | 71.0 | 84.0 | 11.0 | 13.0 | 48.0 | 52.0 | 60.0 | 25.0 | 52.0 | 59.0 | 62.0 | 66.0 | 56.0 | 62.0 | 66.0 | 69.0 | 83.0 | 21.0 | 16.0 | 19.0 | 25.0 | 69.0 | 18.0 | 20.0 | 22.0 | 25.0 | 56.0 | 12.0 | 12.0 | 15.0 | 17.0 | 32.0 | 30.0 | 30.0 | 32.0 | 34.0 | 42.0 | 23.0 | 24.0 | 25.0 | 27.0 | 30.0 | 8.0 | 9.0 | 9.0 | 10.0 | 16.0 | 72.0 | 90.0 | 98.0 | 110.0 | 110.0 | 31.0 | 96.0 | 103.0 | 115.0 | 115.0 | 70.0 | 93.0 | 99.0 | 111.0 | 114.0 | 18.0 | 23.0 | 25.0 | 33.0 | 40.0 | 9.0 | 18.0 | 20.0 | 22.0 | 37.0 | 26.0 | 38.0 | 44.0 | 54.0 | 65.0 | 23.0 | 47.0 | 51.0 | 54.0 | 74.0 | 34.0 | 37.0 | 44.0 | 49.0 | 52.0 | 15.0 | 63.0 | 70.0 | 75.0 | 77.0 | 8.0 | 26.0 | 28.0 | 30.0 | 33.0 | 4.0 | 4.0 | 5.0 | 9.0 | 18.0 | 7.0 | 7.0 | 8.0 | 9.0 | 18.0 | 26.0 | 45.0 | 49.0 | 56.0 | 71.0 | 21.0 | 27.0 | 30.0 | 36.0 | 34.0 | 27.0 | 33.0 | 38.0 | 43.0 | 54.0 | 26.0 | 34.0 | 40.0 | 44.0 | 59.0 | 30.0 | 35.0 | 40.0 | 41.0 | 45.0 | 13.0 | 31.0 | 36.0 | 40.0 | 63.0 |
pred_median_col = [c for c in submit_df.columns if '_0.5' in c]
test_add_pred = test.merge(submit_df[pred_median_col].reset_index(), on='id', how='left')
test_add_pred.columns = [c.replace('_0.5', '') if '_0.5' in c else c for c in test_add_pred.columns]
test_add_pred[target_columns] = test_add_pred[target_columns].astype(float)
test_add_pred.head(3)
id | date | highest | lowest | rain | time | year | month | day | weekday | ice1 | ice2 | ice3 | oden1 | oden2 | oden3 | oden4 | hot1 | hot2 | hot3 | dessert1 | dessert2 | dessert3 | dessert4 | dessert5 | drink1 | drink2 | drink3 | drink4 | drink5 | drink6 | alcol1 | alcol2 | alcol3 | snack1 | snack2 | snack3 | bento1 | bento2 | bento3 | bento4 | tild1 | tild2 | men1 | men2 | men3 | men4 | men5 | men6 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3/27 | 19.7 | 7.3 | 0.0 | 2019-03-27 | 2019 | 3 | 27 | 2 | 20.0 | 60.0 | 30.0 | 45.0 | 59.0 | 108.0 | 63.0 | 158.0 | 219.0 | 236.0 | 53.0 | 78.0 | 55.0 | 62.0 | 73.0 | 36.0 | 29.0 | 22.0 | 37.0 | 24.0 | 8.0 | 61.0 | 47.0 | 57.0 | 27.0 | 21.0 | 43.0 | 51.0 | 43.0 | 70.0 | 28.0 | 13.0 | 11.0 | 50.0 | 30.0 | 37.0 | 40.0 | 41.0 | 37.0 |
1 | 2 | 3/28 | 16.9 | 9.0 | 0.0 | 2019-03-28 | 2019 | 3 | 28 | 3 | 20.0 | 57.0 | 30.0 | 25.0 | 37.0 | 69.0 | 54.0 | 147.0 | 225.0 | 222.0 | 31.0 | 54.0 | 10.0 | 31.0 | 38.0 | 27.0 | 28.0 | 20.0 | 33.0 | 23.0 | 8.0 | 73.0 | 60.0 | 69.0 | 35.0 | 32.0 | 52.0 | 10.0 | 14.0 | 21.0 | 8.0 | 5.0 | 9.0 | 21.0 | 10.0 | 14.0 | 30.0 | 4.0 | 16.0 |
2 | 3 | 3/29 | 9.3 | 6.8 | 0.0 | 2019-03-29 | 2019 | 3 | 29 | 4 | 16.0 | 38.0 | 29.0 | 49.0 | 56.0 | 112.0 | 69.0 | 133.0 | 208.0 | 220.0 | 28.0 | 47.0 | 7.0 | 28.0 | 34.0 | 19.0 | 23.0 | 15.0 | 30.0 | 24.0 | 9.0 | 79.0 | 72.0 | 78.0 | 30.0 | 25.0 | 49.0 | 10.0 | 14.0 | 21.0 | 8.0 | 5.0 | 9.0 | 21.0 | 10.0 | 14.0 | 30.0 | 4.0 | 16.0 |
# 移動平均
window=7
train_rolling = train.rolling(window, min_periods=1).mean()
test_rolling = test_add_pred.rolling(window, min_periods=1).mean()
all_rolling = pd.concat([train, test_add_pred], axis=0).reset_index(drop=True).rolling(window, min_periods=1).mean()
# 目視確認
plot_col = [c for c in train.columns if c not in ['id', 'date', 'time', 'year', 'month', 'day', 'weekday']]
ncols = len(plot_col) // 13
plt.subplots(14, ncols, sharey=True, sharex=True, figsize=(30, 80))
for i, col in enumerate(plot_col):
plt.subplot(14, ncols, i+1)
plt.plot(train_rolling.index[window:], train_rolling[col][window:], alpha=1, color='blue', label='train')
plt.plot(all_rolling.index[-len(test_rolling):], all_rolling[col][-len(test_rolling):], alpha=1, color='red', label='test')
for x in [20,51,81,112,143,173.204,234,265,296,324,350]:
plt.axvline(x)
plt.xlabel(col)
plt.legend()
plt.xticks([])
plt.show()
sub_base19
MODEL
FE
Post-Processing
Score: 0.8462863293289724
second sub_base17
Sard
ご共有ありがとうございます。
2018年だと判断した理由を教えて頂いてもよろしいでしょうか。