集計特徴量
基本的な集計系の特徴量使用です。参考までにTop20FeatureImportanceも貼っておきます。
def create_base_feature(df):
# --- Period ---
df['month'] = df['period'].dt.month
df['dayofmonth'] = df['period'].dt.day
df['dayofweek'] = df['period'].dt.dayofweek
df['hour'] = df['period'].dt.hour
# --- Level ---
for level in ['A1-level', 'A2-level', 'A3-level', 'A4-level', 'B1-level', 'B2-level', 'B3-level', 'B4-level']:
df[f'{level}_99div'] = df[level] // 99
df[f'{level}_99rem'] = df[level] % 99
df['Alevel_sum'] = df[['A1-level', 'A2-level', 'A3-level', 'A4-level']].sum(axis=1)
df['Alevel_std'] = df[['A1-level', 'A2-level', 'A3-level', 'A4-level']].std(axis=1)
df['Alevel_skew'] = df[['A1-level', 'A2-level', 'A3-level', 'A4-level']].skew(axis=1)
df['Blevel_sum'] = df[['B1-level', 'B2-level', 'B3-level', 'B4-level']].sum(axis=1)
df['Blevel_std'] = df[['B1-level', 'B2-level', 'B3-level', 'B4-level']].std(axis=1)
df['Blevel_skew'] = df[['B1-level', 'B2-level', 'B3-level', 'B4-level']].skew(axis=1)
df['level_diff'] = df['Alevel_sum'] - df['Blevel_sum']
df['level_ratio'] = df['Alevel_sum'] / df['Blevel_sum']
df.loc[np.isfinite(df.level_ratio), 'level_ratio_cut'] = pd.cut(df.loc[np.isfinite(df.level_ratio), 'level_ratio'], 20, labels=False)
#df['level_ratio_qcut'] = pd.qcut(df['level_ratio'], 20, labels=[i for i in range(20)], duplicates='drop')
# --- Rank ---
df['Arank_sum'] = df[['A1-rank', 'A2-rank', 'A3-rank', 'A4-rank']].sum(axis=1)
df['Arank_std'] = df[['A1-rank', 'A2-rank', 'A3-rank', 'A4-rank']].std(axis=1)
df['Arank_skew'] = df[['A1-rank', 'A2-rank', 'A3-rank', 'A4-rank']].skew(axis=1)
df['Brank_sum'] = df[['B1-rank', 'B2-rank', 'B3-rank', 'B4-rank']].sum(axis=1)
df['Brank_std'] = df[['B1-rank', 'B2-rank', 'B3-rank', 'B4-rank']].std(axis=1)
df['Brank_skew'] = df[['B1-rank', 'B2-rank', 'B3-rank', 'B4-rank']].skew(axis=1)
df['rank_diff'] = df['Arank_sum'] - df['Brank_sum']
df['rank_ratio'] = df['Arank_sum'] / df['Brank_sum']
df.loc[np.isfinite(df.level_ratio_cut), 'rank_ratio_cut'] = pd.cut(df.loc[np.isfinite(df.rank_ratio), 'rank_ratio'], 20, labels=False)
#df['rank_ratio_qcut'] = pd.qcut(df['rank_ratio'], 20, labels=[i for i in range(20)], duplicates='drop')
# --- splatnet ---
df['Asplatnet_sum'] = df[['A1_splatnet', 'A2_splatnet', 'A3_splatnet', 'A4_splatnet']].sum(axis=1)
df['Asplatnet_std'] = df[['A1_splatnet', 'A2_splatnet', 'A3_splatnet', 'A4_splatnet']].std(axis=1)
df['Asplatnet_skew'] = df[['A1_splatnet', 'A2_splatnet', 'A3_splatnet', 'A4_splatnet']].skew(axis=1)
df['Bsplatnet_sum'] = df[['B1_splatnet', 'B2_splatnet', 'B3_splatnet', 'B4_splatnet']].sum(axis=1)
df['Bsplatnet_std'] = df[['B1_splatnet', 'B2_splatnet', 'B3_splatnet', 'B4_splatnet']].std(axis=1)
df['Bsplatnet_skew'] = df[['B1_splatnet', 'B2_splatnet', 'B3_splatnet', 'B4_splatnet']].skew(axis=1)
df['splatnet_diff'] = df['Asplatnet_sum'] - df['Bsplatnet_sum']
df['splatnet_ratio'] = df['Asplatnet_sum'] / df['Bsplatnet_sum']
df.loc[np.isfinite(df.splatnet_ratio), 'splatnet_ratio_cut'] = pd.cut(df.loc[np.isfinite(df.splatnet_ratio), 'splatnet_ratio'], 20, labels=False)
#df['splatnet_ratio_qcut'] = pd.qcut(df['splatnet_ratio'], 20, labels=[i for i in range(20)], duplicates='drop')
return df
Feature importance 0: ('A1-rank_min_ratio_rank_ratio', 1774.846758365631)
Feature importance 1: ('A1_seqnum_mean_diff_level_diff', 910.2062306404114)
Feature importance 2: ('B1_category2', 516.2507209777832)
Feature importance 3: ('B3-weapon_min_diff_rank_diff', 426.2479922771454)
Feature importance 4: ('A1_seqnum_mean_diff_level_ratio', 410.01326155662537)
Feature importance 5: ('A4_category1', 387.31628799438477)
Feature importance 6: ('A3_category1', 382.49681973457336)
Feature importance 7: ('Asplatnet_std', 280.09175205230713)
Feature importance 8: ('A1-rank_max_diff_rank_ratio', 262.1223609447479)
Feature importance 9: ('B2-rank_min_diff_level_diff', 251.01503992080688)
Feature importance 10: ('A1_playernum_mean_diff_Brank_sum', 239.02090072631836)
Feature importance 11: ('B2_category1', 233.08357858657837)
Feature importance 12: ('A1-rank_max_ratio_rank_ratio', 221.75276899337769)
Feature importance 13: ('A2-weapon_min_Arank_sum', 216.08291101455688)
Feature importance 14: ('A1_seqnum_max_Brank_sum', 197.79781126976013)
Feature importance 15: ('A1_seqnum_mean_diff_Brank_sum', 196.66624999046326)
Feature importance 16: ('A3_category2', 193.10358953475952)
Feature importance 17: ('B2-weapon_min_diff_rank_diff', 184.9084596633911)
Feature importance 18: ('A2-weapon_skew_rank_diff', 182.678528547287)
Feature importance 19: ('A1-weapon_max_min_diff_Asplatnet_sum', 181.3388111591339)
Feature importance 20: ('A1_playernum_max_Bsplatnet_skew', 176.16425037384033)