from sklearn.decomposition import NMF
from sklearn.preprocessing import MinMaxScaler
# pivot tabel を用いた特徴量
def get_pivot_NMF9_features(input_df, n, value_col):
pivot_df = pd.pivot_table(input_df, index='subGameID', columns='outCount', values=value_col, aggfunc=np.median)
sc0 = MinMaxScaler().fit_transform(np.median(pivot_df.fillna(0).values.reshape(-1,54//3,3)[:,0::2,:], axis=-1))
sc1 = MinMaxScaler().fit_transform(np.median(pivot_df.fillna(0).values.reshape(-1,54//3,3)[:,1::2,:], axis=-1))
nmf = NMF(n_components=n, random_state=2021)
nmf_df0 = pd.DataFrame(nmf.fit_transform(sc0), index=pivot_df.index).rename(
columns=lambda x: f'pivot_{value_col}_NMF9T={x:02}')
nmf_df1 = pd.DataFrame(nmf.fit_transform(sc1), index=pivot_df.index).rename(
columns=lambda x: f'pivot_{value_col}_NMF9B={x:02}')
nmf_df = pd.concat([nmf_df0, nmf_df1], axis=1)
nmf_df = pd.merge(
input_df, nmf_df, left_on='subGameID', right_index=True, how='left')
return reduce_mem_usage(nmf_df)
# pivot tabel を用いた特徴量
def get_pivot_NMF27_features(input_df, n, value_col):
pivot_df = pd.pivot_table(input_df, index='subGameID', columns='outCount', values=value_col, aggfunc=np.median)
sc0 = MinMaxScaler().fit_transform(pivot_df.fillna(0).values.reshape(-1,54//3,3)[:,0::2].reshape(-1,27))
sc1 = MinMaxScaler().fit_transform(pivot_df.fillna(0).values.reshape(-1,54//3,3)[:,1::2].reshape(-1,27))
nmf = NMF(n_components=n, random_state=2021)
nmf_df0 = pd.DataFrame(nmf.fit_transform(sc0), index=pivot_df.index).rename(
columns=lambda x: f'pivot_{value_col}_NMF27T={x:02}')
nmf_df1 = pd.DataFrame(nmf.fit_transform(sc1), index=pivot_df.index).rename(
columns=lambda x: f'pivot_{value_col}_NMF27B={x:02}')
nmf_df = pd.concat([nmf_df0, nmf_df1], axis=1)
nmf_df = pd.merge(
input_df, nmf_df, left_on='subGameID', right_index=True, how='left')
return reduce_mem_usage(nmf_df)
# pivot tabel を用いた特徴量
def get_pivot_NMF54_features(input_df, n, value_col):
pivot_df = pd.pivot_table(input_df, index='subGameID', columns='outCount', values=value_col, aggfunc=np.median)
sc = MinMaxScaler().fit_transform(pivot_df.fillna(0).values)
nmf = NMF(n_components=n, random_state=2021)
nmf_df = pd.DataFrame(nmf.fit_transform(sc), index=pivot_df.index).rename(
columns=lambda x: f'pivot_{value_col}_NMF54={x:02}')
nmf_df = pd.merge(
input_df, nmf_df, left_on='subGameID', right_index=True, how='left')
return reduce_mem_usage(nmf_df)