sylk
# 可読性とくに考えてません。
# とりあえず一通り作成したものを投下しておきます。
# ライブラリのインポート
#適当にパクってきてるところあり
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score as acc
from sklearn.preprocessing import MultiLabelBinarizer
import warnings
warnings.filterwarnings('ignore')
# データの読込
train = pd.read_csv("./data/train_data.csv")
test = pd.read_csv('./data/test_data.csv')
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
change_weapon = {
"heroblaster_replica": "hotblaster",
"herobrush_replica": "hokusai",
"herocharger_replica": "splatcharger",
"heromaneuver_replica":"maneuver",
"heroroller_replica":"splatroller",
"heroshelter_replica":"parashelter",
"heroshooter_replica":"sshooter",
"heroslosher_replica":"bucketslosher",
"herospinner_replica":"splatspinner",
"octoshooter_replica":"sshooter"
}
train = train.replace(change_weapon)
test = test.replace(change_weapon)
mlb = MultiLabelBinarizer()
mlb.fit([set(train['A4-weapon'].fillna("none").unique())])
MultiLabelBinarizer(classes=None, sparse_output=False)
train.head(3)
id | period | game-ver | lobby-mode | lobby | mode | stage | A1-weapon | A1-rank | A1-level | ... | B2-weapon | B2-rank | B2-level | B3-weapon | B3-rank | B3-level | B4-weapon | B4-rank | B4-level | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2019-10-15T20:00:00+00:00 | 5.0.1 | regular | standard | nawabari | sumeshi | sshooter_becchu | NaN | 139 | ... | hokusai_becchu | NaN | 26.0 | splatcharger | NaN | 68.0 | sharp_neo | NaN | 31.0 | 1 |
1 | 2 | 2019-12-14T04:00:00+00:00 | 5.0.1 | regular | standard | nawabari | arowana | parashelter_sorella | NaN | 198 | ... | squiclean_b | NaN | 118.0 | campingshelter | NaN | 168.0 | sputtery_clear | NaN | 151.0 | 0 |
2 | 3 | 2019-12-25T14:00:00+00:00 | 5.0.1 | gachi | standard | hoko | ama | nzap89 | a- | 114 | ... | nzap85 | a+ | 163.0 | prime_becchu | a- | 160.0 | dualsweeper_custom | a | 126.0 | 0 |
3 rows × 32 columns
print(len(train), len(test))
66125 28340
train.y.sum()
34696
def make_feature(df_train, df_test):
train_num = len(df_train)
df = pd.concat([df_train, df_test])
#print(len(df))
cat_cols = ["lobby-mode", "mode", "stage"]
for c in cat_cols:
vv, obj = pd.factorize(df[c])
df[c] = vv
#print(len(df))
A1 = ['A1-weapon', 'A2-weapon', 'A3-weapon', 'A4-weapon']
B1 = ['B1-weapon', 'B2-weapon', 'B3-weapon', 'B4-weapon']
t = mlb.transform(df[A1].fillna("none")[A1].values)
t2 = mlb.transform(df[B1].fillna("none")[B1].values)
for i in range(t.shape[1]):
df["A-" + mlb.classes_[i]] = t[:,i]
df["B-" + mlb.classes_[i]] = t2[:, i]
s = ["A", "B"]
p = ["1", "2", "3", "4"]
for i in s:
for j in p:
df[i+j+"-level"] = df[i+j+"-level"]//10
df[i+j+"-level"] = df[i+j+"-level"].clip(0,30)
return df[:train_num], df[train_num:]
tr, te = make_feature(train, test)
feature_cols = [
'lobby-mode', 'mode', 'stage',
]
feature_cols.extend(["A-" + i for i in mlb.classes_])
feature_cols.extend(["B-" + i for i in mlb.classes_])
print(tr.shape)
(66125, 292)
#def cv_score(clf,X,y,n_splits=5,kf=StratifiedKFold(n_splits=5))->float:
kf=KFold(n_splits=5, random_state = 0)
score = 0
counter = 1
for train_index, valid_index in kf.split(tr, tr.y):
train_X,valid_X = tr.loc[train_index,:].copy(),tr.loc[valid_index,:].copy()
train_y,valid_y = tr.y.iloc[train_index],tr.y.iloc[valid_index]
train_X, valid_X = make_feature(train_X, valid_X)
clf = lgb.LGBMClassifier()
clf.fit(train_X[feature_cols],train_y)
preds = clf.predict(valid_X[feature_cols])
acc_score = acc(valid_y,preds)
print(f"fold{counter} score is :{acc_score}")
score += acc_score
counter += 1
print("average : ",round(score/5,5))
fold1 score is :0.5442722117202269 fold2 score is :0.5435160680529301 fold3 score is :0.5465406427221172 fold4 score is :0.5326275992438564 fold5 score is :0.5365595463137997 average : 0.5407
#提出用 全データ
tr, te = make_feature(train, test)
clf = lgb.LGBMClassifier().fit(tr[feature_cols].fillna(0),tr["y"].fillna(0))
pred_test = clf.predict(te[feature_cols])
pd.DataFrame({"id": range(len(pred_test)), "y": pred_test }).to_csv("submission.csv", index=False)
pred_test[pred_test == 1].shape
(19281,)
pred_test[pred_test == 0].shape
(9059,)
# importanceを表示する
#importance = pd.DataFrame(clf.feature_importances_, index=feature_cols, importance = "gain")
importance = pd.DataFrame(clf.booster_.feature_importance(importance_type='gain'), index=feature_cols, columns = ["f"])
importance.sort_values("f", ascending = False).head(10)
f | |
---|---|
stage | 1681.303699 |
A-splatcharger | 539.786695 |
lobby-mode | 534.930203 |
B-splatcharger | 424.459795 |
mode | 343.591718 |
B-l3reelgun | 252.490988 |
A-l3reelgun | 232.794662 |
B-dualsweeper_custom | 228.827620 |
B-none | 226.369461 |
B-squiclean_a | 203.461810 |
mlb.classes_
array(['52gal', '52gal_becchu', '52gal_deco', '96gal', '96gal_deco', 'bamboo14mk1', 'bamboo14mk2', 'bamboo14mk3', 'barrelspinner', 'barrelspinner_deco', 'barrelspinner_remix', 'bold', 'bold_7', 'bold_neo', 'bottlegeyser', 'bottlegeyser_foil', 'bucketslosher', 'bucketslosher_deco', 'bucketslosher_soda', 'campingshelter', 'campingshelter_camo', 'campingshelter_sorella', 'carbon', 'carbon_deco', 'clashblaster', 'clashblaster_neo', 'dualsweeper', 'dualsweeper_custom', 'dynamo', 'dynamo_becchu', 'dynamo_tesla', 'explosher', 'explosher_custom', 'furo', 'furo_deco', 'h3reelgun', 'h3reelgun_cherry', 'h3reelgun_d', 'hissen', 'hissen_hue', 'hokusai', 'hokusai_becchu', 'hokusai_hue', 'hotblaster', 'hotblaster_custom', 'hydra', 'hydra_custom', 'jetsweeper', 'jetsweeper_custom', 'kelvin525', 'kelvin525_becchu', 'kelvin525_deco', 'kugelschreiber', 'kugelschreiber_hue', 'l3reelgun', 'l3reelgun_becchu', 'l3reelgun_d', 'liter4k', 'liter4k_custom', 'liter4k_scope', 'liter4k_scope_custom', 'longblaster', 'longblaster_custom', 'longblaster_necro', 'maneuver', 'maneuver_becchu', 'maneuver_collabo', 'momiji', 'nautilus47', 'nautilus79', 'none', 'nova', 'nova_becchu', 'nova_neo', 'nzap83', 'nzap85', 'nzap89', 'ochiba', 'pablo', 'pablo_hue', 'pablo_permanent', 'parashelter', 'parashelter_sorella', 'prime', 'prime_becchu', 'prime_collabo', 'promodeler_mg', 'promodeler_pg', 'promodeler_rg', 'quadhopper_black', 'quadhopper_white', 'rapid', 'rapid_becchu', 'rapid_deco', 'rapid_elite', 'rapid_elite_deco', 'screwslosher', 'screwslosher_becchu', 'screwslosher_neo', 'sharp', 'sharp_neo', 'soytuber', 'soytuber_custom', 'splatcharger', 'splatcharger_becchu', 'splatcharger_collabo', 'splatroller', 'splatroller_becchu', 'splatroller_collabo', 'splatscope', 'splatscope_becchu', 'splatscope_collabo', 'splatspinner', 'splatspinner_becchu', 'splatspinner_collabo', 'sputtery', 'sputtery_clear', 'sputtery_hue', 'spygadget', 'spygadget_becchu', 'spygadget_sorella', 'squiclean_a', 'squiclean_b', 'squiclean_g', 'sshooter', 'sshooter_becchu', 'sshooter_collabo', 'variableroller', 'variableroller_foil', 'wakaba'], dtype=object)
s = ["A", "B"]
p = ["1", "2", "3", "4"]
d = []
for i in s:
for j in p:
d .extend(list(train[i+j+"-level"].values))
plt.figure(figsize = (30,5))
c = pd.DataFrame(d, columns = ["level"])
c.level = c.level//10
c.level.clip(0,30).value_counts().plot(kind = "bar")
<matplotlib.axes._subplots.AxesSubplot at 0x7fbeb8d4a9b0>
c.level.max()
93.0
train
id | period | game-ver | lobby-mode | lobby | mode | stage | A1-weapon | A1-rank | A1-level | ... | B2-weapon | B2-rank | B2-level | B3-weapon | B3-rank | B3-level | B4-weapon | B4-rank | B4-level | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2019-10-15T20:00:00+00:00 | 5.0.1 | regular | standard | nawabari | sumeshi | sshooter_becchu | NaN | 139 | ... | hokusai_becchu | NaN | 26.0 | splatcharger | NaN | 68.0 | sharp_neo | NaN | 31.0 | 1 |
1 | 2 | 2019-12-14T04:00:00+00:00 | 5.0.1 | regular | standard | nawabari | arowana | parashelter_sorella | NaN | 198 | ... | squiclean_b | NaN | 118.0 | campingshelter | NaN | 168.0 | sputtery_clear | NaN | 151.0 | 0 |
2 | 3 | 2019-12-25T14:00:00+00:00 | 5.0.1 | gachi | standard | hoko | ama | nzap89 | a- | 114 | ... | nzap85 | a+ | 163.0 | prime_becchu | a- | 160.0 | dualsweeper_custom | a | 126.0 | 0 |
3 | 4 | 2019-11-11T14:00:00+00:00 | 5.0.1 | regular | standard | nawabari | engawa | bamboo14mk1 | NaN | 336 | ... | liter4k | NaN | 189.0 | promodeler_mg | NaN | 194.0 | hotblaster_custom | NaN | 391.0 | 0 |
4 | 5 | 2019-12-14T06:00:00+00:00 | 5.0.1 | gachi | standard | hoko | chozame | bold_7 | x | 299 | ... | sputtery_hue | x | 45.0 | bucketslosher_soda | x | 246.0 | wakaba | x | 160.0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
66120 | 66121 | 2019-10-11T10:00:00+00:00 | 5.0.1 | gachi | standard | area | devon | hokusai_becchu | a | 66 | ... | nzap85 | a+ | 272.0 | splatcharger | a | 38.0 | wakaba | a+ | 283.0 | 1 |
66121 | 66122 | 2019-12-04T00:00:00+00:00 | 5.0.1 | gachi | standard | asari | otoro | hokusai_becchu | s+ | 204 | ... | prime_becchu | s+ | 167.0 | pablo | s+ | 188.0 | nzap89 | s+ | 170.0 | 1 |
66122 | 66123 | 2019-10-22T00:00:00+00:00 | 5.0.1 | gachi | standard | hoko | bbass | prime_collabo | a+ | 116 | ... | bold_neo | a+ | 70.0 | nzap85 | a+ | 191.0 | hissen_hue | a+ | 139.0 | 1 |
66123 | 66124 | 2019-12-11T00:00:00+00:00 | 5.0.1 | gachi | standard | area | fujitsubo | splatroller | s+ | 210 | ... | splatroller | s+ | 395.0 | rapid_elite_deco | s+ | 223.0 | sharp_neo | s+ | 194.0 | 0 |
66124 | 66125 | 2019-12-13T18:00:00+00:00 | 5.0.1 | gachi | standard | asari | sumeshi | splatroller | s | 132 | ... | nzap85 | s | 124.0 | l3reelgun | s | 103.0 | hokusai_becchu | s | 35.0 | 0 |
66125 rows × 32 columns