sample code

# 可読性とくに考えてません。
# とりあえず一通り作成したものを投下しておきます。
# ライブラリのインポート
#適当にパクってきてるところあり
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score as acc
from sklearn.preprocessing import MultiLabelBinarizer

import warnings
warnings.filterwarnings('ignore')

# データの読込
train = pd.read_csv("./data/train_data.csv")
test = pd.read_csv('./data/test_data.csv')

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
change_weapon = {
    "heroblaster_replica":  "hotblaster",
    "herobrush_replica": "hokusai", 
    "herocharger_replica":  "splatcharger",
    "heromaneuver_replica":"maneuver",
    "heroroller_replica":"splatroller",
    "heroshelter_replica":"parashelter",
    "heroshooter_replica":"sshooter",
    "heroslosher_replica":"bucketslosher",
    "herospinner_replica":"splatspinner",
    "octoshooter_replica":"sshooter"
                }

train = train.replace(change_weapon)
test = test.replace(change_weapon)

mlb = MultiLabelBinarizer()
mlb.fit([set(train['A4-weapon'].fillna("none").unique())])
MultiLabelBinarizer(classes=None, sparse_output=False)
train.head(3)
id period game-ver lobby-mode lobby mode stage A1-weapon A1-rank A1-level ... B2-weapon B2-rank B2-level B3-weapon B3-rank B3-level B4-weapon B4-rank B4-level y
0 1 2019-10-15T20:00:00+00:00 5.0.1 regular standard nawabari sumeshi sshooter_becchu NaN 139 ... hokusai_becchu NaN 26.0 splatcharger NaN 68.0 sharp_neo NaN 31.0 1
1 2 2019-12-14T04:00:00+00:00 5.0.1 regular standard nawabari arowana parashelter_sorella NaN 198 ... squiclean_b NaN 118.0 campingshelter NaN 168.0 sputtery_clear NaN 151.0 0
2 3 2019-12-25T14:00:00+00:00 5.0.1 gachi standard hoko ama nzap89 a- 114 ... nzap85 a+ 163.0 prime_becchu a- 160.0 dualsweeper_custom a 126.0 0

3 rows × 32 columns

print(len(train), len(test))
66125 28340
train.y.sum()
34696
def make_feature(df_train, df_test):
    train_num = len(df_train)
    df = pd.concat([df_train, df_test])
    
    
    #print(len(df))
    cat_cols = ["lobby-mode", "mode", "stage"]
    for c in cat_cols:
        vv, obj = pd.factorize(df[c])
        df[c] = vv
    #print(len(df))
    
    A1 = ['A1-weapon', 'A2-weapon', 'A3-weapon', 'A4-weapon']
    B1 = ['B1-weapon', 'B2-weapon', 'B3-weapon', 'B4-weapon']

    t = mlb.transform(df[A1].fillna("none")[A1].values)
    t2 = mlb.transform(df[B1].fillna("none")[B1].values)
    for i in range(t.shape[1]):
        df["A-" + mlb.classes_[i]] = t[:,i]
        df["B-" + mlb.classes_[i]] = t2[:, i]
    s = ["A", "B"]
    p = ["1", "2", "3", "4"]
    for i in s:
        for j in p:
            df[i+j+"-level"] = df[i+j+"-level"]//10
            df[i+j+"-level"] = df[i+j+"-level"].clip(0,30)

        
    return df[:train_num], df[train_num:]
tr, te = make_feature(train, test)

feature_cols = [
     'lobby-mode', 'mode', 'stage',
]
feature_cols.extend(["A-" + i for i in mlb.classes_])
feature_cols.extend(["B-" + i for i in mlb.classes_])
print(tr.shape)
(66125, 292)
#def cv_score(clf,X,y,n_splits=5,kf=StratifiedKFold(n_splits=5))->float:
    
kf=KFold(n_splits=5, random_state = 0)
score = 0
counter = 1
for train_index, valid_index in kf.split(tr, tr.y):
        train_X,valid_X = tr.loc[train_index,:].copy(),tr.loc[valid_index,:].copy()
        train_y,valid_y = tr.y.iloc[train_index],tr.y.iloc[valid_index]
        
        train_X, valid_X = make_feature(train_X, valid_X)
        
        
        clf = lgb.LGBMClassifier()
        clf.fit(train_X[feature_cols],train_y)
        preds = clf.predict(valid_X[feature_cols])
        acc_score = acc(valid_y,preds)
        print(f"fold{counter} score is :{acc_score}")
        score += acc_score
        counter += 1
print("average : ",round(score/5,5))
fold1 score is :0.5442722117202269
fold2 score is :0.5435160680529301
fold3 score is :0.5465406427221172
fold4 score is :0.5326275992438564
fold5 score is :0.5365595463137997
average :  0.5407
#提出用 全データ
tr, te = make_feature(train, test)
clf = lgb.LGBMClassifier().fit(tr[feature_cols].fillna(0),tr["y"].fillna(0))
pred_test = clf.predict(te[feature_cols])
pd.DataFrame({"id": range(len(pred_test)), "y": pred_test }).to_csv("submission.csv", index=False)
pred_test[pred_test == 1].shape
(19281,)
pred_test[pred_test == 0].shape
(9059,)
# importanceを表示する
#importance = pd.DataFrame(clf.feature_importances_,  index=feature_cols, importance = "gain")
importance = pd.DataFrame(clf.booster_.feature_importance(importance_type='gain'), index=feature_cols, columns = ["f"])

importance.sort_values("f", ascending = False).head(10)

f
stage 1681.303699
A-splatcharger 539.786695
lobby-mode 534.930203
B-splatcharger 424.459795
mode 343.591718
B-l3reelgun 252.490988
A-l3reelgun 232.794662
B-dualsweeper_custom 228.827620
B-none 226.369461
B-squiclean_a 203.461810
mlb.classes_
array(['52gal', '52gal_becchu', '52gal_deco', '96gal', '96gal_deco',
       'bamboo14mk1', 'bamboo14mk2', 'bamboo14mk3', 'barrelspinner',
       'barrelspinner_deco', 'barrelspinner_remix', 'bold', 'bold_7',
       'bold_neo', 'bottlegeyser', 'bottlegeyser_foil', 'bucketslosher',
       'bucketslosher_deco', 'bucketslosher_soda', 'campingshelter',
       'campingshelter_camo', 'campingshelter_sorella', 'carbon',
       'carbon_deco', 'clashblaster', 'clashblaster_neo', 'dualsweeper',
       'dualsweeper_custom', 'dynamo', 'dynamo_becchu', 'dynamo_tesla',
       'explosher', 'explosher_custom', 'furo', 'furo_deco', 'h3reelgun',
       'h3reelgun_cherry', 'h3reelgun_d', 'hissen', 'hissen_hue',
       'hokusai', 'hokusai_becchu', 'hokusai_hue', 'hotblaster',
       'hotblaster_custom', 'hydra', 'hydra_custom', 'jetsweeper',
       'jetsweeper_custom', 'kelvin525', 'kelvin525_becchu',
       'kelvin525_deco', 'kugelschreiber', 'kugelschreiber_hue',
       'l3reelgun', 'l3reelgun_becchu', 'l3reelgun_d', 'liter4k',
       'liter4k_custom', 'liter4k_scope', 'liter4k_scope_custom',
       'longblaster', 'longblaster_custom', 'longblaster_necro',
       'maneuver', 'maneuver_becchu', 'maneuver_collabo', 'momiji',
       'nautilus47', 'nautilus79', 'none', 'nova', 'nova_becchu',
       'nova_neo', 'nzap83', 'nzap85', 'nzap89', 'ochiba', 'pablo',
       'pablo_hue', 'pablo_permanent', 'parashelter',
       'parashelter_sorella', 'prime', 'prime_becchu', 'prime_collabo',
       'promodeler_mg', 'promodeler_pg', 'promodeler_rg',
       'quadhopper_black', 'quadhopper_white', 'rapid', 'rapid_becchu',
       'rapid_deco', 'rapid_elite', 'rapid_elite_deco', 'screwslosher',
       'screwslosher_becchu', 'screwslosher_neo', 'sharp', 'sharp_neo',
       'soytuber', 'soytuber_custom', 'splatcharger',
       'splatcharger_becchu', 'splatcharger_collabo', 'splatroller',
       'splatroller_becchu', 'splatroller_collabo', 'splatscope',
       'splatscope_becchu', 'splatscope_collabo', 'splatspinner',
       'splatspinner_becchu', 'splatspinner_collabo', 'sputtery',
       'sputtery_clear', 'sputtery_hue', 'spygadget', 'spygadget_becchu',
       'spygadget_sorella', 'squiclean_a', 'squiclean_b', 'squiclean_g',
       'sshooter', 'sshooter_becchu', 'sshooter_collabo',
       'variableroller', 'variableroller_foil', 'wakaba'], dtype=object)
s = ["A", "B"]
p = ["1", "2", "3", "4"]
d = []
for i in s:
    for j in p:
        d .extend(list(train[i+j+"-level"].values))
plt.figure(figsize = (30,5))
c = pd.DataFrame(d, columns = ["level"])
c.level = c.level//10
c.level.clip(0,30).value_counts().plot(kind = "bar")
<matplotlib.axes._subplots.AxesSubplot at 0x7fbeb8d4a9b0>
c.level.max()
93.0
train
id period game-ver lobby-mode lobby mode stage A1-weapon A1-rank A1-level ... B2-weapon B2-rank B2-level B3-weapon B3-rank B3-level B4-weapon B4-rank B4-level y
0 1 2019-10-15T20:00:00+00:00 5.0.1 regular standard nawabari sumeshi sshooter_becchu NaN 139 ... hokusai_becchu NaN 26.0 splatcharger NaN 68.0 sharp_neo NaN 31.0 1
1 2 2019-12-14T04:00:00+00:00 5.0.1 regular standard nawabari arowana parashelter_sorella NaN 198 ... squiclean_b NaN 118.0 campingshelter NaN 168.0 sputtery_clear NaN 151.0 0
2 3 2019-12-25T14:00:00+00:00 5.0.1 gachi standard hoko ama nzap89 a- 114 ... nzap85 a+ 163.0 prime_becchu a- 160.0 dualsweeper_custom a 126.0 0
3 4 2019-11-11T14:00:00+00:00 5.0.1 regular standard nawabari engawa bamboo14mk1 NaN 336 ... liter4k NaN 189.0 promodeler_mg NaN 194.0 hotblaster_custom NaN 391.0 0
4 5 2019-12-14T06:00:00+00:00 5.0.1 gachi standard hoko chozame bold_7 x 299 ... sputtery_hue x 45.0 bucketslosher_soda x 246.0 wakaba x 160.0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
66120 66121 2019-10-11T10:00:00+00:00 5.0.1 gachi standard area devon hokusai_becchu a 66 ... nzap85 a+ 272.0 splatcharger a 38.0 wakaba a+ 283.0 1
66121 66122 2019-12-04T00:00:00+00:00 5.0.1 gachi standard asari otoro hokusai_becchu s+ 204 ... prime_becchu s+ 167.0 pablo s+ 188.0 nzap89 s+ 170.0 1
66122 66123 2019-10-22T00:00:00+00:00 5.0.1 gachi standard hoko bbass prime_collabo a+ 116 ... bold_neo a+ 70.0 nzap85 a+ 191.0 hissen_hue a+ 139.0 1
66123 66124 2019-12-11T00:00:00+00:00 5.0.1 gachi standard area fujitsubo splatroller s+ 210 ... splatroller s+ 395.0 rapid_elite_deco s+ 223.0 sharp_neo s+ 194.0 0
66124 66125 2019-12-13T18:00:00+00:00 5.0.1 gachi standard asari sumeshi splatroller s 132 ... nzap85 s 124.0 l3reelgun s 103.0 hokusai_becchu s 35.0 0

66125 rows × 32 columns

添付データ

  • first_data.ipynb?X-Amz-Expires=10800&X-Amz-Date=20241123T093801Z&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIP7GCBGMWPMZ42PQ
  • Favicon
    new user
    コメントするには 新規登録 もしくは ログイン が必要です。