y_values = y["salary"]
def objective(trial):
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2'},
'verbosity': -1,
"seed":42,
"learning_rate":trial.suggest_loguniform('lambda_l1', 0.005, 0.03),
'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
'num_leaves': trial.suggest_int('num_leaves', 2, 256),
'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
}
num_round = 10000
FOLD_NUM = 5
models = []
kf = KFold(n_splits=FOLD_NUM, random_state=42)
scores = []
feature_importance_df = pd.DataFrame()
pred_cv = np.zeros(len(test.index))
for i, (tdx, vdx) in enumerate(kf.split(X, y)):
print(f'Fold : {i}')
X_train, X_valid, y_train, y_valid = X.iloc[tdx], X.iloc[vdx], y_values[tdx], y_values[vdx]
lgb_train = lgb.Dataset(X_train, y_train)
lgb_valid = lgb.Dataset(X_valid, y_valid)
model = lgb.train(params, lgb_train, num_boost_round=num_round,
#categorical_feature=cat_features,
valid_names=["train", "valid"], valid_sets=[lgb_train, lgb_valid],
early_stopping_rounds=10)
va_pred = model.predict(X_valid)
score_ = -mean_squared_error(y_valid.values, va_pred) # 改良の余地あり
print(score_)
scores.append(score_)
models.append(model)
return np.mean(scores)
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
# 結果の確認
print('Best trial:')
trial = study.best_trial
print(' Value: {}'.format(trial.value))
print(' Params: ')
for key, value in trial.params.items():
print(' "{}": {},'.format(key, value))