β版ProbSpaceコンペ第1弾!
koyama
SVMの結果についてカーネルを 'linear' 'rbf' 'poly' 'sigmoid' として比較してみた。 その結果、スコアは '0.814' '0.826' '0.808' '0.687' となった。カーネルが'rbf'のときがもっとも良いスコアが得られた、一方'sigmoid'が圧倒的に悪い結果となってしまった。
import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC train_data_path = '../data/train_data.csv' train_data = pd.read_csv(train_data_path) x_train = train_data.drop(['id', 'y'], axis=1) y_train = train_data['y'] scaler = StandardScaler() scaler.fit(x_train) x_train_scaled = scaler.transform(x_train) svm_linear = SVC(kernel='linear') svm_linear.fit(x_train_scaled, y_train) svm_rbf = SVC(kernel='rbf') svm_rbf.fit(x_train_scaled, y_train) svm_poly = SVC(kernel='poly') svm_poly.fit(x_train_scaled, y_train) svm_sigmoid = SVC(kernel='sigmoid') svm_sigmoid.fit(x_train_scaled, y_train) test_data_path = '../data/test_data.csv' test_data = pd.read_csv(test_data_path) x_test = test_data.drop('ID', axis=1) scaler = StandardScaler() scaler.fit(x_test) x_test_scaled = scaler.transform(x_test) pred_linear = svm_linear.predict(x_test_scaled) pred_liner_df = pd.DataFrame({'Y' : pred_linear}) output_path = '../output/pred_liner.csv' pred_liner_df.to_csv(output_path) # 0.814 pred_rbf = svm_rbf.predict(x_test_scaled) pred_rbf_df = pd.DataFrame({'Y' : pred_rbf}) output_path = '../output/pred_rbf.csv' pred_rbf_df.to_csv(output_path) # 0.826 pred_poly = svm_poly.predict(x_test_scaled) pred_poly_df = pd.DataFrame({'Y' : pred_poly}) output_path = '../output/pred_poly.csv' pred_poly_df.to_csv(output_path) # 0.808 pred_sigmoid = svm_sigmoid.predict(x_test_scaled) pred_sigmoid_df = pd.DataFrame({'Y' : pred_sigmoid}) output_path = '../output/pred_sigmoid.csv' pred_sigmoid_df.to_csv(output_path) # 0.687