SVMの結果を比較してみた
SVMの結果についてカーネルを
'linear' 'rbf' 'poly' 'sigmoid'
として比較してみた。
その結果、スコアは '0.814' '0.826' '0.808' '0.687' となった。カーネルが'rbf'のときがもっとも良いスコアが得られた、一方'sigmoid'が圧倒的に悪い結果となってしまった。
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
train_data_path = '../data/train_data.csv'
train_data = pd.read_csv(train_data_path)
x_train = train_data.drop(['id', 'y'], axis=1)
y_train = train_data['y']
scaler = StandardScaler()
scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
svm_linear = SVC(kernel='linear')
svm_linear.fit(x_train_scaled, y_train)
svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(x_train_scaled, y_train)
svm_poly = SVC(kernel='poly')
svm_poly.fit(x_train_scaled, y_train)
svm_sigmoid = SVC(kernel='sigmoid')
svm_sigmoid.fit(x_train_scaled, y_train)
test_data_path = '../data/test_data.csv'
test_data = pd.read_csv(test_data_path)
x_test = test_data.drop('ID', axis=1)
scaler = StandardScaler()
scaler.fit(x_test)
x_test_scaled = scaler.transform(x_test)
pred_linear = svm_linear.predict(x_test_scaled)
pred_liner_df = pd.DataFrame({'Y' : pred_linear})
output_path = '../output/pred_liner.csv'
pred_liner_df.to_csv(output_path)
pred_rbf = svm_rbf.predict(x_test_scaled)
pred_rbf_df = pd.DataFrame({'Y' : pred_rbf})
output_path = '../output/pred_rbf.csv'
pred_rbf_df.to_csv(output_path)
pred_poly = svm_poly.predict(x_test_scaled)
pred_poly_df = pd.DataFrame({'Y' : pred_poly})
output_path = '../output/pred_poly.csv'
pred_poly_df.to_csv(output_path)
pred_sigmoid = svm_sigmoid.predict(x_test_scaled)
pred_sigmoid_df = pd.DataFrame({'Y' : pred_sigmoid})
output_path = '../output/pred_sigmoid.csv'
pred_sigmoid_df.to_csv(output_path)