β版ProbSpaceコンペ第2弾!
chizuchizu
このコードは Introduction to CNN Keras - 0.997 (top 6%)を参考にして書きました。
ライブラリのインポート等
import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg import seaborn as sns from sklearn.model_selection import train_test_split import tensorflow as tf from keras.utils.np_utils import to_categorical # convert to one-hot-encoding from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D from keras.optimizers import RMSprop from keras.preprocessing.image import ImageDataGenerator from keras.callbacks import ReduceLROnPlateau from keras.backend import tensorflow_backend config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) session = tf.Session(config=config) tensorflow_backend.set_session(session) np.random.seed(2)
Using TensorFlow backend.
データの読み込みデータが0~1になるように255で割ります。
Y_train = np.load("kmnist-train-labels.npz")["arr_0"].astype(np.int) X_train = np.load("kmnist-train-imgs.npz")["arr_0"].astype(np.float) test = np.load("kmnist-test-imgs.npz")["arr_0"].astype(np.float) X_train = X_train / 255.0 test = test / 255.0
前処理・データを4次元に整形・学習用データ、テスト用データに分割させる
X_train = X_train[:, :, :, np.newaxis] test = test[:, :, :, np.newaxis] Y_train = to_categorical(Y_train, num_classes=10) random_seed = 2 X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1, random_state=random_seed)
モデル定義
model = Sequential() model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same', activation='relu', input_shape=(28, 28, 1))) model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same', activation='relu')) model.add(MaxPool2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same', activation='relu')) model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same', activation='relu')) model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(64, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(10, activation="softmax")) optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]) learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.0001) epochs = 1 # Turn epochs to 30 to get 0.9967 accuracy batch_size = 86 datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180) zoom_range=0.1, # Randomly zoom image width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(X_train)
モデルの学習
history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), epochs=epochs, validation_data=(X_val, Y_val), verbose=2, steps_per_epoch=X_train.shape[0] // batch_size , callbacks=[learning_rate_reduction])
Epoch 1/1 - 7s - loss: 0.8765 - acc: 0.7102 - val_loss: 0.1628 - val_acc: 0.9528
学習済みモデルをテストデータで予測させ、csvファイルに出力
results = model.predict(test) results = np.argmax(results,axis = 1) results = pd.Series(results,name="Label") submission = pd.concat([pd.Series(range(1,10001),name = "ImageId"),results],axis = 1) submission.to_csv("submit.csv",index=False)