skywalker
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import matplotlib.pyplot as plt
import japanize_matplotlib
from matplotlib_venn import venn2
import os
from tqdm import tqdm_notebook as tqdm
INPUT_DIR = './raw/'
Train = pd.read_csv(INPUT_DIR + 'train_data.csv')
Test = pd.read_csv(INPUT_DIR + 'test_data.csv')
print('Train:', Train.shape, ' Test:', Test.shape)
Train: (356344, 28) Test: (34844, 27)
def draw_venn(df1, df2):
df1_col = df1.columns.tolist()
df2_col = df2.columns.tolist()
# 短いほうを使う
if len(df1_col) > len(df2_col):
df_col = df2_col
else:
df_col = df1_col
plt.figure(figsize=(20,20), facecolor='w')
c = 4
r = (len(df_col) // c) + 1
for i, col in tqdm(enumerate(df_col)):
plt.subplot(r, c, i+1)
s1 = set(df1[col].unique().tolist())
s2 = set(df2[col].unique().tolist())
venn2(subsets=[s1, s2], set_labels=['Train', 'Test'])
plt.title(str(col), fontsize=14)
plt.savefig('venn.png',
bbox_inches='tight')
plt.show()
return df_col
df_col = draw_venn(Train, Test)
HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))