TrainとTestの重なり

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import matplotlib.pyplot as plt
import japanize_matplotlib
from matplotlib_venn import venn2
import os
from tqdm import tqdm_notebook as tqdm

データ読み込み

INPUT_DIR = './raw/'
Train = pd.read_csv(INPUT_DIR + 'train_data.csv')
Test = pd.read_csv(INPUT_DIR + 'test_data.csv')
print('Train:', Train.shape, ' Test:', Test.shape)
Train: (356344, 28)  Test: (34844, 27)

ベン図作成

def draw_venn(df1, df2):
    
    df1_col = df1.columns.tolist()
    df2_col = df2.columns.tolist()
    
    # 短いほうを使う
    if len(df1_col) > len(df2_col):
        df_col = df2_col
    else:
        df_col = df1_col
    
    plt.figure(figsize=(20,20), facecolor='w')
    
    c = 4
    r = (len(df_col) // c) + 1
    
    
    for i, col in tqdm(enumerate(df_col)):
        
        plt.subplot(r, c, i+1)
        s1 = set(df1[col].unique().tolist())
        s2 = set(df2[col].unique().tolist())
        venn2(subsets=[s1, s2], set_labels=['Train', 'Test'])
        plt.title(str(col), fontsize=14)
        
    plt.savefig('venn.png',
                bbox_inches='tight')
    plt.show()
    
    return df_col
df_col = draw_venn(Train, Test)
HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

添付データ

  • 10_plt_venn__2_.ipynb?X-Amz-Expires=10800&X-Amz-Date=20241121T091447Z&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIP7GCBGMWPMZ42PQ
  • Favicon
    new user
    コメントするには 新規登録 もしくは ログイン が必要です。