upura
import pandas as pd import japanize_matplotlib from IPython.core.display import display, HTML display(HTML("<style>.container { width:100% !important; }</style>")) train = pd.read_csv('../datasets/data/train_data.csv') test = pd.read_csv('../datasets/data/test_data.csv')
train.head()
train.shape, test.shape
((21000, 13), (9000, 12))
train.isnull().sum()
id 0 position 0 age 0 area 0 sex 0 partner 0 num_child 0 education 0 service_length 0 study_time 0 commute 0 overtime 0 salary 0 dtype: int64
test.isnull().sum()
id 0 position 0 age 0 area 0 sex 0 partner 0 num_child 0 education 0 service_length 0 study_time 0 commute 0 overtime 0 dtype: int64
train['position'].value_counts().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f46a15f8>
train['age'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f45dd978>
train['area'].value_counts().plot.bar(figsize=(20, 5))
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f456b2e8>
train['sex'].value_counts().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f44755c0>
train['partner'].value_counts().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f4411f98>
train['num_child'].value_counts().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f43ebdd8>
train['education'].value_counts().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f4411c18>
train['service_length'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f434f320>
train['study_time'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f43b6320>
train['commute'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54ecfe7240>
train['overtime'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54f4047d30>
train['salary'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54ecec51d0>