nishimoto
初のコンペお疲れ様でした。
今回のデータで(私は解析には活かせませんでしたが)リークのようなものがあったので、共有します。
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import accuracy_score
df_train = pd.read_csv("train_data.csv")
df_test = pd.read_csv("test_data.csv")
df_train.drop("y", axis=1, inplace=True)
df_train.rename(columns={"id": "ID"}, inplace=True)
df = pd.concat([df_train, df_test])
df = df.reset_index()
query_str1 = "X1 == @X1 and X2 == @X2 and X3== @X3 and X4 == @X4 and X5 == @X5"
query_str2 = "X6 == @X7 and X7 == @X8 and X8 == @X9 and X9 == @X10 and X10 == @X11"
query_str3 = "X12 == @X13 and X13 == @X14 and X14 == @X15 and X15 == @X16 and X16 == @X17"
query_str4 = "X18 == @X19 and X19 == @X20 and X20 == @X21 and X21 == @X22 and X22 == @X23"
leak_id = []
leak_val = []
for ind in tqdm(df.index):
X1 = df.loc[ind, "X1"]
X2 = df.loc[ind, "X2"]
X3 = df.loc[ind, "X3"]
X4 = df.loc[ind, "X4"]
X5 = df.loc[ind, "X5"]
X6 = df.loc[ind, "X6"]
X7 = df.loc[ind, "X7"]
X8 = df.loc[ind, "X8"]
X9 = df.loc[ind, "X9"]
X10 = df.loc[ind, "X10"]
X11 = df.loc[ind, "X11"]
X12 = df.loc[ind, "X12"]
X13 = df.loc[ind, "X13"]
X14 = df.loc[ind, "X14"]
X15 = df.loc[ind, "X15"]
X16 = df.loc[ind, "X16"]
X17 = df.loc[ind, "X17"]
X18 = df.loc[ind, "X18"]
X19 = df.loc[ind, "X19"]
X20 = df.loc[ind, "X20"]
X21 = df.loc[ind, "X21"]
X22 = df.loc[ind, "X22"]
X23 = df.loc[ind, "X23"]
# 新規顧客, ほぼ新規顧客は飛ばす
if X6+X7+X8+X9+X10+X11 == -12 and X12+X13+X14+X15+X16+X17+X18+X19+X20+X21+X22+X23 == 0:
continue
if X6+X7+X8+X9+X10+X11 == -9 and X12+X13+X14+X15+X16+X17+X18+X19+X20+X21+X22+X23 == 0:
continue
queried1 = df.query(query_str1)
if len(queried1) == 0:
continue
queried2 = queried1.query(query_str2)
if len(queried2) == 0:
continue
queried3 = queried2.query(query_str3)
if len(queried3) == 0:
continue
queried4 = queried3.query(query_str4)
if len(queried4) != 1:
continue
if ind != queried4.index[0]:
leak_id.append(queried4.index[0])
leak_id.append(ind)
df.loc[leak_id, :]
index | ID | X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | ... | X14 | X15 | X16 | X17 | X18 | X19 | X20 | X21 | X22 | X23 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
29596 | 2596 | 2596 | 240000 | 2 | 2 | 1 | 34 | -2 | -2 | -2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4710 | 4710 | 4710 | 240000 | 2 | 2 | 1 | 34 | -2 | -2 | -2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
23944 | 23944 | 23944 | 80000 | 1 | 1 | 2 | 26 | 1 | -2 | -2 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5501 | 5501 | 5501 | 80000 | 1 | 1 | 2 | 26 | 1 | 1 | -2 | ... | 0 | 0 | 0 | 0 | 5000 | 0 | 0 | 0 | 0 | 0 |
12295 | 12295 | 12295 | 50000 | 1 | 1 | 1 | 47 | 0 | 0 | 0 | ... | 53561 | 53283 | 53057 | 52908 | 2743 | 2668 | 1712 | 1762 | 1894 | 1670 |
13486 | 13486 | 13486 | 50000 | 1 | 1 | 1 | 47 | 0 | 0 | 0 | ... | 51636 | 53561 | 53283 | 53057 | 2657 | 2743 | 2668 | 1712 | 1762 | 1894 |
28139 | 1139 | 1139 | 110000 | 1 | 2 | 1 | 39 | 0 | 0 | 0 | ... | 34533 | 36819 | 32467 | 35806 | 3007 | 3000 | 3000 | 3000 | 4000 | 3000 |
13493 | 13493 | 13493 | 110000 | 1 | 2 | 1 | 39 | 0 | 0 | 0 | ... | 40359 | 34533 | 36819 | 32467 | 4000 | 3007 | 3000 | 3000 | 3000 | 4000 |
3854 | 3854 | 3854 | 390000 | 1 | 1 | 1 | 35 | 0 | 0 | 0 | ... | 47532 | 49414 | 51380 | 50661 | 20000 | 5006 | 5006 | 5008 | 3004 | 3006 |
13513 | 13513 | 13513 | 390000 | 1 | 1 | 1 | 35 | 0 | 0 | 0 | ... | 43996 | 47532 | 49414 | 51380 | 5000 | 20000 | 5006 | 5006 | 5008 | 3004 |
2957 | 2957 | 2957 | 280000 | 1 | 1 | 1 | 35 | 0 | 0 | 0 | ... | 210226 | 177595 | 153181 | 145852 | 101585 | 90762 | 50119 | 282 | 50436 | 54184 |
13534 | 13534 | 13534 | 280000 | 1 | 1 | 1 | 35 | 0 | 0 | 0 | ... | 244092 | 210226 | 177595 | 153181 | 100000 | 101585 | 90762 | 50119 | 282 | 50436 |
317 | 317 | 317 | 180000 | 1 | 1 | 1 | 39 | 0 | 0 | -1 | ... | 242063 | 122295 | -1005 | 1005 | 11000 | 145000 | 26000 | 0 | 101005 | 1898 |
13566 | 13566 | 13566 | 180000 | 1 | 1 | 1 | 39 | 0 | 0 | 0 | ... | 281713 | 242063 | 122295 | -1005 | 20000 | 11000 | 145000 | 26000 | 0 | 101005 |
1958 | 1958 | 1958 | 50000 | 1 | 2 | 1 | 53 | 0 | 0 | 0 | ... | 46716 | 18685 | 19076 | 19466 | 1948 | 1835 | 669 | 692 | 707 | 695 |
13580 | 13580 | 13580 | 50000 | 1 | 2 | 1 | 53 | 0 | 0 | 0 | ... | 48954 | 46716 | 18685 | 19076 | 1818 | 1948 | 1835 | 669 | 692 | 707 |
5774 | 5774 | 5774 | 80000 | 1 | 2 | 2 | 48 | 0 | 0 | 0 | ... | 47663 | 48350 | 47208 | 46292 | 2500 | 2000 | 2000 | 2000 | 2000 | 2000 |
13585 | 13585 | 13585 | 80000 | 1 | 2 | 2 | 48 | 0 | 0 | 0 | ... | 72885 | 47663 | 48350 | 47208 | 3500 | 2500 | 2000 | 2000 | 2000 | 2000 |
8229 | 8229 | 8229 | 240000 | 1 | 2 | 1 | 54 | 0 | 0 | 2 | ... | 243589 | 204754 | 201426 | 198736 | 234000 | 30 | 7046 | 7023 | 10005 | 8014 |
13591 | 13591 | 13591 | 240000 | 1 | 2 | 1 | 54 | 0 | 0 | 0 | ... | 249258 | 243589 | 204754 | 201426 | 2000 | 234000 | 30 | 7046 | 7023 | 10005 |
10130 | 10130 | 10130 | 190000 | 1 | 2 | 1 | 58 | 2 | 0 | 0 | ... | 142293 | 113086 | 115433 | 117789 | 6586 | 5881 | 3292 | 3399 | 3469 | 5000 |
13594 | 13594 | 13594 | 190000 | 1 | 2 | 1 | 58 | 1 | 2 | 0 | ... | 139664 | 142293 | 113086 | 115433 | 0 | 6586 | 5881 | 3292 | 3399 | 3469 |
13447 | 13447 | 13447 | 30000 | 1 | 3 | 1 | 55 | 2 | 2 | 7 | ... | 2395 | 2395 | 2395 | 2395 | 0 | 0 | 0 | 0 | 0 | 0 |
13595 | 13595 | 13595 | 30000 | 1 | 3 | 1 | 55 | 3 | 2 | 2 | ... | 2395 | 2395 | 2395 | 2395 | 0 | 0 | 0 | 0 | 0 | 0 |
27857 | 857 | 857 | 50000 | 2 | 2 | 2 | 25 | 0 | 0 | 0 | ... | 27634 | 26691 | 26329 | 25923 | 1800 | 1400 | 915 | 1500 | 1500 | 2000 |
13615 | 13615 | 13615 | 50000 | 2 | 2 | 2 | 25 | 0 | 0 | 0 | ... | 28481 | 27634 | 26691 | 26329 | 1800 | 1800 | 1400 | 915 | 1500 | 1500 |
2772 | 2772 | 2772 | 30000 | 2 | 2 | 2 | 24 | 2 | 2 | 7 | ... | 300 | 300 | 300 | 300 | 0 | 0 | 0 | 0 | 0 | 0 |
13644 | 13644 | 13644 | 30000 | 2 | 2 | 2 | 24 | 3 | 2 | 2 | ... | 300 | 300 | 300 | 300 | 0 | 0 | 0 | 0 | 0 | 0 |
27605 | 605 | 605 | 90000 | 2 | 2 | 2 | 30 | 0 | 0 | 0 | ... | 75036 | 77094 | 75708 | 76610 | 3350 | 3300 | 5500 | 1600 | 3950 | 0 |
13654 | 13654 | 13654 | 90000 | 2 | 2 | 2 | 30 | 2 | 0 | 0 | ... | 73980 | 75036 | 77094 | 75708 | 3300 | 3350 | 3300 | 5500 | 1600 | 3950 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1680 | 1680 | 1680 | 160000 | 2 | 3 | 1 | 41 | 2 | 2 | 2 | ... | 46134 | 47063 | 50034 | 51063 | 2200 | 0 | 2000 | 3900 | 2000 | 0 |
29828 | 2828 | 2828 | 160000 | 2 | 3 | 1 | 41 | 1 | 2 | 2 | ... | 47188 | 46134 | 47063 | 50034 | 2000 | 2200 | 0 | 2000 | 3900 | 2000 |
2754 | 2754 | 2754 | 430000 | 2 | 2 | 1 | 42 | 0 | 0 | 0 | ... | 90604 | 91200 | 92134 | 92834 | 3243 | 3200 | 3185 | 3500 | 3500 | 3420 |
29836 | 2836 | 2836 | 430000 | 2 | 2 | 1 | 42 | 0 | 0 | 0 | ... | 90052 | 90604 | 91200 | 92134 | 3169 | 3243 | 3200 | 3185 | 3500 | 3500 |
1719 | 1719 | 1719 | 80000 | 2 | 1 | 2 | 29 | 0 | 0 | 0 | ... | 23141 | 22160 | 21478 | 20441 | 1500 | 1700 | 1000 | 1000 | 1000 | 1500 |
29863 | 2863 | 2863 | 80000 | 2 | 1 | 2 | 29 | 0 | 0 | 0 | ... | 23224 | 23141 | 22160 | 21478 | 1710 | 1500 | 1700 | 1000 | 1000 | 1000 |
1763 | 1763 | 1763 | 220000 | 2 | 1 | 1 | 42 | 2 | 2 | 2 | ... | 2500 | 2500 | 2500 | 2500 | 0 | 0 | 0 | 0 | 0 | 0 |
29895 | 2895 | 2895 | 220000 | 2 | 1 | 1 | 42 | 3 | 2 | 2 | ... | 2500 | 2500 | 2500 | 2500 | 0 | 0 | 0 | 0 | 0 | 0 |
27217 | 217 | 217 | 120000 | 2 | 3 | 1 | 50 | 0 | 0 | 0 | ... | 115046 | 80987 | 85921 | 84384 | 4200 | 4552 | 3100 | 6416 | 0 | 3228 |
29918 | 2918 | 2918 | 120000 | 2 | 3 | 1 | 50 | 0 | 0 | 0 | ... | 112482 | 115046 | 80987 | 85921 | 5500 | 4200 | 4552 | 3100 | 6416 | 0 |
4799 | 4799 | 4799 | 20000 | 1 | 3 | 2 | 25 | 2 | 2 | 4 | ... | 1650 | 1650 | 1650 | 1650 | 0 | 0 | 0 | 0 | 0 | 0 |
29933 | 2933 | 2933 | 20000 | 1 | 3 | 2 | 25 | 3 | 2 | 2 | ... | 1650 | 1650 | 1650 | 1650 | 0 | 0 | 0 | 0 | 0 | 0 |
9574 | 9574 | 9574 | 50000 | 1 | 2 | 2 | 24 | 0 | 0 | 0 | ... | 17827 | 7655 | 7881 | 8248 | 1700 | 2000 | 500 | 500 | 500 | 1000 |
29938 | 2938 | 2938 | 50000 | 1 | 2 | 2 | 24 | 0 | 0 | 0 | ... | 21507 | 17827 | 7655 | 7881 | 5000 | 1700 | 2000 | 500 | 500 | 500 |
1389 | 1389 | 1389 | 50000 | 1 | 3 | 2 | 34 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 2400 | 0 | 0 | 0 | 0 | 0 |
29955 | 2955 | 2955 | 50000 | 1 | 3 | 2 | 34 | 0 | 0 | 0 | ... | 51400 | 0 | 0 | 0 | 2400 | 2400 | 0 | 0 | 0 | 0 |
459 | 459 | 459 | 120000 | 1 | 1 | 2 | 26 | -1 | -1 | -2 | ... | 0 | 10446 | 0 | 0 | 0 | 0 | 10446 | 0 | 0 | 0 |
29962 | 2962 | 2962 | 120000 | 1 | 1 | 2 | 26 | -1 | -1 | -1 | ... | 0 | 0 | 10446 | 0 | 884 | 0 | 0 | 10446 | 0 | 0 |
13261 | 13261 | 13261 | 180000 | 1 | 1 | 2 | 29 | -1 | -1 | -1 | ... | 1016 | 4745 | 0 | 0 | 4390 | 1016 | 4745 | 0 | 0 | 1400 |
29968 | 2968 | 2968 | 180000 | 1 | 1 | 2 | 29 | -1 | -1 | -1 | ... | 4390 | 1016 | 4745 | 0 | 18213 | 4390 | 1016 | 4745 | 0 | 0 |
6457 | 6457 | 6457 | 20000 | 1 | 2 | 1 | 36 | 2 | 2 | 2 | ... | 23073 | 22857 | 21143 | 21482 | 1700 | 0 | 1653 | 0 | 1940 | 0 |
29974 | 2974 | 2974 | 20000 | 1 | 2 | 1 | 36 | 2 | 2 | 2 | ... | 25026 | 23073 | 22857 | 21143 | 2000 | 1700 | 0 | 1653 | 0 | 1940 |
12677 | 12677 | 12677 | 30000 | 1 | 2 | 2 | 29 | 0 | 0 | -1 | ... | 3126 | 7365 | 3245 | 828 | 1000 | 3126 | 7365 | 0 | 828 | 8511 |
29983 | 2983 | 2983 | 30000 | 1 | 2 | 2 | 29 | 0 | 0 | 0 | ... | 3101 | 3126 | 7365 | 3245 | 1082 | 1000 | 3126 | 7365 | 0 | 828 |
1928 | 1928 | 1928 | 140000 | 1 | 3 | 1 | 41 | -1 | -1 | -1 | ... | 1686 | 1686 | 205 | 6689 | 0 | 1686 | 0 | 205 | 6689 | 0 |
29984 | 2984 | 2984 | 140000 | 1 | 3 | 1 | 41 | -1 | -1 | -1 | ... | 0 | 1686 | 1686 | 205 | 2139 | 0 | 1686 | 0 | 205 | 6689 |
9546 | 9546 | 9546 | 230000 | 1 | 2 | 1 | 41 | 0 | 0 | 0 | ... | 228653 | 225471 | 224853 | 230078 | 10500 | 10000 | 9000 | 8200 | 9000 | 9000 |
29992 | 2992 | 2992 | 230000 | 1 | 2 | 1 | 41 | 0 | 0 | 0 | ... | 231272 | 228653 | 225471 | 224853 | 10000 | 10500 | 10000 | 9000 | 8200 | 9000 |
9288 | 9288 | 9288 | 220000 | 1 | 3 | 1 | 39 | 0 | 0 | 0 | ... | 88004 | 31237 | 15980 | 529 | 20000 | 5003 | 3047 | 5000 | 1000 | 81000 |
29999 | 2999 | 2999 | 220000 | 1 | 3 | 1 | 39 | 0 | 0 | 0 | ... | 208365 | 88004 | 31237 | 15980 | 8500 | 20000 | 5003 | 3047 | 5000 | 1000 |
2134 rows × 25 columns
古いデータ → 新しいデータという順で並べています。
そうでもないデータも混じっていますが、ID: 12295とID: 13486 などはleak風ですね。
(ほぼ同じデータが1コズレで並んでいる)
また、新しいデータの方がIDが大きいことが多いので、おそらくデータは時系列順に並んでいると思われます。
nishimoto
ちなみに、僕は普通に文字列検索でこのリークを見つけました。
tak
違うIDを振られていたけど 同一顧客のデータが含まれていたということでしょうか。