1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| model = lgb.LGBMClassifier(objective='binary', boosting_type='gbdt', tree_learner='serial', num_leaves=32, max_depth=6, learning_rate=0.05, n_estimators=3000, subsample=0.8, feature_fraction=0.6, reg_alpha=0., reg_lambda=0., random_state=1983, is_unbalance=True, metric='auc')
oof = [] prediction = test[['session_id']] prediction[ycol] = 0 df_importance_list = []
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1983) for fold_id, (trn_idx, val_idx) in enumerate(kfold.split(train[feature_names], train[ycol])): X_train = train.iloc[trn_idx][feature_names] Y_train = train.iloc[trn_idx][ycol]
X_val = train.iloc[val_idx][feature_names] Y_val = train.iloc[val_idx][ycol]
print('\nFold_{} Training ================================\n'.format(fold_id+1))
lgb_model = model.fit(X_train, Y_train, eval_names=['train', 'valid'], eval_set=[(X_train, Y_train), (X_val, Y_val)], verbose=500, eval_metric='auc', early_stopping_rounds=50)
pred_val = lgb_model.predict_proba( X_val, num_iteration=lgb_model.best_iteration_) df_oof = train.iloc[val_idx][['session_id', ycol]].copy() df_oof['pred'] = pred_val[:, 1] oof.append(df_oof)
pred_test = lgb_model.predict_proba( test[feature_names], num_iteration=lgb_model.best_iteration_) prediction[ycol] += pred_test[:, 1] / kfold.n_splits
df_importance = pd.DataFrame({ 'column': feature_names, 'importance': lgb_model.feature_importances_, }) df_importance_list.append(df_importance)
del lgb_model, pred_val, pred_test, X_train, Y_train, X_val, Y_val gc.collect() df_importance = pd.concat(df_importance_list) df_importance = df_importance.groupby(['column'])['importance'].agg( 'mean').sort_values(ascending=False).reset_index() df_oof = pd.concat(oof) print('roc_auc_score', roc_auc_score(df_oof[ycol], df_oof['pred']))
prediction['id'] = range(len(prediction)) prediction['id'] = prediction['id'] + 1 prediction = prediction[['id', 'risk_label']].copy() prediction.columns = ['id', 'ret'] prediction.head()
id ret 6147 1 0.378 6148 2 0.488 6149 3 0.502 6150 4 0.509 6151 5 0.480
|