Browse Source

AdaBoost correct AUC

Tolstoyevsky 5 months ago
parent
commit
9ae247b024
5 changed files with 21 additions and 24 deletions
  1. 7 7
      metrics.csv
  2. 5 9
      params.yml
  3. 7 7
      test-metrics.csv
  4. 1 0
      tutorial/shared.py
  5. 1 1
      tutorial/train_model.py

+ 7 - 7
metrics.csv

@@ -1,8 +1,8 @@
 Name,Value,Timestamp,Step
-train_accuracy_score,0.96815,1577642261432,1
-train_f1_score,0.31871657754010696,1577642261432,1
-train_recall_score,0.1913937058445729,1577642261432,1
-train_precision_score,0.952076677316294,1577642261432,1
-train_roc_auc_score,0.9928830731737041,1577642261432,1
-train_pr_auc_score,0.8535926331719048,1577642261432,1
-train_balanced_accuracy_score,0.5955017588869614,1577642261432,1
+train_accuracy_score,0.96405,1577647647114,1
+train_f1_score,0.38756388415672915,1577647647114,1
+train_recall_score,0.2922286448298009,1577647647114,1
+train_precision_score,0.5752212389380531,1577647647114,1
+train_roc_auc_score,0.9524876815930352,1577647647114,1
+train_pr_auc_score,0.4755851931929786,1577647647114,1
+train_balanced_accuracy_score,0.6417442160236199,1577647647114,1

+ 5 - 9
params.yml

@@ -1,10 +1,6 @@
-alpha: 1.0
-class_weight: null
-classifier_type: RidgeClassifierPredictProba
-copy_X: true
-fit_intercept: true
-max_iter: null
-normalize: false
+algorithm: SAMME.R
+base_estimator: null
+classifier_type: AdaBoostClassifier
+learning_rate: 1.0
+n_estimators: 50
 random_state: null
-solver: auto
-tol: 0.001

+ 7 - 7
test-metrics.csv

@@ -1,8 +1,8 @@
 Name,Value,Timestamp,Step
-test_accuracy_score,0.9604,1577642281213,1
-test_f1_score,0.0660377358490566,1577642281213,1
-test_recall_score,0.03598971722365039,1577642281213,1
-test_precision_score,0.4,1577642281213,1
-test_roc_auc_score,0.8732627219400221,1577642281213,1
-test_pr_auc_score,0.25904935424187586,1577642281213,1
-test_balanced_accuracy_score,0.5169023604326555,1577642281213,1
+test_accuracy_score,0.9648,1577647663364,1
+test_f1_score,0.3993174061433447,1577647663364,1
+test_recall_score,0.30077120822622105,1577647663364,1
+test_precision_score,0.5939086294416244,1577647663364,1
+test_roc_auc_score,0.9375528094281429,1577647663364,1
+test_pr_auc_score,0.46741838596311946,1577647663364,1
+test_balanced_accuracy_score,0.6462237062877021,1577647663364,1

+ 1 - 0
tutorial/shared.py

@@ -40,6 +40,7 @@ def load_labels(path=train_data):
 
 def compute_metrics(clf, X, y, prefix):
     from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score, balanced_accuracy_score, auc, precision_recall_curve
+    X = X.sparse.to_coo()
     preds = clf.predict(X)
     probas = clf.predict_proba(X)[:,1]
     pr_curve = precision_recall_curve(y, probas)

+ 1 - 1
tutorial/train_model.py

@@ -20,7 +20,7 @@ def fit_model(params: dict):
     X, y = shared.load_data_and_labels(shared.train_processed)
     print("Done")
 
-    from tutorial.train_model import RidgeClassifierPredictProba as Classifier
+    from sklearn.ensemble import AdaBoostClassifier as Classifier
     clf = Classifier(**params)
     print("Training model ", clf)
     # Required for efficient training, so that sklearn doesn't inflate the pandas sparse DF to a dense matrix.