Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

01_process_train_size_results.py 3.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  1. import os
  2. from os.path import join as oj
  3. import sys
  4. sys.path.append('../src')
  5. import numpy as np
  6. import torch
  7. import scipy
  8. from matplotlib import pyplot as plt
  9. from sklearn import metrics
  10. import data
  11. from config import *
  12. from tqdm import tqdm
  13. import pickle as pkl
  14. import train_reg
  15. from copy import deepcopy
  16. import config
  17. import models
  18. import pandas as pd
  19. import features
  20. import outcomes
  21. import neural_networks
  22. from sklearn.model_selection import KFold
  23. from torch import nn, optim
  24. from torch.nn import functional as F
  25. from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
  26. from sklearn.linear_model import LinearRegression, RidgeCV
  27. from sklearn.svm import SVR
  28. from collections import defaultdict
  29. import pickle as pkl
  30. if __name__ == '__main__':
  31. outcome_def = 'successful_full'
  32. print("loading data")
  33. dsets = ['clath_aux_dynamin']
  34. splits = ['test']
  35. #feat_names = ['X_same_length_normalized'] + data.select_final_feats(data.get_feature_names(df))
  36. #['mean_total_displacement', 'mean_square_displacement', 'lifetime']
  37. meta = ['cell_num', 'Y_sig_mean', 'Y_sig_mean_normalized', 'X_max_orig', outcome_def]
  38. dfs, feat_names = data.load_dfs_for_lstm(dsets=dsets,
  39. splits=splits,
  40. meta=meta,
  41. length=40,
  42. padding='end')
  43. df_test = pd.concat([dfs[(k, s)]
  44. for (k, s) in dfs
  45. if s == 'test'])[feat_names + meta]
  46. #df_test = df_test.dropna()
  47. X1 = df_test[feat_names[:1]]
  48. X2 = df_test[feat_names[1:]]
  49. X2 = X2.fillna(X2.mean())
  50. y = df_test[outcome_def].values
  51. accuracy = {}
  52. for k in [1, 2, 5, 10]:
  53. for j in tqdm(range(10)):
  54. checkpoint_fname = f'../models/models_different_size_10/downsample_{k}_batch_{j}_lstm.pkl'
  55. results = pkl.load(open(checkpoint_fname, 'rb'))
  56. dnn = neural_networks.neural_net_sklearn(D_in=40, H=20, p=0, arch='lstm', epochs=200)
  57. dnn.model.load_state_dict(results['model_state_dict'])
  58. preds = dnn.predict(X1)
  59. preds_binary = np.logical_and((preds > 0), df_test['X_max_orig'].values > 1500).astype(int)
  60. accuracy[(k, j, 'lstm', 'accuracy')] = np.mean(y == preds_binary)
  61. accuracy[(k, j, 'lstm', 'f1')] = metrics.f1_score(y, preds_binary)
  62. accuracy[(k, j, 'lstm', 'roc.auc')] = metrics.roc_auc_score(y, preds)
  63. checkpoint_fname = f'../models/models_different_size_10/downsample_{k}_batch_{j}_gb.pkl'
  64. m = pkl.load(open(checkpoint_fname, 'rb'))
  65. preds = m.predict(X2)
  66. preds_binary = np.logical_and((preds > 0), df_test['X_max_orig'].values > 1500).astype(int)
  67. accuracy[(k, j, 'gb', 'accuracy')] = np.mean(y == preds_binary)
  68. accuracy[(k, j, 'gb', 'f1')] = metrics.f1_score(y, preds_binary)
  69. accuracy[(k, j, 'gb', 'roc.auc')] = metrics.roc_auc_score(y, preds)
  70. pkl.dump(accuracy, open(f'../reports/data_size_stability_10_{outcome_def}.pkl', 'wb'))
  71. # calculate dasc accuracy
  72. dasc_pred = (df_test['X_d1'].values > 0).astype(int)
  73. dasc_acc = np.mean(y == dasc_pred)
  74. pkl.dump(dasc_acc, open('../reports/data_size_stability_10_dasc_acc.pkl', 'wb'))
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...