csinva
/
auxilin-prediction
mirror of https://github.com/Yu-Group/auxilin-prediction


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
            import os
from os.path import join as oj
import sys
sys.path.append('../src')
import numpy as np
import torch
import scipy
from matplotlib import pyplot as plt
from sklearn import metrics
import data
from config import *
from tqdm import tqdm
import pickle as pkl
import train_reg
from copy import deepcopy
import config
import models
import pandas as pd
import features
import outcomes
import neural_networks
from sklearn.model_selection import KFold
from torch import nn, optim
from torch.nn import functional as F
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.svm import SVR
from collections import defaultdict
import pickle as pkl

if __name__ == '__main__':
    
    outcome_def = 'successful_full'
    print("loading data")
    dsets = ['clath_aux_dynamin']
    splits = ['test']
    #feat_names = ['X_same_length_normalized'] + data.select_final_feats(data.get_feature_names(df))
                  #['mean_total_displacement', 'mean_square_displacement', 'lifetime']
    meta = ['cell_num', 'Y_sig_mean', 'Y_sig_mean_normalized', 'X_max_orig', outcome_def]
    dfs, feat_names = data.load_dfs_for_lstm(dsets=dsets, 
                                             splits=splits, 
                                             meta=meta,
                                             length=40,
                                             padding='end')
    df_test = pd.concat([dfs[(k, s)]
                                 for (k, s) in dfs
                                 if s == 'test'])[feat_names + meta]
    #df_test = df_test.dropna()
    X1 = df_test[feat_names[:1]]
    X2 = df_test[feat_names[1:]]
    X2 = X2.fillna(X2.mean())
    y = df_test[outcome_def].values
    
    accuracy = {}
    
    for k in [1, 2, 5, 10]:
        for j in tqdm(range(10)):
            
            checkpoint_fname = f'../models/models_different_size_10/downsample_{k}_batch_{j}_lstm.pkl'
            results = pkl.load(open(checkpoint_fname, 'rb'))
            dnn = neural_networks.neural_net_sklearn(D_in=40, H=20, p=0, arch='lstm', epochs=200)
            dnn.model.load_state_dict(results['model_state_dict'])
            preds = dnn.predict(X1)
            preds_binary = np.logical_and((preds > 0), df_test['X_max_orig'].values > 1500).astype(int)
            accuracy[(k, j, 'lstm', 'accuracy')] = np.mean(y == preds_binary)
            accuracy[(k, j, 'lstm', 'f1')] = metrics.f1_score(y, preds_binary)
            accuracy[(k, j, 'lstm', 'roc.auc')] = metrics.roc_auc_score(y, preds)

            checkpoint_fname = f'../models/models_different_size_10/downsample_{k}_batch_{j}_gb.pkl'
            m = pkl.load(open(checkpoint_fname, 'rb'))
            preds = m.predict(X2)
            preds_binary = np.logical_and((preds > 0), df_test['X_max_orig'].values > 1500).astype(int)
            accuracy[(k, j, 'gb', 'accuracy')] = np.mean(y == preds_binary)
            accuracy[(k, j, 'gb', 'f1')] = metrics.f1_score(y, preds_binary)
            accuracy[(k, j, 'gb', 'roc.auc')] = metrics.roc_auc_score(y, preds)
            
    pkl.dump(accuracy, open(f'../reports/data_size_stability_10_{outcome_def}.pkl', 'wb'))
    
    
    # calculate dasc accuracy
    dasc_pred = (df_test['X_d1'].values > 0).astype(int)
    dasc_acc = np.mean(y == dasc_pred)
    pkl.dump(dasc_acc, open('../reports/data_size_stability_10_dasc_acc.pkl', 'wb'))