csinva
/
auxilin-prediction
mirror of https://github.com/Yu-Group/auxilin-prediction


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
            import os
from os.path import join as oj
import sys
sys.path.append('../src')
import numpy as np
import torch
import scipy
from matplotlib import pyplot as plt
from sklearn import metrics
import data
from config import *
from tqdm import tqdm
import pickle as pkl
import train_reg
from copy import deepcopy
import config
import models
import pandas as pd
import features
import outcomes
import neural_networks
from sklearn.model_selection import KFold
from torch import nn, optim
from torch.nn import functional as F
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.svm import SVR
from collections import defaultdict
import pickle as pkl

if __name__ == '__main__':
    
    
    print("loading data")
    dsets = ['clath_aux+gak_a7d2', 'clath_aux+gak', 'clath_aux+gak_a7d2_new', 'clath_aux+gak_new', 'clath_gak', 'clath_aux_dynamin']
    splits = ['train', 'test']
    #feat_names = ['X_same_length_normalized'] + data.select_final_feats(data.get_feature_names(df))
                  #['mean_total_displacement', 'mean_square_displacement', 'lifetime']
    meta = ['cell_num', 'Y_sig_mean', 'Y_sig_mean_normalized', 'y_consec_thresh']
    for length in [40, 100, 200]:
        for padding in ['front', 'end']:
            dfs, feat_names = data.load_dfs_for_lstm(dsets=dsets, 
                                                     splits=splits, 
                                                     meta=meta,
                                                     length=length,
                                                     padding=padding)
            df_full = pd.concat([dfs[(k, s)]
                                 for (k, s) in dfs
                                 if s == 'train'])[feat_names + meta]
            np.random.seed(42)
            checkpoint_fname = f'../models/dnn_full_long_normalized_across_track_1_feat_dynamin_{length}_{padding}_tuning.pkl'
            valid_cells = ['A7D2/1', 
                           'CLTA-TagRFP EGFP-Aux1 EGFP-GAK F6/1', 
                           'CLTA-TagRFP EGFP-GAK A8/1', 
                           'EGFP-GAK F6/1',
                           '488-1.5mW 561-1.5mW 647-1.5mW Exp100ms Int1.5s_4_Pos0/1_1.5s',
                           '488-1.5mW 561-1.5mW 647-1.5mW Exp100ms Int1.5s_4_Pos1/1_1.5s',
                           '488-1.5mW 561-1.5mW 647-1.5mW Exp100ms Int1.5s_4_Pos2/1_1.5s']
            valid = df_full['cell_num'].isin(valid_cells)
            df_full_train = df_full[~valid]
            dnn = neural_networks.neural_net_sklearn(D_in=length, H=20, p=0, arch='lstm', epochs=200)
            dnn.fit(df_full_train[feat_names[:1]], df_full_train['Y_sig_mean_normalized'].values, verbose=True, checkpoint_fname=checkpoint_fname)
            pkl.dump({'model_state_dict': dnn.model.state_dict()}, open(checkpoint_fname, 'wb'))