Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

01_fit_dnn_best.py 2.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  1. import os
  2. from os.path import join as oj
  3. import sys
  4. sys.path.append('../src')
  5. import numpy as np
  6. import torch
  7. import scipy
  8. from matplotlib import pyplot as plt
  9. from sklearn import metrics
  10. import data
  11. from config import *
  12. from tqdm import tqdm
  13. import pickle as pkl
  14. import train_reg
  15. from copy import deepcopy
  16. import config
  17. import models
  18. import pandas as pd
  19. import features
  20. import outcomes
  21. import neural_networks
  22. from sklearn.model_selection import KFold
  23. from torch import nn, optim
  24. from torch.nn import functional as F
  25. from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
  26. from sklearn.linear_model import LinearRegression, RidgeCV
  27. from sklearn.svm import SVR
  28. from collections import defaultdict
  29. import pickle as pkl
  30. if __name__ == '__main__':
  31. print("loading data")
  32. dsets = ['clath_aux+gak_a7d2', 'clath_aux+gak', 'clath_aux+gak_a7d2_new', 'clath_aux+gak_new', 'clath_gak', 'clath_aux_dynamin']
  33. splits = ['train', 'test']
  34. #feat_names = [''] + data.select_final_feats(data.get_feature_names(df))
  35. #['mean_total_displacement', 'mean_square_displacement', 'lifetime']
  36. length = 40
  37. padding = 'end'
  38. feat_name = 'X_same_length_extended_normalized' # include buffer X_same_length_normalized
  39. outcome = 'Y_sig_mean_normalized'
  40. for lifetime_threshold in [5, 10, 15]:
  41. dfs, feat_names = data.load_dfs_for_lstm(dsets=dsets,
  42. splits=splits,
  43. lifetime_threshold=lifetime_threshold,
  44. length=length,
  45. padding=padding)
  46. df_full = pd.concat([dfs[(k, s)]
  47. for (k, s) in dfs
  48. if s == 'train'])
  49. np.random.seed(42)
  50. checkpoint_fname = f'../models/dnn_fit_extended_lifetimes>{lifetime_threshold}.pkl'
  51. dnn = neural_networks.neural_net_sklearn(D_in=length, H=20, p=0, arch='lstm', epochs=200, track_name=feat_name)
  52. dnn.fit(df_full[[feat_name]],
  53. df_full[outcome].values,
  54. verbose=True, checkpoint_fname=checkpoint_fname, device='cuda')
  55. pkl.dump({'model_state_dict': dnn.model.cpu().state_dict()}, open(checkpoint_fname, 'wb'))
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...