Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

removed_hidden_layer.py 7.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  1. import numpy as np
  2. import pandas as pd
  3. import tensorflow as tf
  4. from keras.layers import Dense
  5. from sklearn.preprocessing import MinMaxScaler
  6. import dagshub
  7. import mlflow
  8. import mlflow.keras
  9. import pickle
  10. mlflow.set_tracking_uri('https://dagshub.com/ML-Purdue/hackathonf23-Stacks.mlflow')
  11. dagshub.init(repo_owner='ML-Purdue', repo_name='hackathonf23-Stacks', mlflow=True)
  12. def get_or_create_experiment_id(name):
  13. exp = mlflow.get_experiment_by_name(name)
  14. if exp is None:
  15. exp_id = mlflow.create_experiment(name)
  16. return exp_id
  17. return exp.experiment_id
  18. class MaxEntIRL:
  19. def __init__(self, state_dim):
  20. self.state_dim = state_dim
  21. self.model = self._create_irl_model()
  22. def _create_irl_model(self):
  23. model = tf.keras.Sequential([
  24. Dense(self.state_dim, input_shape=(self.state_dim,), activation='relu'),
  25. Dense(4096, activation='relu'),
  26. Dense(self.state_dim, activation='linear')
  27. ])
  28. return model
  29. def generateHumanTrajectories(self, num_trajectories, trajectory_length):
  30. human_trajectories = []
  31. for _ in range(num_trajectories):
  32. trajectory = []
  33. state = np.zeros(self.state_dim)
  34. for _ in range(trajectory_length):
  35. direction_probabilities = self._generate_direction_probabilities() # Get direction probabilities
  36. action_coefficients = np.random.choice([-1, 0, 1], p=direction_probabilities)
  37. action = action_coefficients * 0.1
  38. new_state = state + action
  39. trajectory.append((state, action))
  40. state = new_state
  41. human_trajectories.append(trajectory)
  42. return human_trajectories
  43. def _generate_direction_probabilities(self):
  44. probabilities = np.random.dirichlet(np.ones(self.state_dim) * 0.1)
  45. return probabilities
  46. def loadDataset(self, file_path):
  47. data = pd.read_csv(file_path) # Load CSV data
  48. scaler = MinMaxScaler()
  49. columns_to_normalize = ['position x [mm]', 'position y [mm]', 'position z (height) [mm]', 'velocity [mm/s]']
  50. data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])
  51. return data
  52. def train_irl_with_dataset(self, data, lr=0.001, epochs=3):
  53. state_dim = self.state_dim
  54. optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
  55. # Extract relevant columns from the loaded dataset
  56. positions = data[['position x [mm]', 'position y [mm]', 'position z (height) [mm]']].values
  57. velocities = data['velocity [mm/s]'].values
  58. mlflow.tensorflow.autolog()
  59. with mlflow.start_run(experiment_id=get_or_create_experiment_id("Base Reproduction")):
  60. for epoch in range(epochs):
  61. total_loss = 0
  62. state_frequencies = self._calculate_state_frequencies(positions)
  63. for idx in range(len(positions)):
  64. state = positions[idx]
  65. velocity = velocities[idx]
  66. with tf.GradientTape() as tape:
  67. preferences = self.model(state[np.newaxis, :])
  68. prob_human = tf.nn.softmax(preferences)
  69. # Define losses
  70. max_entropy_loss = -tf.reduce_sum(prob_human * tf.math.log(prob_human + 1e-8), axis=1)
  71. alignment_loss = -tf.reduce_sum(state_frequencies * tf.math.log(prob_human + 1e-8), axis=1)
  72. maxent_irl_objective = max_entropy_loss + alignment_loss
  73. # Compute the gradients
  74. grads = tape.gradient(maxent_irl_objective, self.model.trainable_variables)
  75. optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
  76. total_loss += tf.reduce_sum(maxent_irl_objective) # Accumulate the total loss
  77. avg_loss = total_loss / len(positions)
  78. mlflow.log_metric(f"loss_epoch_{epoch}", avg_loss)
  79. print(f"Epoch {epoch + 1}/{epochs}, MaxEnt IRL Loss: {avg_loss}")
  80. def train_irl(self, human_trajectories=None, data=None, use_dataset=False, lr=0.001, epochs=3):
  81. if use_dataset and data is not None:
  82. # Train using the loaded dataset
  83. self.train_irl_with_dataset(data, lr=lr, epochs=epochs)
  84. else:
  85. # Train using the generative function
  86. if human_trajectories is None:
  87. human_trajectories = self.generateHumanTrajectories(num_trajectories, trajectory_length)
  88. self._train_irl_generative(human_trajectories, lr=lr, epochs=epochs)
  89. def _train_irl_generative(self, human_trajectories, lr=0.001, epochs=3):
  90. trajectory_length = len(human_trajectories[0])
  91. optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
  92. for epoch in range(epochs):
  93. total_loss = 0
  94. state_frequencies = self._calculate_state_frequencies(human_trajectories, trajectory_length)
  95. for trajectory in human_trajectories:
  96. for state, _ in trajectory:
  97. with tf.GradientTape() as tape:
  98. preferences = self.model(state[np.newaxis, :])
  99. prob_human = tf.nn.softmax(preferences)
  100. # Inside the training loop:
  101. max_entropy_loss = -tf.reduce_sum(prob_human * tf.math.log(prob_human + 1e-8), axis=1)
  102. alignment_loss = -tf.reduce_sum(state_frequencies * tf.math.log(prob_human + 1e-8), axis=1)
  103. maxent_irl_objective = max_entropy_loss + alignment_loss
  104. grads = tape.gradient(maxent_irl_objective, self.model.trainable_variables)
  105. optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
  106. total_loss += maxent_irl_objective
  107. avg_loss = total_loss / (len(human_trajectories) * trajectory_length)
  108. mlflow.log_metric(f"loss_epoch_{epoch}", avg_loss)
  109. autolog_run = mlflow.last_active_run()
  110. print(f"Epoch {epoch + 1}/{epochs}, MaxEnt IRL Loss: {avg_loss}")
  111. def _calculate_state_frequencies(self, positions):
  112. state_counts = np.sum(positions, axis=0)
  113. state_frequencies = state_counts / (len(positions) * self.state_dim)
  114. return state_frequencies
  115. def save_model(self, file_path):
  116. model_config = self.model.get_config()
  117. with open(file_path, 'wb') as f:
  118. pickle.dump(model_config, f)
  119. @classmethod
  120. def load_model(cls, file_path, state_dim):
  121. with open(file_path, 'rb') as f:
  122. model_config = pickle.load(f)
  123. irl_instance = cls(state_dim)
  124. irl_instance.model = tf.keras.Sequential.from_config(model_config)
  125. return irl_instance
  126. # Indicate test completion status
  127. state_dim = 3 # Dimension of the state space
  128. irl = MaxEntIRL(state_dim)
  129. num_trajectories = 100
  130. trajectory_length = 20
  131. # Load the dataset
  132. file_path = '/Users/vinay/Desktop/Computer_Science_Projects/ReScience/hackathonf23-Stacks/data/train.csv' # Replace with the actual file path
  133. data = irl.loadDataset(file_path)
  134. irl.train_irl(data=data, use_dataset=True, lr=0.001, epochs=3)
  135. irl.save_model('/Users/vinay/Desktop/Computer_Science_Projects/ReScience/hackathonf23-Stacks/models/removed_hidden_layer.pkl')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...