Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

rmv_max_entropy_local.py 6.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  1. """
  2. This is the fourth ablation study, removing the maximum entropy factor during training
  3. """
  4. import numpy as np
  5. import pandas as pd
  6. import tensorflow as tf
  7. from keras.layers import Dense
  8. from sklearn.preprocessing import MinMaxScaler
  9. import dagshub
  10. import mlflow
  11. import mlflow.keras
  12. import pickle
  13. mlflow.set_tracking_uri('https://dagshub.com/ML-Purdue/hackathonf23-Stacks.mlflow')
  14. dagshub.init(repo_owner='ML-Purdue', repo_name='hackathonf23-Stacks', mlflow=True)
  15. def get_or_create_experiment_id(name):
  16. exp = mlflow.get_experiment_by_name(name)
  17. if exp is None:
  18. exp_id = mlflow.create_experiment(name)
  19. return exp_id
  20. return exp.experiment_id
  21. class IRL:
  22. def __init__(self, state_dim):
  23. self.state_dim = state_dim
  24. self.model = self._create_irl_model()
  25. def _create_irl_model(self):
  26. model = tf.keras.Sequential([
  27. Dense(self.state_dim, input_shape=(self.state_dim,), activation='relu'),
  28. Dense(4096, activation='relu'),
  29. Dense(2048, activation='relu'),
  30. Dense(self.state_dim, activation='linear')
  31. ])
  32. return model
  33. def generateHumanTrajectories(self, num_trajectories, trajectory_length):
  34. human_trajectories = []
  35. for _ in range(num_trajectories):
  36. trajectory = []
  37. state = np.zeros(self.state_dim)
  38. for _ in range(trajectory_length):
  39. direction_probabilities = self._generate_direction_probabilities()
  40. action_coefficients = np.random.choice([-1, 0, 1], p=direction_probabilities)
  41. action = action_coefficients * 0.1
  42. new_state = state + action
  43. trajectory.append((state, action))
  44. state = new_state
  45. human_trajectories.append(trajectory)
  46. return human_trajectories
  47. def _generate_direction_probabilities(self):
  48. probabilities = np.random.dirichlet(np.ones(self.state_dim) * 0.1)
  49. return probabilities
  50. def loadDataset(self, file_path):
  51. data = pd.read_csv(file_path)
  52. scaler = MinMaxScaler()
  53. columns_to_normalize = ['position x [mm]', 'position y [mm]', 'position z (height) [mm]', 'velocity [mm/s]']
  54. data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])
  55. return data
  56. def train_irl_with_dataset(self, data, lr=0.001, epochs=3):
  57. state_dim = self.state_dim
  58. optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
  59. positions = data[['position x [mm]', 'position y [mm]', 'position z (height) [mm]']].values
  60. velocities = data['velocity [mm/s]'].values
  61. mlflow.tensorflow.autolog()
  62. with mlflow.start_run(experiment_id=get_or_create_experiment_id("Ablation Study 4: Removed Maximum Entropy Component")):
  63. for epoch in range(epochs):
  64. total_loss = 0
  65. for idx in range(len(positions)):
  66. state = positions[idx]
  67. velocity = velocities[idx] # Define velocity here
  68. with tf.GradientTape() as tape:
  69. preferences = self.model(state[np.newaxis, :])
  70. specific_loss = tf.reduce_mean(tf.square(preferences - velocity)) # Calculate Mean Squared Error
  71. grads = tape.gradient(specific_loss, self.model.trainable_variables)
  72. optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
  73. total_loss += specific_loss
  74. avg_loss = total_loss / len(positions)
  75. mlflow.log_metric(f"loss", avg_loss, step=epoch)
  76. print(f"Epoch {epoch + 1}/{epochs}, Loss (MSE): {avg_loss}")
  77. def train_irl(self, human_trajectories=None, data=None, use_dataset=False, lr=0.001, epochs=3):
  78. if use_dataset and data is not None:
  79. self.train_irl_with_dataset(data, lr=lr, epochs=epochs)
  80. else:
  81. if human_trajectories is None:
  82. human_trajectories = self.generateHumanTrajectories(num_trajectories, trajectory_length)
  83. self._train_irl_generative(human_trajectories, lr=lr, epochs=epochs)
  84. def _train_irl_generative(self, human_trajectories, lr=0.001, epochs=3):
  85. trajectory_length = len(human_trajectories[0])
  86. optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
  87. for epoch in range(epochs):
  88. total_loss = 0
  89. for trajectory in human_trajectories:
  90. for state, velocity in trajectory: # Define velocity from the trajectory
  91. with tf.GradientTape() as tape:
  92. preferences = self.model(state[np.newaxis, :])
  93. specific_loss = tf.reduce_mean(tf.square(preferences - velocity)) # Calculate Mean Squared Error
  94. grads = tape.gradient(specific_loss, self.model.trainable_variables)
  95. optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
  96. total_loss += specific_loss
  97. avg_loss = total_loss / (len(human_trajectories) * trajectory_length)
  98. print(f"Epoch {epoch + 1}/{epochs}, Loss (MSE): {avg_loss}")
  99. def _calculate_state_frequencies(self, positions):
  100. state_counts = np.sum(positions, axis=0)
  101. state_frequencies = state_counts / (len(positions) * self.state_dim)
  102. return state_frequencies
  103. def save_model(self, file_path):
  104. model_config = self.model.get_config()
  105. with open(file_path, 'wb') as f:
  106. pickle.dump(model_config, f)
  107. @classmethod
  108. def load_model(cls, file_path, state_dim):
  109. with open(file_path, 'rb') as f:
  110. model_config = pickle.load(f)
  111. irl_instance = cls(state_dim)
  112. irl_instance.model = tf.keras.Sequential.from_config(model_config)
  113. return irl_instance
  114. # Indicate test completion status
  115. state_dim = 3 # Dimension of the state space
  116. irl = IRL(state_dim)
  117. num_trajectories = 100
  118. trajectory_length = 20
  119. # Load the dataset
  120. file_path = '/Users/vinay/Desktop/Computer_Science_Projects/ReScience/hackathonf23-Stacks/data/train.csv' # Replace with the actual file path
  121. data = irl.loadDataset(file_path)
  122. irl.train_irl(data=data, use_dataset=True, lr=0.001, epochs=3)
  123. irl.save_model('/Users/vinay/Desktop/Computer_Science_Projects/ReScience/hackathonf23-Stacks/models/rmv_max_entropy_model.pkl')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...