Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

mpvae_replica.py 12 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  1. import tensorflow as tf
  2. # import tensorflow.contrib.slim as slim
  3. import tf_slim as slim
  4. import numpy as np
  5. import tensorflow_probability as tfp
  6. class MODEL:
  7. def build_multi_classify_loss(self, predictions, labels):
  8. shape = tf.shape(labels)
  9. labels = tf.cast(labels, tf.float32) # labels: n_batch * n_labels, e.g. 128*100
  10. y_i = tf.equal(labels, tf.ones(shape)) # turn ones in labels to True, 128*100
  11. y_not_i = tf.equal(labels, tf.zeros(shape)) # turn zeros in labels to True, 128*100
  12. # get indices to check
  13. truth_matrix = tf.compat.v1.to_float(self.pairwise_and(y_i, y_not_i)) # pairs of 0/1 of labels for one sample, 128*100*100
  14. # calculate all exp'd differences
  15. # through and with truth_matrix, we can get all c_i - c_k(appear in the paper)
  16. sub_matrix = self.pairwise_sub(predictions, predictions) # pairwise subtraction, 100*128*100*100
  17. exp_matrix = tf.exp(tf.negative(5 * sub_matrix)) # take the exponential, 100*128*100*100
  18. # check which differences to consider and sum them
  19. sparse_matrix = tf.multiply(exp_matrix, truth_matrix) # zero-out the ones with the same label, 100*128*100*100
  20. sums = tf.reduce_sum(sparse_matrix, axis=[2, 3]) # loss for each sample in every batch, 100*128
  21. # get normalizing terms and apply them
  22. y_i_sizes = tf.reduce_sum(tf.cast(y_i, tf.float32), axis=1) # number of 1's for each sample, 128
  23. y_i_bar_sizes = tf.reduce_sum(tf.cast(y_not_i, tf.float32), axis=1) # number of 0's, 128
  24. normalizers = tf.multiply(y_i_sizes, y_i_bar_sizes) # 128
  25. loss = tf.divide(sums, 5*normalizers) # 100*128 divide 128
  26. zero = tf.zeros_like(loss) # 100*128 zeros
  27. loss = tf.where(tf.logical_or(tf.math.is_inf(loss), tf.math.is_nan(loss)), x=zero, y=loss)
  28. loss = tf.reduce_mean(loss, axis=0)
  29. loss = tf.reduce_mean(loss)
  30. return loss
  31. def pairwise_and(self, a, b):
  32. """compute pairwise logical and between elements of the tensors a and b
  33. Description
  34. -----
  35. if y shape is [3,3], y_i would be translate to [3,3,1], y_not_i is would be [3,1,3]
  36. and return [3,3,3],through the matrix ,we can easy to caculate c_k - c_i(appear in the paper)
  37. """
  38. column = tf.expand_dims(a, 2)
  39. row = tf.expand_dims(b, 1)
  40. return tf.logical_and(column, row)
  41. def pairwise_sub(self, a, b):
  42. """compute pairwise differences between elements of the tensors a and b
  43. :param a:
  44. :param b:
  45. :return:
  46. """
  47. column = tf.expand_dims(a, 3)
  48. row = tf.expand_dims(b, 2)
  49. return tf.subtract(column, row)
  50. def cross_entropy_loss(self, logits, labels, n_sample):
  51. labels = tf.tile(tf.expand_dims(labels, 0), [n_sample, 1, 1])
  52. ce_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
  53. ce_loss = tf.reduce_mean(tf.reduce_sum(ce_loss, axis=1))
  54. return ce_loss
  55. def __init__(self, is_training, label_dim, feat_dim,
  56. n_train_sample, n_test_sample, l2_coeff=1.0, nll_coeff=0.1, c_coeff=200.,
  57. weight_regularizer=1e-4, latent_dim=16, cholesky=None, random_seed=42):
  58. tf.compat.v1.disable_eager_execution()
  59. tf.compat.v1.set_random_seed(random_seed)
  60. self.label_dim = label_dim
  61. self.latent_dim = latent_dim
  62. self.input_feat = tf.compat.v1.placeholder(dtype=tf.float32,shape=[None, feat_dim],name='input_feat')
  63. self.input_label = tf.compat.v1.placeholder(dtype=tf.float32,shape=[None, label_dim],name='input_label')
  64. self.keep_prob = tf.compat.v1.placeholder(tf.float32) #keep probability for the dropout
  65. weights_regularizer = slim.l2_regularizer(weight_regularizer)
  66. ## label encoder
  67. # we concatenate features with labels in this implementation,
  68. # since this made the training more stable. similar techniques used in Conditional VAE
  69. input_x = tf.concat([self.input_feat, self.input_label], 1)
  70. self.fe_1 = slim.dropout(slim.fully_connected
  71. (input_x, 256, weights_regularizer=weights_regularizer,
  72. activation_fn=tf.nn.relu, scope='label_encoder/fc_1'),
  73. keep_prob=self.keep_prob, is_training=is_training) * 1e-2
  74. self.fe_2 = slim.dropout(slim.fully_connected
  75. (self.fe_1, 128, weights_regularizer=weights_regularizer,
  76. activation_fn=tf.nn.relu, scope='label_encoder/fc_2'),
  77. keep_prob=self.keep_prob, is_training=is_training) * 1e-2
  78. self.fe_mu = slim.fully_connected(self.fe_2, latent_dim, activation_fn=None,
  79. weights_regularizer=weights_regularizer,scope='encoder/z_miu') * 1e-2
  80. self.fe_logvar = slim.fully_connected(self.fe_2, latent_dim, activation_fn=None,
  81. weights_regularizer=weights_regularizer,scope='encoder/z_logvar') * 1e-2
  82. eps = tf.random.normal(shape=tf.shape(self.fe_mu))
  83. fe_sample = eps * tf.exp(self.fe_logvar / 2) + self.fe_mu
  84. ## feature encoder (informative prior)
  85. self.fx_1 = slim.dropout(slim.fully_connected
  86. (self.input_feat, 128, weights_regularizer=weights_regularizer,
  87. activation_fn=tf.nn.relu, scope='feat_encoder/fc_1'),
  88. keep_prob=self.keep_prob, is_training=is_training) * 1e-2
  89. self.fx_2 = slim.dropout(slim.fully_connected
  90. (self.fx_1, 256, weights_regularizer=weights_regularizer,
  91. activation_fn=tf.nn.relu, scope='feat_encoder/fc_2'),
  92. keep_prob=self.keep_prob, is_training=is_training) * 1e-2
  93. self.fx_3 = slim.dropout(slim.fully_connected
  94. (self.fx_2, 128, weights_regularizer=weights_regularizer,
  95. activation_fn=tf.nn.relu, scope='feat_encoder/fc_3'),
  96. keep_prob=self.keep_prob, is_training=is_training) * 1e-2
  97. self.fx_mu = slim.fully_connected(self.fx_3, latent_dim, activation_fn=None,
  98. weights_regularizer=weights_regularizer,scope='feat_encoder/z_miu') * 1e-2
  99. self.fx_logvar = slim.fully_connected(self.fx_3, latent_dim, activation_fn=None,
  100. weights_regularizer=weights_regularizer,scope='feat_encoder/z_logvar') * 1e-2
  101. fx_sample = eps * tf.exp(self.fx_logvar / 2) + self.fx_mu
  102. # kl divergence between two learnt normal distributions
  103. self.kl_loss = tf.reduce_mean(0.5*tf.reduce_sum((self.fx_logvar-self.fe_logvar)-1
  104. +tf.exp(self.fe_logvar-self.fx_logvar)+
  105. tf.divide(tf.pow(self.fx_mu-self.fe_mu, 2),
  106. tf.exp(self.fx_logvar)+1e-6), axis=1))
  107. # concatenate input_feat with samples. similar technique in Conditional VAE
  108. c_fe_sample = tf.concat([self.input_feat, fe_sample], 1)
  109. c_fx_sample = tf.concat([self.input_feat, fx_sample], 1)
  110. self.cfs = self.fx_3
  111. ## label decoder
  112. self.fd_1 = slim.fully_connected(c_fe_sample, 128,
  113. weights_regularizer=weights_regularizer, activation_fn=tf.nn.relu,
  114. scope='label_decoder/fc_1')
  115. self.fd_2 = slim.fully_connected(self.fd_1, 256,
  116. weights_regularizer=weights_regularizer, activation_fn=tf.nn.relu,
  117. scope='label_decoder/fc_2')
  118. ## feature decoder
  119. self.fd_x_1 = slim.fully_connected(c_fx_sample, 128, weights_regularizer=weights_regularizer,
  120. activation_fn=tf.nn.relu, reuse=True, scope='label_decoder/fc_1')
  121. self.fd_x_2 = slim.fully_connected(self.fd_x_1, 256, weights_regularizer=weights_regularizer,
  122. activation_fn=tf.nn.relu, reuse=True, scope='label_decoder/fc_2')
  123. # derive the label mean in the Multivariate Probit model
  124. self.label_mp_mu = slim.fully_connected(self.fd_2, label_dim,
  125. activation_fn=None,weights_regularizer=weights_regularizer, scope='label_mp_mu')
  126. # derive the feature mean in the Multivariate Probit model
  127. self.feat_mp_mu = slim.fully_connected(self.fd_x_2, label_dim,
  128. activation_fn=None, weights_regularizer=weights_regularizer, scope='feat_mp_mu')
  129. # initialize the square root of the residual covariance matrix
  130. self.r_sqrt_sigma=tf.Variable(np.random.uniform(-np.sqrt(6.0/(label_dim+10)),
  131. np.sqrt(6.0/(label_dim+10)),
  132. (label_dim, 10)), dtype=tf.float32, name='r_sqrt_sigma')
  133. # construct a semi-positive definite matrix
  134. self.sigma=tf.matmul(self.r_sqrt_sigma, tf.transpose(self.r_sqrt_sigma))
  135. # covariance = residual_covariance + identity
  136. self.covariance=self.sigma + tf.eye(label_dim)
  137. # epsilon
  138. self.eps1=tf.constant(1e-4, dtype="float32")
  139. n_sample = n_train_sample
  140. if (is_training==False):
  141. n_sample = n_test_sample
  142. # batch_size
  143. n_batch = tf.shape(self.label_mp_mu)[0]
  144. # standard Gaussian samples
  145. self.noise = tf.random.normal(shape=[n_sample, n_batch, 10])
  146. # see equation (3) in the paper for this block
  147. self.B = tf.transpose(self.r_sqrt_sigma)
  148. self.sample_r = tf.tensordot(self.noise, self.B, axes=1)+self.label_mp_mu #tensor: n_sample*n_batch*label_dim
  149. self.sample_r_x = tf.tensordot(self.noise, self.B, axes=1)+self.feat_mp_mu #tensor: n_sample*n_batch*label_dim
  150. norm=tfp.distributions.Normal(0., 1.)
  151. # the probabilities w.r.t. every label in each sample from the batch
  152. # size: n_sample * n_batch * label_dim
  153. # eps1: to ensure the probability is non-zero
  154. E = norm.cdf(self.sample_r)*(1-self.eps1)+self.eps1*0.5
  155. # similar for the feature branch
  156. E_x = norm.cdf(self.sample_r_x)*(1-self.eps1)+self.eps1*0.5
  157. def compute_BCE_and_RL_loss(E):
  158. #compute negative log likelihood (BCE loss) for each sample point
  159. sample_nll = tf.negative((tf.math.log(E)*self.input_label+
  160. tf.math.log(1-E)*(1-self.input_label)), name='sample_nll')
  161. logprob=-tf.reduce_sum(sample_nll, axis=2)
  162. #the following computation is designed to avoid the float overflow (log_sum_exp trick)
  163. maxlogprob=tf.reduce_max(logprob, axis=0)
  164. Eprob=tf.reduce_mean(tf.exp(logprob-maxlogprob), axis=0)
  165. nll_loss=tf.reduce_mean(-tf.math.log(Eprob)-maxlogprob)
  166. # compute the ranking loss (RL loss)
  167. c_loss = self.build_multi_classify_loss(E, self.input_label)
  168. return nll_loss, c_loss
  169. # BCE and RL losses for label branch
  170. self.nll_loss, self.c_loss = compute_BCE_and_RL_loss(E)
  171. # BCE and RL losses for feature branch
  172. self.nll_loss_x, self.c_loss_x = compute_BCE_and_RL_loss(E_x)
  173. # if in the training phase, the prediction
  174. self.indiv_prob = tf.reduce_mean(E_x, axis=0, name='individual_prob')
  175. # weight regularization
  176. self.l2_loss = tf.add_n(tf.compat.v1.losses.get_regularization_losses())
  177. # total loss: refer to equation (5)
  178. self.total_loss = (self.l2_loss * l2_coeff +
  179. (self.nll_loss + self.nll_loss_x) * nll_coeff +
  180. (self.c_loss + self.c_loss_x) * c_coeff + self.kl_loss * 1.1)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...