Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

hinge_loss_layer.hpp 4.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  1. #ifndef CAFFE_HINGE_LOSS_LAYER_HPP_
  2. #define CAFFE_HINGE_LOSS_LAYER_HPP_
  3. #include <vector>
  4. #include "caffe/blob.hpp"
  5. #include "caffe/layer.hpp"
  6. #include "caffe/proto/caffe.pb.h"
  7. #include "caffe/layers/loss_layer.hpp"
  8. namespace caffe {
  9. /**
  10. * @brief Computes the hinge loss for a one-of-many classification task.
  11. *
  12. * @param bottom input Blob vector (length 2)
  13. * -# @f$ (N \times C \times H \times W) @f$
  14. * the predictions @f$ t @f$, a Blob with values in
  15. * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
  16. * the @f$ K = CHW @f$ classes. In an SVM, @f$ t @f$ is the result of
  17. * taking the inner product @f$ X^T W @f$ of the D-dimensional features
  18. * @f$ X \in \mathcal{R}^{D \times N} @f$ and the learned hyperplane
  19. * parameters @f$ W \in \mathcal{R}^{D \times K} @f$, so a Net with just
  20. * an InnerProductLayer (with num_output = D) providing predictions to a
  21. * HingeLossLayer and no other learnable parameters or losses is
  22. * equivalent to an SVM.
  23. * -# @f$ (N \times 1 \times 1 \times 1) @f$
  24. * the labels @f$ l @f$, an integer-valued Blob with values
  25. * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
  26. * indicating the correct class label among the @f$ K @f$ classes
  27. * @param top output Blob vector (length 1)
  28. * -# @f$ (1 \times 1 \times 1 \times 1) @f$
  29. * the computed hinge loss: @f$ E =
  30. * \frac{1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^K
  31. * [\max(0, 1 - \delta\{l_n = k\} t_{nk})] ^ p
  32. * @f$, for the @f$ L^p @f$ norm
  33. * (defaults to @f$ p = 1 @f$, the L1 norm; L2 norm, as in L2-SVM,
  34. * is also available), and @f$
  35. * \delta\{\mathrm{condition}\} = \left\{
  36. * \begin{array}{lr}
  37. * 1 & \mbox{if condition} \\
  38. * -1 & \mbox{otherwise}
  39. * \end{array} \right.
  40. * @f$
  41. *
  42. * In an SVM, @f$ t \in \mathcal{R}^{N \times K} @f$ is the result of taking
  43. * the inner product @f$ X^T W @f$ of the features
  44. * @f$ X \in \mathcal{R}^{D \times N} @f$
  45. * and the learned hyperplane parameters
  46. * @f$ W \in \mathcal{R}^{D \times K} @f$. So, a Net with just an
  47. * InnerProductLayer (with num_output = @f$k@f$) providing predictions to a
  48. * HingeLossLayer is equivalent to an SVM (assuming it has no other learned
  49. * outside the InnerProductLayer and no other losses outside the
  50. * HingeLossLayer).
  51. */
  52. template <typename Dtype>
  53. class HingeLossLayer : public LossLayer<Dtype> {
  54. public:
  55. explicit HingeLossLayer(const LayerParameter& param)
  56. : LossLayer<Dtype>(param) {}
  57. virtual inline const char* type() const { return "HingeLoss"; }
  58. protected:
  59. /// @copydoc HingeLossLayer
  60. virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  61. const vector<Blob<Dtype>*>& top);
  62. /**
  63. * @brief Computes the hinge loss error gradient w.r.t. the predictions.
  64. *
  65. * Gradients cannot be computed with respect to the label inputs (bottom[1]),
  66. * so this method ignores bottom[1] and requires !propagate_down[1], crashing
  67. * if propagate_down[1] is set.
  68. *
  69. * @param top output Blob vector (length 1), providing the error gradient with
  70. * respect to the outputs
  71. * -# @f$ (1 \times 1 \times 1 \times 1) @f$
  72. * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
  73. * as @f$ \lambda @f$ is the coefficient of this layer's output
  74. * @f$\ell_i@f$ in the overall Net loss
  75. * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
  76. * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
  77. * (*Assuming that this top Blob is not used as a bottom (input) by any
  78. * other layer of the Net.)
  79. * @param propagate_down see Layer::Backward.
  80. * propagate_down[1] must be false as we can't compute gradients with
  81. * respect to the labels.
  82. * @param bottom input Blob vector (length 2)
  83. * -# @f$ (N \times C \times H \times W) @f$
  84. * the predictions @f$t@f$; Backward computes diff
  85. * @f$ \frac{\partial E}{\partial t} @f$
  86. * -# @f$ (N \times 1 \times 1 \times 1) @f$
  87. * the labels -- ignored as we can't compute their error gradients
  88. */
  89. virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
  90. const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  91. };
  92. } // namespace caffe
  93. #endif // CAFFE_HINGE_LOSS_LAYER_HPP_
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...