MachineIntelligenceCore:NeuralNets
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Friends Macros
AdaDelta.hpp
Go to the documentation of this file.
1 
25 #ifndef ADADELTA_HPP_
26 #define ADADELTA_HPP_
27 
29 
30 namespace mic {
31 namespace neural_nets {
32 namespace optimization {
33 
38 template <typename eT=float>
39 class AdaDelta : public OptimizationFunction<eT> {
40 public:
41 
47  AdaDelta(size_t rows_, size_t cols_, eT decay_ = 0.9, eT eps_ = 1e-8) : decay(decay_), eps(eps_) {
48  EG = MAKE_MATRIX_PTR(eT, rows_, cols_);
49  EG->zeros();
50 
51  ED = MAKE_MATRIX_PTR(eT, rows_, cols_);
52  ED->zeros();
53 
54  // Allocate and reset delta.
55  delta = MAKE_MATRIX_PTR(eT, rows_, cols_);
56  delta->zeros();
57  }
58 
65  mic::types::MatrixPtr<eT> calculateUpdate(mic::types::MatrixPtr<eT> x_, mic::types::MatrixPtr<eT> dx_, eT learning_rate_) {
66  assert(x_->size() == dx_->size());
67  assert(x_->size() == EG->size());
68 
69 /* for (size_t i=0; i<x_->size(); i++) {
70  std::cout << "(*x)["<< i <<"] = " << (*x_)[i] <<std::endl;
71  }
72  for (size_t i=0; i<x_->size(); i++) {
73  std::cout << "(*dx_)["<< i <<"] = " << (*dx_)[i] <<std::endl;
74  }*/
75 
76  // Update decaying sum of squares of gradients - up to time t.
77  for (size_t i=0; i<(size_t)x_->size(); i++) {
78  (*EG)[i] = decay *(*EG)[i] + (1.0 - decay) * (*dx_)[i] * (*dx_)[i];
79 // std::cout << "(*EG)["<< i <<"] = " << (*EG)[i] <<std::endl;
80  assert(std::isfinite((*EG)[i]));
81  }
82 
83  // Update decaying sum of squares of updates - up to time t-1.
84  for (size_t i=0; i<(size_t)x_->size(); i++)
85  (*ED)[i] = decay *(*ED)[i] + (1 - decay) * (*delta)[i] * (*delta)[i];
86 
87  // Calculate updates - and store as previous (already) = - RMS(ED)/(RMS(G) * dx
88  for (size_t i=0; i<(size_t)x_->size(); i++){
89 // (*prev_d)[i] = (0.1 / std::sqrt((*EG)[i] + eps)) * (*dx_)[i];
90  (*delta)[i] = (std::sqrt((*ED)[i] + eps) / std::sqrt((*EG)[i] + eps)) * (*dx_)[i];
91 // std::cout << "(*prev_d)["<< i <<"] = " << (*prev_d)[i] <<std::endl;
92  assert(std::isfinite((*delta)[i]));
93  }
94 
95  // Return the update.
96  return delta;
97  }
98 
99 protected:
101  eT decay;
102 
104  eT eps;
105 
107  mic::types::MatrixPtr<eT> EG;
108 
110  mic::types::MatrixPtr<eT> ED;
111 
113  mic::types::MatrixPtr<eT> delta;
114 };
115 
116 
117 } //: optimization
118 } //: neural_nets
119 } //: mic
120 
121 #endif /* ADADELTA_HPP_ */
eT eps
Smoothing term that avoids division by zero.
Definition: AdaDelta.hpp:104
mic::types::MatrixPtr< eT > ED
Decaying average of the squares of updates up to time t ("diagonal matrix") - E[delta Theta^2]...
Definition: AdaDelta.hpp:110
Update using AdaDelta - adaptive gradient descent with running average E[g^2] and E[d^2]...
Definition: AdaDelta.hpp:39
Abstract class representing interface to optimization function.
AdaDelta(size_t rows_, size_t cols_, eT decay_=0.9, eT eps_=1e-8)
Definition: AdaDelta.hpp:47
eT decay
Decay ratio, similar to momentum.
Definition: AdaDelta.hpp:101
mic::types::MatrixPtr< eT > delta
Calculated update.
Definition: AdaDelta.hpp:113
mic::types::MatrixPtr< eT > EG
Decaying average of the squares of gradients up to time t ("diagonal matrix") - E[g^2].
Definition: AdaDelta.hpp:107
mic::types::MatrixPtr< eT > calculateUpdate(mic::types::MatrixPtr< eT > x_, mic::types::MatrixPtr< eT > dx_, eT learning_rate_)
Definition: AdaDelta.hpp:65