31 namespace neural_nets {
32 namespace optimization {
38 template <
typename eT=
float>
47 RMSProp(
size_t rows_,
size_t cols_, eT decay_ = 0.9, eT eps_ = 1e-8) :
decay(decay_),
eps(eps_) {
48 EG = MAKE_MATRIX_PTR(eT, rows_, cols_);
49 delta = MAKE_MATRIX_PTR(eT, rows_, cols_);
61 mic::types::MatrixPtr<eT>
calculateUpdate(mic::types::MatrixPtr<eT> x_, mic::types::MatrixPtr<eT> dx_, eT learning_rate_) {
62 assert(x_->size() == dx_->size());
63 assert(x_->size() ==
EG->size());
66 for (
size_t i=0; i<(size_t)x_->size(); i++) {
67 (*EG)[i] =
decay *(*EG)[i] + (1.0 -
decay) * (*dx_)[i] * (*dx_)[i];
68 assert(std::isfinite((*
EG)[i]));
72 for (
size_t i=0; i<(size_t)x_->size(); i++){
73 (*delta)[i] = (learning_rate_ / std::sqrt((*
EG)[i] +
eps)) * (*dx_)[i];
74 assert(std::isfinite((*
delta)[i]));
89 mic::types::MatrixPtr<eT>
EG;
92 mic::types::MatrixPtr<eT>
delta;
mic::types::MatrixPtr< eT > calculateUpdate(mic::types::MatrixPtr< eT > x_, mic::types::MatrixPtr< eT > dx_, eT learning_rate_)
mic::types::MatrixPtr< eT > delta
Calculated update.
Abstract class representing interface to optimization function.
mic::types::MatrixPtr< eT > EG
Decaying average of the squares of gradients up to time t ("diagonal matrix") - E[g^2].
RMSProp(size_t rows_, size_t cols_, eT decay_=0.9, eT eps_=1e-8)
eT decay
Decay ratio, similar to momentum.
eT eps
Smoothing term that avoids division by zero.
Update using RMSProp - adaptive gradient descent with running average E[g^2].