mi-neural-nets/a00085_source.html

 #ifndef BACKPROPAGATIONNEURALNETWORK_H_

 #define BACKPROPAGATIONNEURALNETWORK_H_


 #include <mlnn/MultiLayerNeuralNetwork.hpp>


 namespace mic {

 namespace mlnn {


 template <typename eT>

 class BackpropagationNeuralNetwork : public MultiLayerNeuralNetwork<eT> {

 public:


     BackpropagationNeuralNetwork(std::string name_ = "bp_net") : MultiLayerNeuralNetwork<eT> (name_)

     {

         // Set default cross entropy loss function.

         setLoss <mic::neural_nets::loss::CrossEntropyLoss<eT> >();


         // Set "classical" SDG as default optimization method.

         MultiLayerNeuralNetwork<eT>::template setOptimization<mic::neural_nets::optimization::GradientDescent<eT> > ();

     }


     virtual ~BackpropagationNeuralNetwork() { }


     template<typename LossFunction>

     void setLoss () {

         loss = std::make_shared< LossFunction > (LossFunction());

     }


     void forward(mic::types::MatrixPtr<eT> input_data, bool skip_dropout = false)  {

         // Make sure that there are some layers in the nn!

         assert(layers.size() != 0);


         // Boost::Matrix is col major!

         LOG(LDEBUG) << "Inputs size: " << input_data->rows() << "x" << input_data->cols();

         LOG(LDEBUG) << "First layer input matrix size: " <<  layers[0]->s['x']->rows() << "x" << layers[0]->s['x']->cols();


         // Make sure that the dimensions are ok.

         // Check only rows, as cols determine the batch size - and we allow them to be dynamically changing!.

         assert((layers[0]->s['x'])->rows() == input_data->rows());

         //LOG(LDEBUG) <<" input_data: " << input_data.transpose();


         // Connect layers by setting the input matrices pointers to point the output matrices.

         // There will not need to be copy data between layers anymore.

         if (!connected) {

             // Verify structure of the network.

             verify();

             // Set pointers - pass result to the next layer: x(next layer) = y(current layer).

             if (layers.size() > 1)

                 for (size_t i = 0; i < layers.size()-1; i++) {

                     // Connect pointers.

                     layers[i+1]->s['x'] = layers[i]->s['y'];

                     layers[i]->g['y'] = layers[i+1]->g['x'];

                 }//: for

             connected = true;

         }


         //assert((layers[0]->s['x'])->cols() == input_data->cols());

         // Change the size of batch - if required.

         resizeBatch(input_data->cols());


         // Copy inputs to the lowest point in the network.

         (*(layers[0]->s['x'])) = (*input_data);


         // Compute the forward activations.

         for (size_t i = 0; i < layers.size(); i++) {

             LOG(LDEBUG) << "Layer [" << i << "] " << layers[i]->name() << ": (" <<

                     layers[i]->inputSize() << "x" << layers[i]->batchSize() << ") -> (" <<

                     layers[i]->outputSize() << "x" << layers[i]->batchSize() << ")";


             // Perform the forward computation: y = f(x).

             layers[i]->forward(skip_dropout);


         }

         //LOG(LDEBUG) <<" predictions: " << getPredictions()->transpose();

     }


     bool verify() {

         bool ok = true;

         // Set pointers - pass result to the next layer: x(next layer) = y(current layer).

         if (layers.size() > 1)

             for (size_t i = 0; i < layers.size()-1; i++) {

                 bool layer_ok = true;

                 // Check inputs.

                 if (layers[i]->s['y']->rows() != layers[i+1]->s['x']->rows()) {

                     LOG(LERROR) << "Layer["<<i<<"].y differs from " << "Layer["<<i+1<<"].x";

                     ok = false;

                     layer_ok = false;

                 }


                 // Check gradients.

                 if (layers[i]->g['y']->rows() != layers[i+1]->g['x']->rows()) {

                     LOG(LERROR) << "Layer["<<i<<"].dy differs from " << "Layer["<<i+1<<"].dx";

                     ok = false;

                     layer_ok = false;

                 }

                 // Display parameters of botch layers.

                 if (!layer_ok) {

                     LOG(LINFO) << "Layer["<<i<<"]: " <<  (*layers[i]).streamLayerParameters();

                     LOG(LINFO) << "Layer["<<i+1<<"]: " <<  (*layers[i+1]).streamLayerParameters();

                 }

             }//: for

         return ok;

     }


     void backward(mic::types::MatrixPtr<eT> gradients_) {

         // Make sure that there are some layers in the nn!

         assert(layers.size() != 0);


         LOG(LDEBUG) << "Last layer output gradient matrix size: " << layers.back()->g['y']->cols() << "x" << layers.back()->g['y']->rows();

         LOG(LDEBUG) << "Passed target matrix size: " <<  gradients_->cols() << "x" << gradients_->rows();


         // Make sure that the dimensions are ok.

         assert((layers.back()->g['y'])->cols() == gradients_->cols());

         assert((layers.back()->g['y'])->rows() == gradients_->rows());


         // Set gradient of the last layer - COPY data.

         (*(layers.back()->g['y'])) = (*gradients_);


         // Back-propagate the gradients.

         for (int i = layers.size() - 1; i >= 0; i--) {

             layers[i]->backward();

         }//: for


     }


     eT train(mic::types::MatrixPtr<eT> encoded_batch_, mic::types::MatrixPtr<eT> encoded_targets_, eT learning_rate_, eT decay_ = 0.0f) {


         // Forward propagate the activations from first layer to the last.

         forward(encoded_batch_);


         // Get predictions.

         mic::types::MatrixPtr<eT> encoded_predictions = getPredictions();


         // Calculate gradient according to the loss function.

         mic::types::MatrixPtr<eT> dy = loss->calculateGradient(encoded_targets_, encoded_predictions);


         // Backpropagate the gradients from last layer to the first.

         backward(dy);


         // Apply the changes - according to the optimization function.

         update(learning_rate_, decay_);


         // Calculate mean value of the loss function (i.e. loss divided by the batch size).

         eT loss_value = loss->calculateMeanLoss(encoded_targets_, encoded_predictions);


         // Return loss.

         return loss_value;

     }


     eT test(mic::types::MatrixPtr<eT> encoded_batch_, mic::types::MatrixPtr<eT> encoded_targets_) {

         // skip dropout layers at test time

         bool skip_dropout = true;


         forward(encoded_batch_, skip_dropout);


         // Get predictions.

         mic::types::MatrixPtr<eT> encoded_predictions = getPredictions();


         // Calculate the mean loss.

         return loss->calculateMeanLoss(encoded_targets_, encoded_predictions);

     }


     eT calculateMeanLoss(mic::types::MatrixPtr<eT> encoded_targets_, mic::types::MatrixPtr<eT> encoded_predictions_)  {


         return loss->calculateMeanLoss(encoded_targets_, encoded_predictions_);

     }


     // Unhide the overloaded public methods & fields inherited from the template class MultiLayerNeuralNetwork fields via "using" statement.

     using MultiLayerNeuralNetwork<eT>::getPredictions;

     using MultiLayerNeuralNetwork<eT>::update;

     using MultiLayerNeuralNetwork<eT>::setOptimization;

     using MultiLayerNeuralNetwork<eT>::resizeBatch;


 protected:

     // Unhide the overloaded protected methods & fields inherited from the template class MultiLayerNeuralNetwork fields via "using" statement.

     using MultiLayerNeuralNetwork<eT>::layers;

     using MultiLayerNeuralNetwork<eT>::connected;


     std::shared_ptr<mic::neural_nets::loss::Loss<eT> > loss;


 };


 } /* namespace mlnn */

 } /* namespace mic */


 #endif /* BACKPROPAGATIONNEURALNETWORK_H_ */

mic::mlnn::BackpropagationNeuralNetwork::backward
void backward(mic::types::MatrixPtr< eT > gradients_)
Definition: BackpropagationNeuralNetwork.hpp:161

mic::mlnn::MultiLayerNeuralNetwork::connected
bool connected
Flag denoting whether the layers are interconnected, thus no copying between inputs and outputs of th...
Definition: MultiLayerNeuralNetwork.hpp:339

mic::mlnn::BackpropagationNeuralNetwork::setLoss
void setLoss()
Definition: BackpropagationNeuralNetwork.hpp:66

mic::mlnn::MultiLayerNeuralNetwork::layers
std::vector< std::shared_ptr< mic::mlnn::Layer< eT > > > layers
Definition: MultiLayerNeuralNetwork.hpp:331

mic::mlnn::BackpropagationNeuralNetwork
Class representing a multi-layer neural network based on backpropagation/gradient descent...
Definition: BackpropagationNeuralNetwork.hpp:40

MultiLayerNeuralNetwork.hpp

mic::mlnn::BackpropagationNeuralNetwork::~BackpropagationNeuralNetwork
virtual ~BackpropagationNeuralNetwork()
Virtual descriptor - empty.
Definition: BackpropagationNeuralNetwork.hpp:58

mic::mlnn::BackpropagationNeuralNetwork::loss
std::shared_ptr< mic::neural_nets::loss::Loss< eT > > loss
Definition: BackpropagationNeuralNetwork.hpp:261

mic::mlnn::MultiLayerNeuralNetwork::update
void update(eT alpha_, eT decay_=0.0f)
Definition: MultiLayerNeuralNetwork.hpp:181

mic::mlnn::MultiLayerNeuralNetwork
Class representing a multi-layer neural network.
Definition: Layer.hpp:86

mic::mlnn::MultiLayerNeuralNetwork::getPredictions
mic::types::MatrixPtr< eT > getPredictions()
Definition: MultiLayerNeuralNetwork.hpp:217

mic::mlnn::BackpropagationNeuralNetwork::test
eT test(mic::types::MatrixPtr< eT > encoded_batch_, mic::types::MatrixPtr< eT > encoded_targets_)
Definition: BackpropagationNeuralNetwork.hpp:222

mic::mlnn::BackpropagationNeuralNetwork::verify
bool verify()
Definition: BackpropagationNeuralNetwork.hpp:128

mic::mlnn::MultiLayerNeuralNetwork::resizeBatch
void resizeBatch(size_t batch_size_)
Definition: MultiLayerNeuralNetwork.hpp:203

mic::mlnn::BackpropagationNeuralNetwork::train
eT train(mic::types::MatrixPtr< eT > encoded_batch_, mic::types::MatrixPtr< eT > encoded_targets_, eT learning_rate_, eT decay_=0.0f)
Definition: BackpropagationNeuralNetwork.hpp:191

mic::mlnn::BackpropagationNeuralNetwork::BackpropagationNeuralNetwork
BackpropagationNeuralNetwork(std::string name_="bp_net")
Definition: BackpropagationNeuralNetwork.hpp:47

mic::mlnn::BackpropagationNeuralNetwork::forward
void forward(mic::types::MatrixPtr< eT > input_data, bool skip_dropout=false)
Definition: BackpropagationNeuralNetwork.hpp:76

mic::mlnn::BackpropagationNeuralNetwork::calculateMeanLoss
eT calculateMeanLoss(mic::types::MatrixPtr< eT > encoded_targets_, mic::types::MatrixPtr< eT > encoded_predictions_)
Definition: BackpropagationNeuralNetwork.hpp:242