mi-neural-nets/a00099_source.html

 #ifndef SRC_MLNN_LINEAR_HPP_

 #define SRC_MLNN_LINEAR_HPP_


 #include <mlnn/layer/Layer.hpp>


 namespace mic {

 namespace mlnn {

 namespace fully_connected {


 // Forward declaration of SparseLinear class.

 template <typename eT>

 class SparseLinear;


 template <typename eT=float>

 class Linear : public mic::mlnn::Layer<eT> {

 public:


     Linear(size_t inputs_, size_t outputs_, std::string name_ = "Linear") :

         Linear(inputs_, 1, 1, outputs_, 1, 1, name_)

     {


     }


     Linear(size_t input_height_, size_t input_width_, size_t input_depth_,

             size_t output_height_, size_t output_width_, size_t output_depth_,

             std::string name_ = "Linear") :

         Layer<eT>::Layer(input_height_, input_width_, input_depth_,

                 output_height_, output_width_, output_depth_,

                 LayerTypes::Convolution, name_)

     {

         // Create the weights matrix.

         p.add ("W", Layer<eT>::outputSize(), Layer<eT>::inputSize());


         // Create the bias vector.

         p.add ("b", Layer<eT>::outputSize(), 1);


         // Initialize weights of the W matrix.

         eT range = sqrt(6.0 / eT(Layer<eT>::inputSize() + Layer<eT>::outputSize()));


         Layer<eT>::p['W']->rand(-range, range);

         Layer<eT>::p['b']->setZero();


         // Add W and b gradients.

         Layer<eT>::g.add ("W", Layer<eT>::outputSize(), Layer<eT>::inputSize());

         Layer<eT>::g.add ("b", Layer<eT>::outputSize(), 1 );


         // Set gradient descent as default optimization function.

         Layer<eT>::template setOptimization<mic::neural_nets::optimization::GradientDescent<eT> > ();

     };


     virtual ~Linear() {};


     void forward(bool test_ = false) {

         // Get pointers to data matrices.

         mic::types::MatrixPtr<eT> x = s['x'];

         mic::types::MatrixPtr<eT> W = p['W'];

         mic::types::MatrixPtr<eT> b = p['b'];

         // Get output pointer - so the results will be stored!

         mic::types::MatrixPtr<eT> y = s['y'];


         // Forward pass.

         (*y) = (*W) * (*x) + (*b).replicate(1, (*x).cols());


 /*      std::cout << "Linear forward: s['x'] = \n" << (*s['x']) << std::endl;

         std::cout << "Linear forward: p['W'] = \n" << (*p['W']) << std::endl;

         std::cout << "Linear forward: p['b'] = \n" << (*p['b']) << std::endl;

         std::cout << "Linear forward: s['y'] = \n" << (*s['y']) << std::endl;*/


     }


     void backward() {

         // Get pointer to data matrices.

         mic::types::MatrixPtr<eT> dy = g['y'];

         mic::types::MatrixPtr<eT> x = s['x'];

         mic::types::MatrixPtr<eT> W = p['W'];

         // Get output pointers - so the results will be stored!

         mic::types::MatrixPtr<eT> dW = g['W'];

         mic::types::MatrixPtr<eT> db = g['b'];

         mic::types::MatrixPtr<eT> dx = g['x'];


         // Backward pass.

         (*dW) = (*dy) * (*x).transpose();

         (*db) = (*dy).rowwise().sum(); // Sum for all samples in batch, similarly as it is done for dW.

         (*dx) = (*W).transpose() * (*dy);


 /*      std::cout << "Linear backward: g['y'] = \n" << (*g['y']) << std::endl;

         std::cout << "Linear backward: g['x'] = \n" << (*g['x']) << std::endl;*/

     }


     void resetGrads() {

         g['W']->setZero();

         g['b']->setZero();

     }


     void update(eT alpha_, eT decay_  = 0.0f) {

         //std::cout << "p['W'] = \n" << (*p['W']) << std::endl;

         //std::cout << "g['W'] = \n" << (*g['W']) << std::endl;


         opt["W"]->update(p['W'], g['W'], alpha_, decay_);

         opt["b"]->update(p['b'], g['b'], alpha_, 0.0);


         //std::cout << "p['W'] after update= \n" << (*p['W']) << std::endl;

     }


     std::vector< mic::types::MatrixPtr<eT> > & getWeightActivations() {


         // Allocate memory.

         lazyAllocateMatrixVector(w_activations, 1, Layer<eT>::outputSize()*Layer<eT>::inputSize(), 1);


         // Get matrix of a given "part of a given neuron".

         mic::types::MatrixPtr<eT> W = p["W"];


         // Get row.

         mic::types::MatrixPtr<eT> row = w_activations[0];

         // Copy data.

         (*row) = (*W);

         row->resize(Layer<eT>::outputSize(), Layer<eT>::inputSize());


         // Return activations.

         return w_activations;

     }


     std::vector< mic::types::MatrixPtr<eT> > & getWeightGradientActivations() {


         // Allocate memory.

         lazyAllocateMatrixVector(dw_activations, 1, Layer<eT>::outputSize()*Layer<eT>::inputSize(), 1);


         // Get matrix of a given "part of a given neuron".

         mic::types::MatrixPtr<eT> dW = g["W"];


         // Get row.

         mic::types::MatrixPtr<eT> row = dw_activations[0];

         // Copy data.

         (*row) = (*dW);

         row->resize(Layer<eT>::outputSize(), Layer<eT>::inputSize());


         // Return activations.

         return dw_activations;

     }


     std::vector< mic::types::MatrixPtr<eT> > & getInverseWeightActivations() {


         // Allocate memory.

         lazyAllocateMatrixVector(inverse_w_activations, Layer<eT>::outputSize() * input_depth, input_height*input_width, 1);


         // TODO: check different input-output depths.


         mic::types::MatrixPtr<eT> W =  p["W"];

         // Iterate through "neurons" and generate "activation image" for each one.

         for (size_t i=0; i < output_height*output_width*output_depth; i++) {


             for (size_t j=0; j < input_depth; j++) {

                 // "Access" activation row.

                 mic::types::MatrixPtr<eT> row = inverse_w_activations[i*input_depth + j];

                 // Copy data.

                 (*row) = W->block(i, j*input_depth, 1, input_height*input_width);

                 // Resize row.

                 row->resize( input_height, input_width);


             }//: for

         }//: for


         // Return activations.

         return inverse_w_activations;

     }


     std::vector< mic::types::MatrixPtr<eT> > & getInverseOutputActivations() {

         // Allocate memory.

         lazyAllocateMatrixVector(inverse_y_activations, batch_size*input_depth, input_height, input_width);


         // Get y batch.

         mic::types::MatrixPtr<eT> batch_y = s['y'];

         // Get weights.

         mic::types::MatrixPtr<eT> W =  p["W"];


         // Iterate through batch samples and generate "activation image" for each one.

         for (size_t ib=0; ib< batch_size; ib++) {


             // Get output sample from batch.

             mic::types::MatrixPtr<eT> sample_y = m["ys"];

             (*sample_y) = batch_y->col(ib);


             // Get pointer to "x sample".

             mic::types::MatrixPtr<eT> x_act = m["xs"];

             (*x_act) = W->transpose() * (*sample_y);


             // Iterate through input channels.

             for (size_t ic=0; ic< input_depth; ic++) {

                 // Get activation "row".

                 mic::types::MatrixPtr<eT> row = inverse_y_activations[ib*input_depth + ic];


                 // Copy "channel block" from given dx sample.

                 (*row) = x_act->block(ic*input_height*input_width, 0, input_height*input_width, 1);

                 row->resize(input_height, input_width);


             }//: for channel


         }//: for batch


         // Return activations.

         return inverse_y_activations;

     }


     eT calculateMeanReconstructionError() {


         // Get input batch.

         mic::types::MatrixPtr<eT> batch_x = s['x'];

         // Calculate the reconstruction.

         std::vector< mic::types::MatrixPtr<eT> > reconstructed_batch_x = getInverseOutputActivations();


         // Calculate the reconstruction error for the whole batch.

         eT error =0;

         // Iterate through batch samples and generate "activation image" for each one.

         for (size_t ib=0; ib< batch_size; ib++) {


             // Get input sample from batch!

             mic::types::MatrixPtr<eT> sample_x = m["xs"];

             (*sample_x) = batch_x->col(ib);

             eT* sample_x_ptr = (*sample_x).data();


             // Get reconstruction.

             mic::types::MatrixPtr<eT> reconstructed_x = reconstructed_batch_x[ib];

             eT* reconstructed_x_ptr = (*reconstructed_x).data();


             // Calculate the error for a given sample.

             for (size_t i=0; i< input_height*input_width*input_depth; i++)

                 error += fabs(sample_x_ptr[i] - reconstructed_x_ptr[i]);// * (sample_x_ptr[i] - reconstructed_x_ptr[i]);

         }//: for batch


         // Return mean error.

         return (error/batch_size);

     }


     // Unhide the overloaded methods inherited from the template class Layer fields via "using" statement.

     using Layer<eT>::forward;

     using Layer<eT>::backward;


 protected:

     // Unhide the fields inherited from the template class Layer via "using" statement.

     using Layer<eT>::g;

     using Layer<eT>::s;

     using Layer<eT>::p;

     using Layer<eT>::m;

     using Layer<eT>::opt;


     // Uncover "sizes" for visualization.

     using Layer<eT>::input_height;

     using Layer<eT>::input_width;

     using Layer<eT>::input_depth;

     using Layer<eT>::output_height;

     using Layer<eT>::output_width;

     using Layer<eT>::output_depth;

     using Layer<eT>::batch_size;


      // Uncover methods useful in visualization.

      using Layer<eT>::lazyAllocateMatrixVector;


 private:

     // Friend class - required for using boost serialization.

     template<typename tmp> friend class mic::mlnn::MultiLayerNeuralNetwork;


     // Friend class - required for accessing private constructor.

     template<typename tmp> friend class mic::mlnn::fully_connected::SparseLinear;


     std::vector< mic::types::MatrixPtr<eT> > w_activations;


     std::vector< mic::types::MatrixPtr<eT> > dw_activations;


     std::vector< mic::types::MatrixPtr<eT> > inverse_w_activations;


     std::vector< mic::types::MatrixPtr<eT> > inverse_y_activations;


     Linear<eT>() : Layer<eT> () { }


 };


 } /* namespace fully_connected */

 } /* namespace mlnn */

 } /* namespace mic */


 #endif /* SRC_MLNN_LINEAR_HPP_ */

mic::mlnn::fully_connected::Linear::~Linear
virtual ~Linear()
Definition: Linear.hpp:99

mic::mlnn::convolution::Convolution
Class representing a convolution layer, with "valid padding" and variable stride. ...
Definition: Convolution.hpp:41

mic::mlnn::fully_connected::Linear::backward
void backward()
Definition: Linear.hpp:126

mic::mlnn::fully_connected::Linear::getInverseWeightActivations
std::vector< mic::types::MatrixPtr< eT > > & getInverseWeightActivations()
Definition: Linear.hpp:219

mic::mlnn::fully_connected::Linear::getWeightGradientActivations
std::vector< mic::types::MatrixPtr< eT > > & getWeightGradientActivations()
Definition: Linear.hpp:196

mic::mlnn::fully_connected::Linear::w_activations
std::vector< mic::types::MatrixPtr< eT > > w_activations
Vector containing activations of weights/filters.
Definition: Linear.hpp:353

mic::mlnn::Layer::input_depth
size_t input_depth
Number of channels of the input (e.g. 3 for RGB images).
Definition: Layer.hpp:732

mic::mlnn::fully_connected::Linear::dw_activations
std::vector< mic::types::MatrixPtr< eT > > dw_activations
Vector containing activations of gradients of weights (dW).
Definition: Linear.hpp:356

mic::mlnn::fully_connected::Linear::update
void update(eT alpha_, eT decay_=0.0f)
Definition: Linear.hpp:159

mic::mlnn::Layer::batch_size
size_t batch_size
Size (length) of (mini)batch.
Definition: Layer.hpp:744

mic::mlnn::fully_connected::Linear::Linear
Linear(size_t input_height_, size_t input_width_, size_t input_depth_, size_t output_height_, size_t output_width_, size_t output_depth_, std::string name_="Linear")
Definition: Linear.hpp:68

mic::mlnn::fully_connected::Linear::calculateMeanReconstructionError
eT calculateMeanReconstructionError()
Definition: Linear.hpp:289

mic::mlnn::fully_connected::Linear::getInverseOutputActivations
std::vector< mic::types::MatrixPtr< eT > > & getInverseOutputActivations()
Definition: Linear.hpp:249

mic::mlnn::Layer::input_height
size_t input_height
Height of the input (e.g. 28 for MNIST).
Definition: Layer.hpp:726

mic::mlnn::Layer::lazyAllocateMatrixVector
void lazyAllocateMatrixVector(std::vector< std::shared_ptr< mic::types::Matrix< eT > > > &vector_, size_t vector_size_, size_t matrix_height_, size_t matrix_width_)
Definition: Layer.hpp:543

mic::mlnn::MultiLayerNeuralNetwork
Class representing a multi-layer neural network.
Definition: Layer.hpp:86

mic::mlnn::Layer::output_depth
size_t output_depth
Number of filters = number of output channels.
Definition: Layer.hpp:741

mic::mlnn::fully_connected::Linear
Class implementing a linear, fully connected layer.
Definition: Linear.hpp:42

mic::mlnn::fully_connected::Linear::inverse_w_activations
std::vector< mic::types::MatrixPtr< eT > > inverse_w_activations
Vector containing "inverse activations" of each neuron weights(W^T).
Definition: Linear.hpp:359

mic::mlnn::Layer::opt
mic::neural_nets::optimization::OptimizationArray< eT > opt
Array of optimization functions.
Definition: Layer.hpp:765

mic::mlnn::LayerTypes
LayerTypes
Enumeration of possible layer types.
Definition: Layer.hpp:58

mic::mlnn::Layer::s
mic::types::MatrixArray< eT > s
States - contains input [x] and output [y] matrices.
Definition: Layer.hpp:753

mic::mlnn::Layer::g
mic::types::MatrixArray< eT > g
Gradients - contains input [x] and output [y] matrices.
Definition: Layer.hpp:756

mic::mlnn::fully_connected::Linear::Linear
Linear(size_t inputs_, size_t outputs_, std::string name_="Linear")
Definition: Linear.hpp:51

mic::mlnn::Layer::input_width
size_t input_width
Width of the input (e.g. 28 for MNIST).
Definition: Layer.hpp:729

mic::mlnn::Layer::output_height
size_t output_height
Number of receptive fields in a single channel - vertical direction.
Definition: Layer.hpp:735

mic::mlnn::Layer
Definition: Layer.hpp:94

mic::mlnn::fully_connected::SparseLinear
Class implementing a linear, fully connected layer with sparsity regulation.
Definition: Linear.hpp:34

mic::mlnn::fully_connected::Linear::resetGrads
void resetGrads()
Definition: Linear.hpp:148

mic::mlnn::Layer::output_width
size_t output_width
Number of receptive fields in a single channel - horizontal direction.
Definition: Layer.hpp:738

Layer.hpp
Contains a template class representing a layer.

mic::mlnn::fully_connected::Linear::forward
void forward(bool test_=false)
Definition: Linear.hpp:105

mic::mlnn::Layer::m
mic::types::MatrixArray< eT > m
Memory - a list of temporal parameters, to be used by the derived classes.
Definition: Layer.hpp:762

mic::mlnn::fully_connected::Linear::getWeightActivations
std::vector< mic::types::MatrixPtr< eT > > & getWeightActivations()
Definition: Linear.hpp:173

mic::mlnn::fully_connected::Linear::inverse_y_activations
std::vector< mic::types::MatrixPtr< eT > > inverse_y_activations
Vector containing activations of neurons (y*W^T).
Definition: Linear.hpp:362

mic::mlnn::Layer::p
mic::types::MatrixArray< eT > p
Parameters - parameters of the layer, to be used by the derived classes.
Definition: Layer.hpp:759