MachineIntelligenceCore:NeuralNets
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Friends Macros
Linear.hpp
Go to the documentation of this file.
1 
23 #ifndef SRC_MLNN_LINEAR_HPP_
24 #define SRC_MLNN_LINEAR_HPP_
25 
26 #include <mlnn/layer/Layer.hpp>
27 
28 namespace mic {
29 namespace mlnn {
30 namespace fully_connected {
31 
32 // Forward declaration of SparseLinear class.
33 template <typename eT>
35 
41 template <typename eT=float>
42 class Linear : public mic::mlnn::Layer<eT> {
43 public:
44 
51  Linear(size_t inputs_, size_t outputs_, std::string name_ = "Linear") :
52  Linear(inputs_, 1, 1, outputs_, 1, 1, name_)
53  {
54 
55  }
56 
57 
68  Linear(size_t input_height_, size_t input_width_, size_t input_depth_,
69  size_t output_height_, size_t output_width_, size_t output_depth_,
70  std::string name_ = "Linear") :
71  Layer<eT>::Layer(input_height_, input_width_, input_depth_,
72  output_height_, output_width_, output_depth_,
73  LayerTypes::Convolution, name_)
74  {
75  // Create the weights matrix.
77 
78  // Create the bias vector.
79  p.add ("b", Layer<eT>::outputSize(), 1);
80 
81  // Initialize weights of the W matrix.
82  eT range = sqrt(6.0 / eT(Layer<eT>::inputSize() + Layer<eT>::outputSize()));
83 
84  Layer<eT>::p['W']->rand(-range, range);
85  Layer<eT>::p['b']->setZero();
86 
87  // Add W and b gradients.
89  Layer<eT>::g.add ("b", Layer<eT>::outputSize(), 1 );
90 
91  // Set gradient descent as default optimization function.
92  Layer<eT>::template setOptimization<mic::neural_nets::optimization::GradientDescent<eT> > ();
93  };
94 
95 
99  virtual ~Linear() {};
100 
105  void forward(bool test_ = false) {
106  // Get pointers to data matrices.
107  mic::types::MatrixPtr<eT> x = s['x'];
108  mic::types::MatrixPtr<eT> W = p['W'];
109  mic::types::MatrixPtr<eT> b = p['b'];
110  // Get output pointer - so the results will be stored!
111  mic::types::MatrixPtr<eT> y = s['y'];
112 
113  // Forward pass.
114  (*y) = (*W) * (*x) + (*b).replicate(1, (*x).cols());
115 
116 /* std::cout << "Linear forward: s['x'] = \n" << (*s['x']) << std::endl;
117  std::cout << "Linear forward: p['W'] = \n" << (*p['W']) << std::endl;
118  std::cout << "Linear forward: p['b'] = \n" << (*p['b']) << std::endl;
119  std::cout << "Linear forward: s['y'] = \n" << (*s['y']) << std::endl;*/
120 
121  }
122 
126  void backward() {
127  // Get pointer to data matrices.
128  mic::types::MatrixPtr<eT> dy = g['y'];
129  mic::types::MatrixPtr<eT> x = s['x'];
130  mic::types::MatrixPtr<eT> W = p['W'];
131  // Get output pointers - so the results will be stored!
132  mic::types::MatrixPtr<eT> dW = g['W'];
133  mic::types::MatrixPtr<eT> db = g['b'];
134  mic::types::MatrixPtr<eT> dx = g['x'];
135 
136  // Backward pass.
137  (*dW) = (*dy) * (*x).transpose();
138  (*db) = (*dy).rowwise().sum(); // Sum for all samples in batch, similarly as it is done for dW.
139  (*dx) = (*W).transpose() * (*dy);
140 
141 /* std::cout << "Linear backward: g['y'] = \n" << (*g['y']) << std::endl;
142  std::cout << "Linear backward: g['x'] = \n" << (*g['x']) << std::endl;*/
143  }
144 
148  void resetGrads() {
149  g['W']->setZero();
150  g['b']->setZero();
151  }
152 
153 
159  void update(eT alpha_, eT decay_ = 0.0f) {
160  //std::cout << "p['W'] = \n" << (*p['W']) << std::endl;
161  //std::cout << "g['W'] = \n" << (*g['W']) << std::endl;
162 
163  opt["W"]->update(p['W'], g['W'], alpha_, decay_);
164  opt["b"]->update(p['b'], g['b'], alpha_, 0.0);
165 
166  //std::cout << "p['W'] after update= \n" << (*p['W']) << std::endl;
167  }
168 
169 
173  std::vector< mic::types::MatrixPtr<eT> > & getWeightActivations() {
174 
175  // Allocate memory.
177 
178  // Get matrix of a given "part of a given neuron".
179  mic::types::MatrixPtr<eT> W = p["W"];
180 
181  // Get row.
182  mic::types::MatrixPtr<eT> row = w_activations[0];
183  // Copy data.
184  (*row) = (*W);
186 
187  // Return activations.
188  return w_activations;
189  }
190 
191 
192 
196  std::vector< mic::types::MatrixPtr<eT> > & getWeightGradientActivations() {
197 
198  // Allocate memory.
200 
201  // Get matrix of a given "part of a given neuron".
202  mic::types::MatrixPtr<eT> dW = g["W"];
203 
204  // Get row.
205  mic::types::MatrixPtr<eT> row = dw_activations[0];
206  // Copy data.
207  (*row) = (*dW);
209 
210  // Return activations.
211  return dw_activations;
212  }
213 
214 
215 
219  std::vector< mic::types::MatrixPtr<eT> > & getInverseWeightActivations() {
220 
221  // Allocate memory.
223 
224  // TODO: check different input-output depths.
225 
226  mic::types::MatrixPtr<eT> W = p["W"];
227  // Iterate through "neurons" and generate "activation image" for each one.
228  for (size_t i=0; i < output_height*output_width*output_depth; i++) {
229 
230  for (size_t j=0; j < input_depth; j++) {
231  // "Access" activation row.
232  mic::types::MatrixPtr<eT> row = inverse_w_activations[i*input_depth + j];
233  // Copy data.
234  (*row) = W->block(i, j*input_depth, 1, input_height*input_width);
235  // Resize row.
236  row->resize( input_height, input_width);
237 
238  }//: for
239  }//: for
240 
241  // Return activations.
242  return inverse_w_activations;
243  }
244 
245 
249  std::vector< mic::types::MatrixPtr<eT> > & getInverseOutputActivations() {
250  // Allocate memory.
252 
253  // Get y batch.
254  mic::types::MatrixPtr<eT> batch_y = s['y'];
255  // Get weights.
256  mic::types::MatrixPtr<eT> W = p["W"];
257 
258  // Iterate through batch samples and generate "activation image" for each one.
259  for (size_t ib=0; ib< batch_size; ib++) {
260 
261  // Get output sample from batch.
262  mic::types::MatrixPtr<eT> sample_y = m["ys"];
263  (*sample_y) = batch_y->col(ib);
264 
265  // Get pointer to "x sample".
266  mic::types::MatrixPtr<eT> x_act = m["xs"];
267  (*x_act) = W->transpose() * (*sample_y);
268 
269  // Iterate through input channels.
270  for (size_t ic=0; ic< input_depth; ic++) {
271  // Get activation "row".
272  mic::types::MatrixPtr<eT> row = inverse_y_activations[ib*input_depth + ic];
273 
274  // Copy "channel block" from given dx sample.
275  (*row) = x_act->block(ic*input_height*input_width, 0, input_height*input_width, 1);
276  row->resize(input_height, input_width);
277 
278  }//: for channel
279 
280  }//: for batch
281 
282  // Return activations.
283  return inverse_y_activations;
284  }
285 
290 
291  // Get input batch.
292  mic::types::MatrixPtr<eT> batch_x = s['x'];
293  // Calculate the reconstruction.
294  std::vector< mic::types::MatrixPtr<eT> > reconstructed_batch_x = getInverseOutputActivations();
295 
296  // Calculate the reconstruction error for the whole batch.
297  eT error =0;
298  // Iterate through batch samples and generate "activation image" for each one.
299  for (size_t ib=0; ib< batch_size; ib++) {
300 
301  // Get input sample from batch!
302  mic::types::MatrixPtr<eT> sample_x = m["xs"];
303  (*sample_x) = batch_x->col(ib);
304  eT* sample_x_ptr = (*sample_x).data();
305 
306  // Get reconstruction.
307  mic::types::MatrixPtr<eT> reconstructed_x = reconstructed_batch_x[ib];
308  eT* reconstructed_x_ptr = (*reconstructed_x).data();
309 
310  // Calculate the error for a given sample.
311  for (size_t i=0; i< input_height*input_width*input_depth; i++)
312  error += fabs(sample_x_ptr[i] - reconstructed_x_ptr[i]);// * (sample_x_ptr[i] - reconstructed_x_ptr[i]);
313  }//: for batch
314 
315  // Return mean error.
316  return (error/batch_size);
317  }
318 
319 
320  // Unhide the overloaded methods inherited from the template class Layer fields via "using" statement.
321  using Layer<eT>::forward;
322  using Layer<eT>::backward;
323 
324 protected:
325  // Unhide the fields inherited from the template class Layer via "using" statement.
326  using Layer<eT>::g;
327  using Layer<eT>::s;
328  using Layer<eT>::p;
329  using Layer<eT>::m;
330  using Layer<eT>::opt;
331 
332  // Uncover "sizes" for visualization.
339  using Layer<eT>::batch_size;
340 
341  // Uncover methods useful in visualization.
343 
344 
345 private:
346  // Friend class - required for using boost serialization.
347  template<typename tmp> friend class mic::mlnn::MultiLayerNeuralNetwork;
348 
349  // Friend class - required for accessing private constructor.
350  template<typename tmp> friend class mic::mlnn::fully_connected::SparseLinear;
351 
353  std::vector< mic::types::MatrixPtr<eT> > w_activations;
354 
356  std::vector< mic::types::MatrixPtr<eT> > dw_activations;
357 
359  std::vector< mic::types::MatrixPtr<eT> > inverse_w_activations;
360 
362  std::vector< mic::types::MatrixPtr<eT> > inverse_y_activations;
363 
368 
369 };
370 
371 
372 } /* namespace fully_connected */
373 } /* namespace mlnn */
374 } /* namespace mic */
375 
376 #endif /* SRC_MLNN_LINEAR_HPP_ */
Class representing a convolution layer, with "valid padding" and variable stride. ...
Definition: Convolution.hpp:41
std::vector< mic::types::MatrixPtr< eT > > & getInverseWeightActivations()
Definition: Linear.hpp:219
std::vector< mic::types::MatrixPtr< eT > > & getWeightGradientActivations()
Definition: Linear.hpp:196
std::vector< mic::types::MatrixPtr< eT > > w_activations
Vector containing activations of weights/filters.
Definition: Linear.hpp:353
size_t input_depth
Number of channels of the input (e.g. 3 for RGB images).
Definition: Layer.hpp:732
std::vector< mic::types::MatrixPtr< eT > > dw_activations
Vector containing activations of gradients of weights (dW).
Definition: Linear.hpp:356
void update(eT alpha_, eT decay_=0.0f)
Definition: Linear.hpp:159
size_t batch_size
Size (length) of (mini)batch.
Definition: Layer.hpp:744
Linear(size_t input_height_, size_t input_width_, size_t input_depth_, size_t output_height_, size_t output_width_, size_t output_depth_, std::string name_="Linear")
Definition: Linear.hpp:68
std::vector< mic::types::MatrixPtr< eT > > & getInverseOutputActivations()
Definition: Linear.hpp:249
size_t input_height
Height of the input (e.g. 28 for MNIST).
Definition: Layer.hpp:726
void lazyAllocateMatrixVector(std::vector< std::shared_ptr< mic::types::Matrix< eT > > > &vector_, size_t vector_size_, size_t matrix_height_, size_t matrix_width_)
Definition: Layer.hpp:543
Class representing a multi-layer neural network.
Definition: Layer.hpp:86
size_t output_depth
Number of filters = number of output channels.
Definition: Layer.hpp:741
Class implementing a linear, fully connected layer.
Definition: Linear.hpp:42
std::vector< mic::types::MatrixPtr< eT > > inverse_w_activations
Vector containing "inverse activations" of each neuron weights(W^T).
Definition: Linear.hpp:359
mic::neural_nets::optimization::OptimizationArray< eT > opt
Array of optimization functions.
Definition: Layer.hpp:765
LayerTypes
Enumeration of possible layer types.
Definition: Layer.hpp:58
mic::types::MatrixArray< eT > s
States - contains input [x] and output [y] matrices.
Definition: Layer.hpp:753
mic::types::MatrixArray< eT > g
Gradients - contains input [x] and output [y] matrices.
Definition: Layer.hpp:756
Linear(size_t inputs_, size_t outputs_, std::string name_="Linear")
Definition: Linear.hpp:51
size_t input_width
Width of the input (e.g. 28 for MNIST).
Definition: Layer.hpp:729
size_t output_height
Number of receptive fields in a single channel - vertical direction.
Definition: Layer.hpp:735
Class implementing a linear, fully connected layer with sparsity regulation.
Definition: Linear.hpp:34
size_t output_width
Number of receptive fields in a single channel - horizontal direction.
Definition: Layer.hpp:738
Contains a template class representing a layer.
void forward(bool test_=false)
Definition: Linear.hpp:105
mic::types::MatrixArray< eT > m
Memory - a list of temporal parameters, to be used by the derived classes.
Definition: Layer.hpp:762
std::vector< mic::types::MatrixPtr< eT > > & getWeightActivations()
Definition: Linear.hpp:173
std::vector< mic::types::MatrixPtr< eT > > inverse_y_activations
Vector containing activations of neurons (y*W^T).
Definition: Linear.hpp:362
mic::types::MatrixArray< eT > p
Parameters - parameters of the layer, to be used by the derived classes.
Definition: Layer.hpp:759