MachineIntelligenceCore:NeuralNets
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Friends Macros
BackpropagationNeuralNetwork.hpp
Go to the documentation of this file.
1 
25 #ifndef BACKPROPAGATIONNEURALNETWORK_H_
26 #define BACKPROPAGATIONNEURALNETWORK_H_
27 
29 
30 namespace mic {
31 namespace mlnn {
32 
39 template <typename eT>
41 public:
42 
47  BackpropagationNeuralNetwork(std::string name_ = "bp_net") : MultiLayerNeuralNetwork<eT> (name_)
48  {
49  // Set default cross entropy loss function.
50  setLoss <mic::neural_nets::loss::CrossEntropyLoss<eT> >();
51 
52  // Set "classical" SDG as default optimization method.
53  MultiLayerNeuralNetwork<eT>::template setOptimization<mic::neural_nets::optimization::GradientDescent<eT> > ();
54  }
55 
56 
59 
60 
65  template<typename LossFunction>
66  void setLoss () {
67  loss = std::make_shared< LossFunction > (LossFunction());
68  }
69 
70 
76  void forward(mic::types::MatrixPtr<eT> input_data, bool skip_dropout = false) {
77  // Make sure that there are some layers in the nn!
78  assert(layers.size() != 0);
79 
80  // Boost::Matrix is col major!
81  LOG(LDEBUG) << "Inputs size: " << input_data->rows() << "x" << input_data->cols();
82  LOG(LDEBUG) << "First layer input matrix size: " << layers[0]->s['x']->rows() << "x" << layers[0]->s['x']->cols();
83 
84  // Make sure that the dimensions are ok.
85  // Check only rows, as cols determine the batch size - and we allow them to be dynamically changing!.
86  assert((layers[0]->s['x'])->rows() == input_data->rows());
87  //LOG(LDEBUG) <<" input_data: " << input_data.transpose();
88 
89  // Connect layers by setting the input matrices pointers to point the output matrices.
90  // There will not need to be copy data between layers anymore.
91  if (!connected) {
92  // Verify structure of the network.
93  verify();
94  // Set pointers - pass result to the next layer: x(next layer) = y(current layer).
95  if (layers.size() > 1)
96  for (size_t i = 0; i < layers.size()-1; i++) {
97  // Connect pointers.
98  layers[i+1]->s['x'] = layers[i]->s['y'];
99  layers[i]->g['y'] = layers[i+1]->g['x'];
100  }//: for
101  connected = true;
102  }
103 
104  //assert((layers[0]->s['x'])->cols() == input_data->cols());
105  // Change the size of batch - if required.
106  resizeBatch(input_data->cols());
107 
108  // Copy inputs to the lowest point in the network.
109  (*(layers[0]->s['x'])) = (*input_data);
110 
111  // Compute the forward activations.
112  for (size_t i = 0; i < layers.size(); i++) {
113  LOG(LDEBUG) << "Layer [" << i << "] " << layers[i]->name() << ": (" <<
114  layers[i]->inputSize() << "x" << layers[i]->batchSize() << ") -> (" <<
115  layers[i]->outputSize() << "x" << layers[i]->batchSize() << ")";
116 
117  // Perform the forward computation: y = f(x).
118  layers[i]->forward(skip_dropout);
119 
120  }
121  //LOG(LDEBUG) <<" predictions: " << getPredictions()->transpose();
122  }
123 
124 
128  bool verify() {
129  bool ok = true;
130  // Set pointers - pass result to the next layer: x(next layer) = y(current layer).
131  if (layers.size() > 1)
132  for (size_t i = 0; i < layers.size()-1; i++) {
133  bool layer_ok = true;
134  // Check inputs.
135  if (layers[i]->s['y']->rows() != layers[i+1]->s['x']->rows()) {
136  LOG(LERROR) << "Layer["<<i<<"].y differs from " << "Layer["<<i+1<<"].x";
137  ok = false;
138  layer_ok = false;
139  }
140 
141  // Check gradients.
142  if (layers[i]->g['y']->rows() != layers[i+1]->g['x']->rows()) {
143  LOG(LERROR) << "Layer["<<i<<"].dy differs from " << "Layer["<<i+1<<"].dx";
144  ok = false;
145  layer_ok = false;
146  }
147  // Display parameters of botch layers.
148  if (!layer_ok) {
149  LOG(LINFO) << "Layer["<<i<<"]: " << (*layers[i]).streamLayerParameters();
150  LOG(LINFO) << "Layer["<<i+1<<"]: " << (*layers[i+1]).streamLayerParameters();
151  }
152  }//: for
153  return ok;
154  }
155 
156 
161  void backward(mic::types::MatrixPtr<eT> gradients_) {
162  // Make sure that there are some layers in the nn!
163  assert(layers.size() != 0);
164 
165  LOG(LDEBUG) << "Last layer output gradient matrix size: " << layers.back()->g['y']->cols() << "x" << layers.back()->g['y']->rows();
166  LOG(LDEBUG) << "Passed target matrix size: " << gradients_->cols() << "x" << gradients_->rows();
167 
168  // Make sure that the dimensions are ok.
169  assert((layers.back()->g['y'])->cols() == gradients_->cols());
170  assert((layers.back()->g['y'])->rows() == gradients_->rows());
171 
172  // Set gradient of the last layer - COPY data.
173  (*(layers.back()->g['y'])) = (*gradients_);
174 
175  // Back-propagate the gradients.
176  for (int i = layers.size() - 1; i >= 0; i--) {
177  layers[i]->backward();
178  }//: for
179 
180  }
181 
182 
191  eT train(mic::types::MatrixPtr<eT> encoded_batch_, mic::types::MatrixPtr<eT> encoded_targets_, eT learning_rate_, eT decay_ = 0.0f) {
192 
193  // Forward propagate the activations from first layer to the last.
194  forward(encoded_batch_);
195 
196  // Get predictions.
197  mic::types::MatrixPtr<eT> encoded_predictions = getPredictions();
198 
199  // Calculate gradient according to the loss function.
200  mic::types::MatrixPtr<eT> dy = loss->calculateGradient(encoded_targets_, encoded_predictions);
201 
202  // Backpropagate the gradients from last layer to the first.
203  backward(dy);
204 
205  // Apply the changes - according to the optimization function.
206  update(learning_rate_, decay_);
207 
208  // Calculate mean value of the loss function (i.e. loss divided by the batch size).
209  eT loss_value = loss->calculateMeanLoss(encoded_targets_, encoded_predictions);
210 
211  // Return loss.
212  return loss_value;
213  }
214 
215 
222  eT test(mic::types::MatrixPtr<eT> encoded_batch_, mic::types::MatrixPtr<eT> encoded_targets_) {
223  // skip dropout layers at test time
224  bool skip_dropout = true;
225 
226  forward(encoded_batch_, skip_dropout);
227 
228  // Get predictions.
229  mic::types::MatrixPtr<eT> encoded_predictions = getPredictions();
230 
231  // Calculate the mean loss.
232  return loss->calculateMeanLoss(encoded_targets_, encoded_predictions);
233  }
234 
235 
242  eT calculateMeanLoss(mic::types::MatrixPtr<eT> encoded_targets_, mic::types::MatrixPtr<eT> encoded_predictions_) {
243 
244  return loss->calculateMeanLoss(encoded_targets_, encoded_predictions_);
245  }
246 
247  // Unhide the overloaded public methods & fields inherited from the template class MultiLayerNeuralNetwork fields via "using" statement.
252 
253 protected:
254  // Unhide the overloaded protected methods & fields inherited from the template class MultiLayerNeuralNetwork fields via "using" statement.
257 
261  std::shared_ptr<mic::neural_nets::loss::Loss<eT> > loss;
262 
263 };
264 
265 } /* namespace mlnn */
266 } /* namespace mic */
267 
268 #endif /* BACKPROPAGATIONNEURALNETWORK_H_ */
void backward(mic::types::MatrixPtr< eT > gradients_)
bool connected
Flag denoting whether the layers are interconnected, thus no copying between inputs and outputs of th...
std::vector< std::shared_ptr< mic::mlnn::Layer< eT > > > layers
Class representing a multi-layer neural network based on backpropagation/gradient descent...
virtual ~BackpropagationNeuralNetwork()
Virtual descriptor - empty.
std::shared_ptr< mic::neural_nets::loss::Loss< eT > > loss
void update(eT alpha_, eT decay_=0.0f)
Class representing a multi-layer neural network.
Definition: Layer.hpp:86
mic::types::MatrixPtr< eT > getPredictions()
eT test(mic::types::MatrixPtr< eT > encoded_batch_, mic::types::MatrixPtr< eT > encoded_targets_)
eT train(mic::types::MatrixPtr< eT > encoded_batch_, mic::types::MatrixPtr< eT > encoded_targets_, eT learning_rate_, eT decay_=0.0f)
void forward(mic::types::MatrixPtr< eT > input_data, bool skip_dropout=false)
eT calculateMeanLoss(mic::types::MatrixPtr< eT > encoded_targets_, mic::types::MatrixPtr< eT > encoded_predictions_)