MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
MazeHistogramFilter.cpp
Go to the documentation of this file.
1 
24 
25 #include <utils/DataCollector.hpp>
26 
27 namespace mic {
28 namespace algorithms {
29 
30 
32  // Reset variables.
35 
36 }
37 
38 void MazeHistogramFilter::setMazes(std::vector<mic::types::MatrixXiPtr> & mazes_, unsigned int number_of_distinctive_patches_)
39 {
40  mazes = mazes_;
41  // Set problem dimensions.
42  number_of_mazes = mazes.size();
43  maze_width = mazes[0]->cols();
44  maze_height = mazes[0]->rows();
46  number_of_distinctive_patches = number_of_distinctive_patches_;
47 
48  // Create matrices with probabilities.
49  for (size_t m=0; m<number_of_mazes; m++) {
50  mic::types::MatrixXdPtr position_probabilities(new mic::types::MatrixXd (maze_height, maze_width));
51  maze_position_probabilities.push_back(position_probabilities);
52  }
53 
54  maze_probabilities.resize(number_of_mazes);
56  maze_y_coordinate_probilities.resize(maze_height);
57 
59 
60 }
61 
62 
63 void MazeHistogramFilter::setHiddenPose(int hidden_maze_number_, int hidden_x_, int hidden_y_)
64 {
65  // Get "hidden" maze number.
66  if (hidden_maze_number_ == -1) {
67  hidden_maze_number = RAN_GEN->uniRandInt(0,number_of_mazes-1);
68  } else
69  hidden_maze_number = hidden_maze_number_ % number_of_mazes;
70 
71  // Get "hidden" maze x coordinate.
72  if (hidden_x_ == -1) {
73  hidden_x = RAN_GEN->uniRandInt(0,maze_width-1);
74  } else
75  hidden_x = hidden_x_ % maze_width;
76 
77  // Get "hidden" maze y coordinate.
78  if (hidden_y == -1) {
79  hidden_y = RAN_GEN->uniRandInt(0,maze_height-1);
80  } else
81  hidden_y = hidden_y_ % maze_height;
82 
83  LOG(LWARNING) << "After truncation/random: hidden position in maze " << hidden_maze_number << "= (" << hidden_y << "," << hidden_x << ")";
84 
85 }
86 
87 
89 
90  // Assign initial probabilities for all mazes/positions.
91  LOG(LNOTICE) << "Initial maze_position_probabilities:";
92  for (size_t m=0; m<number_of_mazes; m++) {
93  mic::types::MatrixXdPtr position_probabilities = maze_position_probabilities[m];
94 
95  for (size_t i=0; i<maze_height; i++) {
96  for (size_t j=0; j<maze_width; j++) {
97  (*position_probabilities)(i,j) = (double) 1.0/(problem_dimensions);
98  }//: for j
99  }//: for i
100 
101  LOG(LNOTICE) << "maze_position_prob(" <<m<<"):\n" << (*maze_position_probabilities[m]);
102  }//: for m
103 
104  // Assign initial probabilities to maze - for visualization.
105  for (size_t m=0; m<number_of_mazes; m++) {
106  maze_probabilities[m] = ((double) 1.0/ mazes.size());
107  }//: for
108 
109 
110  // Assign initial probabilities to x coordinate - for visualization.
111  for (size_t x=0; x<maze_width; x++) {
112  maze_x_coordinate_probilities[x] = ((double) 1.0/ maze_width);
113  }//: for
114 
115  // Assign initial probabilities to y coordinate - for visualization.
116  for (size_t y=0; y<maze_height; y++) {
117  maze_y_coordinate_probilities[y] = ((double) 1.0/ maze_height);
118  }//: for
119 
120  // Collect statistics for all mazes - number of appearances of a given "patch" (i.e. digit).
121  for (size_t m=0; m<number_of_mazes; m++) {
122  mic::types::MatrixXiPtr maze = mazes[m];
123 
124  // Iterate through maze and collect occurrences.
125  for (size_t i=0; i<maze_height; i++) {
126  for (size_t j=0; j<maze_width; j++) {
127  short patch_id = (*maze)(i,j);
128  maze_patch_probabilities[patch_id] += 1.0;
129  }//: for j
130  }//: for i
131 
132  }//: for m(azes)
133 
134  // Divide by problem dimensions (number of mazes * width * height) -> probabilities.
135  LOG(LNOTICE) << "maze_patch_probabilities:";
136  for (size_t i=0; i<number_of_distinctive_patches; i++) {
138  LOG(LNOTICE) << "maze_patch_prob(" <<i<<"):\n" << maze_patch_probabilities[i];
139  }//: for
140 
141 }
142 
143 
144 
145 void MazeHistogramFilter::sense (double hit_factor_, double miss_factor_) {
146 
147  // Get observation.
149  LOG(LINFO) << "Current observation=" << obs;
150 
151  // Compute posterior distribution given Z (observation) - total probability.
152 
153  // For all mazes.
154  double prob_sum = 0;
155  for (size_t m=0; m<number_of_mazes; m++) {
156  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
157  mic::types::MatrixXiPtr maze = mazes[m];
158 
159  // Display results.
160 /* LOG(LERROR) << "Przed updatem";
161  LOG(LERROR) << (*mazes[m]);
162  LOG(LERROR) << (*maze_position_probabilities[m]);
163 */
164 
165  // Iterate through position probabilities and update them.
166  for (size_t y=0; y<maze_height; y++) {
167  for (size_t x=0; x<maze_width; x++) {
168  if ((*maze)(y,x) == obs)
169  (*pos_probs)(y,x) *= hit_factor_;
170  else
171  (*pos_probs)(y,x) *= miss_factor_;
172  prob_sum += (*pos_probs)(y,x);
173  }//: for j
174  }//: for i
175  }//: for m
176 
177  prob_sum = 1/prob_sum;
178  // Normalize probabilities for all mazes.
179  for (size_t m=0; m<number_of_mazes; m++) {
180  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
181  for (size_t i=0; i<maze_height; i++) {
182  for (size_t j=0; j<maze_width; j++) {
183  (*pos_probs)(i,j) *= prob_sum;
184  }//: for j
185  }//: for i
186 
187  // Display results.
188  LOG(LNOTICE) << "maze(" <<m<<"):\n" << (*mazes[m]);
189  LOG(LNOTICE) << "maze_pose_prob(" <<m<<"):\n" << (*maze_position_probabilities[m]);
190  }//: for m
191 
192 }
193 
194 void MazeHistogramFilter::move (mic::types::Action2DInterface ac_) {
195  LOG(LINFO) << "Current move dy,dx= ( " << ac_.dy << "," <<ac_.dx<< ")";
196 
197  // For all mazes.
198  for (size_t m=0; m<number_of_mazes; m++) {
199  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
200  mic::types::MatrixXd old_pose_probs = (*pos_probs);
201 
202 /* LOG(LERROR) << "Przed ruchem";
203  LOG(LERROR) << (*mazes[m]);
204  LOG(LERROR) << (*maze_position_probabilities[m]);*/
205 
206  // Iterate through position probabilities and update them.
207  for (size_t y=0; y<maze_height; y++) {
208  for (size_t x=0; x<maze_width; x++) {
209  //std::cout << "i=" << i << " j=" << j << " dx=" << dx_ << " dy=" << dy_ << " (i - dx_) % 3 = " << (i +3 - dx_) % 3 << " (j - dy_) % 3=" << (j + 3 - dy_) % 3 << std::endl;
210  (*pos_probs)((y + maze_height + ac_.dy) %maze_height, (x +maze_width + ac_.dx) % maze_width) = old_pose_probs(y, x);
211 
212  }//: for j
213  }//: for i
214 
215  // Display results.
216  LOG(LNOTICE) << "maze(" <<m<<"):\n" << (*mazes[m]);
217  LOG(LNOTICE) << "maze_pose_prob(" <<m<<"):\n" << (*maze_position_probabilities[m]);
218  }//: for m
219 
220  // Perform the REAL move.
221  hidden_y = (hidden_y + maze_height + ac_.dy) % maze_height;
222  hidden_x = (hidden_x + maze_width + ac_.dx) % maze_width;
223 
224  LOG(LWARNING) << "Hidden position in maze " << hidden_maze_number << "= (" << hidden_y << "," << hidden_x << ")";
225 
226 }
227 
228 void MazeHistogramFilter::probabilisticMove (mic::types::Action2DInterface ac_, double exact_move_probability_, double overshoot_move_probability_, double undershoot_move_probability_) {
229 
230  LOG(LINFO) << "Current move dy,dx= ( " << ac_.dy << "," <<ac_.dx<< ")";
231 
232  // For all mazes.
233  for (size_t m=0; m<number_of_mazes; m++) {
234  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
235  // Make a copy of probabilities.
236  mic::types::MatrixXd old_pose_probs = (*pos_probs);
237  // Set probabilities to zero.
238  (*pos_probs).setZero();
239 
240  // Iterate through position probabilities and update them.
241  for (size_t y=0; y<maze_height; y++) {
242  for (size_t x=0; x<maze_width; x++) {
243  size_t exact_y = (y + maze_height + ac_.dy) %maze_height;
244  size_t overshoot_y = (y + maze_height + ac_.dy + 1) %maze_height;
245  size_t undershoot_y = (y + maze_height + ac_.dy - 1) %maze_height;
246 
247  size_t exact_x = (x + maze_width + ac_.dx) %maze_width;
248  size_t overshoot_x = (x + maze_width + ac_.dx + 1) %maze_width;
249  size_t undershoot_x = (x + maze_width + ac_.dx - 1) %maze_width;
250 
251  (*pos_probs)(exact_y, exact_x) += exact_move_probability_ * old_pose_probs(y, x);
252  (*pos_probs)(overshoot_y, overshoot_x) += overshoot_move_probability_ * old_pose_probs(y, x);
253  (*pos_probs)(undershoot_y, undershoot_x) += undershoot_move_probability_ * old_pose_probs(y, x);
254 
255  }//: for j
256  }//: for i
257 
258  // Display results.
259  LOG(LNOTICE) << "maze(" <<m<<"):\n" << (*mazes[m]);
260  LOG(LNOTICE) << "maze_pose_prob(" <<m<<"):\n" << (*maze_position_probabilities[m]);
261  }//: for m
262 
263  // Perform the REAL move.
264  hidden_y = (hidden_y + maze_height + ac_.dy) % maze_height;
265  hidden_x = (hidden_x + maze_width + ac_.dx) % maze_width;
266 
267  LOG(LWARNING) << "Hidden position in maze " << hidden_maze_number << "= (" << hidden_y << "," << hidden_x << ")";
268 }
269 
270 
271 
272 
274  // Update maze_probabilities.
275  for (size_t m=0; m<number_of_mazes; m++) {
276  // Reset probability.
277  maze_probabilities[m] = 0;
278  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
279  // Sum probabilities of all positions.
280  for (size_t i=0; i<maze_height; i++) {
281  for (size_t j=0; j<maze_width; j++) {
282  maze_probabilities[m] += (*pos_probs)(i,j);
283  }//: for j
284  }//: for i
285  }//: for m
286 
287  // Update maze_x_coordinate_probilities.
288  for (size_t x=0; x<maze_width; x++) {
289  // Reset probability.
291  for (size_t m=0; m<number_of_mazes; m++) {
292  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
293  // Sum probabilities of all positions.
294  for (size_t y=0; y<maze_height; y++) {
295  maze_x_coordinate_probilities[x] += (*pos_probs)(y,x);
296  }//: for y
297  }//: for m
298  }//: for x
299 
300 
301  // Update maze_y_coordinate_probilities.
302  for (size_t y=0; y<maze_height; y++) {
303  // Reset probability.
305  for (size_t m=0; m<number_of_mazes; m++) {
306  mic::types::MatrixXdPtr pos_probs = maze_position_probabilities[m];
307  // Sum probabilities of all positions.
308  for (size_t x=0; x<maze_width; x++) {
309  maze_y_coordinate_probilities[y] += (*pos_probs)(y,x);
310  }//: for x
311  }//: for m
312  }//: for y
313 
314 }
315 
316 
317 
319  double best_action_utility = 0.0;
320  size_t best_action = -1;
321 
322  // Calculate probabilities of all actions.
323  for (size_t act_t=0; act_t < 4; act_t++) {
324  mic::types::NESWAction ac((types::NESW)act_t);
325 
326  // Check the score of a given action.
327  for (size_t m=0; m<number_of_mazes; m++) {
328 
329  for (size_t y=0; y<maze_height; y++) {
330  for (size_t x=0; x<maze_width; x++) {
331  // Check result of the next motion.
332  // Compute resulting coordinates.
333  size_t new_y = (y + maze_height + ac.dy) % maze_height;
334  size_t new_x = (x + maze_width + ac.dx) % maze_width;
335  // Get image patch.
336  short patch = (*mazes[m])(new_y, new_x);
337  LOG(LDEBUG) << "maze [" << m << "] (y=" << y <<",x="<< x <<") move="<< act_t << "=> (y+dy=" << new_y << ",x+dx=" << new_x <<") patch=" << patch << std::endl;
338  // Get patch probability.
339  double patch_prob = maze_patch_probabilities[patch];
340  // Check the action utility.
341  double action_utility = (*maze_position_probabilities[m])(new_y, new_x) * (1- patch_prob);
342  LOG(LDEBUG) << "patch_prob= " << patch_prob << " action_utility=" << action_utility << std::endl;
343  if (action_utility > best_action_utility) {
344  best_action_utility = action_utility;
345  best_action = act_t;
346  LOG(LDEBUG) << "found action " << best_action << " with biggest utility " << best_action_utility << std::endl;
347  }
348  }//: for j
349  }//: for i
350 
351  }//: for each maze
352  }//: for each action type
353 
354  mic::types::NESWAction a((types::NESW) best_action);
355  return a;
356 }
357 
358 
360  mic::types::VectorXf action_utilities(4);
361  action_utilities.setZero();
362 
363 
364  // Calculate probabilities of all actions.
365  for (size_t act_t=0; act_t < 4; act_t++) {
366  mic::types::NESWAction ac((types::NESW)act_t);
367 
368  // Check the score of a given action.
369  for (size_t m=0; m<number_of_mazes; m++) {
370 
371  for (size_t y=0; y<maze_height; y++) {
372  for (size_t x=0; x<maze_width; x++) {
373  // Check result of the next motion.
374  // Compute resulting coordinates.
375  size_t new_y = (y + maze_height + ac.dy) % maze_height;
376  size_t new_x = (x + maze_width + ac.dx) % maze_width;
377  // Get image patch.
378  short patch = (*mazes[m])(new_y, new_x);
379  LOG(LDEBUG) << "maze [" << m << "] (y=" << y <<",x="<< x <<") move="<< act_t << "=> (y+dy=" << new_y << ",x+dx=" << new_x <<") patch=" << patch << std::endl;
380  // Get patch probability.
381  double patch_prob = maze_patch_probabilities[patch];
382  // Check the action result.
383  double tmp_action_utility = (*maze_position_probabilities[m])(new_y, new_x) * (1- patch_prob);
384  LOG(LDEBUG) << "patch_prob= " << patch_prob << " action_utility=" << tmp_action_utility << std::endl;
385 
386  // Add action utility.
387  action_utilities(act_t) += tmp_action_utility;
388  }//: for j
389  }//: for i
390 
391  }//: for each maze
392  }//: for each action type
393 
394  // Select best action
395  size_t best_action = -1;
396  double best_action_utility = 0.0;
397  for (size_t act_t=0; act_t < 4; act_t++) {
398  if (action_utilities(act_t) > best_action_utility) {
399  best_action_utility = action_utilities(act_t);
400  best_action = act_t;
401  LOG(LDEBUG) << "found action " << best_action << " with biggest utility " << best_action_utility << std::endl;
402  }
403 
404  }//: for each action type
405 
406 
407  mic::types::NESWAction a((types::NESW) best_action);
408  return a;
409 }
410 
411 
412 
413 
414 
415 } /* namespace algorithms */
416 } /* namespace mic */
void probabilisticMove(mic::types::Action2DInterface ac_, double exact_move_probability_, double overshoot_move_probability_, double undershoot_move_probability_)
void setHiddenPose(int hidden_maze_number_, int hidden_x_, int hidden_y_)
unsigned int maze_width
Width of maze.
std::vector< mic::types::MatrixXiPtr > mazes
List of mazes.
void move(mic::types::Action2DInterface ac_)
unsigned int maze_height
Height of maze.
int hidden_maze_number
shortariable denoting in which maze are we right now (unknown, to be determined). ...
void sense(double hit_factor_, double miss_factor_)
int hidden_x
Variable denoting the x position are we right now (unknown, to be determined).
mic::types::Action2DInterface sumOfMostUniquePatchesActionSelection()
mic::types::Action2DInterface mostUniquePatchActionSelection()
std::vector< double > maze_patch_probabilities
Variable storing the probability that we can find given patch in a given maze.
std::vector< double > maze_probabilities
Variable storing the probability that we are currently moving in/observing a given maze...
unsigned int number_of_mazes
Problem dimensions - number of mazes.
std::vector< double > maze_y_coordinate_probilities
Variable storing the probability that we are currently in a given y coordinate.
unsigned int problem_dimensions
Problem dimensions - number of mazes * their width * their height.
void setMazes(std::vector< mic::types::MatrixXiPtr > &mazes_, unsigned int number_of_distinctive_patches_)
int hidden_y
Variable denoting the y position are we right now (unknown, to be determined).
std::vector< double > maze_x_coordinate_probilities
Variable storing the probability that we are currently in a given x coordinate.
unsigned int number_of_distinctive_patches
Problem dimensions - number of distinctive patches (in here - number of different digits...
std::vector< mic::types::MatrixXdPtr > maze_position_probabilities
Variable storing the probability that we are in a given maze position.