MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
MazeOfDigits.cpp
Go to the documentation of this file.
1 
23 #include <types/MazeOfDigits.hpp>
24 
25 namespace mic {
26 namespace environments {
27 
28 
29 MazeOfDigits::MazeOfDigits(std::string node_name_) : Environment(node_name_),
30  type("type", 0)
31 {
32  // Register properties - so their values can be overridden (read from the configuration file).
33  registerProperty(type);
34 
36 }
37 
38 MazeOfDigits::MazeOfDigits (const mic::environments::MazeOfDigits & md_) : Environment(md_.getNodeName()+"_copy"),
39  type("type", md_.type)
40 {
41  // Register properties - so their values can be overridden (read from the configuration file).
42  registerProperty(type);
43  // Not used, but still let's copy it.
44  width = md_.width;
45  height = md_.height;
46  channels = md_.channels;
50 }
51 
52 
54  // TODO Auto-generated destructor stub
55 }
56 
58  width = md_.width;
59  height = md_.height;
60  channels = md_.channels;
64 
65  return *this;
66 }
67 
68 
70  // Empty - everything will be initialized in environment initialization.
71 }
72 
74  // Generate adequate maze.
75  switch(type) {
76  case 0 : initExemplaryMaze(); break;
77  case -3:
78  case -4: initRandomStructuredMaze(); break;
79  case -5:
80  case -6: initRandomPathMaze(); break;
81  case -2:
82  case -1:
83  default: initFullyRandomMaze();
84  }//: switch
85 
86  // Check whether it is a POMDP or not.
87  if (roi_size >0) {
88  pomdp_flag = true;
89  observation_grid->resize({roi_size, roi_size, 1});
90  } else {
91  observation_grid->resize({width, height, 1});
92  }//: else
93 }
94 
96  LOG(LNOTICE) << "Generating an exemplary maze of digits";
97  /*
98  * [['2','4','7','7'],
99  * ['1','5','7','9'],
100  * ['2','3','6','8'],
101  * ['A','2','5','6']]
102  */
103 
104 
105  // Overwrite dimensions.
106  width = 4;
107  height = 4;
108 
109  // Set environment_grid size.
110  environment_grid->resize({width, height, channels});
111  environment_grid->zeros();
112 
113  // Place the agent.
114  initial_position.set(0,1);
116 
117  // Place digit
118  (*environment_grid)({0,0, (size_t)MazeOfDigitsChannels::Digits}) = 2;
119  (*environment_grid)({1,0, (size_t)MazeOfDigitsChannels::Digits}) = 4;
120  (*environment_grid)({2,0, (size_t)MazeOfDigitsChannels::Digits}) = 7;
121  (*environment_grid)({3,0, (size_t)MazeOfDigitsChannels::Digits}) = 7;
122  (*environment_grid)({0,1, (size_t)MazeOfDigitsChannels::Digits}) = 1;
123  (*environment_grid)({1,1, (size_t)MazeOfDigitsChannels::Digits}) = 5;
124  (*environment_grid)({2,1, (size_t)MazeOfDigitsChannels::Digits}) = 7;
125  (*environment_grid)({3,1, (size_t)MazeOfDigitsChannels::Digits}) = 9;
126  (*environment_grid)({0,2, (size_t)MazeOfDigitsChannels::Digits}) = 2;
127  (*environment_grid)({1,2, (size_t)MazeOfDigitsChannels::Digits}) = 3;
128  (*environment_grid)({2,2, (size_t)MazeOfDigitsChannels::Digits}) = 6;
129  (*environment_grid)({3,2, (size_t)MazeOfDigitsChannels::Digits}) = 8;
130  (*environment_grid)({0,3, (size_t)MazeOfDigitsChannels::Digits}) = 1;
131  (*environment_grid)({1,3, (size_t)MazeOfDigitsChannels::Digits}) = 2;
132  (*environment_grid)({2,3, (size_t)MazeOfDigitsChannels::Digits}) = 5;
133  (*environment_grid)({3,3, (size_t)MazeOfDigitsChannels::Digits}) = 6;
134 
135 
136  // Place goal(s).
137  (*environment_grid)({3,0, (size_t)MazeOfDigitsChannels::Goals}) = 10;
138 
139  // Calculate the optimal path length.
141 
142 }
143 
145  // Generate only the new agent position.
146 
147  // Find the goal.
148  mic::types::Position2D goal;
149  for (size_t x = 0; x < width; x++)
150  for (size_t y = 0; y < height; y++)
151  if ((*environment_grid)({ x, y, (size_t)MazeOfDigitsChannels::Goals }) > 0) {
152  goal.x = x;
153  goal.y = y;
154  break;
155  } //: if
156 
157  // Try to place the agent.
158  mic::types::Position2D agent;
159  while (1) {
160  // Random position.
161  agent.rand(0, width - 1, 0, height - 1);
162 
163  // Validate pose.
164  if ((*environment_grid)({ (size_t)agent.x, (size_t)agent.y, (size_t)MazeOfDigitsChannels::Goals }) != 0)
165  continue;
166 
167  // Ok, move agent to that position.
168  initial_position = agent;
170  break;
171  } //: while
172 
173  // Recalculate the optimal path length.
174  optimal_path_length = abs((int) goal.x - (int) agent.x) + abs((int) goal.y - (int) agent.y);
175 }
176 
177 
179  LOG(LNOTICE) << "Generating a fully random " << width << "x" << height<< " maze of digits";
180 
181  static bool maze_generated = false;
182 
183  // It maze type = -1: do not generate new maze.
184  if (((short)type == -1) && (maze_generated)) {
186  return;
187  }
188 
189  // Set environment_grid size.
190  environment_grid->resize({width, height, channels});
191  environment_grid->zeros();
192 
193  // Place the agent.
194  mic::types::Position2D agent(0, width-1, 0, height-1);
195  initial_position = agent;
197 
198 
199  // Place goal.
200  mic::types::Position2D goal;
201  while(1) {
202  // Random position.
203  goal.rand(0, width-1, 0, height-1);
204 
205  // Validate pose.
206  if ((*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Agent}) != 0)
207  continue;
208 
209  // Ok, add the goal.
210  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Goals}) = 10;
211  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Digits}) = 9;
212  break;
213  }//: while
214 
215  // Calculate the optimal path length.
216  optimal_path_length = abs((int)goal.x-(int)agent.x) + abs((int)goal.y-(int)agent.y);
217 
218  // Initialize random device and generator.
219  std::random_device rd;
220  std::mt19937_64 rng_mt19937_64(rd());
221 
222  // Initialize uniform integer distribution for digit.
223  std::uniform_int_distribution<size_t> d_dist(0, 8);
224 
225 
226  // Fill the "rest" with random digits.
227  for(size_t x=0; x<width; x++ ){
228  for(size_t y=0; y<height; y++ ){
229 
230  // Skip the goal.
231  if ((x == (size_t)goal.x) && (y == (size_t)goal.y))
232  continue;
233 
234  // Random variables.
235  size_t d = d_dist(rng_mt19937_64);
236  (*environment_grid)({(size_t)x, (size_t)y, (size_t)MazeOfDigitsChannels::Digits}) = d;
237 
238  }//:for
239  }//:for
240 
241  maze_generated = true;
242 }
243 
244 
246  LOG(LNOTICE) << "Generating a structured random " << width << "x" << height<< " maze of digits";
247 
248  static bool maze_generated = false;
249 
250  // It maze type = -3: do not generate new maze.
251  if (((short)type == -3) && (maze_generated)) {
253  return;
254  }
255 
256  // Set environment_grid size.
257  environment_grid->resize({width, height, channels});
258  environment_grid->zeros();
259 
260  // Place the agent.
261  mic::types::Position2D agent(0, width-1, 0, height-1);
262  initial_position = agent;
264 
265 
266  // Place goal.
267  mic::types::Position2D goal;
268  while(1) {
269  // Random position.
270  goal.rand(0, width-1, 0, height-1);
271 
272  // Validate pose.
273  if ((*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Agent}) != 0)
274  continue;
275 
276  // Ok, add the goal.
277  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Goals}) = 10;
278  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Digits}) = 9;
279  break;
280  }//: while
281 
282  // Calculate the optimal path length.
283  optimal_path_length = abs((int)goal.x-(int)agent.x) + abs((int)goal.y-(int)agent.y);
284 
285  // Initialize random device and generator.
286  std::random_device rd;
287  std::mt19937_64 rng_mt19937_64(rd());
288 
289 
290  // Fill the "rest" with random digits.
291  for(size_t x=0; x<width; x++ ){
292  for(size_t y=0; y<height; y++ ){
293 
294  // Skip the goal.
295  if ((x == (size_t)goal.x) && (y == (size_t)goal.y))
296  continue;
297 
298  // Calculate the distance.
299  float dist = (float)sqrt((x-goal.x)*(x-goal.x) + (y-goal.y)*(y-goal.y));
300  // Take into account the scale - size of maze.
301  float scaled_dist = 10*dist/sqrt((width*height));
302  // Truncate it to 0-0.
303  size_t min, max;
304  if (scaled_dist<1.1) {
305  min = max = 8;
306  } else {
307  min = 9 - ((scaled_dist >= 9) ? 9 : scaled_dist);
308  //max = ((min +1 >= 9) ? 9 : min + 1);
309  max = min + 1;
310  }//: else
311 
312  // Random variables.
313  std::uniform_int_distribution<size_t> d_dist(min, max);
314  size_t d = d_dist(rng_mt19937_64);
315  LOG(LDEBUG)<< " x = " << x << " goal.x = " << goal.x << " y = " << y << " goal.y = " << goal.y << " dist = " << dist << " scaled_dist = " << scaled_dist << " min = " << min << " max = " << max << " d = " << d;
316  (*environment_grid)({(size_t)x, (size_t)y, (size_t)MazeOfDigitsChannels::Digits}) = d;
317 
318  }//:for
319  }//:for
320 
321  maze_generated = true;
322 }
323 
325 
326  LOG(LNOTICE) << "Generating a random patch " << width << "x" << height<< " maze of digits";
327 
328  static bool maze_generated = false;
329 
330  // It maze type = -5: do not generate new maze.
331  if (((short)type == -5) && (maze_generated)) {
333  return;
334  }
335 
336  // Set environment size.
337  environment_grid->resize({width, height, channels});
338  environment_grid->zeros();
339 // environment_grid->setValue(-1);
340 
341  // Place the agent.
342  mic::types::Position2D agent(0, width-1, 0, height-1);
343  initial_position = agent;
345 
346 
347  // Place goal.
348  mic::types::Position2D goal;
349  while(1) {
350  // Random position.
351  goal.rand(2, width-3, 2, height-3);
352 
353  // Validate pose.
354  if ((*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Agent}) != 0)
355  continue;
356 
357  // Ok, add the goal.
358  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Goals}) = 10;
359  (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Digits}) = 9;
360  break;
361  }//: while
362 
363  // Calculate the optimal path length.
364  optimal_path_length = abs((int)goal.x-(int)agent.x) + abs((int)goal.y-(int)agent.y);
365 
366  // Initialize random device and generator.
367  std::random_device rd;
368  std::mt19937_64 rng_mt19937_64(rd());
369 
370  // Check quarter and calculate "main path direction".
371  types::NESWAction dir_min, dir_max;
372  if (((size_t)goal.x < width/2) && ((size_t)goal.y < height/2)) {
373  // First square -> go E, S or ES.
374  dir_min.dx = 0;
375  dir_max.dx = 1;
376  dir_min.dy = 0;
377  dir_max.dy = 1;
378 
379  } else if (((size_t)goal.x >= width/2) && ((size_t)goal.y < height/2)) {
380  // Second square -> go S or WS or W.
381  dir_min.dx = -1;
382  dir_max.dx = 0;
383  dir_min.dy = 0;
384  dir_max.dy = 1;
385 
386  } else if (((size_t)goal.x >= width/2) && ((size_t)goal.y >= height/2)) {
387  // Third square -> go N, W or NW
388  dir_min.dx = -1;
389  dir_max.dx = 0;
390  dir_min.dy = -1;
391  dir_max.dy = 0;
392 
393  } else {
394  // Fourth square -> go N, E or NE
395  dir_min.dx = 0;
396  dir_max.dx = 1;
397  dir_min.dy = -1;
398  dir_max.dy = 0;
399  }
400 
401  // Generate path direction distributions.
402  std::uniform_int_distribution<size_t> x_dist(dir_min.dx, dir_max.dx);
403  std::uniform_int_distribution<size_t> y_dist(dir_min.dy, dir_max.dy);
404 
405  // Create a path starting in the goal and using the main direction.
406  mic::types::Position2D cur = goal;
407 
408  std::vector<mic::types::Position2D> path;
409  path.push_back(goal);
410  while (1) {
411 
412  // "Move" ;)
413  types::NESWAction action;
414  action.dx = x_dist(rng_mt19937_64);
415  action.dy = y_dist(rng_mt19937_64);
416  cur = cur + action;
417 
418  if ((cur.x < 0) || (cur.y < 0) || ((size_t)cur.x >= width) || ((size_t)cur.y >= height))
419  break;
420 
421  // Skip the goal.
422  if ((cur.x == goal.x) && (cur.y == goal.y))
423  continue;
424 
425  // Add point to path.
426  path.push_back(cur);
427 
428  // Calculate the distance from goal.
429  float dist = (float)sqrt((cur.x-goal.x)*(cur.x-goal.x) + (cur.y-goal.y)*(cur.y-goal.y));
430  // Take into account the scale - size of maze.
431  float scaled_dist = 5*dist/sqrt((width*height));
432  // Truncate it to 0-0.
433  size_t min, max;
434  if (scaled_dist<1.1) {
435  min = max = 8;
436  } else {
437  min = 9 - ((scaled_dist >= 9) ? 9 : scaled_dist);
438  //max = ((min +1 >= 9) ? 9 : min + 1);
439  max = min + 1;
440  }//: else
441 
442  // Random variables.
443  std::uniform_int_distribution<size_t> d_dist(min, max);
444  size_t d = d_dist(rng_mt19937_64);
445  LOG(LDEBUG)<< " x = " << cur.x << " goal.x = " << goal.x << " y = " << cur.y << " goal.y = " << goal.y << " dist = " << dist << " scaled_dist = " << scaled_dist << " min = " << min << " max = " << max << " d = " << d;
446  (*environment_grid)({(size_t)cur.x, (size_t)cur.y, (size_t)MazeOfDigitsChannels::Digits}) = d;
447 
448  };
449 
450  // "Grow the path" by 1.
451  for (auto point : path) {
452  // Create distribution basing on patch point.
453  size_t min, max;
454  max = (*environment_grid)({(size_t)point.x, (size_t)point.y, (size_t)MazeOfDigitsChannels::Digits});
455  max = (max == 9) ? 8 : max;
456  min = ((max < 1) ? 1 : max - 1); // size_t truncates that to zero.
457  std::uniform_int_distribution<size_t> d_dist(min, max);
458 
459  // Check 4 neighbours.
460  setBiggerDigit(point.x, point.y-1, d_dist(rng_mt19937_64));
461  setBiggerDigit(point.x-1, point.y, d_dist(rng_mt19937_64));
462  setBiggerDigit(point.x+1, point.y, d_dist(rng_mt19937_64));
463  setBiggerDigit(point.x, point.y+1, d_dist(rng_mt19937_64));
464  }//:for
465 
466  // "Grow the path" by 2.
467  for (auto point : path) {
468  // Create distribution basing on patch point.
469  size_t min, max;
470  max = (*environment_grid)({(size_t)point.x, (size_t)point.y, (size_t)MazeOfDigitsChannels::Digits});
471  max = (max > 8) ? 7 : max;
472  min = ((max < 2) ? 1 : max - 2); // size_t truncates that to zero.
473  std::uniform_int_distribution<size_t> d2_dist(min, max);
474 
475  // Check 8 neighbours.
476  setBiggerDigit(point.x, point.y-2, d2_dist(rng_mt19937_64));
477  setBiggerDigit(point.x-1, point.y-1, d2_dist(rng_mt19937_64));
478  setBiggerDigit(point.x+1, point.y-1, d2_dist(rng_mt19937_64));
479  setBiggerDigit(point.x-2, point.y, d2_dist(rng_mt19937_64));
480  setBiggerDigit(point.x+2, point.y, d2_dist(rng_mt19937_64));
481  setBiggerDigit(point.x-1, point.y+1, d2_dist(rng_mt19937_64));
482  setBiggerDigit(point.x+1, point.y+1, d2_dist(rng_mt19937_64));
483  setBiggerDigit(point.x, point.y+2, d2_dist(rng_mt19937_64));
484  }//:for
485 
486  // "Grow the path" by 3.
487  for (auto point : path) {
488  // Create distribution basing on patch point.
489  size_t min, max;
490  max = (*environment_grid)({(size_t)point.x, (size_t)point.y, (size_t)MazeOfDigitsChannels::Digits});
491  max = (max > 7) ? 6 : max;
492  min = ((max < 2) ? 1 : max - 2); // size_t truncates that to zero.
493  std::uniform_int_distribution<size_t> d3_dist(min, max);
494 
495  // Check 8 neighbours.
496  setBiggerDigit(point.x+2, point.y-1, d3_dist(rng_mt19937_64));
497  setBiggerDigit(point.x+2, point.y+1, d3_dist(rng_mt19937_64));
498  setBiggerDigit(point.x-2, point.y-1, d3_dist(rng_mt19937_64));
499  setBiggerDigit(point.x-2, point.y+1, d3_dist(rng_mt19937_64));
500  setBiggerDigit(point.x+1, point.y-2, d3_dist(rng_mt19937_64));
501  setBiggerDigit(point.x+1, point.y+2, d3_dist(rng_mt19937_64));
502  setBiggerDigit(point.x-1, point.y-2, d3_dist(rng_mt19937_64));
503  setBiggerDigit(point.x-1, point.y+2, d3_dist(rng_mt19937_64));
504  }//:for
505 
506  // Fill the "rest" with random digits.
507  for(size_t x=0; x<width; x++ ){
508  for(size_t y=0; y<height; y++ ){
509 
510  // Random variables.
511  std::uniform_int_distribution<size_t> d_dist(0, 1);
512  size_t d = d_dist(rng_mt19937_64);
513  if ((*environment_grid)({(size_t)x, (size_t)y, (size_t)MazeOfDigitsChannels::Digits}) == 0)
514  (*environment_grid)({(size_t)x, (size_t)y, (size_t)MazeOfDigitsChannels::Digits}) = d;
515 
516  }//:for
517  }//:for
518 
519  // Quick fix :]
520 // (*environment_grid)({(size_t)goal.x, (size_t)goal.y, (size_t)MazeOfDigitsChannels::Digits}) = 9;
521 
522  maze_generated = true;
523 }
524 
525 void MazeOfDigits::setBiggerDigit(size_t x_, size_t y_, size_t value_){
526  //if ((x_ < 0) || (y_ < 0) || (x_ >= width) || (y_ >= height))
527  if ((x_ >= width) || (y_ >= height))
528  return;
529 
530  if ((*environment_grid)({(size_t)x_, (size_t)y_, (size_t)MazeOfDigitsChannels::Digits}) < value_)
531  (*environment_grid)({(size_t)x_, (size_t)y_, (size_t)MazeOfDigitsChannels::Digits}) = value_;
532 }
533 
534 std::string MazeOfDigits::gridToString(mic::types::TensorXfPtr & grid_) {
535  std::string s;
536  // Add line.
537  s+= "+";
538  for (size_t x=0; x<grid_->dim(0); x++)
539  s+="---";
540  s+= "+\n";
541 
542  for (size_t y=0; y<grid_->dim(1); y++){
543  s += "|";
544  for (size_t x=0; x<grid_->dim(0); x++) {
545  // Check object occupancy.
546  if ((*grid_)({x,y, (size_t)MazeOfDigitsChannels::Agent}) != 0) {
547  // Display agent.
548  s += "<A>";
549  } else if ((*grid_)({x,y, (size_t)MazeOfDigitsChannels::Walls}) != 0) {
550  // Display wall.
551  s += " # ";
552 /* } else if ((*grid_)({x,y, (size_t)MazeOfDigitsChannels::Goals}) > 0) {
553  // Display goal.
554  s += " + ,";*/
555  } else
556  // Display pit.
557  s += " " + std::to_string((unsigned short)(*grid_)({x,y, (size_t)MazeOfDigitsChannels::Digits})) + " ";
558  }//: for x
559  s += "|\n";
560  }//: for y
561 
562  // Add line.
563  s+= "+";
564  for (size_t x=0; x<grid_->dim(0); x++)
565  s+="---";
566  s+= "+\n";
567 
568  return s;
569 }
570 
573 }
574 
576  if (pomdp_flag) {
577  // Get observation.
578  mic::types::TensorXfPtr obs = getObservation();
579  return gridToString(obs);
580  }
581  else
583 }
584 
585 mic::types::MatrixXfPtr MazeOfDigits::encodeEnvironment() {
586  // Temporarily reshape the environment grid.
587  environment_grid->conservativeResize({1, width * height * channels});
588  // Create a matrix pointer and copy data from grid into the matrix.
589  mic::types::MatrixXfPtr encoded_grid (new mic::types::MatrixXf(*environment_grid));
590  // Back to the original shape.
591  environment_grid->resize({width, height, channels});
592 
593  // Return the matrix pointer.
594  return encoded_grid;
595 }
596 
597 mic::types::MatrixXfPtr MazeOfDigits::encodeObservation() {
598  LOG(LDEBUG) << "encodeObservation()";
599  if (pomdp_flag) {
600  mic::types::Position2D p = getAgentPosition();
601  LOG(LDEBUG) << p;
602 
603  // Get observation.
604  mic::types::TensorXfPtr obs = getObservation();
605  // Temporarily reshape the observation grid.
606  obs->conservativeResize({1, roi_size * roi_size * 1});
607  // Encode the observation.
608  mic::types::MatrixXfPtr encoded_obs (new mic::types::MatrixXf(*obs));
609  // Back to the original shape.
610  obs->conservativeResize({roi_size, roi_size, 1});
611 
612  // Return encoded observation.
613  return encoded_obs;
614  }
615  else
616  return encodeEnvironment();
617 }
618 
619 
620 mic::types::TensorXfPtr MazeOfDigits::getObservation() {
621  LOG(LDEBUG) << "getObservation()";
622  // Reset observation.
623  observation_grid->zeros();
624 
625  size_t delta = (roi_size-1)/2;
626  mic::types::Position2D p = getAgentPosition();
627 
628  // Copy data.
629  for (long oy=0, ey=(p.y-delta); oy<(long)roi_size; oy++, ey++){
630  for (long ox=0, ex=(p.x-delta); ox<(long)roi_size; ox++, ex++) {
631  // Check grid boundaries.
632  if ((ex < 0) || (ex >= (long)width) || (ey < 0) || (ey >= (long)height)){
633  // Place the wall only
634  //(*observation_grid)({(size_t)ox, (size_t)oy, (size_t)MazeOfDigitsChannels::Walls}) = 1;
635  continue;
636  }//: if
637  // Else : copy data for all channels.
638  //(*observation_grid)({(size_t)ox,(size_t)oy, (size_t)MazeOfDigitsChannels::Goals}) = (*environment_grid)({(size_t)ex,(size_t)ey, (size_t)MazeOfDigitsChannels::Goals});
639  (*observation_grid)({(size_t)ox,(size_t)oy, (size_t)MazeOfDigitsChannels::Digits}) = (*environment_grid)({(size_t)ex,(size_t)ey, (size_t)MazeOfDigitsChannels::Digits});
640  //(*observation_grid)({(size_t)ox,(size_t)oy, (size_t)MazeOfDigitsChannels::Walls}) = (*environment_grid)({(size_t)ex,(size_t)ey, (size_t)MazeOfDigitsChannels::Walls});
641  //(*observation_grid)({(size_t)ox,(size_t)oy, (size_t)MazeOfDigitsChannels::Agent}) = (*environment_grid)({(size_t)ex,(size_t)ey, (size_t)MazeOfDigitsChannels::Agent});
642  }//: for x
643  }//: for y
644 
645  //LOG(LDEBUG) << std::endl << gridToString(observation_grid);
646 
647  return observation_grid;
648 }
649 
650 
651 mic::types::MatrixXfPtr MazeOfDigits::encodeAgentGrid() {
652  // DEBUG - copy only agent pose data, avoid goals etc.
653  mic::types::MatrixXfPtr encoded_grid (new mic::types::MatrixXf(height, width));
654  encoded_grid->setZero();
655 
656  for (size_t y=0; y<height; y++){
657  for (size_t x=0; x<width; x++) {
658  // Check object occupancy.
659  if ((*environment_grid)({x,y, (size_t)MazeOfDigitsChannels::Agent}) != 0) {
660  // Set one.
661  (*encoded_grid)(y,x) = 1;
662  break;
663  }
664  }//: for x
665  }//: for y
666  encoded_grid->resize(height*width, 1);
667 
668  // Return the matrix pointer.
669  return encoded_grid;
670 }
671 
672 
673 mic::types::Position2D MazeOfDigits::getAgentPosition() {
674  mic::types::Position2D position;
675  for (size_t y=0; y<height; y++){
676  for (size_t x=0; x<width; x++) {
677  if ((*environment_grid)({x,y, (size_t)MazeOfDigitsChannels::Agent}) == 1) {
678  position.x = x;
679  position.y = y;
680  return position;
681  }// if
682  }//: for x
683  }//: for y
684  // Remove warnings...
685  return position;
686 }
687 
688 bool MazeOfDigits::moveAgentToPosition(mic::types::Position2D pos_) {
689  LOG(LDEBUG) << "New agent position = " << pos_;
690 
691  // Check whether the state is allowed.
692  if (!isStateAllowed(pos_))
693  return false;
694 
695  // Clear old.
696  mic::types::Position2D old = getAgentPosition();
697  (*environment_grid)({(size_t)old.x, (size_t)old.y, (size_t)MazeOfDigitsChannels::Agent}) = 0;
698  // Set new.
699  (*environment_grid)({(size_t)pos_.x, (size_t)pos_.y, (size_t)MazeOfDigitsChannels::Agent}) = 1;
700 
701  return true;
702 }
703 
704 
705 float MazeOfDigits::getStateReward(mic::types::Position2D pos_) {
706  // Check reward - goal.
707  if ((*environment_grid)({(size_t)pos_.x, (size_t)pos_.y, (size_t)MazeOfDigitsChannels::Goals}) != 0)
708  return (*environment_grid)({(size_t)pos_.x, (size_t)pos_.y, (size_t)MazeOfDigitsChannels::Goals});
709  else
710  return 0;
711 }
712 
713 
714 bool MazeOfDigits::isStateAllowed(mic::types::Position2D pos_) {
715  if ((pos_.x < 0) || ((size_t)pos_.x >= width))
716  return false;
717 
718  if ((pos_.y < 0) || ((size_t)pos_.y >= height))
719  return false;
720 
721  // Check walls!
722  if ((*environment_grid)({(size_t)pos_.x, (size_t)pos_.y, (size_t)MazeOfDigitsChannels::Walls}) != 0)
723  return false;
724 
725  return true;
726 }
727 
728 
729 bool MazeOfDigits::isStateTerminal(mic::types::Position2D pos_) {
730  if ((pos_.x < 0) || ((size_t)pos_.x >= width))
731  return false;
732 
733  if ((pos_.y < 0) || ((size_t)pos_.y >= height))
734  return false;
735 
736  // Check reward - goal or pit.
737  if ((*environment_grid)({(size_t)pos_.x, (size_t)pos_.y, (size_t)MazeOfDigitsChannels::Goals}) != 0)
738  return true;
739  else
740  return false;
741 }
742 } /* namespace environments */
743 } /* namespace mic */
virtual float getStateReward(mic::types::Position2D pos_)
virtual std::string observationToString()
Abstract class representing an environment.
Definition: Environment.hpp:40
virtual bool isStateTerminal(mic::types::Position2D pos_)
mic::environments::MazeOfDigits & operator=(const mic::environments::MazeOfDigits &md)
size_t channels
Number of channels.
virtual mic::types::Position2D getAgentPosition()
virtual bool isStateAllowed(mic::types::Position2D pos_)
mic::types::TensorXfPtr getObservation()
virtual mic::types::MatrixXfPtr encodeEnvironment()
std::string gridToString(mic::types::TensorXfPtr &grid_)
mic::configuration::Property< size_t > width
Property: width of the environment.
Class emulating the maze of digits environment.
virtual void initializePropertyDependentVariables()
bool pomdp_flag
Flag related to.
Channel storing the agent position.
void setBiggerDigit(size_t x_, size_t y_, size_t value_)
virtual mic::types::MatrixXfPtr encodeAgentGrid()
Encode the current state of the reduced grid (only the agent position) as a matrix of size [1...
mic::configuration::Property< size_t > roi_size
Property: size of the ROI (region of interest).
virtual bool moveAgentToPosition(mic::types::Position2D pos_)
mic::types::TensorXfPtr observation_grid
virtual mic::types::MatrixXfPtr encodeObservation()
mic::types::TensorXfPtr environment_grid
Tensor storing the environment.
MazeOfDigits(std::string node_name_="maze_of_digits")
mic::configuration::Property< short > type
mic::types::Position2D initial_position
Property: initial position of the agent.
mic::configuration::Property< size_t > height
Property: height of the environment.
virtual std::string environmentToString()