MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
EpisodicHistogramFilterMazeLocalization.cpp
Go to the documentation of this file.
1 
24 
25 namespace mic {
26 namespace application {
27 
32 void RegisterApplication (void) {
34 }
35 
36 
37 EpisodicHistogramFilterMazeLocalization::EpisodicHistogramFilterMazeLocalization(std::string node_name_) : OpenGLEpisodicApplication(node_name_),
38  hidden_maze_number("hidden_maze", 0),
39  hidden_x("hidden_x", 0),
40  hidden_y("hidden_y", 0),
41  action("action", -1),
42  epsilon("epsilon", 0.0),
43  hit_factor("hit_factor", 0.6),
44  miss_factor("miss_factor", 0.2),
45  exact_move_probability("exact_move_probability", 1.0),
46  overshoot_move_probability("overshoot_move_probability", 0.0),
47  undershoot_move_probability("undershoot_move_probability", 0.0),
48  max_number_of_iterations("max_number_of_iterations",100),
49  min_maze_confidence("min_maze_confidence",0.99),
50  statistics_filename("statistics_filename","statistics_filename.csv")
51  {
52  // Register properties - so their values can be overridden (read from the configuration file).
53  registerProperty(hidden_maze_number);
54  registerProperty(hidden_x);
55  registerProperty(hidden_y);
56 
57  registerProperty(action);
58  registerProperty(epsilon);
59 
60  registerProperty(hit_factor);
61  registerProperty(miss_factor);
62  registerProperty(exact_move_probability);
63  registerProperty(overshoot_move_probability);
64  registerProperty(undershoot_move_probability);
65 
66  registerProperty(max_number_of_iterations);
67  registerProperty(min_maze_confidence);
68 
69  registerProperty(statistics_filename);
70 
71  LOG(LINFO) << "Properties registered";
72 }
73 
74 
77 }
78 
79 
81  // Initialize GLUT! :]
82  VGL_MANAGER->initializeGLUT(argc, argv);
83 
84  // Create the visualization windows - must be created in the same, main thread :]
85  w_localization_time_chart = new WindowCollectorChart<float>("Current_maze", 256, 256, 0, 0);
86  collector_ptr = std::make_shared < mic::utils::DataCollector<std::string, float> >( );//new mic::utils::DataCollector<std::string, float>() );
87  w_localization_time_chart->setDataCollectorPtr(collector_ptr);
88 
89  // Create data containers and add them to chart window.
90  collector_ptr->createContainer("Iteration", mic::types::color_rgba(255, 0, 0, 180));
91  collector_ptr->createContainer("Converged", mic::types::color_rgba(0, 255, 0, 180));
92  collector_ptr->createContainer("Max(Pm)", mic::types::color_rgba(0, 0, 255, 180));
93 }
94 
96 
97  // Import mazes.
98  if ((!importer.importData()) || (importer.size() == 0)){
99  LOG(LERROR) << "The dataset must consists of at least one maze!";
100  exit(0);
101  }//: if
102 
103  // Show mazes.
104  LOG(LNOTICE) << "Loaded mazes";
105  for (size_t m=0; m<importer.size(); m++) {
106  // Display results.
107  LOG(LNOTICE) << "maze(" <<m<<"):\n" << (importer.data()[m]);
108  }//: for
109 
110  // Set mazes.
111  hf.setMazes(importer.data(), 10);
112 
113 }
114 
115 
117  LOG(LWARNING) << "Start new episode";
118 
119  // Assign initial probabilities to all variables (uniform distribution).
121 
122  // Set hidden state to "original one".
124 
125  // Get first observation.
127 
128  // Update aggregated probabilities.
130 }
131 
132 
134  LOG(LWARNING) << "End current episode";
135 
136  collector_ptr->addDataToContainer("Iteration", iteration);
137  collector_ptr->addDataToContainer("Max(Pm)", max_pm);
138 
139  if (iteration >= (size_t)max_number_of_iterations)
140  collector_ptr->addDataToContainer("Converged", 0);
141  else
142  collector_ptr->addDataToContainer("Converged", 1);
143 
144  // Export collected data.
145  if (number_of_episodes==(long)0) {
146  // If number of episodes are not limited
147  collector_ptr->exportDataToCsv(statistics_filename);
148  } else if ( episode >= (size_t) number_of_episodes)
149  collector_ptr->exportDataToCsv(statistics_filename);
150 
151 }
152 
153 
155  LOG(LTRACE) << "Performing a single step (" << iteration << ")";
156 
157  short tmp_action = action;
158 
159  // Check epsilon-greedy action selection.
160  if ((double)epsilon > 0) {
161  if (RAN_GEN->uniRandReal() < (double)epsilon)
162  tmp_action = -3;
163  }//: if
164 
165  // Determine action.
166  mic::types::Action2DInterface act;
167  switch(tmp_action){
168  case (short)-3:
169  LOG(LINFO) << "Random action selection";
170  act = A_RANDOM; break;
171  case (short)-2:
172  LOG(LINFO) << "Sum Of Most Unique Patches action selection";
174  case (short)-1:
175  LOG(LINFO) << "Most Unique Patch action selection";
176  act = hf.mostUniquePatchActionSelection(); break;
177  default:
178  act = mic::types::NESWAction((mic::types::NESW) (short)tmp_action);
179  }//: switch action
180 
181  // Perform move.
183 
184 
185  // Get current observation.
187 
188  // Update state.
190 
191  // Check terminal condition(s).
192 
193  // 1. Check iteration number.
194  if (iteration >= (size_t)max_number_of_iterations)
195  return false;
196 
197  // 2. Check max maze probability.
198  max_pm = 0;
199  for (size_t m=0; m<importer.size(); m++) {
201  }//: for
203  return false;
204 
205  return true;
206 }
207 
208 
209 } /* namespace application */
210 } /* namespace mic */
void probabilisticMove(mic::types::Action2DInterface ac_, double exact_move_probability_, double overshoot_move_probability_, double undershoot_move_probability_)
void setHiddenPose(int hidden_maze_number_, int hidden_x_, int hidden_y_)
mic::utils::DataCollectorPtr< std::string, float > collector_ptr
Data collector.
mic::configuration::Property< double > exact_move_probability
Property: variable storing the probability that we made the exact move (x+dx).
mic::configuration::Property< short > max_number_of_iterations
Property: stop condition 1: maximal number of iterations - if exceeded, we finish the episode claimin...
mic::configuration::Property< short > hidden_y
Property: variable denoting the y position are we right now (unknown, to be determined).
mic::configuration::Property< double > epsilon
Property: variable denoting epsilon in aciton selection (the probability "below" which a random actio...
void sense(double hit_factor_, double miss_factor_)
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics (convergence) will be exported.
mic::configuration::Property< short > action
Property: performed action (0-3: NESW, -3: random, -2: sumOfMostUniquePatchesActionSelection, -1: mostUniquePatchActionSelection).
mic::types::Action2DInterface sumOfMostUniquePatchesActionSelection()
mic::configuration::Property< double > miss_factor
Property: variable denoting the miss factor (the gain when the observation does not coincide with cur...
mic::types::Action2DInterface mostUniquePatchActionSelection()
std::vector< double > maze_probabilities
Variable storing the probability that we are currently moving in/observing a given maze...
mic::importers::MazeMatrixImporter importer
Importer responsible for loading mazes from file.
void setMazes(std::vector< mic::types::MatrixXiPtr > &mazes_, unsigned int number_of_distinctive_patches_)
WindowCollectorChart< float > * w_localization_time_chart
Window for displaying chart with statistics on current maze number.
mic::configuration::Property< short > hidden_maze_number
Property: variable denoting in which maze are we right now (unknown, to be determined).
void RegisterApplication(void)
Registers application.
mic::configuration::Property< double > hit_factor
Property: variable denoting the hit factor (the gain when the observation coincides with current posi...
mic::configuration::Property< double > undershoot_move_probability
Property: variable storing the probability that we made the "undershoot" move (d+dx-1).
Application for episodic testing of convergence of histogram filter based maze-of-digits localization...
mic::configuration::Property< double > min_maze_confidence
Property: stop condition 2: minimal maze confidence - if exceeded, we finish the episode claiming tha...
mic::configuration::Property< double > overshoot_move_probability
Property: variable storing the probability that we made the "overshoot" move (d+dx+1).
mic::configuration::Property< short > hidden_x
Property: variable denoting the x position are we right now (unknown, to be determined).