MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
MazeOfDigitsDLRERPOMPD.hpp
Go to the documentation of this file.
1 
23 #ifndef SRC_APPLICATION_MAZEOFDIGITSDLRERPOMPD_HPP_
24 #define SRC_APPLICATION_MAZEOFDIGITSDLRERPOMPD_HPP_
25 
26 #include <vector>
27 #include <string>
28 
29 #include <opengl/application/OpenGLEpisodicApplication.hpp>
30 #include <opengl/visualization/WindowCollectorChart.hpp>
31 #include <opengl/visualization/WindowMazeOfDigits.hpp>
32 using namespace mic::opengl::visualization;
33 
34 #include <mlnn/BackpropagationNeuralNetwork.hpp>
35 // Using multi layer neural networks
36 using namespace mic::mlnn;
37 using namespace mic::types;
38 
39 #include <types/MazeOfDigits.hpp>
41 
42 namespace mic {
43 namespace application {
44 
45 
51 class MazeOfDigitsDLRERPOMPD: public mic::opengl::application::OpenGLEpisodicApplication {
52 public:
57  MazeOfDigitsDLRERPOMPD(std::string node_name_ = "application");
58 
62  virtual ~MazeOfDigitsDLRERPOMPD();
63 
64 protected:
65 
71  virtual void initialize(int argc, char* argv[]);
72 
76  virtual void initializePropertyDependentVariables();
77 
81  virtual bool performSingleStep();
82 
86  virtual void startNewEpisode();
87 
91  virtual void finishCurrentEpisode();
92 
93 
94 private:
95 
97  WindowCollectorChart<float>* w_chart;
98 
100  mic::utils::DataCollectorPtr<std::string, float> collector_ptr;
101 
103  WindowMazeOfDigits* wmd_environment;
105  WindowMazeOfDigits* wmd_observation;
106 
107 
110 
112  std::shared_ptr<std::vector <mic::types::Position2D> > saccadic_path;
113 
115  size_t batch_size;
116 
120  mic::configuration::Property<float> step_reward;
121 
125  mic::configuration::Property<float> discount_rate;
126 
130  mic::configuration::Property<float> learning_rate;
131 
136  mic::configuration::Property<double> epsilon;
137 
141  mic::configuration::Property<int> step_limit;
142 
144  mic::configuration::Property<std::string> statistics_filename;
145 
147  mic::configuration::Property<std::string> mlnn_filename;
148 
150  mic::configuration::Property<bool> mlnn_save;
151 
153  mic::configuration::Property<bool> mlnn_load;
154 
156  BackpropagationNeuralNetwork<float> neural_net;
157 
164  float computeBestValueForGivenStateAndPredictions(mic::types::Position2D player_position_, float* predictions_);
165 
171  mic::types::MatrixXfPtr getPredictedRewardsForGivenState(mic::types::Position2D player_position_);
172 
178  mic::types::NESWAction selectBestActionForGivenState(mic::types::Position2D player_position_);
179 
184  std::string streamNetworkResponseTable();
185 
189  long long sum_of_iterations;
190 
195 
200 };
201 
202 } /* namespace application */
203 } /* namespace mic */
204 
205 #endif /* SRC_APPLICATION_MAZEOFDIGITSDLRERPOMPD_HPP_ */
mic::utils::DataCollectorPtr< std::string, float > collector_ptr
Data collector.
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
mic::configuration::Property< float > discount_rate
std::shared_ptr< std::vector< mic::types::Position2D > > saccadic_path
Saccadic path - a sequence of consecutive agent positions.
WindowMazeOfDigits * wmd_observation
Window displaying the observation.
mic::configuration::Property< float > learning_rate
mic::configuration::Property< std::string > mlnn_filename
Property: name of the file to which the neural network will be serialized (or deserialized from)...
WindowCollectorChart< float > * w_chart
Window for displaying statistics.
mic::configuration::Property< bool > mlnn_load
Property: flad denoting thether the nn should be loaded from a file (at the initialization of the tas...
Class emulating the maze of digits environment.
mic::environments::MazeOfDigits env
The maze of digits environment.
mic::configuration::Property< double > epsilon
Application of Partially Observable Deep Q-learning with Experience Reply to the maze of digits probl...
BackpropagationNeuralNetwork< float > neural_net
Multi-layer neural network used for approximation of the Qstate rewards.
WindowMazeOfDigits * wmd_environment
Window displaying the whole environment.
mic::configuration::Property< int > step_limit
size_t batch_size
Size of the batch in experience replay - set to the size of maze (width*height).
Class representing the spatial experience memory - used in memory replay. Derived from the Batch clas...
mic::configuration::Property< float > step_reward
mic::configuration::Property< bool > mlnn_save
Property: flad denoting thether the nn should be saved to a file (after every episode end)...