MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
GridworldDRLExperienceReplayPOMDP.hpp
Go to the documentation of this file.
1 
23 #ifndef SRC_APPLICATION_GRIDWORLDDRLEXPERIENCEREPLAYPOMDP_HPP_
24 #define SRC_APPLICATION_GRIDWORLDDRLEXPERIENCEREPLAYPOMDP_HPP_
25 
26 #include <vector>
27 #include <string>
28 
29 #include <opengl/application/OpenGLEpisodicApplication.hpp>
30 #include <opengl/visualization/WindowCollectorChart.hpp>
31 using namespace mic::opengl::visualization;
32 
33 #include <mlnn/BackpropagationNeuralNetwork.hpp>
34 // Using multi layer neural networks
35 using namespace mic::mlnn;
36 using namespace mic::types;
37 
38 #include <types/Gridworld.hpp>
40 
41 namespace mic {
42 namespace application {
43 
44 
50 class GridworldDRLExperienceReplayPOMDP: public mic::opengl::application::OpenGLEpisodicApplication {
51 public:
56  GridworldDRLExperienceReplayPOMDP(std::string node_name_ = "application");
57 
62 
63 protected:
64 
70  virtual void initialize(int argc, char* argv[]);
71 
75  virtual void initializePropertyDependentVariables();
76 
80  virtual bool performSingleStep();
81 
85  virtual void startNewEpisode();
86 
90  virtual void finishCurrentEpisode();
91 
92 
93 private:
94 
96  WindowCollectorChart<float>* w_chart;
97 
99  mic::utils::DataCollectorPtr<std::string, float> collector_ptr;
100 
103 
105  size_t batch_size;
106 
110  mic::configuration::Property<float> step_reward;
111 
115  mic::configuration::Property<float> discount_rate;
116 
120  mic::configuration::Property<float> learning_rate;
121 
126  mic::configuration::Property<double> epsilon;
127 
131  mic::configuration::Property<int> step_limit;
132 
134  mic::configuration::Property<std::string> statistics_filename;
135 
137  mic::configuration::Property<std::string> mlnn_filename;
138 
140  mic::configuration::Property<bool> mlnn_save;
141 
143  mic::configuration::Property<bool> mlnn_load;
144 
146  BackpropagationNeuralNetwork<float> neural_net;
147 
154  float computeBestValueForGivenStateAndPredictions(mic::types::Position2D player_position_, float* predictions_);
155 
161  mic::types::MatrixXfPtr getPredictedRewardsForGivenState(mic::types::Position2D player_position_);
162 
168  mic::types::NESWAction selectBestActionForGivenState(mic::types::Position2D player_position_);
169 
174  std::string streamNetworkResponseTable();
175 
179  long long sum_of_iterations;
180 
184  long long sum_of_rewards;
185 
190 
195 };
196 
197 } /* namespace application */
198 } /* namespace mic */
199 
200 #endif /* SRC_APPLICATION_GRIDWORLDDRLEXPERIENCEREPLAYPOMDP_HPP_ */
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
Class emulating the gridworld environment.
Definition: Gridworld.hpp:50
mic::configuration::Property< std::string > mlnn_filename
Property: name of the file to which the neural network will be serialized (or deserialized from)...
mic::utils::DataCollectorPtr< std::string, float > collector_ptr
Data collector.
Class responsible for solving the gridworld problem with Q-learning, neural network used for approxim...
mic::configuration::Property< bool > mlnn_save
Property: flad denoting thether the nn should be saved to a file (after every episode end)...
BackpropagationNeuralNetwork< float > neural_net
Multi-layer neural network used for approximation of the Qstate rewards.
WindowCollectorChart< float > * w_chart
Window for displaying statistics.
mic::environments::Gridworld grid_env
The gridworld environment.
mic::configuration::Property< bool > mlnn_load
Property: flad denoting thether the nn should be loaded from a file (at the initialization of the tas...
size_t batch_size
Size of the batch in experience replay - set to the size of maze (width*height).
Class representing the spatial experience memory - used in memory replay. Derived from the Batch clas...