MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
nArmedBanditsSofmax.hpp
Go to the documentation of this file.
1 
23 #ifndef SRC_APPLICATION_NARMEDBANDITSSOFMAX_HPP_
24 #define SRC_APPLICATION_NARMEDBANDITSSOFMAX_HPP_
25 
26 #include <vector>
27 
28 #include <types/MatrixTypes.hpp>
29 
30 #include <opengl/application/OpenGLApplication.hpp>
31 #include <opengl/visualization/WindowCollectorChart.hpp>
32 using namespace mic::opengl::visualization;
33 
34 namespace mic {
35 namespace application {
36 
41 class nArmedBanditsSofmax: public mic::opengl::application::OpenGLApplication {
42 public:
47  nArmedBanditsSofmax(std::string node_name_ = "application");
48 
52  virtual ~nArmedBanditsSofmax();
53 
54 protected:
58  virtual void initializePropertyDependentVariables();
59 
65  virtual void initialize(int argc, char* argv[]);
66 
70  virtual bool performSingleStep();
71 
72 private:
73 
75  WindowCollectorChart<float>* w_reward;
76 
78  mic::utils::DataCollectorPtr<std::string, float> reward_collector_ptr;
79 
81  mic::types::VectorXf arms;
82 
84  mic::types::VectorXf action_values;
85 
87  mic::types::VectorXi action_counts;
88 
90  mic::types::VectorXf action_values_softmax;
91 
92 
94  mic::configuration::Property<size_t> number_of_bandits;
95 
98  mic::configuration::Property<double> tau;
99 
101  mic::configuration::Property<std::string> statistics_filename;
102 
106  size_t best_arm;
107 
112 
117  short calculateReward(float prob_);
118 
122  void updateSoftmaxValues();
123 
124 };
125 
126 } /* namespace application */
127 } /* namespace mic */
128 
129 #endif /* SRC_APPLICATION_NARMEDBANDITSSOFMAX_HPP_ */
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.
mic::types::VectorXi action_counts
Counters storing how many times we've taken a particular action.
mic::types::VectorXf action_values
Action values.
mic::types::VectorXf arms
n Bandit arms.
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.
mic::configuration::Property< double > tau
mic::types::VectorXf action_values_softmax
Action values - softmax.
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
Class implementing a n-Armed Bandits problem solving the n armed bandits problem using Softmax Action...