23 #ifndef SRC_APPLICATION_NARMEDBANDITSSIMPLEQLEARNING_HPP_
24 #define SRC_APPLICATION_NARMEDBANDITSSIMPLEQLEARNING_HPP_
28 #include <types/MatrixTypes.hpp>
30 #include <opengl/application/OpenGLApplication.hpp>
31 #include <opengl/visualization/WindowCollectorChart.hpp>
32 using namespace mic::opengl::visualization;
35 namespace application {
41 class TestApp:
public mic::opengl::application::OpenGLApplication {
47 TestApp(std::string node_name_ =
"application");
58 virtual void initializePropertyDependentVariables();
65 virtual void initialize(
int argc,
char* argv[]);
70 virtual bool performSingleStep();
93 mic::configuration::Property<double>
epsilon;
112 short calculateReward(
float prob_);
117 size_t selectBestArm();
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.
mic::configuration::Property< double > epsilon
Property: variable denoting epsilon in action selection (the probability "below" which a random actio...
Class implementing a n-Armed Bandits problem solving the n armed bandits problem using simple Q-learn...
mic::types::VectorXf arms
n Bandit arms.
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
mic::types::VectorXf action_values
Action values.
mic::types::VectorXi action_counts
Counters storing how many times we've taken a particular action.
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.