25 #include <utils/RandomGenerator.hpp>
28 namespace application {
40 number_of_bandits(
"number_of_bandits", 10),
41 epsilon(
"epsilon", 0.1),
42 statistics_filename(
"statistics_filename",
"statistics_filename.csv")
50 LOG(LINFO) <<
"Properties registered";
61 VGL_MANAGER->initializeGLUT(argc, argv);
65 reward_collector_ptr->createContainer(
"average_reward", 0, 10, mic::types::color_rgba(255, 0, 0, 180));
66 reward_collector_ptr->createContainer(
"correct_arms_percentage", 0, 100, mic::types::color_rgba(0, 255, 0, 180));
67 reward_collector_ptr->createContainer(
"best_possible_reward", 0, 10, mic::types::color_rgba(0, 0, 255, 180));
70 w_reward =
new WindowCollectorChart<float>(
"nBandits", 256, 256, 0, 0);
79 arms[i] = RAN_GEN->uniRandReal();
103 if (RAN_GEN->uniRandReal() < prob_)
112 size_t current_best_arm = 0;
113 float current_best_value = -1;
119 current_best_arm = i;
122 return current_best_arm;
127 LOG(LTRACE) <<
"Performing a single step (" << iteration <<
")";
129 std::cout<<
"hidden state (arms)=";
131 std::cout <<
arms[i] <<
", ";
132 std::cout << std::endl;
134 std::cout <<
"action_counts=" ;
137 std::cout << std::endl;
139 std::cout<<
"action_values=";
142 std::cout << std::endl;
147 if (RAN_GEN->uniRandReal() > (double)
epsilon){
150 std::cout<<
"best choice=" << choice << std::endl;
154 choice = RAN_GEN->uniRandInt(0, number_of_bandits-1);
155 std::cout<<
"random choice=" << choice << std::endl;
161 std::cout<<
"reward= " << reward << std::endl;
165 std::cout<<
"action_values[choice]" <<
action_values[choice] <<
" (1.0/action_counts[choice])=" << (1.0/
action_counts[choice]) <<
" (reward - action_values[choice])=" << (reward -
action_values[choice]) << std::endl;
168 std::cout<<
"action_values[choice] po = " <<
action_values[choice] << std::endl;
172 std::cout<<
"correct arm/choice=" <<
best_arm << std::endl;
175 float running_mean_reward = 0;
179 running_mean_reward /= (float)iteration;
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
virtual void initializePropertyDependentVariables()
short calculateReward(float prob_)
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.
virtual bool performSingleStep()
mic::configuration::Property< double > epsilon
Property: variable denoting epsilon in action selection (the probability "below" which a random actio...
Class implementing a n-Armed Bandits problem solving the n armed bandits problem using simple Q-learn...
virtual void initialize(int argc, char *argv[])
mic::types::VectorXf arms
n Bandit arms.
TestApp(std::string node_name_="application")
void RegisterApplication(void)
Registers application.
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
mic::types::VectorXf action_values
Action values.
mic::types::VectorXi action_counts
Counters storing how many times we've taken a particular action.
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.