23 #include <utils/RandomGenerator.hpp>
27 namespace application {
39 number_of_bandits(
"number_of_bandits", 10),
40 epsilon(
"epsilon", 0.1),
41 statistics_filename(
"statistics_filename",
"statistics_filename.csv")
49 LOG(LINFO) <<
"Properties registered";
60 VGL_MANAGER->initializeGLUT(argc, argv);
64 reward_collector_ptr->createContainer(
"average_reward", 0, 10, mic::types::color_rgba(255, 0, 0, 180));
65 reward_collector_ptr->createContainer(
"correct_arms_percentage", 0, 100, mic::types::color_rgba(0, 255, 0, 180));
66 reward_collector_ptr->createContainer(
"best_possible_reward", 0, 10, mic::types::color_rgba(0, 0, 255, 180));
69 w_reward =
new WindowCollectorChart<float>(
"nBandits", 256, 256, 0, 0);
78 arms[i] = RAN_GEN->uniRandReal();
92 action_values.push_back(std::make_pair(RAN_GEN->uniRandInt(0, number_of_bandits-1), 0));
99 if (RAN_GEN->uniRandReal() < prob_)
109 size_t current_best_arm = 0;
110 float current_best_mean = -1;
122 float mean_reward = (float) sum/no_actions;
125 if (mean_reward > current_best_mean) {
126 current_best_mean = mean_reward;
127 current_best_arm = i;
132 return current_best_arm;
137 LOG(LTRACE) <<
"Performing a single step (" << iteration <<
")";
141 if (RAN_GEN->uniRandReal() > (double)
epsilon){
158 size_t correct_arm =0;
163 float correct_arms_percentage = 100.0*correct_arm/(action_values.size()-1);
166 float running_mean_reward = 0;
168 for(
auto av: action_values){
170 running_mean_reward += av.second;
172 running_mean_reward /= (action_values.size()-1);
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.
virtual bool performSingleStep()
virtual void initialize(int argc, char *argv[])
mic::configuration::Property< double > epsilon
Property: variable denoting epsilon in action selection (the probability "below" which a random actio...
nArmedBanditsUnlimitedHistory(std::string node_name_="application")
Class implementing a n-Armed Bandits problem solving the n armed bandits problem based on unlimited h...
virtual void initializePropertyDependentVariables()
virtual ~nArmedBanditsUnlimitedHistory()
std::vector< std::pair< size_t, size_t > > action_values
Action values - pairs of <arm_number, reward>.
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
mic::types::VectorXf arms
n Bandit arms.
short calculateReward(float prob_)
void RegisterApplication(void)
Registers application.
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.