MachineIntelligenceCore:ReinforcementLearning
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator
nArmedBanditsUnlimitedHistory.cpp
Go to the documentation of this file.
1 
23 #include <utils/RandomGenerator.hpp>
25 
26 namespace mic {
27 namespace application {
28 
33 void RegisterApplication (void) {
35 }
36 
37 
38 nArmedBanditsUnlimitedHistory::nArmedBanditsUnlimitedHistory(std::string node_name_) : OpenGLApplication(node_name_),
39  number_of_bandits("number_of_bandits", 10),
40  epsilon("epsilon", 0.1),
41  statistics_filename("statistics_filename","statistics_filename.csv")
42 
43  {
44  // Register properties - so their values can be overridden (read from the configuration file).
45  registerProperty(number_of_bandits);
46  registerProperty(epsilon);
47  registerProperty(statistics_filename);
48 
49  LOG(LINFO) << "Properties registered";
50 }
51 
52 
54  delete(w_reward);
55 }
56 
57 
58 void nArmedBanditsUnlimitedHistory::initialize(int argc, char* argv[]) {
59  // Initialize GLUT! :]
60  VGL_MANAGER->initializeGLUT(argc, argv);
61 
62  reward_collector_ptr = std::make_shared < mic::utils::DataCollector<std::string, float> >( );
63  // Add containers to collector.
64  reward_collector_ptr->createContainer("average_reward", 0, 10, mic::types::color_rgba(255, 0, 0, 180));
65  reward_collector_ptr->createContainer("correct_arms_percentage", 0, 100, mic::types::color_rgba(0, 255, 0, 180));
66  reward_collector_ptr->createContainer("best_possible_reward", 0, 10, mic::types::color_rgba(0, 0, 255, 180));
67 
68  // Create the visualization windows - must be created in the same, main thread :]
69  w_reward = new WindowCollectorChart<float>("nBandits", 256, 256, 0, 0);
70  w_reward->setDataCollectorPtr(reward_collector_ptr);
71 
72 }
73 
75  // Initialize random "arm" thresholds.
76  arms.resize(number_of_bandits);
77  for(size_t i=0; i<number_of_bandits; i++)
78  arms[i] = RAN_GEN->uniRandReal();
79  //std::cout << arms << std:: endl;
80 
81  // Find the best arm.
82  best_arm = -1;
83  best_arm_prob = -1;
84  for (size_t i=0; i<number_of_bandits; i++) {
85  if (arms[i] > best_arm_prob){
86  best_arm_prob = arms[i];
87  best_arm = i;
88  }//: if
89  }//: for
90 
91  // Initialize action value - add single row with random action index and value of 0.
92  action_values.push_back(std::make_pair(RAN_GEN->uniRandInt(0, number_of_bandits-1), 0));
93 
94 }
95 
97  short reward = 0;
98  for(size_t i=0; i<number_of_bandits; i++) {
99  if (RAN_GEN->uniRandReal() < prob_)
100  reward += 1;
101  }//: for
102  return reward;
103 }
104 
105 
107 
108  // greedy method to select best arm based on memory array (historical results)
109  size_t current_best_arm = 0;
110  float current_best_mean = -1;
111  // For all possible arms.
112  for(size_t i=0; i<number_of_bandits; i++) {
113  long sum = 0;
114  long no_actions=0;
115  for(auto av: action_values){
116  if (av.first == i) {
117  sum += av.second;
118  no_actions ++;
119  }
120  }//: for all action values
121  // Calculate mean reward for each action.
122  float mean_reward = (float) sum/no_actions;
123  //std::cout<< "mean_reward ["<< i <<"] = " << mean_reward <<std::endl;
124  // Check if this one is better than the others.
125  if (mean_reward > current_best_mean) {
126  current_best_mean = mean_reward;
127  current_best_arm = i;
128  //std::cout<< "found best reward = " << best_mean <<" for arm" << best_arm <<std::endl;
129  }//: if
130  }//: for
131  //std::cout<< "best arm = " << best_arm <<std::endl;
132  return current_best_arm;
133 }
134 
135 
137  LOG(LTRACE) << "Performing a single step (" << iteration << ")";
138 
139  short choice;
140  // Epsilon-greedy action selection.
141  if (RAN_GEN->uniRandReal() > (double)epsilon){
142  // Select best action.
143  choice = selectBestArm();
144  } else {
145  //std::cout << "Random action!" << std::endl;
146  // Random arm selection.
147  choice = RAN_GEN->uniRandInt(0, number_of_bandits-1);
148  }//: if
149 
150  // Calculate reward.
151  //std::cout << "choice = " << choice << " arms[choice]=" << arms[choice] << std::endl;
152  short reward = calculateReward(arms[choice]);
153  // Add results to the memory.
154  action_values.push_back(std::make_pair(choice, reward));
155 
156  // Calculate the percentage the correct arm is chosen.
157 // std::cout<< "correct arm/choice=" << best_arm << std::endl;
158  size_t correct_arm =0;
159  for(auto av: action_values){
160  if (av.first == best_arm)
161  correct_arm++;
162  }//: for all action values
163  float correct_arms_percentage = 100.0*correct_arm/(action_values.size()-1);
164 
165  // Calculate the mean reward.
166  float running_mean_reward = 0;
167  //std::cout << "action_values= \n";
168  for(auto av: action_values){
169  //std::cout << av.first << ", " << av.second << std::endl;
170  running_mean_reward += av.second;
171  }//: for all action values
172  running_mean_reward /= (action_values.size()-1);
173 
174  // Add variables to container.
175  reward_collector_ptr->addDataToContainer("average_reward",running_mean_reward);
176  reward_collector_ptr->addDataToContainer("correct_arms_percentage",correct_arms_percentage);
177  reward_collector_ptr->addDataToContainer("best_possible_reward",10.0*best_arm_prob);
178 
179  // Export reward "convergence" diagram.
180  reward_collector_ptr->exportDataToCsv(statistics_filename);
181 
182  return true;
183 }
184 
185 
186 
187 } /* namespace application */
188 } /* namespace mic */
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.
mic::configuration::Property< double > epsilon
Property: variable denoting epsilon in action selection (the probability "below" which a random actio...
nArmedBanditsUnlimitedHistory(std::string node_name_="application")
Class implementing a n-Armed Bandits problem solving the n armed bandits problem based on unlimited h...
std::vector< std::pair< size_t, size_t > > action_values
Action values - pairs of <arm_number, reward>.
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
void RegisterApplication(void)
Registers application.
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.