25 #include <utils/RandomGenerator.hpp>
30 namespace application {
42 number_of_bandits(
"number_of_bandits", 10),
44 statistics_filename(
"statistics_filename",
"statistics_filename.csv")
49 registerProperty(
tau);
52 LOG(LINFO) <<
"Properties registered";
63 VGL_MANAGER->initializeGLUT(argc, argv);
67 reward_collector_ptr->createContainer(
"average_reward", 0, 10, mic::types::color_rgba(255, 0, 0, 180));
68 reward_collector_ptr->createContainer(
"correct_arms_percentage", 0, 100, mic::types::color_rgba(0, 255, 0, 180));
69 reward_collector_ptr->createContainer(
"best_possible_reward", 0, 10, mic::types::color_rgba(0, 0, 255, 180));
72 w_reward =
new WindowCollectorChart<float>(
"nBandits", 256, 256, 0, 0);
81 arms[i] = RAN_GEN->uniRandReal();
112 if (RAN_GEN->uniRandReal() < prob_)
137 LOG(LTRACE) <<
"Performing a single step (" << iteration <<
")";
139 std::cout<<
"hidden state (arms)=";
141 std::cout <<
arms[i] <<
", ";
142 std::cout << std::endl;
144 std::cout <<
"action_counts=" ;
147 std::cout << std::endl;
149 std::cout<<
"action_values=";
152 std::cout << std::endl;
158 float r = RAN_GEN->uniRandReal();
163 std::cout<<
"choice=" << choice << std::endl;
171 std::cout<<
"reward= " << reward << std::endl;
175 std::cout<<
"action_values[choice]" <<
action_values[choice] <<
" (1.0/action_counts[choice])=" << (1.0/
action_counts[choice]) <<
" (reward - action_values[choice])=" << (reward -
action_values[choice]) << std::endl;
178 std::cout<<
"action_values[choice] po = " <<
action_values[choice] << std::endl;
185 std::cout<<
"correct arm/choice=" <<
best_arm << std::endl;
188 float running_mean_reward = 0;
192 running_mean_reward /= (float)iteration;
virtual ~nArmedBanditsSofmax()
mic::configuration::Property< size_t > number_of_bandits
Property: number of bandits.
mic::types::VectorXi action_counts
Counters storing how many times we've taken a particular action.
mic::types::VectorXf action_values
Action values.
mic::types::VectorXf arms
n Bandit arms.
mic::utils::DataCollectorPtr< std::string, float > reward_collector_ptr
Reward collector.
virtual bool performSingleStep()
mic::configuration::Property< double > tau
mic::types::VectorXf action_values_softmax
Action values - softmax.
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
WindowCollectorChart< float > * w_reward
Window for displaying average reward.
nArmedBanditsSofmax(std::string node_name_="application")
virtual void initializePropertyDependentVariables()
virtual void initialize(int argc, char *argv[])
void RegisterApplication(void)
Registers application.
Class implementing a n-Armed Bandits problem solving the n armed bandits problem using Softmax Action...
void updateSoftmaxValues()
short calculateReward(float prob_)