25 #include <utils/RandomGenerator.hpp>
29 namespace application {
41 step_reward(
"step_reward", 0.0),
42 discount_rate(
"discount_rate", 0.9),
43 move_noise(
"move_noise",0.2),
44 statistics_filename(
"statistics_filename",
"statistics_filename.csv")
53 LOG(LINFO) <<
"Properties registered";
82 std::ostringstream os;
101 mic::types::Position2D new_pos = pos_ + ac_;
106 if ((ac_.getType() == types::NESW::North) || (ac_.getType() == types::NESW::South)) {
108 mic::types::Position2D east_pos = pos_ + A_EAST;
109 if (
state_value_table((
size_t)east_pos.y, (
size_t)east_pos.x) != -std::numeric_limits<float>::infinity()) {
115 mic::types::Position2D west_pos = pos_ + A_WEST;
116 if (
state_value_table((
size_t)west_pos.y, (
size_t)west_pos.x) != -std::numeric_limits<float>::infinity()) {
124 if ((ac_.getType() == types::NESW::East) || (ac_.getType() == types::NESW::West)) {
126 mic::types::Position2D north_pos = pos_ + A_NORTH;
127 if (
state_value_table((
size_t)north_pos.y, (
size_t)north_pos.x) != -std::numeric_limits<float>::infinity()) {
133 mic::types::Position2D south_pos = pos_ + A_SOUTH;
134 if (
state_value_table((
size_t)south_pos.y, (
size_t)south_pos.x) != -std::numeric_limits<float>::infinity()) {
142 q_value /= probs_normalizer;
148 float best_value = -std::numeric_limits<float>::infinity();
154 std::vector<mic::types::NESWAction> actions;
155 actions.push_back(A_NORTH);
156 actions.push_back(A_EAST);
157 actions.push_back(A_SOUTH);
158 actions.push_back(A_WEST);
161 for(mic::types::NESWAction action : actions) {
164 if (value > best_value)
174 LOG(LTRACE) <<
"Performing a single step (" << iteration <<
")";
178 new_state_value_table.setValue( -std::numeric_limits<float>::infinity() );
182 mic::types::Position2D pos(x,y);
190 new_state_value_table((
size_t)pos.y, (size_t)pos.x) =
computeBestValue(pos);
195 mic::types::MatrixXf delta_value;
196 float curr_delta = 0;
199 if (std::isfinite(new_state_value_table(i)))
200 tmp_delta += new_state_value_table(i);
203 curr_delta += std::abs(tmp_delta);
virtual ~GridworldValueIteration()
mic::environments::Gridworld grid_env
The gridworld object.
virtual float getStateReward(mic::types::Position2D pos_)
GridworldValueIteration(std::string node_name_="application")
mic::types::MatrixXf state_value_table
Matrix storing values for all states (gridworld w * h). ROW MAJOR(!).
mic::configuration::Property< float > discount_rate
virtual bool isActionAllowed(long x_, long y_, size_t action_)
virtual bool isStateTerminal(mic::types::Position2D pos_)
mic::configuration::Property< float > move_noise
virtual size_t getEnvironmentWidth()
std::string streamStateActionTable()
virtual void initializePropertyDependentVariables()
float computeQValueFromValues(mic::types::Position2D pos_, mic::types::NESWAction ac_)
mic::configuration::Property< std::string > statistics_filename
Property: name of the file to which the statistics will be exported.
Class responsible for solving the gridworld problem by applying the reinforcement learning value iter...
virtual std::string environmentToString()
virtual size_t getEnvironmentHeight()
virtual bool performSingleStep()
virtual void initialize(int argc, char *argv[])
virtual bool isStateAllowed(mic::types::Position2D pos_)
void RegisterApplication(void)
Registers application.
float computeBestValue(mic::types::Position2D pos_)
virtual void initializeEnvironment()
mic::configuration::Property< float > step_reward
Declaration of the application class responsible for solving the gridworld problem with value iterati...