MachineIntelligenceCore:ReinforcementLearning
|
Class emulating the gridworld environment. More...
#include <Gridworld.hpp>
Public Member Functions | |
Gridworld (std::string node_name_="gridworld") | |
Gridworld (const mic::environments::Gridworld &gw_) | |
virtual | ~Gridworld () |
mic::environments::Gridworld & | operator= (const mic::environments::Gridworld &gw_) |
virtual void | initializePropertyDependentVariables () |
virtual void | initializeEnvironment () |
void | initExemplaryGrid () |
void | initClassicCliffGrid () |
void | initDiscountGrid () |
void | initBridgeGrid () |
void | initBookGrid () |
void | initMazeGrid () |
void | initExemplaryDQLGrid () |
void | initModifiedDQLGrid () |
void | initDebug2x2Grid () |
void | initDebug3x3Grid () |
void | initSimpleRandomGrid () |
void | initHardRandomGrid () |
bool | isGridTraversible (long x_, long y_, mic::types::Matrix< bool > &visited_) |
mic::types::TensorXfPtr | getObservation () |
virtual std::string | environmentToString () |
virtual std::string | observationToString () |
virtual mic::types::MatrixXfPtr | encodeEnvironment () |
virtual mic::types::MatrixXfPtr | encodeObservation () |
virtual mic::types::MatrixXfPtr | encodeAgentGrid () |
Encode the current state of the reduced grid (only the agent position) as a matrix of size [1, width * height]. More... | |
virtual mic::types::Position2D | getAgentPosition () |
virtual bool | moveAgentToPosition (mic::types::Position2D pos_) |
virtual float | getStateReward (mic::types::Position2D pos_) |
virtual bool | isStateAllowed (mic::types::Position2D pos_) |
virtual bool | isStateTerminal (mic::types::Position2D pos_) |
![]() | |
Environment (std::string node_name_) | |
virtual | ~Environment () |
mic::types::TensorXfPtr & | getEnvironment () |
virtual size_t | getEnvironmentWidth () |
virtual size_t | getEnvironmentHeight () |
virtual size_t | getEnvironmentSize () |
virtual size_t | getObservationWidth () |
virtual size_t | getObservationHeight () |
virtual size_t | getObservationSize () |
virtual size_t | getChannels () |
size_t | getROISize () |
bool | moveAgent (mic::types::Action2DInterface ac_) |
virtual void | moveAgentToInitialPosition () |
virtual bool | isStateAllowed (long x_, long y_) |
virtual bool | isStateTerminal (long x_, long y_) |
virtual bool | isActionAllowed (long x_, long y_, size_t action_) |
virtual bool | isActionAllowed (mic::types::Position2D pos_, mic::types::Action2DInterface ac_) |
virtual bool | isActionAllowed (mic::types::Action2DInterface ac_) |
Protected Member Functions | |
std::string | gridToString (mic::types::TensorXfPtr grid_) |
Protected Attributes | |
mic::configuration::Property < short > | type |
![]() | |
mic::configuration::Property < size_t > | width |
Property: width of the environment. More... | |
mic::configuration::Property < size_t > | height |
Property: height of the environment. More... | |
mic::configuration::Property < size_t > | roi_size |
Property: size of the ROI (region of interest). More... | |
size_t | channels |
Number of channels. More... | |
bool | pomdp_flag |
Flag related to. More... | |
mic::types::Position2D | initial_position |
Property: initial position of the agent. More... | |
mic::types::TensorXfPtr | environment_grid |
Tensor storing the environment. More... | |
mic::types::TensorXfPtr | observation_grid |
Class emulating the gridworld environment.
Definition at line 50 of file Gridworld.hpp.
mic::environments::Gridworld::Gridworld | ( | std::string | node_name_ = "gridworld" | ) |
Constructor. Registers properties.
node_name_ | Name of the node in configuration file. |
Definition at line 28 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Count, and type.
mic::environments::Gridworld::Gridworld | ( | const mic::environments::Gridworld & | gw_ | ) |
Copying constructor.
gw_ | Gridworld object to be cloned. |
Definition at line 38 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Environment::height, mic::environments::Environment::initial_position, mic::environments::Environment::observation_grid, type, and mic::environments::Environment::width.
|
virtual |
Destructor. Empty for now.
Definition at line 57 of file Gridworld.cpp.
|
virtual |
Encode the current state of the reduced grid (only the agent position) as a matrix of size [1, width * height].
Definition at line 768 of file Gridworld.cpp.
References mic::environments::Agent, mic::environments::Environment::environment_grid, mic::environments::Environment::height, and mic::environments::Environment::width.
Referenced by mic::application::GridworldDeepQLearning::getPredictedRewardsForCurrentState(), mic::application::GridworldDeepQLearning::performSingleStep(), and mic::application::GridworldDeepQLearning::streamNetworkResponseTable().
|
virtual |
Encodes the current state of the gridworld in as a matrix of size [1, width * height * channels].
Implements mic::environments::Environment.
Definition at line 703 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Environment::height, and mic::environments::Environment::width.
Referenced by encodeObservation(), mic::application::GridworldDRLExperienceReplay::getPredictedRewardsForGivenState(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), and mic::application::GridworldDRLExperienceReplay::streamNetworkResponseTable().
|
virtual |
Encodes the current observation taken in the environment in as a matrix of size [1, roi_size * roi_size * channels].
Implements mic::environments::Environment.
Definition at line 715 of file Gridworld.cpp.
References mic::environments::Environment::channels, encodeEnvironment(), getAgentPosition(), getObservation(), mic::environments::Environment::pomdp_flag, and mic::environments::Environment::roi_size.
Referenced by mic::application::GridworldDRLExperienceReplayPOMDP::getPredictedRewardsForGivenState(), mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), and mic::application::GridworldDRLExperienceReplayPOMDP::streamNetworkResponseTable().
|
virtual |
Returns the current state of the gridworld in the form of a string.
Implements mic::environments::Environment.
Definition at line 689 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, and gridToString().
Referenced by mic::application::GridworldQLearning::performSingleStep(), mic::application::GridworldValueIteration::performSingleStep(), mic::application::GridworldDeepQLearning::performSingleStep(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), mic::application::GridworldQLearning::startNewEpisode(), mic::application::GridworldDeepQLearning::startNewEpisode(), mic::application::GridworldDRLExperienceReplay::startNewEpisode(), and mic::application::GridworldDRLExperienceReplayPOMDP::startNewEpisode().
|
virtual |
Calculates the agent position.
Implements mic::environments::Environment.
Definition at line 790 of file Gridworld.cpp.
References mic::environments::Agent, mic::environments::Environment::environment_grid, mic::environments::Environment::height, and mic::environments::Environment::width.
Referenced by encodeObservation(), mic::application::GridworldQLearning::finishCurrentEpisode(), mic::application::GridworldDeepQLearning::finishCurrentEpisode(), mic::application::GridworldDRLExperienceReplay::finishCurrentEpisode(), mic::application::GridworldDRLExperienceReplayPOMDP::finishCurrentEpisode(), getObservation(), mic::application::GridworldDRLExperienceReplay::getPredictedRewardsForGivenState(), mic::application::GridworldDRLExperienceReplayPOMDP::getPredictedRewardsForGivenState(), moveAgentToPosition(), mic::application::GridworldQLearning::performSingleStep(), mic::application::GridworldDeepQLearning::performSingleStep(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), mic::application::GridworldDeepQLearning::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplay::streamNetworkResponseTable(), and mic::application::GridworldDRLExperienceReplayPOMDP::streamNetworkResponseTable().
mic::types::TensorXfPtr mic::environments::Gridworld::getObservation | ( | ) |
Returns the tensor being the observation.
Definition at line 737 of file Gridworld.cpp.
References mic::environments::Agent, getAgentPosition(), mic::environments::Goals, mic::environments::Environment::observation_grid, mic::environments::Environment::roi_size, and mic::environments::Walls.
Referenced by encodeObservation(), and observationToString().
|
virtual |
Returns the reward associated with the given state.
pos_ | Position (state). |
Implements mic::environments::Environment.
Definition at line 823 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, and mic::environments::Goals.
Referenced by mic::application::GridworldQLearning::finishCurrentEpisode(), mic::application::GridworldDeepQLearning::finishCurrentEpisode(), mic::application::GridworldDRLExperienceReplay::finishCurrentEpisode(), mic::application::GridworldDRLExperienceReplayPOMDP::finishCurrentEpisode(), mic::application::GridworldQLearning::performSingleStep(), mic::application::GridworldValueIteration::performSingleStep(), mic::application::GridworldDeepQLearning::performSingleStep(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), and mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep().
|
protected |
Returns the current state of the grid passed as an argument in the form of a string.
grid_ | Grid to be processed. |
Definition at line 651 of file Gridworld.cpp.
References mic::environments::Agent, mic::environments::Goals, and mic::environments::Walls.
Referenced by environmentToString(), and observationToString().
void mic::environments::Gridworld::initBookGrid | ( | ) |
Initializes the classic Book gridworld - example from Sutton&Barto book on RL.
[[' ',' ',' ',+1], [' ','#',' ',-1], ['S',' ',' ',' ']]
Definition at line 237 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initBridgeGrid | ( | ) |
Initializes the classic discount gridworld.
[[ '#',-100, -100, -100, -100, -100, '#'], [ 1, 'S', ' ', ' ', ' ', ' ', 10], [ '#',-100, -100, -100, -100, -100, '#']]
Definition at line 201 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initClassicCliffGrid | ( | ) |
Initializes the classic cliff gridworld.
[[' ',' ',' ',' ',' '], ['S',' ',' ',' ',10], [-100,-100, -100, -100, -100]]
Definition at line 140 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initDebug2x2Grid | ( | ) |
Method initializes the 2x2 grid useful during the debugging.
[['S',-10], [+10,' ']]
Definition at line 362 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initDebug3x3Grid | ( | ) |
Method initializes the 3x3 grid useful during the debugging.
[[' ',-10,' '], [-10,'S',-10], [' ',+10,' ']]
Method initializes the 3x3 grid useful during the debugging.
Definition at line 393 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initDiscountGrid | ( | ) |
Initializes the classic discount gridworld.
[[' ',' ',' ',' ',' '], [' ','#',' ',' ',' '], [' ','#', 1,'#', 10], ['S',' ',' ',' ',' '], [-10,-10, -10, -10, -10]]
Definition at line 166 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initExemplaryDQLGrid | ( | ) |
Method initializes the grid from Deep Q-Learning example.
[[' ',' ',' ',' '], [' ',' ',+10,' '], [' ','#',-10,' '], ['S',' ',' ',' ']]
Definition at line 299 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initExemplaryGrid | ( | ) |
Method initializes the exemplary grid.
[[' ',' ',' ',' '], ['S',-10,' ',' '], [' ','','#',' '], [' ',' ',' ',10]]
Definition at line 110 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initHardRandomGrid | ( | ) |
Generates a random grid of size (width x height), with a single goal, but several walls and pits.
Definition at line 540 of file Gridworld.cpp.
References mic::environments::Agent, mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, isGridTraversible(), moveAgentToPosition(), mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
|
virtual |
(Re)initializes the environment - generates the gridworld of a required (defined by property) type, sets agent, goal etc.
Implements mic::environments::Environment.
Definition at line 81 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::height, initBookGrid(), initBridgeGrid(), initClassicCliffGrid(), initDebug2x2Grid(), initDebug3x3Grid(), initDiscountGrid(), initExemplaryDQLGrid(), initExemplaryGrid(), initHardRandomGrid(), initMazeGrid(), initModifiedDQLGrid(), initSimpleRandomGrid(), mic::environments::Environment::observation_grid, mic::environments::Environment::pomdp_flag, mic::environments::Environment::roi_size, type, and mic::environments::Environment::width.
Referenced by mic::application::GridworldValueIteration::initializePropertyDependentVariables(), mic::application::GridworldQLearning::initializePropertyDependentVariables(), mic::application::GridworldDeepQLearning::initializePropertyDependentVariables(), mic::application::GridworldDRLExperienceReplay::initializePropertyDependentVariables(), mic::application::GridworldDRLExperienceReplayPOMDP::initializePropertyDependentVariables(), mic::application::GridworldQLearning::startNewEpisode(), mic::application::GridworldDeepQLearning::startNewEpisode(), mic::application::GridworldDRLExperienceReplay::startNewEpisode(), and mic::application::GridworldDRLExperienceReplayPOMDP::startNewEpisode().
|
virtual |
Initializes all variables that are property-dependent.
Definition at line 77 of file Gridworld.cpp.
void mic::environments::Gridworld::initMazeGrid | ( | ) |
Initializes the classic maze gridworld.
[[' ',' ',' ',+1], ['#','#',' ','#'], [' ','#',' ',' '], [' ','#','#',' '], ['S',' ',' ',' ']]
Definition at line 266 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initModifiedDQLGrid | ( | ) |
Method initializes a slightly modified grid from Deep Q-Learning example.
[[' ',' ',' ',' '], [' ','#',+10,' '], [' ',' ',-10,' '], ['S',' ',' ',' ']]
Definition at line 330 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
void mic::environments::Gridworld::initSimpleRandomGrid | ( | ) |
Generates a random grid of size (width x height), with a single pit, goal and wall.
Definition at line 427 of file Gridworld.cpp.
References mic::environments::Agent, mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Goals, mic::environments::Environment::height, mic::environments::Environment::initial_position, isStateAllowed(), moveAgentToPosition(), mic::environments::Pits, mic::environments::Walls, and mic::environments::Environment::width.
Referenced by initializeEnvironment().
bool mic::environments::Gridworld::isGridTraversible | ( | long | x_, |
long | y_, | ||
mic::types::Matrix< bool > & | visited_ | ||
) |
A recursive method for checking whether the grid is traversable (i.e. there is a path from agent to goal).
x_ | Current x coordinate to check. |
y_ | Current x coordinate to check. |
visited_ | Matrix with visited states. |
Definition at line 509 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, mic::environments::Goals, isStateAllowed(), and mic::environments::Pits.
Referenced by initHardRandomGrid().
|
virtual |
Checks if position is allowed, i.e. within the gridworld boundaries and there is no wall at that place.
pos_ | Position to be checked. |
Implements mic::environments::Environment.
Definition at line 834 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, and mic::environments::Walls.
Referenced by mic::application::GridworldValueIteration::computeBestValue(), mic::application::GridworldQLearning::computeBestValue(), initSimpleRandomGrid(), isGridTraversible(), moveAgentToPosition(), mic::application::GridworldValueIteration::performSingleStep(), mic::application::GridworldDeepQLearning::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplay::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplayPOMDP::streamNetworkResponseTable(), and mic::application::GridworldQLearning::streamQStateTable().
|
virtual |
Checks if position is terminal, i.e. agent is standing in a pit or reached the goal. Returns reward associated with given state.
pos_ | Position (state) to be checked. |
Implements mic::environments::Environment.
Definition at line 849 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, and mic::environments::Goals.
Referenced by mic::application::GridworldQLearning::performSingleStep(), mic::application::GridworldValueIteration::performSingleStep(), mic::application::GridworldDeepQLearning::performSingleStep(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), mic::application::GridworldDeepQLearning::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplay::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplayPOMDP::streamNetworkResponseTable(), and mic::application::GridworldQLearning::streamQStateTable().
|
virtual |
Moves the agent to the position.Type of move (deterministic vs stochastic) depends on the environment (the same goes to e.g. circular world assumption).
pos_ | Desired position of the agent. |
Implements mic::environments::Environment.
Definition at line 805 of file Gridworld.cpp.
References mic::environments::Agent, getAgentPosition(), and isStateAllowed().
Referenced by mic::application::GridworldDRLExperienceReplay::getPredictedRewardsForGivenState(), mic::application::GridworldDRLExperienceReplayPOMDP::getPredictedRewardsForGivenState(), initBookGrid(), initBridgeGrid(), initClassicCliffGrid(), initDebug2x2Grid(), initDebug3x3Grid(), initDiscountGrid(), initExemplaryDQLGrid(), initExemplaryGrid(), initHardRandomGrid(), initMazeGrid(), initModifiedDQLGrid(), initSimpleRandomGrid(), mic::application::GridworldDRLExperienceReplay::performSingleStep(), mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), mic::application::GridworldDeepQLearning::streamNetworkResponseTable(), mic::application::GridworldDRLExperienceReplay::streamNetworkResponseTable(), and mic::application::GridworldDRLExperienceReplayPOMDP::streamNetworkResponseTable().
|
virtual |
Returns the current observation taken in the gridworld in the form of a string.
Implements mic::environments::Environment.
Definition at line 693 of file Gridworld.cpp.
References mic::environments::Environment::environment_grid, getObservation(), gridToString(), and mic::environments::Environment::pomdp_flag.
Referenced by mic::application::GridworldDRLExperienceReplayPOMDP::performSingleStep(), and mic::application::GridworldDRLExperienceReplayPOMDP::startNewEpisode().
mic::environments::Gridworld & mic::environments::Gridworld::operator= | ( | const mic::environments::Gridworld & | gw_ | ) |
Assign operator. Copies the gridworld state along with its properties.
gw_ | Gridworld object that values/properties will be copied. |
Definition at line 61 of file Gridworld.cpp.
References mic::environments::Environment::channels, mic::environments::Environment::environment_grid, mic::environments::Environment::height, mic::environments::Environment::initial_position, mic::environments::Environment::observation_grid, type, and mic::environments::Environment::width.
|
protected |
Property: type of the generated gridworld. Currently available types: 0: the exemplary grid 4x3. 1: the classic cliff grid 5x3. 2: the classic discount grid 5x5. 3: the classic bridge grid 7x3. 4: the classic book grid 4x4. 5: the classic maze grid 4x4. 6: gridworld from DQL example 4x4. 7: slightly modified gridworld from DQL example 4x4. 8: debug grid 2x2. 9: debug grid 3x3. -1 (or else): random grid - all items (wall, goal and pit, agent) placed randomly -2 (or else): random grid - all items (wall, goal and pit, agent) placed randomly with multiple pits and walls
Definition at line 294 of file Gridworld.hpp.
Referenced by Gridworld(), initializeEnvironment(), and operator=().