diff options
author | Neil Kollack <nkollack@gmail.com> | 2021-11-07 14:34:08 -0600 |
---|---|---|
committer | Neil Kollack <nkollack@gmail.com> | 2021-11-07 14:34:08 -0600 |
commit | 5b27e9d273c43fd59905a7f126ddf8edfab7fae7 (patch) | |
tree | 489011e86a50a7d7bd4fd0c1c7be09d634d1de45 /src/mdp.py | |
parent | 90d43312138b00ddbe547aef667869915fd10a0a (diff) |
initial commit
Diffstat (limited to 'src/mdp.py')
-rw-r--r-- | src/mdp.py | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/mdp.py b/src/mdp.py new file mode 100644 index 0000000..0ce0c28 --- /dev/null +++ b/src/mdp.py @@ -0,0 +1,67 @@ +# mdp.py +# ------ +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +import random + +class MarkovDecisionProcess: + + def getStates(self): + """ + Return a list of all states in the MDP. + Not generally possible for large MDPs. + """ + abstract + + def getStartState(self): + """ + Return the start state of the MDP. + """ + abstract + + def getPossibleActions(self, state): + """ + Return list of possible actions from 'state'. + """ + abstract + + def getTransitionStatesAndProbs(self, state, action): + """ + Returns list of (nextState, prob) pairs + representing the states reachable + from 'state' by taking 'action' along + with their transition probabilities. + + Note that in Q-Learning and reinforcment + learning in general, we do not know these + probabilities nor do we directly model them. + """ + abstract + + def getReward(self, state, action, nextState): + """ + Get the reward for the state, action, nextState transition. + + Not available in reinforcement learning. + """ + abstract + + def isTerminal(self, state): + """ + Returns true if the current state is a terminal state. By convention, + a terminal state has zero future rewards. Sometimes the terminal state(s) + may have no possible actions. It is also common to think of the terminal + state as having a self-loop action 'pass' with zero reward; the formulations + are equivalent. + """ + abstract |