aboutsummaryrefslogtreecommitdiffstats
path: root/src/mdp.py
diff options
context:
space:
mode:
authorNeil Kollack <nkollack@gmail.com>2021-11-07 14:34:08 -0600
committerNeil Kollack <nkollack@gmail.com>2021-11-07 14:34:08 -0600
commit5b27e9d273c43fd59905a7f126ddf8edfab7fae7 (patch)
tree489011e86a50a7d7bd4fd0c1c7be09d634d1de45 /src/mdp.py
parent90d43312138b00ddbe547aef667869915fd10a0a (diff)
initial commit
Diffstat (limited to 'src/mdp.py')
-rw-r--r--src/mdp.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/mdp.py b/src/mdp.py
new file mode 100644
index 0000000..0ce0c28
--- /dev/null
+++ b/src/mdp.py
@@ -0,0 +1,67 @@
+# mdp.py
+# ------
+# Licensing Information: You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+#
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import random
+
+class MarkovDecisionProcess:
+
+ def getStates(self):
+ """
+ Return a list of all states in the MDP.
+ Not generally possible for large MDPs.
+ """
+ abstract
+
+ def getStartState(self):
+ """
+ Return the start state of the MDP.
+ """
+ abstract
+
+ def getPossibleActions(self, state):
+ """
+ Return list of possible actions from 'state'.
+ """
+ abstract
+
+ def getTransitionStatesAndProbs(self, state, action):
+ """
+ Returns list of (nextState, prob) pairs
+ representing the states reachable
+ from 'state' by taking 'action' along
+ with their transition probabilities.
+
+ Note that in Q-Learning and reinforcment
+ learning in general, we do not know these
+ probabilities nor do we directly model them.
+ """
+ abstract
+
+ def getReward(self, state, action, nextState):
+ """
+ Get the reward for the state, action, nextState transition.
+
+ Not available in reinforcement learning.
+ """
+ abstract
+
+ def isTerminal(self, state):
+ """
+ Returns true if the current state is a terminal state. By convention,
+ a terminal state has zero future rewards. Sometimes the terminal state(s)
+ may have no possible actions. It is also common to think of the terminal
+ state as having a self-loop action 'pass' with zero reward; the formulations
+ are equivalent.
+ """
+ abstract