initial commit

author: Neil Kollack <nkollack@gmail.com> 2021-11-07 14:34:08 -0600
committer: Neil Kollack <nkollack@gmail.com> 2021-11-07 14:34:08 -0600
commit: 5b27e9d273c43fd59905a7f126ddf8edfab7fae7 (patch)
tree: 489011e86a50a7d7bd4fd0c1c7be09d634d1de45 /src/mdp.py
parent: 90d43312138b00ddbe547aef667869915fd10a0a (diff)
1 files changed, 67 insertions, 0 deletions
diff --git a/src/mdp.py b/src/mdp.py
new file mode 100644
index 0000000..0ce0c28
--- /dev/null
+++ b/src/mdp.py
@@ -0,0 +1,67 @@
+# mdp.py
+# ------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import random
+
+class MarkovDecisionProcess:
+
+    def getStates(self):
+        """
+        Return a list of all states in the MDP.
+        Not generally possible for large MDPs.
+        """
+        abstract
+
+    def getStartState(self):
+        """
+        Return the start state of the MDP.
+        """
+        abstract
+
+    def getPossibleActions(self, state):
+        """
+        Return list of possible actions from 'state'.
+        """
+        abstract
+
+    def getTransitionStatesAndProbs(self, state, action):
+        """
+        Returns list of (nextState, prob) pairs
+        representing the states reachable
+        from 'state' by taking 'action' along
+        with their transition probabilities.
+
+        Note that in Q-Learning and reinforcment
+        learning in general, we do not know these
+        probabilities nor do we directly model them.
+        """
+        abstract
+
+    def getReward(self, state, action, nextState):
+        """
+        Get the reward for the state, action, nextState transition.
+
+        Not available in reinforcement learning.
+        """
+        abstract
+
+    def isTerminal(self, state):
+        """
+        Returns true if the current state is a terminal state.  By convention,
+        a terminal state has zero future rewards.  Sometimes the terminal state(s)
+        may have no possible actions.  It is also common to think of the terminal
+        state as having a self-loop action 'pass' with zero reward; the formulations
+        are equivalent.
+        """
+        abstract
author	Neil Kollack <nkollack@gmail.com>	2021-11-07 14:34:08 -0600
committer	Neil Kollack <nkollack@gmail.com>	2021-11-07 14:34:08 -0600
commit	5b27e9d273c43fd59905a7f126ddf8edfab7fae7 (patch)
tree	489011e86a50a7d7bd4fd0c1c7be09d634d1de45 /src/mdp.py
parent	90d43312138b00ddbe547aef667869915fd10a0a (diff)