Completed q8

author: Neil Kollack <nkollack@gmail.com> 2021-11-10 20:39:45 -0600
committer: Neil Kollack <nkollack@gmail.com> 2021-11-10 20:39:45 -0600
commit: dc4995eb3a8cd8c02aebe6ea49ac000cb33bc0dd (patch)
tree: ea05dc7e49e5678b223784225f146136e1a1271f
parent: 983238aad19ac7c76352ca4e7b7c55fd197af0cf (diff)
2 files changed, 72 insertions, 19 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 9108452..b1de93a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,7 @@
             "cwd": "${workspaceFolder}/src",
             "args": [
                 "-q",
-                "q5",
+                "q8",
                 // "--student-code=valueIterationAgents.py",
             ]
         },
@@ -20,26 +20,36 @@
             "name": "debug",
             "type": "python",
             "request": "launch",
-            "program": "${workspaceFolder}\\src\\gridworld.py",
+            "program": "pacman.py",
+            "cwd": "${workspaceFolder}/src",
             "args": [
-                "-a",
-                "q",
-                "-k",
+                "-p",
+                "ApproximateQAgent",
+                "-x",
                 "50",
                 "-n",
-                "0",
-                "-g",
-                "BridgeGrid",
-                "-e",
-                "${input:epsilon}",
+                "60",
                 "-l",
-                "${input:learning_rate}"
+                "mediumGrid",
+                "-a",
+                "extractor=${input:extractor}",
             ],
             "console": "integratedTerminal"
-        },
+        }
     ],
     "inputs": [
         {
+            "type": "pickString",
+            "id": "extractor",
+            "options": [
+                "IdentityExtractor",
+                "CoordinateExtractor",
+                "SimpleExtractor",
+            ],
+            "default": "SimpleExtractor",
+            "description": ""
+        },
+        {
             "type": "promptString",
             "id": "epsilon",
             "default": "0.5",
diff --git a/src/qlearningAgents.py b/src/qlearningAgents.py
index fda03eb..bc28c51 100644
--- a/src/qlearningAgents.py
+++ b/src/qlearningAgents.py
@@ -43,6 +43,7 @@ class QLearningAgent(ReinforcementAgent):
         ReinforcementAgent.__init__(self, **args)
 
         "*** YOUR CODE HERE ***"
+        # Initialize all Q_Values to 0
         self.qValues = util.Counter()
 
     def getQValue(self, state, action):
@@ -52,6 +53,7 @@ class QLearningAgent(ReinforcementAgent):
           or the Q node value otherwise
         """
         "*** YOUR CODE HERE ***"
+        # Get the current Q-Value
         return self.qValues[(state, action)]
 
 
@@ -65,12 +67,12 @@ class QLearningAgent(ReinforcementAgent):
         "*** YOUR CODE HERE ***"
         qValues = []
 
+        # get a list of the Q-Values from the legal actions
         for action in self.getLegalActions(state):
           qValues.append(self.getQValue(state, action))
 
-        if len(self.getLegalActions(state)) == 0:
-          return 0.0
-        return max(qValues)
+        # return the max Q-Value or 0 if there are no qValues
+        return 0.0 if len(qValues) == 0 else max(qValues)
 
     def computeActionFromQValues(self, state):
         """
@@ -79,15 +81,22 @@ class QLearningAgent(ReinforcementAgent):
           you should return None.
         """
         "*** YOUR CODE HERE ***"
+        # Computes the best action of a state using Q-Values
+
+        # initialize values
         maxAction = None
         maxQValue = None
 
+        # for every action
         for action in self.getLegalActions(state):
+          # get the Q-Value
           qValue = self.getQValue(state, action)
+          # If it is greater than the current max, set it as the new max
           if qValue > maxQValue:
             maxAction = action
             maxQValue = qValue
 
+        # return the max
         return maxAction
 
 
@@ -105,10 +114,14 @@ class QLearningAgent(ReinforcementAgent):
         # Pick Action
         legalActions = self.getLegalActions(state)
         action = None
+
         "*** YOUR CODE HERE ***"
+        # Use Epsilon to determine whether to take a random action or the policy action
         if util.flipCoin(self.epsilon):
+          # A random action was selected
           action = random.choice(legalActions)
         else:
+          # The policy action was taken
           action = self.computeActionFromQValues(state)
 
         return action
@@ -123,16 +136,25 @@ class QLearningAgent(ReinforcementAgent):
           it will be called on your behalf
         """
         "*** YOUR CODE HERE ***"
-        sample = reward
+        # This function updates the Q-Values using Q-Value Iteration
 
+        # initialize the sample with the base reward
+        sample = reward
+        
+        # if the state is not terminal
         if len(self.getLegalActions(nextState)) > 0:
+          # initialize max action to None (converted to 0)
           maxAction = None
+          
+          # iterate through each legal action and find the max value action
           for nextAction in self.getLegalActions(nextState):
             maxAction = max(maxAction, self.getQValue(nextState, nextAction))
+            
+          # This is the sample portion of the Q-Value Iteration function
           sample += self.discount * maxAction
 
+        # This is where the Q-Values are updated using the Q-Value Iteration function
         self.qValues[(state, action)] = ((1 - self.alpha) * self.getQValue(state,action)) + (self.alpha * sample)
-        
 
     def getPolicy(self, state):
         return self.computeActionFromQValues(state)
@@ -195,14 +217,33 @@ class ApproximateQAgent(PacmanQAgent):
           where * is the dotProduct operator
         """
         "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        # In Approximate Q-Learning, Q Values are computed by linearly summing weight and feature pairs
+        
+        # get the features
+        feats = self.featExtractor.getFeatures(state, action)
+        # get the weights
+        weights = self.getWeights()
+        
+        # return the dot product of the weights and the featureVector
+        return weights * feats
 
     def update(self, state, action, nextState, reward):
         """
            Should update your weights based on transition
         """
         "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        # This function is where the weights are updated using the Approximate Q-Learning linear-function
+        
+        # get the features
+        feats = self.featExtractor.getFeatures(state, action)
+        # this is the difference from the Approximate Q-Learning linear-function
+        difference = reward + self.discount * self.getValue(nextState) - self.getQValue(state, action)
+
+        # updating of weights occurs here
+        for feat in feats:
+          # this is the Approximate Q-Learning linear-function
+          self.weights[feat] += self.alpha * difference * feats[feat]
+
 
     def final(self, state):
         "Called at the end of each game."
@@ -213,4 +254,6 @@ class ApproximateQAgent(PacmanQAgent):
         if self.episodesSoFar == self.numTraining:
             # you might want to print your weights here for debugging
             "*** YOUR CODE HERE ***"
+            # print(self.weights)
+            # print("Num weights: " + str(len(self.weights)))
             pass
author	Neil Kollack <nkollack@gmail.com>	2021-11-10 20:39:45 -0600
committer	Neil Kollack <nkollack@gmail.com>	2021-11-10 20:39:45 -0600
commit	dc4995eb3a8cd8c02aebe6ea49ac000cb33bc0dd (patch)
tree	ea05dc7e49e5678b223784225f146136e1a1271f
parent	983238aad19ac7c76352ca4e7b7c55fd197af0cf (diff)