diff options
author | Neil Kollack <nkollack@gmail.com> | 2021-11-10 20:39:45 -0600 |
---|---|---|
committer | Neil Kollack <nkollack@gmail.com> | 2021-11-10 20:39:45 -0600 |
commit | dc4995eb3a8cd8c02aebe6ea49ac000cb33bc0dd (patch) | |
tree | ea05dc7e49e5678b223784225f146136e1a1271f | |
parent | 983238aad19ac7c76352ca4e7b7c55fd197af0cf (diff) |
Completed q8
-rw-r--r-- | .vscode/launch.json | 34 | ||||
-rw-r--r-- | src/qlearningAgents.py | 57 |
2 files changed, 72 insertions, 19 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json index 9108452..b1de93a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,7 +12,7 @@ "cwd": "${workspaceFolder}/src", "args": [ "-q", - "q5", + "q8", // "--student-code=valueIterationAgents.py", ] }, @@ -20,26 +20,36 @@ "name": "debug", "type": "python", "request": "launch", - "program": "${workspaceFolder}\\src\\gridworld.py", + "program": "pacman.py", + "cwd": "${workspaceFolder}/src", "args": [ - "-a", - "q", - "-k", + "-p", + "ApproximateQAgent", + "-x", "50", "-n", - "0", - "-g", - "BridgeGrid", - "-e", - "${input:epsilon}", + "60", "-l", - "${input:learning_rate}" + "mediumGrid", + "-a", + "extractor=${input:extractor}", ], "console": "integratedTerminal" - }, + } ], "inputs": [ { + "type": "pickString", + "id": "extractor", + "options": [ + "IdentityExtractor", + "CoordinateExtractor", + "SimpleExtractor", + ], + "default": "SimpleExtractor", + "description": "" + }, + { "type": "promptString", "id": "epsilon", "default": "0.5", diff --git a/src/qlearningAgents.py b/src/qlearningAgents.py index fda03eb..bc28c51 100644 --- a/src/qlearningAgents.py +++ b/src/qlearningAgents.py @@ -43,6 +43,7 @@ class QLearningAgent(ReinforcementAgent): ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" + # Initialize all Q_Values to 0 self.qValues = util.Counter() def getQValue(self, state, action): @@ -52,6 +53,7 @@ class QLearningAgent(ReinforcementAgent): or the Q node value otherwise """ "*** YOUR CODE HERE ***" + # Get the current Q-Value return self.qValues[(state, action)] @@ -65,12 +67,12 @@ class QLearningAgent(ReinforcementAgent): "*** YOUR CODE HERE ***" qValues = [] + # get a list of the Q-Values from the legal actions for action in self.getLegalActions(state): qValues.append(self.getQValue(state, action)) - if len(self.getLegalActions(state)) == 0: - return 0.0 - return max(qValues) + # return the max Q-Value or 0 if there are no qValues + return 0.0 if len(qValues) == 0 else max(qValues) def computeActionFromQValues(self, state): """ @@ -79,15 +81,22 @@ class QLearningAgent(ReinforcementAgent): you should return None. """ "*** YOUR CODE HERE ***" + # Computes the best action of a state using Q-Values + + # initialize values maxAction = None maxQValue = None + # for every action for action in self.getLegalActions(state): + # get the Q-Value qValue = self.getQValue(state, action) + # If it is greater than the current max, set it as the new max if qValue > maxQValue: maxAction = action maxQValue = qValue + # return the max return maxAction @@ -105,10 +114,14 @@ class QLearningAgent(ReinforcementAgent): # Pick Action legalActions = self.getLegalActions(state) action = None + "*** YOUR CODE HERE ***" + # Use Epsilon to determine whether to take a random action or the policy action if util.flipCoin(self.epsilon): + # A random action was selected action = random.choice(legalActions) else: + # The policy action was taken action = self.computeActionFromQValues(state) return action @@ -123,16 +136,25 @@ class QLearningAgent(ReinforcementAgent): it will be called on your behalf """ "*** YOUR CODE HERE ***" - sample = reward + # This function updates the Q-Values using Q-Value Iteration + # initialize the sample with the base reward + sample = reward + + # if the state is not terminal if len(self.getLegalActions(nextState)) > 0: + # initialize max action to None (converted to 0) maxAction = None + + # iterate through each legal action and find the max value action for nextAction in self.getLegalActions(nextState): maxAction = max(maxAction, self.getQValue(nextState, nextAction)) + + # This is the sample portion of the Q-Value Iteration function sample += self.discount * maxAction + # This is where the Q-Values are updated using the Q-Value Iteration function self.qValues[(state, action)] = ((1 - self.alpha) * self.getQValue(state,action)) + (self.alpha * sample) - def getPolicy(self, state): return self.computeActionFromQValues(state) @@ -195,14 +217,33 @@ class ApproximateQAgent(PacmanQAgent): where * is the dotProduct operator """ "*** YOUR CODE HERE ***" - util.raiseNotDefined() + # In Approximate Q-Learning, Q Values are computed by linearly summing weight and feature pairs + + # get the features + feats = self.featExtractor.getFeatures(state, action) + # get the weights + weights = self.getWeights() + + # return the dot product of the weights and the featureVector + return weights * feats def update(self, state, action, nextState, reward): """ Should update your weights based on transition """ "*** YOUR CODE HERE ***" - util.raiseNotDefined() + # This function is where the weights are updated using the Approximate Q-Learning linear-function + + # get the features + feats = self.featExtractor.getFeatures(state, action) + # this is the difference from the Approximate Q-Learning linear-function + difference = reward + self.discount * self.getValue(nextState) - self.getQValue(state, action) + + # updating of weights occurs here + for feat in feats: + # this is the Approximate Q-Learning linear-function + self.weights[feat] += self.alpha * difference * feats[feat] + def final(self, state): "Called at the end of each game." @@ -213,4 +254,6 @@ class ApproximateQAgent(PacmanQAgent): if self.episodesSoFar == self.numTraining: # you might want to print your weights here for debugging "*** YOUR CODE HERE ***" + # print(self.weights) + # print("Num weights: " + str(len(self.weights))) pass |