aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Kollack <nkollack@gmail.com>2021-11-10 20:39:45 -0600
committerNeil Kollack <nkollack@gmail.com>2021-11-10 20:39:45 -0600
commitdc4995eb3a8cd8c02aebe6ea49ac000cb33bc0dd (patch)
treeea05dc7e49e5678b223784225f146136e1a1271f
parent983238aad19ac7c76352ca4e7b7c55fd197af0cf (diff)
Completed q8
-rw-r--r--.vscode/launch.json34
-rw-r--r--src/qlearningAgents.py57
2 files changed, 72 insertions, 19 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 9108452..b1de93a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,7 @@
"cwd": "${workspaceFolder}/src",
"args": [
"-q",
- "q5",
+ "q8",
// "--student-code=valueIterationAgents.py",
]
},
@@ -20,26 +20,36 @@
"name": "debug",
"type": "python",
"request": "launch",
- "program": "${workspaceFolder}\\src\\gridworld.py",
+ "program": "pacman.py",
+ "cwd": "${workspaceFolder}/src",
"args": [
- "-a",
- "q",
- "-k",
+ "-p",
+ "ApproximateQAgent",
+ "-x",
"50",
"-n",
- "0",
- "-g",
- "BridgeGrid",
- "-e",
- "${input:epsilon}",
+ "60",
"-l",
- "${input:learning_rate}"
+ "mediumGrid",
+ "-a",
+ "extractor=${input:extractor}",
],
"console": "integratedTerminal"
- },
+ }
],
"inputs": [
{
+ "type": "pickString",
+ "id": "extractor",
+ "options": [
+ "IdentityExtractor",
+ "CoordinateExtractor",
+ "SimpleExtractor",
+ ],
+ "default": "SimpleExtractor",
+ "description": ""
+ },
+ {
"type": "promptString",
"id": "epsilon",
"default": "0.5",
diff --git a/src/qlearningAgents.py b/src/qlearningAgents.py
index fda03eb..bc28c51 100644
--- a/src/qlearningAgents.py
+++ b/src/qlearningAgents.py
@@ -43,6 +43,7 @@ class QLearningAgent(ReinforcementAgent):
ReinforcementAgent.__init__(self, **args)
"*** YOUR CODE HERE ***"
+ # Initialize all Q_Values to 0
self.qValues = util.Counter()
def getQValue(self, state, action):
@@ -52,6 +53,7 @@ class QLearningAgent(ReinforcementAgent):
or the Q node value otherwise
"""
"*** YOUR CODE HERE ***"
+ # Get the current Q-Value
return self.qValues[(state, action)]
@@ -65,12 +67,12 @@ class QLearningAgent(ReinforcementAgent):
"*** YOUR CODE HERE ***"
qValues = []
+ # get a list of the Q-Values from the legal actions
for action in self.getLegalActions(state):
qValues.append(self.getQValue(state, action))
- if len(self.getLegalActions(state)) == 0:
- return 0.0
- return max(qValues)
+ # return the max Q-Value or 0 if there are no qValues
+ return 0.0 if len(qValues) == 0 else max(qValues)
def computeActionFromQValues(self, state):
"""
@@ -79,15 +81,22 @@ class QLearningAgent(ReinforcementAgent):
you should return None.
"""
"*** YOUR CODE HERE ***"
+ # Computes the best action of a state using Q-Values
+
+ # initialize values
maxAction = None
maxQValue = None
+ # for every action
for action in self.getLegalActions(state):
+ # get the Q-Value
qValue = self.getQValue(state, action)
+ # If it is greater than the current max, set it as the new max
if qValue > maxQValue:
maxAction = action
maxQValue = qValue
+ # return the max
return maxAction
@@ -105,10 +114,14 @@ class QLearningAgent(ReinforcementAgent):
# Pick Action
legalActions = self.getLegalActions(state)
action = None
+
"*** YOUR CODE HERE ***"
+ # Use Epsilon to determine whether to take a random action or the policy action
if util.flipCoin(self.epsilon):
+ # A random action was selected
action = random.choice(legalActions)
else:
+ # The policy action was taken
action = self.computeActionFromQValues(state)
return action
@@ -123,16 +136,25 @@ class QLearningAgent(ReinforcementAgent):
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
- sample = reward
+ # This function updates the Q-Values using Q-Value Iteration
+ # initialize the sample with the base reward
+ sample = reward
+
+ # if the state is not terminal
if len(self.getLegalActions(nextState)) > 0:
+ # initialize max action to None (converted to 0)
maxAction = None
+
+ # iterate through each legal action and find the max value action
for nextAction in self.getLegalActions(nextState):
maxAction = max(maxAction, self.getQValue(nextState, nextAction))
+
+ # This is the sample portion of the Q-Value Iteration function
sample += self.discount * maxAction
+ # This is where the Q-Values are updated using the Q-Value Iteration function
self.qValues[(state, action)] = ((1 - self.alpha) * self.getQValue(state,action)) + (self.alpha * sample)
-
def getPolicy(self, state):
return self.computeActionFromQValues(state)
@@ -195,14 +217,33 @@ class ApproximateQAgent(PacmanQAgent):
where * is the dotProduct operator
"""
"*** YOUR CODE HERE ***"
- util.raiseNotDefined()
+ # In Approximate Q-Learning, Q Values are computed by linearly summing weight and feature pairs
+
+ # get the features
+ feats = self.featExtractor.getFeatures(state, action)
+ # get the weights
+ weights = self.getWeights()
+
+ # return the dot product of the weights and the featureVector
+ return weights * feats
def update(self, state, action, nextState, reward):
"""
Should update your weights based on transition
"""
"*** YOUR CODE HERE ***"
- util.raiseNotDefined()
+ # This function is where the weights are updated using the Approximate Q-Learning linear-function
+
+ # get the features
+ feats = self.featExtractor.getFeatures(state, action)
+ # this is the difference from the Approximate Q-Learning linear-function
+ difference = reward + self.discount * self.getValue(nextState) - self.getQValue(state, action)
+
+ # updating of weights occurs here
+ for feat in feats:
+ # this is the Approximate Q-Learning linear-function
+ self.weights[feat] += self.alpha * difference * feats[feat]
+
def final(self, state):
"Called at the end of each game."
@@ -213,4 +254,6 @@ class ApproximateQAgent(PacmanQAgent):
if self.episodesSoFar == self.numTraining:
# you might want to print your weights here for debugging
"*** YOUR CODE HERE ***"
+ # print(self.weights)
+ # print("Num weights: " + str(len(self.weights)))
pass