aboutsummaryrefslogtreecommitdiffstats
path: root/src/graphicsCrawlerDisplay.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/graphicsCrawlerDisplay.py')
-rw-r--r--src/graphicsCrawlerDisplay.py333
1 files changed, 333 insertions, 0 deletions
diff --git a/src/graphicsCrawlerDisplay.py b/src/graphicsCrawlerDisplay.py
new file mode 100644
index 0000000..4c9cf41
--- /dev/null
+++ b/src/graphicsCrawlerDisplay.py
@@ -0,0 +1,333 @@
+# graphicsCrawlerDisplay.py
+# -------------------------
+# Licensing Information: You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+#
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+# graphicsCrawlerDisplay.py
+# -------------------------
+# Licensing Information: Please do not distribute or publish solutions to this
+# project. You are free to use and extend these projects for educational
+# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
+# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and Pieter
+# Abbeel in Spring 2013.
+# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html
+
+import Tkinter
+import qlearningAgents
+import time
+import threading
+import sys
+import crawler
+#import pendulum
+import math
+from math import pi as PI
+
+robotType = 'crawler'
+
+class Application:
+
+ def sigmoid(self, x):
+ return 1.0 / (1.0 + 2.0 ** (-x))
+
+ def incrementSpeed(self, inc):
+ self.tickTime *= inc
+# self.epsilon = min(1.0, self.epsilon)
+# self.epsilon = max(0.0,self.epsilon)
+# self.learner.setSpeed(self.epsilon)
+ self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)
+
+ def incrementEpsilon(self, inc):
+ self.ep += inc
+ self.epsilon = self.sigmoid(self.ep)
+ self.learner.setEpsilon(self.epsilon)
+ self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
+
+ def incrementGamma(self, inc):
+ self.ga += inc
+ self.gamma = self.sigmoid(self.ga)
+ self.learner.setDiscount(self.gamma)
+ self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
+
+ def incrementAlpha(self, inc):
+ self.al += inc
+ self.alpha = self.sigmoid(self.al)
+ self.learner.setLearningRate(self.alpha)
+ self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
+
+ def __initGUI(self, win):
+ ## Window ##
+ self.win = win
+
+ ## Initialize Frame ##
+ win.grid()
+ self.dec = -.5
+ self.inc = .5
+ self.tickTime = 0.1
+
+ ## Epsilon Button + Label ##
+ self.setupSpeedButtonAndLabel(win)
+
+ self.setupEpsilonButtonAndLabel(win)
+
+ ## Gamma Button + Label ##
+ self.setUpGammaButtonAndLabel(win)
+
+ ## Alpha Button + Label ##
+ self.setupAlphaButtonAndLabel(win)
+
+ ## Exit Button ##
+ #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
+ #self.exit_button.grid(row=0, column=9)
+
+ ## Simulation Buttons ##
+# self.setupSimulationButtons(win)
+
+ ## Canvas ##
+ self.canvas = Tkinter.Canvas(root, height=200, width=1000)
+ self.canvas.grid(row=2,columnspan=10)
+
+ def setupAlphaButtonAndLabel(self, win):
+ self.alpha_minus = Tkinter.Button(win,
+ text="-",command=(lambda: self.incrementAlpha(self.dec)))
+ self.alpha_minus.grid(row=1, column=3, padx=10)
+
+ self.alpha = self.sigmoid(self.al)
+ self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
+ self.alpha_label.grid(row=1, column=4)
+
+ self.alpha_plus = Tkinter.Button(win,
+ text="+",command=(lambda: self.incrementAlpha(self.inc)))
+ self.alpha_plus.grid(row=1, column=5, padx=10)
+
+ def setUpGammaButtonAndLabel(self, win):
+ self.gamma_minus = Tkinter.Button(win,
+ text="-",command=(lambda: self.incrementGamma(self.dec)))
+ self.gamma_minus.grid(row=1, column=0, padx=10)
+
+ self.gamma = self.sigmoid(self.ga)
+ self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
+ self.gamma_label.grid(row=1, column=1)
+
+ self.gamma_plus = Tkinter.Button(win,
+ text="+",command=(lambda: self.incrementGamma(self.inc)))
+ self.gamma_plus.grid(row=1, column=2, padx=10)
+
+ def setupEpsilonButtonAndLabel(self, win):
+ self.epsilon_minus = Tkinter.Button(win,
+ text="-",command=(lambda: self.incrementEpsilon(self.dec)))
+ self.epsilon_minus.grid(row=0, column=3)
+
+ self.epsilon = self.sigmoid(self.ep)
+ self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
+ self.epsilon_label.grid(row=0, column=4)
+
+ self.epsilon_plus = Tkinter.Button(win,
+ text="+",command=(lambda: self.incrementEpsilon(self.inc)))
+ self.epsilon_plus.grid(row=0, column=5)
+
+ def setupSpeedButtonAndLabel(self, win):
+ self.speed_minus = Tkinter.Button(win,
+ text="-",command=(lambda: self.incrementSpeed(.5)))
+ self.speed_minus.grid(row=0, column=0)
+
+ self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
+ self.speed_label.grid(row=0, column=1)
+
+ self.speed_plus = Tkinter.Button(win,
+ text="+",command=(lambda: self.incrementSpeed(2)))
+ self.speed_plus.grid(row=0, column=2)
+
+
+
+
+
+
+
+ def skip5kSteps(self):
+ self.stepsToSkip = 5000
+
+ def __init__(self, win):
+
+ self.ep = 0
+ self.ga = 2
+ self.al = 2
+ self.stepCount = 0
+ ## Init Gui
+
+ self.__initGUI(win)
+
+ # Init environment
+ if robotType == 'crawler':
+ self.robot = crawler.CrawlingRobot(self.canvas)
+ self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)
+ elif robotType == 'pendulum':
+ self.robot = pendulum.PendulumRobot(self.canvas)
+ self.robotEnvironment = \
+ pendulum.PendulumRobotEnvironment(self.robot)
+ else:
+ raise "Unknown RobotType"
+
+ # Init Agent
+ simulationFn = lambda agent: \
+ simulation.SimulationEnvironment(self.robotEnvironment,agent)
+ actionFn = lambda state: \
+ self.robotEnvironment.getPossibleActions(state)
+ self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
+
+ self.learner.setEpsilon(self.epsilon)
+ self.learner.setLearningRate(self.alpha)
+ self.learner.setDiscount(self.gamma)
+
+ # Start GUI
+ self.running = True
+ self.stopped = False
+ self.stepsToSkip = 0
+ self.thread = threading.Thread(target=self.run)
+ self.thread.start()
+
+
+ def exit(self):
+ self.running = False
+ for i in range(5):
+ if not self.stopped:
+ time.sleep(0.1)
+ try:
+ self.win.destroy()
+ except:
+ pass
+ sys.exit(0)
+
+ def step(self):
+
+ self.stepCount += 1
+
+ state = self.robotEnvironment.getCurrentState()
+ actions = self.robotEnvironment.getPossibleActions(state)
+ if len(actions) == 0.0:
+ self.robotEnvironment.reset()
+ state = self.robotEnvironment.getCurrentState()
+ actions = self.robotEnvironment.getPossibleActions(state)
+ print 'Reset!'
+ action = self.learner.getAction(state)
+ if action == None:
+ raise 'None action returned: Code Not Complete'
+ nextState, reward = self.robotEnvironment.doAction(action)
+ self.learner.observeTransition(state, action, nextState, reward)
+
+ def animatePolicy(self):
+ if robotType != 'pendulum':
+ raise 'Only pendulum can animatePolicy'
+
+
+ totWidth = self.canvas.winfo_reqwidth()
+ totHeight = self.canvas.winfo_reqheight()
+
+ length = 0.48 * min(totWidth, totHeight)
+ x,y = totWidth-length-30, length+10
+
+
+
+ angleMin, angleMax = self.robot.getMinAndMaxAngle()
+ velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
+
+ if not 'animatePolicyBox' in dir(self):
+ self.canvas.create_line(x,y,x+length,y)
+ self.canvas.create_line(x+length,y,x+length,y-length)
+ self.canvas.create_line(x+length,y-length,x,y-length)
+ self.canvas.create_line(x,y-length,x,y)
+ self.animatePolicyBox = 1
+ self.canvas.create_text(x+length/2,y+10,text='angle')
+ self.canvas.create_text(x-30,y-length/2,text='velocity')
+ self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
+ self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
+ self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
+
+
+
+ angleDelta = (angleMax-angleMin) / 100
+ velDelta = (velMax-velMin) / 100
+ for i in range(100):
+ angle = angleMin + i * angleDelta
+
+ for j in range(100):
+ vel = velMin + j * velDelta
+ state = self.robotEnvironment.getState(angle,vel)
+ max, argMax = None, None
+ if not self.learner.seenState(state):
+ argMax = 'unseen'
+ else:
+ for action in ('kickLeft','kickRight','doNothing'):
+ qVal = self.learner.getQValue(state, action)
+ if max == None or qVal > max:
+ max, argMax = qVal, action
+ if argMax != 'unseen':
+ if argMax == 'kickLeft':
+ color = 'blue'
+ elif argMax == 'kickRight':
+ color = 'red'
+ elif argMax == 'doNothing':
+ color = 'white'
+ dx = length / 100.0
+ dy = length / 100.0
+ x0, y0 = x+i*dx, y-j*dy
+ self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
+
+
+
+
+ def run(self):
+ self.stepCount = 0
+ self.learner.startEpisode()
+ while True:
+ minSleep = .01
+ tm = max(minSleep, self.tickTime)
+ time.sleep(tm)
+ self.stepsToSkip = int(tm / self.tickTime) - 1
+
+ if not self.running:
+ self.stopped = True
+ return
+ for i in range(self.stepsToSkip):
+ self.step()
+ self.stepsToSkip = 0
+ self.step()
+# self.robot.draw()
+ self.learner.stopEpisode()
+
+ def start(self):
+ self.win.mainloop()
+
+
+
+
+
+def run():
+ global root
+ root = Tkinter.Tk()
+ root.title( 'Crawler GUI' )
+ root.resizable( 0, 0 )
+
+# root.mainloop()
+
+
+ app = Application(root)
+ def update_gui():
+ app.robot.draw(app.stepCount, app.tickTime)
+ root.after(10, update_gui)
+ update_gui()
+
+ root.protocol( 'WM_DELETE_WINDOW', app.exit)
+ try:
+ app.start()
+ except:
+ app.exit()