diff options
Diffstat (limited to 'src/graphicsCrawlerDisplay.py')
-rw-r--r-- | src/graphicsCrawlerDisplay.py | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/src/graphicsCrawlerDisplay.py b/src/graphicsCrawlerDisplay.py new file mode 100644 index 0000000..4c9cf41 --- /dev/null +++ b/src/graphicsCrawlerDisplay.py @@ -0,0 +1,333 @@ +# graphicsCrawlerDisplay.py +# ------------------------- +# Licensing Information: You are free to use or extend these projects for +# educational purposes provided that (1) you do not distribute or publish +# solutions, (2) you retain this notice, and (3) you provide clear +# attribution to UC Berkeley, including a link to http://ai.berkeley.edu. +# +# Attribution Information: The Pacman AI projects were developed at UC Berkeley. +# The core projects and autograders were primarily created by John DeNero +# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and +# Pieter Abbeel (pabbeel@cs.berkeley.edu). + + +# graphicsCrawlerDisplay.py +# ------------------------- +# Licensing Information: Please do not distribute or publish solutions to this +# project. You are free to use and extend these projects for educational +# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by +# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). +# Student side autograding was added by Brad Miller, Nick Hay, and Pieter +# Abbeel in Spring 2013. +# For more info, see http://inst.eecs.berkeley.edu/~cs188/pacman/pacman.html + +import Tkinter +import qlearningAgents +import time +import threading +import sys +import crawler +#import pendulum +import math +from math import pi as PI + +robotType = 'crawler' + +class Application: + + def sigmoid(self, x): + return 1.0 / (1.0 + 2.0 ** (-x)) + + def incrementSpeed(self, inc): + self.tickTime *= inc +# self.epsilon = min(1.0, self.epsilon) +# self.epsilon = max(0.0,self.epsilon) +# self.learner.setSpeed(self.epsilon) + self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime) + + def incrementEpsilon(self, inc): + self.ep += inc + self.epsilon = self.sigmoid(self.ep) + self.learner.setEpsilon(self.epsilon) + self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon) + + def incrementGamma(self, inc): + self.ga += inc + self.gamma = self.sigmoid(self.ga) + self.learner.setDiscount(self.gamma) + self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma) + + def incrementAlpha(self, inc): + self.al += inc + self.alpha = self.sigmoid(self.al) + self.learner.setLearningRate(self.alpha) + self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha) + + def __initGUI(self, win): + ## Window ## + self.win = win + + ## Initialize Frame ## + win.grid() + self.dec = -.5 + self.inc = .5 + self.tickTime = 0.1 + + ## Epsilon Button + Label ## + self.setupSpeedButtonAndLabel(win) + + self.setupEpsilonButtonAndLabel(win) + + ## Gamma Button + Label ## + self.setUpGammaButtonAndLabel(win) + + ## Alpha Button + Label ## + self.setupAlphaButtonAndLabel(win) + + ## Exit Button ## + #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit) + #self.exit_button.grid(row=0, column=9) + + ## Simulation Buttons ## +# self.setupSimulationButtons(win) + + ## Canvas ## + self.canvas = Tkinter.Canvas(root, height=200, width=1000) + self.canvas.grid(row=2,columnspan=10) + + def setupAlphaButtonAndLabel(self, win): + self.alpha_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementAlpha(self.dec))) + self.alpha_minus.grid(row=1, column=3, padx=10) + + self.alpha = self.sigmoid(self.al) + self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha)) + self.alpha_label.grid(row=1, column=4) + + self.alpha_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementAlpha(self.inc))) + self.alpha_plus.grid(row=1, column=5, padx=10) + + def setUpGammaButtonAndLabel(self, win): + self.gamma_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementGamma(self.dec))) + self.gamma_minus.grid(row=1, column=0, padx=10) + + self.gamma = self.sigmoid(self.ga) + self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma)) + self.gamma_label.grid(row=1, column=1) + + self.gamma_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementGamma(self.inc))) + self.gamma_plus.grid(row=1, column=2, padx=10) + + def setupEpsilonButtonAndLabel(self, win): + self.epsilon_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementEpsilon(self.dec))) + self.epsilon_minus.grid(row=0, column=3) + + self.epsilon = self.sigmoid(self.ep) + self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon)) + self.epsilon_label.grid(row=0, column=4) + + self.epsilon_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementEpsilon(self.inc))) + self.epsilon_plus.grid(row=0, column=5) + + def setupSpeedButtonAndLabel(self, win): + self.speed_minus = Tkinter.Button(win, + text="-",command=(lambda: self.incrementSpeed(.5))) + self.speed_minus.grid(row=0, column=0) + + self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime)) + self.speed_label.grid(row=0, column=1) + + self.speed_plus = Tkinter.Button(win, + text="+",command=(lambda: self.incrementSpeed(2))) + self.speed_plus.grid(row=0, column=2) + + + + + + + + def skip5kSteps(self): + self.stepsToSkip = 5000 + + def __init__(self, win): + + self.ep = 0 + self.ga = 2 + self.al = 2 + self.stepCount = 0 + ## Init Gui + + self.__initGUI(win) + + # Init environment + if robotType == 'crawler': + self.robot = crawler.CrawlingRobot(self.canvas) + self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot) + elif robotType == 'pendulum': + self.robot = pendulum.PendulumRobot(self.canvas) + self.robotEnvironment = \ + pendulum.PendulumRobotEnvironment(self.robot) + else: + raise "Unknown RobotType" + + # Init Agent + simulationFn = lambda agent: \ + simulation.SimulationEnvironment(self.robotEnvironment,agent) + actionFn = lambda state: \ + self.robotEnvironment.getPossibleActions(state) + self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn) + + self.learner.setEpsilon(self.epsilon) + self.learner.setLearningRate(self.alpha) + self.learner.setDiscount(self.gamma) + + # Start GUI + self.running = True + self.stopped = False + self.stepsToSkip = 0 + self.thread = threading.Thread(target=self.run) + self.thread.start() + + + def exit(self): + self.running = False + for i in range(5): + if not self.stopped: + time.sleep(0.1) + try: + self.win.destroy() + except: + pass + sys.exit(0) + + def step(self): + + self.stepCount += 1 + + state = self.robotEnvironment.getCurrentState() + actions = self.robotEnvironment.getPossibleActions(state) + if len(actions) == 0.0: + self.robotEnvironment.reset() + state = self.robotEnvironment.getCurrentState() + actions = self.robotEnvironment.getPossibleActions(state) + print 'Reset!' + action = self.learner.getAction(state) + if action == None: + raise 'None action returned: Code Not Complete' + nextState, reward = self.robotEnvironment.doAction(action) + self.learner.observeTransition(state, action, nextState, reward) + + def animatePolicy(self): + if robotType != 'pendulum': + raise 'Only pendulum can animatePolicy' + + + totWidth = self.canvas.winfo_reqwidth() + totHeight = self.canvas.winfo_reqheight() + + length = 0.48 * min(totWidth, totHeight) + x,y = totWidth-length-30, length+10 + + + + angleMin, angleMax = self.robot.getMinAndMaxAngle() + velMin, velMax = self.robot.getMinAndMaxAngleVelocity() + + if not 'animatePolicyBox' in dir(self): + self.canvas.create_line(x,y,x+length,y) + self.canvas.create_line(x+length,y,x+length,y-length) + self.canvas.create_line(x+length,y-length,x,y-length) + self.canvas.create_line(x,y-length,x,y) + self.animatePolicyBox = 1 + self.canvas.create_text(x+length/2,y+10,text='angle') + self.canvas.create_text(x-30,y-length/2,text='velocity') + self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft') + self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight') + self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing') + + + + angleDelta = (angleMax-angleMin) / 100 + velDelta = (velMax-velMin) / 100 + for i in range(100): + angle = angleMin + i * angleDelta + + for j in range(100): + vel = velMin + j * velDelta + state = self.robotEnvironment.getState(angle,vel) + max, argMax = None, None + if not self.learner.seenState(state): + argMax = 'unseen' + else: + for action in ('kickLeft','kickRight','doNothing'): + qVal = self.learner.getQValue(state, action) + if max == None or qVal > max: + max, argMax = qVal, action + if argMax != 'unseen': + if argMax == 'kickLeft': + color = 'blue' + elif argMax == 'kickRight': + color = 'red' + elif argMax == 'doNothing': + color = 'white' + dx = length / 100.0 + dy = length / 100.0 + x0, y0 = x+i*dx, y-j*dy + self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color) + + + + + def run(self): + self.stepCount = 0 + self.learner.startEpisode() + while True: + minSleep = .01 + tm = max(minSleep, self.tickTime) + time.sleep(tm) + self.stepsToSkip = int(tm / self.tickTime) - 1 + + if not self.running: + self.stopped = True + return + for i in range(self.stepsToSkip): + self.step() + self.stepsToSkip = 0 + self.step() +# self.robot.draw() + self.learner.stopEpisode() + + def start(self): + self.win.mainloop() + + + + + +def run(): + global root + root = Tkinter.Tk() + root.title( 'Crawler GUI' ) + root.resizable( 0, 0 ) + +# root.mainloop() + + + app = Application(root) + def update_gui(): + app.robot.draw(app.stepCount, app.tickTime) + root.after(10, update_gui) + update_gui() + + root.protocol( 'WM_DELETE_WINDOW', app.exit) + try: + app.start() + except: + app.exit() |