# This is code to implement the environment for the game Nim for 2 players # In this game, a number of sticks are thrown down. Each player picks # up 1, 2, or 3 sticks on his or her turn. If there are no sticks to # pick up, that player loses. # The state is simply the number of sticks remaining, and an indicator of # whose turn it is. # Environment functions # nimenvA - totally random opponent # nimenvB - opponent tries to take all the sticks, if not, does random # nimenvC - optimal opponent # nimenvD - optimal opponent with flaw from math import * from random import * # Should these be available to the agent? def nextState (state, move): """returns new state after making the indicated move by the player State is the number of sticks remaining a Move is the number of sticks player is going to take""" state = state - move return state def terminalState (state): """Detects end of game - either no sticks left""" return state == 'terminal' or state == 0 def possibleMoves (state): "Returns a list of legal moves" movelist = [1, 2, 3] mlist = [] for m in movelist: if m <= state: # can't take away more sticks than there are mlist.append(m) return mlist # Environment functions # Each environment function consists of the following parts: # if there is no action, initialize the environment (start new game) # Otherwise, make the given agent move # if not end of game, make opponent move # return new state and reward def randomMove (state): "Returns a move, selected at random" movesleft = possibleMoves(state) if movesleft == []: return -1 else: return movesleft[randrange(len(movesleft))] def winLastMove (state): "Returns a random move unless it can take all the remaining sticks" if state <= 3: return state else: return randomMove(state) def optimalMove (state): "Returns the optimal move to make in this state" if state <= 3: return state elif (state % 4) == 0: # toasted anyway, play randomly return randomMove(state) else: # try to make remaining sticks a multiple of 4 return state % 4 def flawedOptimalMove (state): "Plays optimally except in one state where it plays randomly" if state == 5: return randomMove(state) else: return optimalMove(state) def initNimEnv(): global state state = 16 return state def nimenvA (a=None): """Environment for nim game, including random opponent""" global state if a == None: # start of new game return initNimEnv() elif not a in [1, 2, 3]: print "Illegal action", a return state, 0 else: # agent has performed an action r = 0 state = nextState(state, a) # state after agent's move if not terminalState(state): # if game isn't over yet omove = randomMove(state) state = nextState(state, omove) # state after opponent's move if terminalState(state): r = 0 state = 'terminal' else: r = 1 state = 'terminal' return state, r def nimenvB (a=None): """Environment for nim game, including opponent which plays random unless it can take all the sticks""" global state if a == None: # start of new game return initNimEnv() elif not a in [1, 2, 3]: print "Illegal action", a return state, 0 else: # agent has performed an action r = 0 state = nextState(state, a) # state after agent's move if not terminalState(state): # if game isn't over yet omove = winLastMove(state) state = nextState(state, omove) # state after opponent's move if terminalState(state): r = 0 state = 'terminal' else: r = 1 state = 'terminal' return state, r def nimenvC (a=None): """Environment for nim game, including opponent which plays optimally""" global state if a == None: # start of new game return initNimEnv() elif not a in [1, 2, 3]: print "Illegal action", a return state, 0 else: # agent has performed an action r = 0 state = nextState(state, a) # state after agent's move if not terminalState(state,): # if game isn't over yet omove = optimalMove(state) state = nextState(state, omove) # state after opponent's move if terminalState(state): r = 0 state = 'terminal' else: r = 1 state = 'terminal' return state, r def nimenvD (a=None): """Environment for nim game, including opponent which plays optimally except for in one state""" global state if a == None: # start of new game return initNimEnv() elif not a in [1, 2, 3]: print "Illegal action", a return state, 0 else: # agent has performed an action r = 0 state = nextState(state, a) # state after agent's move if not terminalState(state,): # if game isn't over yet omove = flawedOptimalMove(state) state = nextState(state, omove) # state after opponent's move if terminalState(state): r = 0 state = 'terminal' else: r = 1 state = 'terminal' return state, r