''' Defines the "party problem" as an MDP, a five-tuple (S,A,gamma,P,R): S the set of states as a list A the sets of actions for each state as a list of lists gamma the discount rate P the state transition probabilities as a function -- P(s,a,sp) R the expected rewards as a function -- R(s,a,sp) ''' #--------------------States------------------- Snames = ['RU8p','TU10p','RU10p','RD10p','RU8a','RD8a','TU10a','RU10a','RD10a','TD10a','terminal'] # R=Rested, T=Tired, D=Done(assignment done), U=Undone, 8p=8pm, etc. n = len(Snames) #number of states S = range(n) # Define state names as constants 0,1,2,... i=0 for s in Snames: globals()[s] = i i = i+1 #--------------------Actions------------------- Anames = ['rest', 'party', 'study'] m = len(Anames) #number of actions # Define action names as constants 0,1,2,... i=0 for a in Anames: globals()[a] = i i = i+1 A = [None for s in S] A[RU8p] = [rest, party, study] A[TU10p] = [rest, party] A[RU10p] = [rest, party, study] A[RD10p] = [rest, party] A[RU8a] = [rest, party, study] A[RD8a] = [rest, party] A[TU10a] = [rest] A[RU10a] = [rest] A[RD10a] = [rest] A[TD10a] = [rest] A[terminal] = [rest] #--------------------Gamma------------------- gamma = 0.9 #--------------------Transition probabilities(P)------------------- P = [[[0 for sp in S] for a in range(m)] for s in S] P[RU8p] [party][TU10p] = 1 P[RU8p] [rest] [RU10p] = 1 P[RU8p] [study][RD10p] = 1 P[TU10p][party][RU10a] = 1 P[TU10p][rest] [RU8a] = 1 P[RU10p][party][RU8a] = 0.5 P[RU10p][party][RU10a] = 0.5 P[RU10p][rest] [RU8a] = 1 P[RU10p][study][RD8a] = 1 P[RD10p][party][RD8a] = 0.5 P[RD10p][party][RD10a] = 0.5 P[RD10p][rest] [RD8a] = 1 P[RU8a] [party][TU10a] = 1 P[RU8a] [rest] [RU10a] = 1 P[RU8a] [study][RD10a] = 1 P[RD8a] [party][TD10a] = 1 P[RD8a] [rest] [RD10a] = 1 P[TU10a][rest] [terminal] = 1 P[RU10a][rest] [terminal] = 1 P[RD10a][rest] [terminal] = 1 P[TD10a][rest] [terminal] = 1 P[terminal][rest][terminal] = 1 def Pfunction(s,a,sp): return P[s][a][sp] #--------------------Expected rewards(R)------------------- R = [[[0 for sp in S] for a in range(m)] for s in S] for s in S: for sp in S: R[s][party][sp] = +2 R[s][study][sp] = -1 R[s][rest][sp] = 0 R[TU10a][rest][terminal] = -1 R[RU10a][rest][terminal] = 0 R[RD10a][rest][terminal] = +4 R[TD10a][rest][terminal] = +3 def Rfunction(s,a,sp): return R[s][a][sp] partyMDP = (S,A,gamma,Pfunction,Rfunction)