Temp
Jump to navigation
Jump to search
1 from machine import Pin
2 import utime as time
3 import urandom as random
4 from collections import deque
5
6 class State:
7 def __init__(self, da2, d2, f2, fl, p):
8 self.danger = da2 # Binary encoding (front, left, right)
9 self.dir = d2 # Direction
10 self.food = f2 # Binary encoding (front, left, right)
11 self.foodlarge = fl
12 self.proximity = p
13
14 def __lt__(self, other):
15 return (self.danger, self.dir, self.food, self.foodlarge, self.proximity) < \
16 (other.danger, other.dir, other.food, other.foodlarge, other.proximity)
17
18 class SnakeGame:
19 def __init__(self, n=16, m=32):
20 self.width = m
21 self.height = n
22 self.grid = [[0] * m for _ in range(n)]
23 self.snake = deque([(n // 2, m // 2)])
24 self.walls = set()
25 self.QTable = {}
26 self.epsilon = 0.1
27 self.gamma = 0.7
28 self.alpha = 0.7
29 self.FOOD = 1
30 self.WALL = 2
31 self.dx = [1, 0, -1, 0]
32 self.dy = [0, 1, 0, -1]
33 self.dz = [(1, 0), (0, 1), (-1, 0), (0, -1)]
34 self.FLR = [0, 1, 3]
35 self.prevmove = random.getrandbits(2)
36 self.score = 0
37 self.DANGER = -20
38 self.EAT = 10
39
40 def getQ(self, state, action):
41 return self.QTable.get(state, [0] * 4)[action]
42
43 def setQ(self, state, action, value):
44 if state not in self.QTable:
45 self.QTable[state] = [0] * 4
46 self.QTable[state][action] = value
47
48 def isInSnake(self, value):
49 return value in self.snake
50
51 def inBounds(self, p):
52 return 0 <= p[0] < self.height and 0 <= p[1] < self.width
53
54 def danger(self, p):
55 return not self.inBounds(p) or self.isInSnake(p)
56
57 def getClosestFood(self, p):
58 mindist = float('inf')
59 closest = p
60 for i in range(self.height):
61 for j in range(self.width):
62 if self.grid[i][j] != self.FOOD:
63 continue
64 dist = abs(p[0] - i) + abs(p[1] - j)
65 if dist < mindist:
66 closest = (i, j)
67 mindist = dist
68 return closest
69
70 def getClosestDist(self, p):
71 mindist = float('inf')
72 for i in range(self.height):
73 for j in range(self.width):
74 if self.grid[i][j] != self.FOOD:
75 continue
76 dist = abs(p[0] - i) + abs(p[1] - j)
77 if dist < mindist:
78 mindist = dist
79 return 0 if mindist == float('inf') else mindist
80
81 def placeFood(self):
82 hasfood = any(self.FOOD in row for row in self.grid)
83 if not hasfood:
84 empties = [(i, j) for i in range(self.height) for j in range(self.width)
85 if self.grid[i][j] == 0 and not self.danger((i, j))]
86 if empties:
87 p = random.choice(empties)
88 self.grid[p[0]][p[1]] = self.FOOD
89
90 def getCurrentState(self):
91 dir = self.prevmove
92 dangers = 0
93 foods = 0
94 pos = self.snake[0]
95 for i in range(3):
96 look = (dir + self.FLR[i]) % 4
97 test = (pos[0] + self.dx[look], pos[1] + self.dy[look])
98 if self.danger(test):
99 dangers += (1 << i)
100 if self.inBounds(test) and self.grid[test[0]][test[1]] == self.FOOD:
101 foods += (1 << i)
102 proximity = self.getClosestDist(pos)
103 foodslarge = 0
104 ph = self.getClosestFood(pos)
105 if dir == 0:
106 if ph[0] > pos[0]:
107 foodslarge += 1
108 if ph[1] > pos[1]:
109 foodslarge += 4
110 if ph[1] < pos[1]:
111 foodslarge += 8
112 elif dir == 1:
113 if ph[1] > pos[1]:
114 foodslarge += 1
115 if ph[0] < pos[0]:
116 foodslarge += 4
117 if ph[0] > pos[0]:
118 foodslarge += 8
119 elif dir == 2:
120 if ph[0] < pos[0]:
121 foodslarge += 1
122 if ph[1] < pos[1]:
123 foodslarge += 4
124 if ph[1] > pos[1]:
125 foodslarge += 8
126 elif dir == 3:
127 if ph[1] < pos[1]:
128 foodslarge += 1
129 if ph[0] > pos[0]:
130 foodslarge += 4
131 if ph[0] < pos[0]:
132 foodslarge += 8
133 return State(dangers, dir, foods, foodslarge, 0)
134
135 def move(self, dir, DEBUG=False):
136 if DEBUG:
137 print("MOVING IN DIRECTION", dir)
138 self.placeFood()
139 pos = self.snake[0]
140 test = (pos[0] + self.dx[dir], pos[1] + self.dy[dir])
141 if not self.inBounds(test):
142 return self.DANGER
143 self.prevmove = dir
144 if self.grid[test[0]][test[1]] == self.FOOD:
145 if self.danger(test):
146 return self.DANGER
147 self.snake.appendleft(test)
148 self.grid[test[0]][test[1]] = 0
149 self.score += 1
150 return self.EAT
151 self.snake.pop()
152 if self.danger(test):
153 return self.DANGER
154 self.snake.appendleft(test)
155 if self.getClosestDist(test) < self.getClosestDist(pos):
156 return 2
157 if self.getClosestDist(test) > self.getClosestDist(pos):
158 return -2
159 return 0
160
161 def act(self, state, DEBUG=False):
162 maxq = float('-inf')
163 chosen = random.getrandbits(2) % 3
164 for i in range(3):
165 if self.getQ(state, self.FLR[i]) > maxq:
166 maxq = self.getQ(state, self.FLR[i])
167 chosen = i
168 if random.random() < self.epsilon:
169 chosen = random.getrandbits(2) % 3
170 chosen = self.FLR[chosen]
171 blip = self.move((state.dir + chosen) % 4, DEBUG)
172 curval = self.getQ(state, chosen)
173 maxq = max(self.getQ(self.getCurrentState(), self.FLR[i]) for i in range(3))
174 newQ = curval + self.alpha * (blip + self.gamma * maxq - curval)
175 self.setQ(state, chosen, newQ)
176 return blip
177
178 def iteration(self, DEBUG=False):
179 self.placeFood()
180 return self.act(self.getCurrentState(), DEBUG)
181
182 def dispGrid(self):
183 for i in range(self.height):
184 for j in range(self.width):
185 if self.grid[i][j] == self.FOOD:
186 print("#", end="")
187 elif (i, j) in self.snake:
188 print("O", end="")
189 else:
190 print(".", end="")
191 print()
192 print()
193
194 def dispQ(self):
195 for state, values in self.QTable.items():
196 print(f"[{state.dir} {state.danger} {state.food} {state.foodlarge} {state.proximity}] {values}")
197
198 def death(self):
199 score2 = self.score
200 self.score = 0
201 self.snake = deque([(self.height // 2, self.width // 2)])
202 self.grid = [[0] * self.width for _ in range(self.height)]
203 self.prevmove = random.getrandbits(2) % 4
204 return score2
205
206 def manual():
207 sg = SnakeGame()
208 sg.placeFood()
209 sg.dispGrid