Temp
Jump to navigation
Jump to search
from machine import Pin
import utime as time
import urandom as random
from collections import deque
class State:
def __init__(self, da2, d2, f2, fl, p):
self.danger = da2 # Binary encoding (front, left, right)
self.dir = d2 # Direction
self.food = f2 # Binary encoding (front, left, right)
self.foodlarge = fl
self.proximity = p
def __lt__(self, other):
return (self.danger, self.dir, self.food, self.foodlarge, self.proximity) < \
(other.danger, other.dir, other.food, other.foodlarge, other.proximity)
class SnakeGame:
def __init__(self, n=16, m=32):
self.width = m
self.height = n
self.grid = [[0] * m for _ in range(n)]
self.snake = deque([(n // 2, m // 2)])
self.walls = set()
self.QTable = {}
self.epsilon = 0.1
self.gamma = 0.7
self.alpha = 0.7
self.FOOD = 1
self.WALL = 2
self.dx = [1, 0, -1, 0]
self.dy = [0, 1, 0, -1]
self.dz = [(1, 0), (0, 1), (-1, 0), (0, -1)]
self.FLR = [0, 1, 3]
self.prevmove = random.getrandbits(2)
self.score = 0
self.DANGER = -20
self.EAT = 10
def getQ(self, state, action):
return self.QTable.get(state, [0] * 4)[action]
def setQ(self, state, action, value):
if state not in self.QTable:
self.QTable[state] = [0] * 4
self.QTable[state][action] = value
def isInSnake(self, value):
return value in self.snake
def inBounds(self, p):
return 0 <= p[0] < self.height and 0 <= p[1] < self.width
def danger(self, p):
return not self.inBounds(p) or self.isInSnake(p)
def getClosestFood(self, p):
mindist = float('inf')
closest = p
for i in range(self.height):
for j in range(self.width):
if self.grid[i][j] != self.FOOD:
continue
dist = abs(p[0] - i) + abs(p[1] - j)
if dist < mindist:
closest = (i, j)
mindist = dist
return closest
def getClosestDist(self, p):
mindist = float('inf')
for i in range(self.height):
for j in range(self.width):
if self.grid[i][j] != self.FOOD:
continue
dist = abs(p[0] - i) + abs(p[1] - j)
if dist < mindist:
mindist = dist
return 0 if mindist == float('inf') else mindist
def placeFood(self):
hasfood = any(self.FOOD in row for row in self.grid)
if not hasfood:
empties = [(i, j) for i in range(self.height) for j in range(self.width)
if self.grid[i][j] == 0 and not self.danger((i, j))]
if empties:
p = random.choice(empties)
self.grid[p[0]][p[1]] = self.FOOD
def getCurrentState(self):
dir = self.prevmove
dangers = 0
foods = 0
pos = self.snake[0]
for i in range(3):
look = (dir + self.FLR[i]) % 4
test = (pos[0] + self.dx[look], pos[1] + self.dy[look])
if self.danger(test):
dangers += (1 << i)
if self.inBounds(test) and self.grid[test[0]][test[1]] == self.FOOD:
foods += (1 << i)
proximity = self.getClosestDist(pos)
foodslarge = 0
ph = self.getClosestFood(pos)
if dir == 0:
if ph[0] > pos[0]:
foodslarge += 1
if ph[1] > pos[1]:
foodslarge += 4
if ph[1] < pos[1]:
foodslarge += 8
elif dir == 1:
if ph[1] > pos[1]:
foodslarge += 1
if ph[0] < pos[0]:
foodslarge += 4
if ph[0] > pos[0]:
foodslarge += 8
elif dir == 2:
if ph[0] < pos[0]:
foodslarge += 1
if ph[1] < pos[1]:
foodslarge += 4
if ph[1] > pos[1]:
foodslarge += 8
elif dir == 3:
if ph[1] < pos[1]:
foodslarge += 1
if ph[0] > pos[0]:
foodslarge += 4
if ph[0] < pos[0]:
foodslarge += 8
return State(dangers, dir, foods, foodslarge, 0)
def move(self, dir, DEBUG=False):
if DEBUG:
print("MOVING IN DIRECTION", dir)
self.placeFood()
pos = self.snake[0]
test = (pos[0] + self.dx[dir], pos[1] + self.dy[dir])
if not self.inBounds(test):
return self.DANGER
self.prevmove = dir
if self.grid[test[0]][test[1]] == self.FOOD:
if self.danger(test):
return self.DANGER
self.snake.appendleft(test)
self.grid[test[0]][test[1]] = 0
self.score += 1
return self.EAT
self.snake.pop()
if self.danger(test):
return self.DANGER
self.snake.appendleft(test)
if self.getClosestDist(test) < self.getClosestDist(pos):
return 2
if self.getClosestDist(test) > self.getClosestDist(pos):
return -2
return 0
def act(self, state, DEBUG=False):
maxq = float('-inf')
chosen = random.getrandbits(2) % 3
for i in range(3):
if self.getQ(state, self.FLR[i]) > maxq:
maxq = self.getQ(state, self.FLR[i])
chosen = i
if random.random() < self.epsilon:
chosen = random.getrandbits(2) % 3
chosen = self.FLR[chosen]
blip = self.move((state.dir + chosen) % 4, DEBUG)
curval = self.getQ(state, chosen)
maxq = max(self.getQ(self.getCurrentState(), self.FLR[i]) for i in range(3))
newQ = curval + self.alpha * (blip + self.gamma * maxq - curval)
self.setQ(state, chosen, newQ)
return blip
def iteration(self, DEBUG=False):
self.placeFood()
return self.act(self.getCurrentState(), DEBUG)
def dispGrid(self):
for i in range(self.height):
for j in range(self.width):
if self.grid[i][j] == self.FOOD:
print("#", end="")
elif (i, j) in self.snake:
print("O", end="")
else:
print(".", end="")
print()
print()
def dispQ(self):
for state, values in self.QTable.items():
print(f"[{state.dir} {state.danger} {state.food} {state.foodlarge} {state.proximity}] {values}")
def death(self):
score2 = self.score
self.score = 0
self.snake = deque([(self.height // 2, self.width // 2)])
self.grid = [[0] * self.width for _ in range(self.height)]
self.prevmove = random.getrandbits(2) % 4
return score2
def manual():
sg = SnakeGame()
sg.placeFood()
sg.dispGrid