Temp

From Sketching with Hardware at LMU Wiki
Jump to navigation Jump to search
from machine import Pin
import utime as time
import urandom as random
from collections import deque

class State:
    def __init__(self, da2, d2, f2, fl, p):
        self.danger = da2  # Binary encoding (front, left, right)
        self.dir = d2  # Direction
        self.food = f2  # Binary encoding (front, left, right)
        self.foodlarge = fl
        self.proximity = p

    def __lt__(self, other):
        return (self.danger, self.dir, self.food, self.foodlarge, self.proximity) < \
               (other.danger, other.dir, other.food, other.foodlarge, other.proximity)

class SnakeGame:
    def __init__(self, n=16, m=32):
        self.width = m
        self.height = n
        self.grid = [[0] * m for _ in range(n)]
        self.snake = deque([(n // 2, m // 2)])
        self.walls = set()
        self.QTable = {}
        self.epsilon = 0.1
        self.gamma = 0.7
        self.alpha = 0.7
        self.FOOD = 1
        self.WALL = 2
        self.dx = [1, 0, -1, 0]
        self.dy = [0, 1, 0, -1]
        self.dz = [(1, 0), (0, 1), (-1, 0), (0, -1)]
        self.FLR = [0, 1, 3]
        self.prevmove = random.getrandbits(2)
        self.score = 0
        self.DANGER = -20
        self.EAT = 10

    def getQ(self, state, action):
        return self.QTable.get(state, [0] * 4)[action]

    def setQ(self, state, action, value):
        if state not in self.QTable:
            self.QTable[state] = [0] * 4
        self.QTable[state][action] = value

    def isInSnake(self, value):
        return value in self.snake

    def inBounds(self, p):
        return 0 <= p[0] < self.height and 0 <= p[1] < self.width

    def danger(self, p):
        return not self.inBounds(p) or self.isInSnake(p)

    def getClosestFood(self, p):
        mindist = float('inf')
        closest = p
        for i in range(self.height):
            for j in range(self.width):
                if self.grid[i][j] != self.FOOD:
                    continue
                dist = abs(p[0] - i) + abs(p[1] - j)
                if dist < mindist:
                    closest = (i, j)
                    mindist = dist
        return closest

    def getClosestDist(self, p):
        mindist = float('inf')
        for i in range(self.height):
            for j in range(self.width):
                if self.grid[i][j] != self.FOOD:
                    continue
                dist = abs(p[0] - i) + abs(p[1] - j)
                if dist < mindist:
                    mindist = dist
        return 0 if mindist == float('inf') else mindist

    def placeFood(self):
        hasfood = any(self.FOOD in row for row in self.grid)
        if not hasfood:
            empties = [(i, j) for i in range(self.height) for j in range(self.width)
                       if self.grid[i][j] == 0 and not self.danger((i, j))]
            if empties:
                p = random.choice(empties)
                self.grid[p[0]][p[1]] = self.FOOD

    def getCurrentState(self):
        dir = self.prevmove
        dangers = 0
        foods = 0
        pos = self.snake[0]
        for i in range(3):
            look = (dir + self.FLR[i]) % 4
            test = (pos[0] + self.dx[look], pos[1] + self.dy[look])
            if self.danger(test):
                dangers += (1 << i)
            if self.inBounds(test) and self.grid[test[0]][test[1]] == self.FOOD:
                foods += (1 << i)
        proximity = self.getClosestDist(pos)
        foodslarge = 0
        ph = self.getClosestFood(pos)
        if dir == 0:
            if ph[0] > pos[0]:
                foodslarge += 1
            if ph[1] > pos[1]:
                foodslarge += 4
            if ph[1] < pos[1]:
                foodslarge += 8
        elif dir == 1:
            if ph[1] > pos[1]:
                foodslarge += 1
            if ph[0] < pos[0]:
                foodslarge += 4
            if ph[0] > pos[0]:
                foodslarge += 8
        elif dir == 2:
            if ph[0] < pos[0]:
                foodslarge += 1
            if ph[1] < pos[1]:
                foodslarge += 4
            if ph[1] > pos[1]:
                foodslarge += 8
        elif dir == 3:
            if ph[1] < pos[1]:
                foodslarge += 1
            if ph[0] > pos[0]:
                foodslarge += 4
            if ph[0] < pos[0]:
                foodslarge += 8
        return State(dangers, dir, foods, foodslarge, 0)

    def move(self, dir, DEBUG=False):
        if DEBUG:
            print("MOVING IN DIRECTION", dir)
        self.placeFood()
        pos = self.snake[0]
        test = (pos[0] + self.dx[dir], pos[1] + self.dy[dir])
        if not self.inBounds(test):
            return self.DANGER
        self.prevmove = dir
        if self.grid[test[0]][test[1]] == self.FOOD:
            if self.danger(test):
                return self.DANGER
            self.snake.appendleft(test)
            self.grid[test[0]][test[1]] = 0
            self.score += 1
            return self.EAT
        self.snake.pop()
        if self.danger(test):
            return self.DANGER
        self.snake.appendleft(test)
        if self.getClosestDist(test) < self.getClosestDist(pos):
            return 2
        if self.getClosestDist(test) > self.getClosestDist(pos):
            return -2
        return 0

    def act(self, state, DEBUG=False):
        maxq = float('-inf')
        chosen = random.getrandbits(2) % 3
        for i in range(3):
            if self.getQ(state, self.FLR[i]) > maxq:
                maxq = self.getQ(state, self.FLR[i])
                chosen = i
        if random.random() < self.epsilon:
            chosen = random.getrandbits(2) % 3
        chosen = self.FLR[chosen]
        blip = self.move((state.dir + chosen) % 4, DEBUG)
        curval = self.getQ(state, chosen)
        maxq = max(self.getQ(self.getCurrentState(), self.FLR[i]) for i in range(3))
        newQ = curval + self.alpha * (blip + self.gamma * maxq - curval)
        self.setQ(state, chosen, newQ)
        return blip

    def iteration(self, DEBUG=False):
        self.placeFood()
        return self.act(self.getCurrentState(), DEBUG)

    def dispGrid(self):
        for i in range(self.height):
            for j in range(self.width):
                if self.grid[i][j] == self.FOOD:
                    print("#", end="")
                elif (i, j) in self.snake:
                    print("O", end="")
                else:
                    print(".", end="")
            print()
        print()

    def dispQ(self):
        for state, values in self.QTable.items():
            print(f"[{state.dir} {state.danger} {state.food} {state.foodlarge} {state.proximity}] {values}")

    def death(self):
        score2 = self.score
        self.score = 0
        self.snake = deque([(self.height // 2, self.width // 2)])
        self.grid = [[0] * self.width for _ in range(self.height)]
        self.prevmove = random.getrandbits(2) % 4
        return score2

def manual():
    sg = SnakeGame()
    sg.placeFood()
    sg.dispGrid