391 lines
15 KiB
Python
391 lines
15 KiB
Python
# multiAgents.py
|
||
# --------------
|
||
# Licensing Information: You are free to use or extend these projects for
|
||
# educational purposes provided that (1) you do not distribute or publish
|
||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||
#
|
||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||
# The core projects and autograders were primarily created by John DeNero
|
||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||
|
||
|
||
from util import manhattanDistance
|
||
from game import Directions
|
||
import random, util
|
||
|
||
from game import Agent
|
||
from pacman import GameState
|
||
|
||
class ReflexAgent(Agent):
|
||
"""
|
||
A reflex agent chooses an action at each choice point by examining
|
||
its alternatives via a state evaluation function.
|
||
|
||
The code below is provided as a guide. You are welcome to change
|
||
it in any way you see fit, so long as you don't touch our method
|
||
headers.
|
||
"""
|
||
|
||
|
||
def getAction(self, gameState: GameState):
|
||
"""
|
||
You do not need to change this method, but you're welcome to.
|
||
|
||
getAction chooses among the best options according to the evaluation function.
|
||
|
||
Just like in the previous project, getAction takes a GameState and returns
|
||
some Directions.X for some X in the set {NORTH, SOUTH, WEST, EAST, STOP}
|
||
"""
|
||
# Collect legal moves and successor states
|
||
legalMoves = gameState.getLegalActions()
|
||
|
||
# Choose one of the best actions
|
||
scores = [self.evaluationFunction(gameState, action) for action in legalMoves]
|
||
bestScore = max(scores)
|
||
bestIndices = [index for index in range(len(scores)) if scores[index] == bestScore]
|
||
chosenIndex = random.choice(bestIndices) # Pick randomly among the best
|
||
|
||
"Add more of your code here if you want to"
|
||
|
||
return legalMoves[chosenIndex]
|
||
|
||
def evaluationFunction(self, currentGameState: GameState, action):
|
||
"""
|
||
Design a better evaluation function here.
|
||
|
||
The evaluation function takes in the current and proposed successor
|
||
GameStates (pacman.py) and returns a number, where higher numbers are better.
|
||
|
||
The code below extracts some useful information from the state, like the
|
||
remaining food (newFood) and Pacman position after moving (newPos).
|
||
newScaredTimes holds the number of moves that each ghost will remain
|
||
scared because of Pacman having eaten a power pellet.
|
||
|
||
Print out these variables to see what you're getting, then combine them
|
||
to create a masterful evaluation function.
|
||
"""
|
||
# Useful information you can extract from a GameState (pacman.py)
|
||
successorGameState = currentGameState.generatePacmanSuccessor(action)
|
||
newPos = successorGameState.getPacmanPosition()
|
||
newFood = successorGameState.getFood()
|
||
newGhostStates = successorGameState.getGhostStates()
|
||
newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]
|
||
|
||
"*** YOUR CODE HERE ***"
|
||
# 计算到最近食物的距离
|
||
# Calculate distance to the nearest food
|
||
foodList = newFood.asList()
|
||
if not foodList:
|
||
return 999999
|
||
|
||
minFoodDist = min([util.manhattanDistance(newPos, food) for food in foodList])
|
||
|
||
# 计算到幽灵的距离
|
||
# Calculate distance to ghosts
|
||
for i, ghostState in enumerate(newGhostStates):
|
||
ghostPos = ghostState.getPosition()
|
||
dist = util.manhattanDistance(newPos, ghostPos)
|
||
# 如果幽灵太近且不处于惊吓状态,逃跑!
|
||
# If ghost is too close and not scared, run away!
|
||
if dist < 2 and newScaredTimes[i] == 0:
|
||
return -999999
|
||
|
||
# 结合分数和食物距离
|
||
# 我们优先考虑生存(上面已处理),然后是分数,然后是靠近食物
|
||
# Combine score and food distance
|
||
# We prioritize survival (handled above) then score, then getting closer to food
|
||
return successorGameState.getScore() + 1.0 / (minFoodDist + 1)
|
||
|
||
def scoreEvaluationFunction(currentGameState: GameState):
|
||
"""
|
||
This default evaluation function just returns the score of the state.
|
||
The score is the same one displayed in the Pacman GUI.
|
||
|
||
This evaluation function is meant for use with adversarial search agents
|
||
(not reflex agents).
|
||
"""
|
||
return currentGameState.getScore()
|
||
|
||
class MultiAgentSearchAgent(Agent):
|
||
"""
|
||
This class provides some common elements to all of your
|
||
multi-agent searchers. Any methods defined here will be available
|
||
to the MinimaxPacmanAgent, AlphaBetaPacmanAgent & ExpectimaxPacmanAgent.
|
||
|
||
You *do not* need to make any changes here, but you can if you want to
|
||
add functionality to all your adversarial search agents. Please do not
|
||
remove anything, however.
|
||
|
||
Note: this is an abstract class: one that should not be instantiated. It's
|
||
only partially specified, and designed to be extended. Agent (game.py)
|
||
is another abstract class.
|
||
"""
|
||
|
||
def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'):
|
||
self.index = 0 # Pacman is always agent index 0
|
||
self.evaluationFunction = util.lookup(evalFn, globals())
|
||
self.depth = int(depth)
|
||
|
||
class MinimaxAgent(MultiAgentSearchAgent):
|
||
"""
|
||
Your minimax agent (question 2)
|
||
"""
|
||
|
||
def getAction(self, gameState: GameState):
|
||
"""
|
||
Returns the minimax action from the current gameState using self.depth
|
||
and self.evaluationFunction.
|
||
|
||
Here are some method calls that might be useful when implementing minimax.
|
||
|
||
gameState.getLegalActions(agentIndex):
|
||
Returns a list of legal actions for an agent
|
||
agentIndex=0 means Pacman, ghosts are >= 1
|
||
|
||
gameState.generateSuccessor(agentIndex, action):
|
||
Returns the successor game state after an agent takes an action
|
||
|
||
gameState.getNumAgents():
|
||
Returns the total number of agents in the game
|
||
|
||
gameState.isWin():
|
||
Returns whether or not the game state is a winning state
|
||
|
||
gameState.isLose():
|
||
Returns whether or not the game state is a losing state
|
||
"""
|
||
"*** YOUR CODE HERE ***"
|
||
# 核心 minimax 算法函数
|
||
def minimax(agentIndex, depth, gameState):
|
||
# 终止条件:达到指定深度,或者游戏胜利/失败
|
||
if gameState.isWin() or gameState.isLose() or depth == self.depth:
|
||
return self.evaluationFunction(gameState)
|
||
|
||
# 也就是 Pacman (MAX agent)
|
||
if agentIndex == 0:
|
||
return maxLevel(agentIndex, depth, gameState)
|
||
# 也就是 Ghosts (MIN agents)
|
||
else:
|
||
return minLevel(agentIndex, depth, gameState)
|
||
|
||
# Max 层 (Pacman)
|
||
def maxLevel(agentIndex, depth, gameState):
|
||
bestValue = float("-inf")
|
||
# 遍历 Pacman 的所有合法动作
|
||
for action in gameState.getLegalActions(agentIndex):
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
# 下一个是第一个幽灵 (agentIndex + 1)
|
||
value = minimax(agentIndex + 1, depth, successorGameState)
|
||
bestValue = max(bestValue, value)
|
||
return bestValue
|
||
|
||
# Min 层 (Ghosts)
|
||
def minLevel(agentIndex, depth, gameState):
|
||
bestValue = float("inf")
|
||
# 遍历幽灵的所有合法动作
|
||
for action in gameState.getLegalActions(agentIndex):
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
# 如果是最后一个幽灵,下一个是 Pacman,且深度加 1
|
||
if agentIndex == gameState.getNumAgents() - 1:
|
||
value = minimax(0, depth + 1, successorGameState)
|
||
# 否则是下一个幽灵
|
||
else:
|
||
value = minimax(agentIndex + 1, depth, successorGameState)
|
||
bestValue = min(bestValue, value)
|
||
return bestValue
|
||
|
||
# getAction 主逻辑
|
||
# 针对根节点 (Pacman at current depth) 选择最佳动作
|
||
bestAction = None
|
||
bestValue = float("-inf")
|
||
|
||
for action in gameState.getLegalActions(0):
|
||
successorGameState = gameState.generateSuccessor(0, action)
|
||
# 从第一个幽灵开始计算
|
||
value = minimax(1, 0, successorGameState)
|
||
if value > bestValue:
|
||
bestValue = value
|
||
bestAction = action
|
||
|
||
return bestAction
|
||
|
||
class AlphaBetaAgent(MultiAgentSearchAgent):
|
||
"""
|
||
Your minimax agent with alpha-beta pruning (question 3)
|
||
"""
|
||
|
||
def getAction(self, gameState: GameState):
|
||
"""
|
||
Returns the minimax action using self.depth and self.evaluationFunction
|
||
"""
|
||
"*** YOUR CODE HERE ***"
|
||
# 核心 alpha-beta 算法函数
|
||
def alphaBeta(agentIndex, depth, gameState, alpha, beta):
|
||
# 终止条件:达到指定深度,或者游戏胜利/失败
|
||
if gameState.isWin() or gameState.isLose() or depth == self.depth:
|
||
return self.evaluationFunction(gameState)
|
||
|
||
if agentIndex == 0:
|
||
return maxValue(agentIndex, depth, gameState, alpha, beta)
|
||
else:
|
||
return minValue(agentIndex, depth, gameState, alpha, beta)
|
||
|
||
# Max 值计算 (Pacman)
|
||
def maxValue(agentIndex, depth, gameState, alpha, beta):
|
||
v = float("-inf")
|
||
for action in gameState.getLegalActions(agentIndex):
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
v = max(v, alphaBeta(agentIndex + 1, depth, successorGameState, alpha, beta))
|
||
# Pruning / 剪枝
|
||
if v > beta:
|
||
return v
|
||
alpha = max(alpha, v)
|
||
return v
|
||
|
||
# Min 值计算 (Ghosts)
|
||
def minValue(agentIndex, depth, gameState, alpha, beta):
|
||
v = float("inf")
|
||
for action in gameState.getLegalActions(agentIndex):
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
if agentIndex == gameState.getNumAgents() - 1:
|
||
v = min(v, alphaBeta(0, depth + 1, successorGameState, alpha, beta))
|
||
else:
|
||
v = min(v, alphaBeta(agentIndex + 1, depth, successorGameState, alpha, beta))
|
||
# Pruning / 剪枝
|
||
if v < alpha:
|
||
return v
|
||
beta = min(beta, v)
|
||
return v
|
||
|
||
# getAction 主逻辑
|
||
bestAction = None
|
||
v = float("-inf")
|
||
alpha = float("-inf")
|
||
beta = float("inf")
|
||
|
||
for action in gameState.getLegalActions(0):
|
||
successorGameState = gameState.generateSuccessor(0, action)
|
||
score = alphaBeta(1, 0, successorGameState, alpha, beta)
|
||
|
||
if score > v:
|
||
v = score
|
||
bestAction = action
|
||
|
||
# 根节点的 alpha 更新
|
||
if v > beta:
|
||
return bestAction # 理论上根节点不会在这里剪枝,但保持逻辑一致
|
||
alpha = max(alpha, v)
|
||
|
||
return bestAction
|
||
|
||
class ExpectimaxAgent(MultiAgentSearchAgent):
|
||
"""
|
||
Your expectimax agent (question 4)
|
||
"""
|
||
|
||
def getAction(self, gameState: GameState):
|
||
"""
|
||
Returns the expectimax action using self.depth and self.evaluationFunction
|
||
|
||
All ghosts should be modeled as choosing uniformly at random from their
|
||
legal moves.
|
||
"""
|
||
"*** YOUR CODE HERE ***"
|
||
# 核心 expectimax 算法函数
|
||
def expectimax(agentIndex, depth, gameState):
|
||
if gameState.isWin() or gameState.isLose() or depth == self.depth:
|
||
return self.evaluationFunction(gameState)
|
||
|
||
if agentIndex == 0:
|
||
return maxValue(agentIndex, depth, gameState)
|
||
else:
|
||
return expValue(agentIndex, depth, gameState)
|
||
|
||
def maxValue(agentIndex, depth, gameState):
|
||
v = float("-inf")
|
||
for action in gameState.getLegalActions(agentIndex):
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
v = max(v, expectimax(agentIndex + 1, depth, successorGameState))
|
||
return v
|
||
|
||
def expValue(agentIndex, depth, gameState):
|
||
v = 0
|
||
actions = gameState.getLegalActions(agentIndex)
|
||
if not actions:
|
||
return 0
|
||
prob = 1.0 / len(actions)
|
||
for action in actions:
|
||
successorGameState = gameState.generateSuccessor(agentIndex, action)
|
||
if agentIndex == gameState.getNumAgents() - 1:
|
||
v += prob * expectimax(0, depth + 1, successorGameState)
|
||
else:
|
||
v += prob * expectimax(agentIndex + 1, depth, successorGameState)
|
||
return v
|
||
|
||
bestAction = None
|
||
v = float("-inf")
|
||
for action in gameState.getLegalActions(0):
|
||
successorGameState = gameState.generateSuccessor(0, action)
|
||
score = expectimax(1, 0, successorGameState)
|
||
if score > v:
|
||
v = score
|
||
bestAction = action
|
||
return bestAction
|
||
|
||
def betterEvaluationFunction(currentGameState: GameState):
|
||
"""
|
||
Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
|
||
evaluation function (question 5).
|
||
|
||
DESCRIPTION: <write something here so we know what you did>
|
||
"""
|
||
"*** YOUR CODE HERE ***"
|
||
# 获取有用的状态信息
|
||
# Get useful information from the state
|
||
newPos = currentGameState.getPacmanPosition()
|
||
newFood = currentGameState.getFood()
|
||
newGhostStates = currentGameState.getGhostStates()
|
||
newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]
|
||
newCapsules = currentGameState.getCapsules()
|
||
|
||
# 基础分数
|
||
score = currentGameState.getScore()
|
||
|
||
# 食物距离评分
|
||
foodList = newFood.asList()
|
||
if foodList:
|
||
minFoodDist = min([util.manhattanDistance(newPos, food) for food in foodList])
|
||
score += 10.0 / (minFoodDist + 1) # 距离越近分数越高
|
||
|
||
# 胶囊距离评分
|
||
if newCapsules:
|
||
minCapDist = min([util.manhattanDistance(newPos, cap) for cap in newCapsules])
|
||
score += 20.0 / (minCapDist + 1)
|
||
|
||
# 幽灵距离评分
|
||
for i, ghostState in enumerate(newGhostStates):
|
||
ghostPos = ghostState.getPosition()
|
||
dist = util.manhattanDistance(newPos, ghostPos)
|
||
if newScaredTimes[i] > 0:
|
||
# 如果幽灵被吓坏了,我们要去吃它(靠近它)
|
||
score += 100.0 / (dist + 1)
|
||
else:
|
||
# 否则,如果太近了,要扣分(远离它)
|
||
if dist < 2:
|
||
score -= 1000.0
|
||
else:
|
||
score -= 10.0 / (dist + 1) # 稍微远离一点
|
||
|
||
# 剩余食物越少越好
|
||
score -= len(foodList) * 4
|
||
|
||
# 剩余胶囊越少越好
|
||
score -= len(newCapsules) * 20
|
||
|
||
return score
|
||
|
||
# Abbreviation
|
||
better = betterEvaluationFunction |