井字棋alpha-beta 剪枝算法python代码求解

这两段alpha-beta 剪枝算法python代码的实现效果为什么会不同

这段是正常的

def alphbetaSearch(board):
    global num
    a = actions(board)
    judge_for_player = player(board)
    if judge_for_player is X:
        v = float('-inf')
        for move in a:
            result_board = result(board, move)
            v_every = min_value_ab(result_board, float('-inf'), float('inf'))
            if v <= v_every:
                v = v_every
                marks = move
        print(marks, v,num)
    else:
        v = float('inf')
        for move in a:
            result_board = result(board, move)
            v_every = max_value_ab(result_board, float('-inf'), float('inf'))
            if v >= v_every:
                v = v_every
                marks = move
        print(marks, v,num)
    return marks


def max_value_ab(board, alpha, beta):
    global num
    num=num+1
    if terminal(board):
        return utility(board)
    v = float('-inf')
    for move in actions(board):
        result_board = result(board, move)
        v_every = min_value_ab(result_board, alpha, beta)
        v = max(v, v_every)
        if v >= beta: # alpha-beta pruning
            return v
        alpha = max(alpha, v)
    return v


def min_value_ab(board, alpha, beta):
    global num
    num=num+1
    if terminal(board):
        return utility(board)
    v = float('inf')
    for move in actions(board):
        result_board = result(board, move)
        v_every = max_value_ab(result_board, alpha, beta)
        v = min(v, v_every)
        if v <= alpha: # alpha-beta pruning
            return v
        beta = min(beta, v)
    return v

这段是有问题的,不能正确求出最优解


def alphbetaSearch(board):
    global num
    a = actions(board)
    v11 = float('-inf')
    v12 = float('inf')
    judge_for_player = player(board)
    if judge_for_player is X:
        for i11 in a:
            v1_every = min_value_ab(result(board, i11), v11, v12)
            if v11 <= v1_every:
                v11 = v1_every
                marks = i11
        print(num)
    else:
        for i12 in a:
            v2_every = max_value_ab(result(board, i12), v11, v12)
            if v12 >= v2_every:
                v12 = v2_every
                marks = i12
        print(num)
    return marks


def max_value_ab(board, alph, beta):
    global num
    num=num+1
    if terminal(board):
        return utility(board)
    v2 = float('-inf')
    for i2 in actions(board):
        result_board = result(board, i2)
        v_every = min_value_ab(result_board, alph, beta)
        v2 = max(v2, v_every)
        if v2 >= beta: # alpha-beta pruning
            return v2
        alph = max(alph, v2)
    return v2


def min_value_ab(board, alph, beta):
    global num
    num=num+1
    if terminal(board):
        return utility(board)
    v3 = float('inf')
    for i3 in actions(board):
        result_board = result(board, i3)
        v_every = min_value_ab(result_board, alph, beta)
        v3 = max(v3, v_every)
        if v3 >= beta: # alpha-beta pruning
            return v3
        beta = min(beta, v3)
    return v3

我看这两段代码似乎是等价的,不知道问题出在哪里

img


图示部分,右边永远是与无穷大无穷小进行比较,而左边的v11,v12会重新赋值,再比较就不是和无穷大无穷小比较了

不知道你这个问题是否已经解决, 如果还没有解决的话:

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^