[python scipy] optimize失效, 'Linear search failed'

scipy.optimize 无法工作

最近在跟着Andrew NG 的机器学习课程学习,当跟到逻辑回归的时候,有一个练习,跟着做,遇到了这个问题。
调用minimize函数返回结果result,输出result.x发现与初始theta一样,然后就打印了一下result

e:\FileData\xx\算法\machineLearning\machine-learning-andrew\exercise2\sigmoid.py:6: RuntimeWarning: overflow encountered in exp
  return 1/(1+np.exp(-data))
e:\FileData\xx\算法\machineLearning\machine-learning-andrew\exercise2\costFunction.py:11: RuntimeWarning: divide by zero encountered in log
  ln_h = np.log(h_x)  # np.log() is the Natural Log
     fun: array([[nan]])
     jac: array([ -0.6       , -44.83135362, -44.37384125])
 message: 'Linear search failed'
    nfev: 56
     nit: 0
  status: 4
 success: False
       x: array([ -0.1       , -12.00921659, -11.26284221])

我的函数
main.py

import dataPlot
import numpy as np
import sigmoid
import costFunction
import scipy.optimize as op
#========================read datas========================
with open('./ex2/ex2data1.txt','r') as file:
    lines = file.readlines()
    m = len(lines)
    feature_number = len(lines[0].strip().split(',')) - 1
    x_data = np.zeros((m,feature_number))
    y_data = np.zeros((m,1))
    for i in range(m):
        line_temp = lines[i].strip().split(',')
        for j in range(len(line_temp)):
            if j != len(line_temp) - 1:
                x_data[i,j] = line_temp[j]
            else:
                y_data[i,0] = line_temp[j]
        

# dataPlot.Plot(x_data, y_data)
#========================initialize========================
x_data = np.column_stack((np.ones((m, 1)), x_data))
theta = np.zeros((feature_number+1, 1)).flatten()  # must be a vector

#print(sigmoid.hyposisFunction([1,2],[0,0]))
# ideal ouput 0.5
# mine is 0.5
#========================compute cost and gradient========================
cost = costFunction.cost_function(theta, x_data,y_data)
print("initial_theta cost is {} (approx)".format(cost))
print("expected cost is 0.693")
# my output
# initial_theta cost is [[0.69314718]] (approx)
# expected cost is 0.693

initial_theta = costFunction.gradient(theta, x_data, y_data)
print("initial_theta theta is {}".format(initial_theta))
# myoutput 
# initial_theta theta is [ -0.1        -12.00921659 -11.26284221]

# print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n')
#========================Optimizing using fminunc(matlab)/scipy(python)========================
result = op.minimize(costFunction.cost_function, initial_theta.flatten(), args=(x_data, y_data), method="TNC", jac=costFunction.gradient)
print(result)
# result
# e:\FileData\xx\算法\machineLearning\machine-learning-andrew\exercise2\sigmoid.py:6: RuntimeWarning: overflow encountered in exp
#   return 1/(1+np.exp(-data))
# e:\FileData\xx\算法\machineLearning\machine-learning-andrew\exercise2\costFunction.py:11: RuntimeWarning: divide by zero encountered in log
#   ln_h = np.log(h_x)  # np.log() is the Natural Log
#      fun: array([[nan]])
#      jac: array([ -0.6       , -44.83135362, -44.37384125])
#  message: 'Linear search failed'
#     nfev: 56
#      nit: 0
#   status: 4
#  success: False
#        x: array([ -0.1       , -12.00921659, -11.26284221])

sigmoid.py

import numpy as np


# compute the sigmoid of data, data can be a vector, matrix or scalar
def sigmoid(data):
    return 1/(1+np.exp(-data))

costFunction.py

import numpy as np
import sigmoid


# compute the cost and the partial derivations of theta, x,y can be a vector, matrix, scalar
def cost_function(theta, x_data, y_data):
    m = y_data.shape[0]
    theta = theta.reshape((-1, 1))
    # compute cost
    h_x = sigmoid.sigmoid(np.dot(x_data, theta))
    ln_h = np.log(h_x)  # np.log() is the Natural Log
    part1 = -np.dot(ln_h.T, y_data) / m
    ln_1h = np.log(1-h_x)
    part2 = -np.dot(ln_1h.T, 1-y_data) / m
    cost = part1+part2
    # compute the partial derivations of theta
    return cost


# because scipy.op.minimize is different from fminunc, we need a method returns the gradient independently
def gradient(theta, x_data, y_data):
    theta = theta.reshape((-1, 1))
    m = y_data.shape[0]
    # compute cost
    h_x = sigmoid.sigmoid(np.dot(x_data, theta))
    # compute the partial derivations of theta
    grad = np.dot(x_data.T, h_x - y_data) / m
    return grad.flatten()  # must be a vector

已解决,main.py中38行,调用的gradient将initial_theta给覆盖了