from sympy import symbols
import numpy as np
import random
random.seed(518)
data = np.random.randint(low=0,high=6,size=(500,7))
weight = np.array([[0.2],[0.15],[0.1],[0.008],[0.23],[0.1],[0.14]])
y = np.matmul(data, weight)
len(data)
# 学习率learning rate
lr = 0.0001
# 参数
w0 = 0
w1 = 0
w2 = 0
w3 = 0
w4 = 0
w5 = 0
w6 = 0
epochs=50000
def compute_mse(w0, w1, w2, w3, w4, w5,w6, data, y):
total_error = 0
for i in range(len(data)):
# 计算损失
total_error += (y[i] - (w0 * data[i, 0] + w1 *data[i, 1]+ w2 *data[i, 3]+ w3 *data[i, 4]+ w4 *data[i, 4]+ w5 *data[i, 5]+ w6 *data[i, 6])) ** 2
mse_ = total_error / len(data) / 2
return mse_
def gradient_descent(data, y, w0, w1, w2, w3, w4, w5, w6, lr, epochs):
# 循环 --> 迭代次数
m = float(len(data))
for i in range(epochs):
# 初始化的偏导值
w0_grad = 0
w1_grad = 0
w2_grad = 0
w3_grad = 0
w4_grad = 0
w5_grad = 0
w6_grad = 0
# 计算偏导的总和再平均
# 遍历m次
for j in range(m):
w0_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 0]
w1_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 1]
w2_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 2]
w3_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 3]
w4_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 4]
w5_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 5]
w6_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 6]
w0_grad = w0 - (lr* w0_grad)
w1_grad = w1 - (lr* w1_grad)
w2_grad = w2 - (lr* w2_grad)
w3_grad = w3 - (lr* w3_grad)
w4_grad = w4 - (lr* w4_grad)
w5_grad = w5 - (lr* w5_grad)
w6_grad = w6 - (lr* w6_grad)
return w0, w1, w2, w3, w4, w5, w6
公式写的有问题了吗?
from sympy import symbols
import numpy as np
import random
random.seed(518)
data = np.random.randint(low=0,high=6,size=(500,7))
weight = np.array([[0.2],[0.15],[0.1],[0.008],[0.23],[0.1],[0.14]])
y = np.matmul(data, weight)
len(data)
# 学习率learning rate
lr = 0.0001
# 参数
w0 = 0
w1 = 0
w2 = 0
w3 = 0
w4 = 0
w5 = 0
w6 = 0
epochs=50000
def compute_mse(w0, w1, w2, w3, w4, w5,w6, data, y):
total_error = 0
for i in range(len(data)):
# 计算损失
total_error += (y[i] - (w0 * data[i, 0] + w1 *data[i, 1]+ w2 *data[i, 3]+ w3 *data[i, 4]+ w4 *data[i, 4]+ w5 *data[i, 5]+ w6 *data[i, 6])) ** 2
mse_ = total_error / len(data) / 2
return mse_
def gradient_descent(data, y, w0, w1, w2, w3, w4, w5, w6, lr, epochs):
# 循环 --> 迭代次数
m = float(len(data))
for i in range(epochs):
# 初始化的偏导值
w0_grad = 0
w1_grad = 0
w2_grad = 0
w3_grad = 0
w4_grad = 0
w5_grad = 0
w6_grad = 0
# 计算偏导的总和再平均
# 遍历m次
for j in range(m):
w0_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 0]))
w1_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 1]))
w2_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 2]))
w3_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 3]))
w4_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 4]))
w5_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 5]))
w6_grad += (1 / m) * ((w0 * data[j, 0] + w1 * data[j, 1]+(w2 * data[j, 2] + w3 * data[j, 3]+ w4 * data[j, 4]+(w5 * data[j, 5] + w6 * data[j, 6]) - y[j]) * data[j, 6]))
w0_grad = w0 - (lr* w0_grad)
w1_grad = w1 - (lr* w1_grad)
w2_grad = w2 - (lr* w2_grad)
w3_grad = w3 - (lr* w3_grad)
w4_grad = w4 - (lr* w4_grad)
w5_grad = w5 - (lr* w5_grad)
w6_grad = w6 - (lr* w6_grad)
return w0, w1, w2, w3, w4, w5, w6
这么少参数直接解方程不香吗?