机器学习线性回归问题,Python代码不会

糖尿病数据集是Sklearn提供的一个标准数据集。它从442例糖尿病患者的资料中获取了10个特征--年龄、性别、体重、血压和6个血清的测量值,以及这些患者在一年后疾病发展的病情量化值。糖尿病预测问题的任务是根据上述10个特征预测病情量化值。
加载数据的代码如下:
from sklearn.datasets import load_diabetes
X = diabetes.data
y = diabetes.target
请用线性回归算法来完成糖尿病预测任务(病情量化值)

完整代码:

from sklearn import datasets
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np

#数据集划分
diabetes = datasets.load_diabetes()                #载入数据
diabetes_x_temp = diabetes.data[:, np.newaxis, 2]  #获取一个特征
diabetes_x_train = diabetes_x_temp[:-20]           #训练样本,从0到-20行(不包含最后20行)
diabetes_x_test = diabetes_x_temp[-20:]            #测试样本 后20行
diabetes_y_train = diabetes.target[:-20]           #训练标记
diabetes_y_test = diabetes.target[-20:]            #预测对比标记


#回归训练及预测
clf = linear_model.LinearRegression()
clf.fit(diabetes_x_train, diabetes_y_train)  #注: 训练数据集


#系数 残差平法和 方差得分
print('Coefficients :\n', clf.coef_)
print("Residual sum of square: %.2f" % np.mean((clf.predict(diabetes_x_test) - diabetes_y_test) ** 2))
print("variance score: %.2f" % clf.score(diabetes_x_test, diabetes_y_test))


#绘图
plt.title('LinearRegression Diabetes')   #标题
plt.xlabel(u'Attributes')                 #x轴坐标
plt.ylabel(u'Measure of disease')         #y轴坐标


#点的准确位置
plt.scatter(diabetes_x_test, diabetes_y_test, color = 'orange')


#预测结果 直线表示
plt.plot(diabetes_x_test, clf.predict(diabetes_x_test), color='blue', linewidth = 3)
plt.show()


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def makeLinearRegression(x):
    diabetes = pd.read_csv('data.csv')
    diabetes = diabetes.ix[:, 1:]
    print('head:', diabetes.head(), '\nShape:', diabetes.shape)

    # 数据描述
    print(diabetes.describe())
    # 缺失值检验
    print(diabetes[diabetes.isnull() == True].count())
    # 相关系数0~0.3弱相关0.3~0.6中等程度相关0.6~1强相关
    print(diabetes.corr())

    sns.pairplot(diabetes, x_vars=['bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], y_vars='y', size=6, aspect=0.8,
                 kind='reg')
    plt.savefig("pairplot.png")
    plt.show()

    x_train, x_test, y_train, y_test = train_test_split(diabetes.ix[:, :10], diabetes.y, train_size=.80)

    # print("原始数据特征:", diabetes.ix[:, :10].shape,",训练数据特征:", x_train.shape,",测试数据特征:", x_test.shape)
    # print("原始数据标签:", diabetes.y.shape,",训练数据标签:", y_train.shape, ",测试数据标签:", y_test.shape)

    model = LinearRegression()
    model.fit(x_train, y_train)
    a = model.intercept_  # 截距
    b = model.coef_  # 回归系数

    # 对线性回归进行预测
    y_pred = model.predict(x_test)
    print('测试预测结果:', y_pred)
    print('测试真实结果:', y_test)

    print("预测你的糖尿病指数:", model.predict(x_test))


print("输入身体指数1:age、 sex 、bmi、 map、 tc、 ldl、 hdl、 tch、 ltg、 glu")
# age:年龄
# sex:性别
# bmi = body mass index:身体质量指数,是衡量是否肥胖和标准体重的重要指标,理想BMI(18.5~23.9) = 体重(单位Kg) ÷ 身高的平方 (单位m)
# bp = blood pressure :血压
# s1,s2,s3,s4,s4,s6 (六种血清的化验数据)
arr = input("")
num = [[int(n) for n in arr.split()]]
makeLinearRegression(num)

有帮助的话采纳一下哦!


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def makeLinearRegression(x):
    diabetes = pd.read_csv('data.csv')
    diabetes = diabetes.ix[:, 1:]
    print('head:', diabetes.head(), '\nShape:', diabetes.shape)

    # 数据描述
    print(diabetes.describe())
    # 缺失值检验
    print(diabetes[diabetes.isnull() == True].count())
    # 相关系数0~0.3弱相关0.3~0.6中等程度相关0.6~1强相关
    print(diabetes.corr())

    sns.pairplot(diabetes, x_vars=['bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], y_vars='y', size=6, aspect=0.8,
                 kind='reg')
    plt.savefig("pairplot.png")
    plt.show()

    x_train, x_test, y_train, y_test = train_test_split(diabetes.ix[:, :10], diabetes.y, train_size=.80)

    # print("原始数据特征:", diabetes.ix[:, :10].shape,",训练数据特征:", x_train.shape,",测试数据特征:", x_test.shape)
    # print("原始数据标签:", diabetes.y.shape,",训练数据标签:", y_train.shape, ",测试数据标签:", y_test.shape)

    model = LinearRegression()
    model.fit(x_train, y_train)
    a = model.intercept_  # 截距
    b = model.coef_  # 回归系数

    # 对线性回归进行预测
    y_pred = model.predict(x_test)
    print('测试预测结果:', y_pred)
    print('测试真实结果:', y_test)

    print("预测你的糖尿病指数:", model.predict(x_test))


print("输入身体指数1:age、 sex 、bmi、 map、 tc、 ldl、 hdl、 tch、 ltg、 glu")
# age:年龄
# sex:性别
# bmi = body mass index:身体质量指数,是衡量是否肥胖和标准体重的重要指标,理想BMI(18.5~23.9) = 体重(单位Kg) ÷ 身高的平方 (单位m)
# bp = blood pressure :血压
# s1,s2,s3,s4,s4,s6 (六种血清的化验数据)
arr = input("")
num = [[int(n) for n in arr.split()]]
makeLinearRegression(num)