糖尿病数据集是Sklearn提供的一个标准数据集。它从442例糖尿病患者的资料中获取了10个特征--年龄、性别、体重、血压和6个血清的测量值,以及这些患者在一年后疾病发展的病情量化值。糖尿病预测问题的任务是根据上述10个特征预测病情量化值。
加载数据的代码如下:
from sklearn.datasets import load_diabetes
X = diabetes.data
y = diabetes.target
请用线性回归算法来完成糖尿病预测任务(病情量化值)
完整代码:
from sklearn import datasets
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
#数据集划分
diabetes = datasets.load_diabetes() #载入数据
diabetes_x_temp = diabetes.data[:, np.newaxis, 2] #获取一个特征
diabetes_x_train = diabetes_x_temp[:-20] #训练样本,从0到-20行(不包含最后20行)
diabetes_x_test = diabetes_x_temp[-20:] #测试样本 后20行
diabetes_y_train = diabetes.target[:-20] #训练标记
diabetes_y_test = diabetes.target[-20:] #预测对比标记
#回归训练及预测
clf = linear_model.LinearRegression()
clf.fit(diabetes_x_train, diabetes_y_train) #注: 训练数据集
#系数 残差平法和 方差得分
print('Coefficients :\n', clf.coef_)
print("Residual sum of square: %.2f" % np.mean((clf.predict(diabetes_x_test) - diabetes_y_test) ** 2))
print("variance score: %.2f" % clf.score(diabetes_x_test, diabetes_y_test))
#绘图
plt.title('LinearRegression Diabetes') #标题
plt.xlabel(u'Attributes') #x轴坐标
plt.ylabel(u'Measure of disease') #y轴坐标
#点的准确位置
plt.scatter(diabetes_x_test, diabetes_y_test, color = 'orange')
#预测结果 直线表示
plt.plot(diabetes_x_test, clf.predict(diabetes_x_test), color='blue', linewidth = 3)
plt.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def makeLinearRegression(x):
diabetes = pd.read_csv('data.csv')
diabetes = diabetes.ix[:, 1:]
print('head:', diabetes.head(), '\nShape:', diabetes.shape)
# 数据描述
print(diabetes.describe())
# 缺失值检验
print(diabetes[diabetes.isnull() == True].count())
# 相关系数0~0.3弱相关0.3~0.6中等程度相关0.6~1强相关
print(diabetes.corr())
sns.pairplot(diabetes, x_vars=['bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], y_vars='y', size=6, aspect=0.8,
kind='reg')
plt.savefig("pairplot.png")
plt.show()
x_train, x_test, y_train, y_test = train_test_split(diabetes.ix[:, :10], diabetes.y, train_size=.80)
# print("原始数据特征:", diabetes.ix[:, :10].shape,",训练数据特征:", x_train.shape,",测试数据特征:", x_test.shape)
# print("原始数据标签:", diabetes.y.shape,",训练数据标签:", y_train.shape, ",测试数据标签:", y_test.shape)
model = LinearRegression()
model.fit(x_train, y_train)
a = model.intercept_ # 截距
b = model.coef_ # 回归系数
# 对线性回归进行预测
y_pred = model.predict(x_test)
print('测试预测结果:', y_pred)
print('测试真实结果:', y_test)
print("预测你的糖尿病指数:", model.predict(x_test))
print("输入身体指数1:age、 sex 、bmi、 map、 tc、 ldl、 hdl、 tch、 ltg、 glu")
# age:年龄
# sex:性别
# bmi = body mass index:身体质量指数,是衡量是否肥胖和标准体重的重要指标,理想BMI(18.5~23.9) = 体重(单位Kg) ÷ 身高的平方 (单位m)
# bp = blood pressure :血压
# s1,s2,s3,s4,s4,s6 (六种血清的化验数据)
arr = input("")
num = [[int(n) for n in arr.split()]]
makeLinearRegression(num)
有帮助的话采纳一下哦!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def makeLinearRegression(x):
diabetes = pd.read_csv('data.csv')
diabetes = diabetes.ix[:, 1:]
print('head:', diabetes.head(), '\nShape:', diabetes.shape)
# 数据描述
print(diabetes.describe())
# 缺失值检验
print(diabetes[diabetes.isnull() == True].count())
# 相关系数0~0.3弱相关0.3~0.6中等程度相关0.6~1强相关
print(diabetes.corr())
sns.pairplot(diabetes, x_vars=['bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], y_vars='y', size=6, aspect=0.8,
kind='reg')
plt.savefig("pairplot.png")
plt.show()
x_train, x_test, y_train, y_test = train_test_split(diabetes.ix[:, :10], diabetes.y, train_size=.80)
# print("原始数据特征:", diabetes.ix[:, :10].shape,",训练数据特征:", x_train.shape,",测试数据特征:", x_test.shape)
# print("原始数据标签:", diabetes.y.shape,",训练数据标签:", y_train.shape, ",测试数据标签:", y_test.shape)
model = LinearRegression()
model.fit(x_train, y_train)
a = model.intercept_ # 截距
b = model.coef_ # 回归系数
# 对线性回归进行预测
y_pred = model.predict(x_test)
print('测试预测结果:', y_pred)
print('测试真实结果:', y_test)
print("预测你的糖尿病指数:", model.predict(x_test))
print("输入身体指数1:age、 sex 、bmi、 map、 tc、 ldl、 hdl、 tch、 ltg、 glu")
# age:年龄
# sex:性别
# bmi = body mass index:身体质量指数,是衡量是否肥胖和标准体重的重要指标,理想BMI(18.5~23.9) = 体重(单位Kg) ÷ 身高的平方 (单位m)
# bp = blood pressure :血压
# s1,s2,s3,s4,s4,s6 (六种血清的化验数据)
arr = input("")
num = [[int(n) for n in arr.split()]]
makeLinearRegression(num)