import numpy as np
import pandas as pd
from sklearn.linear_model import linearRegression
import matplotlib.pyplot as plt
x_train, x_test, y_train, y_test =
data=pd.read_csv('stock_data.csv')
print (data)
y_profit=data['profit']
x_marketing=data['R&D spend']
regressor = LinearRegression
# Training the model with Train_set
plt.scatter(color='pink', label= 'Actual Price') #plotting the initial datapoints
plt.plot(color='blue', linewidth=3, label = 'Predicted Price') #plotting the line made by linear regression
plt.title('Linear Regression | profit vs. marketing')
plt.legend()
plt.xlabel('R&D Spend')
plt.show()
# Predicting Price with Test_set
plt.scatter( color='pink', label= 'Actual Price') #plotting the initial datapoints
plt.plot(color='blue', linewidth=3, label = 'Predicted Profit') #plotting the line made by linear regression
plt.title('Linear Regression | marketing vs. Profit_test')
plt.legend()
plt.xlabel('R&D Spend')
plt.show()
不知道怎么改才能运行出来画出来图
根据下图csv文件分析出 R&D Spend 和 Profit的简单线性关系。
你设置的所有图表参数都是些辅助的元素, 并没有告诉程序散点图x y分别是哪个变量呢,试下这个
plt.scatter(x=data['R&D Spend'],y=data['Profit'], color='pink', label= 'Actual Price')
两个变量相关性可以用corr()计算:
df['a'].corr(df['b'],method ='spearman')