1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
   | import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from pprint import pprint if __name__ == '__main__':     path = './Advertising.csv'     data = pd.read_csv(path)          x = data[['TV', 'Radio']]          y = data[['Sales']]          plt.figure(facecolor='w', figsize=(9, 10))     plt.subplot(311)     plt.plot(data['TV'], y, 'ro', mec='k')     plt.title('TV')     plt.grid(b=True, ls=':')     plt.subplot(312)     plt.plot(data['Radio'], y, 'g^', mec='k')     plt.title('Radio')     plt.grid(b=True, ls=':')     plt.subplot(313)     plt.plot(data['Newspaper'], y, 'b*', mec='k')     plt.title('Newspaper')     plt.grid(b=True, ls=':')     plt.tight_layout(pad=2)          plt.show()          x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)     model = LinearRegression()     model.fit(x_train, y_train)     print(model.coef_, model.intercept_)     order = y_test.argsort_value(axis=0)     y_test = y_test.values[order]     x_test = x_test.values[order, :]     y_test_pred = model.predict(x_test)     mse = np.mean((y_test_pred - np.array(y_test)) ** 2)       rmse = np.sqrt(mse)       mse_sys = mean_squared_error(y_test, y_test_pred)     print('MSE = ', mse, end=' ')     print('MSE(System Function) = ', mse_sys, end=' ')     print('MAE = ', mean_absolute_error(y_test, y_test_pred))     print('RMSE = ', rmse)     print('Training R2 = ', model.score(x_train, y_train))     print('Training R2(System) = ', r2_score(y_train, model.predict(x_train)))     print('Test R2 = ', model.score(x_test, y_test))     error = y_test - y_test_pred     np.set_printoptions(suppress=True)     print('error = ', error)     plt.hist(error, bins=20, color='g', alpha=0.6, edgecolor='k')     plt.show()     plt.figure(facecolor='w')     t = np.arange(len(x_test))     plt.plot(t, y_test, 'r-', linewidth=2, label='真实数据')     plt.plot(t, y_test_pred, 'g-', linewidth=2, label='预测数据')     plt.legend(loc='upper left')     plt.title('线性回归预测销量', fontsize=18)     plt.grid(b=True, ls=':')     plt.show()
   |