一元线性回归预测数据
import numpy as np import tensorflow as tf import matplotlib.pyplot as plt house = tf.keras.datasets.boston_housing (train_x, train_y), (test_x, test_y) = house.load_data() print(train_x.shape, train_y.shape) print(test_x.shape, test_y.shape) x_train = train_x[:, 5] # 取出房间数属性 y_train = train_y x_test = test_x[:, 5] y_test = test_y # 设置超参数 learn_rate= 0.04 iter = 2000 display_step = 100 # 模型参数 np.random.seed(444) w = tf.Variable(np.random.randn()) b = tf.Variable(np.random.randn()) mse_train = [] mse_test = [] # tf.reduce_mean 函数用于计算张量tensor沿着指定的数轴(tensor的 # 某一维度)上的的平均值,主要用作降维或者计算tensor(图像)的平均值。 for i in range(0, iter+1): with tf.GradientTape() as tape: pred_train = w*x_train+b loss_train = 0.5*tf.reduce_mean(tf.square(y_train-pred_train)) pred_test = w*x_test+b loss_test = 0.5*tf.reduce_mean(tf.square(y_test-pred_test)) mse_test.append(loss_test) mse_train.append(loss_train) dl_dw, dl_db = tape.gradient(loss_train, [w,b]) w.assign_sub(learn_rate*dl_dw) b.assign_sub(learn_rate*dl_db) if i % display_step == 0: print('i:%i, Trian Loss:%f, Test Loss:%f'%(i, loss_train, loss_test)) # 散点与模型图 plt.figure(figsize=(15, 10)) plt.subplot(2,2,1) plt.scatter(x_train,y_train,color='blue',label='data') plt.plot(x_train, pred_train,color='red', label='model') plt.legend(loc='upper left') plt.subplot(2,2,2) plt.plot(mse_train[:100],color='blue',linewidth=2,label='train loss') plt.plot(mse_test[:100],color='red',linewidth=1.5,label='test loss') plt.legend(loc='upper right') plt.subplot(2,2,3) plt.plot(y_train,color='green',marker='o',label='true_price') plt.plot(pred_train,color='red',marker='.',label='predict') plt.legend() plt.subplot(2,2,4) plt.plot(y_test,color='green',marker='o',label='true_price') plt.plot(pred_test,color='red',marker='.',label='predict') plt.legend() plt.show()绘制图形如下:
多元线性回归数据预测
import matplotlib.pyplot as plt import tensorflow as tf import numpy as np # 获取数据 house = tf.keras.datasets.boston_housing (train_x, train_y),(test_x, test_y) = house.load_data() num_train = len(train_x) num_test = len(test_x) # 不同的属性差值可能差距很多,无法运算,进行属性归一 x_train = (train_x-train_x.min(axis=0))/(train_x.max(axis=0)-train_x.min(axis=0)) y_train = train_y x_test = (test_x-test_x.min(axis=0))/(test_x.max(axis=0)-test_x.min(axis=0)) y_test = test_y x0_train = np.ones(num_train).reshape(-1,1) x0_test = np.ones(num_test).reshape(-1,1) X_train = tf.cast(tf.concat([x0_train, x_train],axis=1),tf.float32) Y_train = tf.constant(y_train.reshape(-1,1), tf.float32) X_test = tf.cast(tf.concat([x0_test, x_test],axis=1), tf.float32) Y_test = tf.constant(y_test.reshape(-1, 1), tf.float32) # 设置参数 learn_rate = 0.01 iter = 2000 display_step = 200 # 生成随机模型参数 np.random.seed(555) W = tf.Variable(np.random.randn(14,1),dtype=tf.float32) mse_train = [] mse_test = [] for i in range(0, iter+1): with tf.GradientTape() as tape: pred_train = tf.matmul(X_train, W) # 预测值 yi = Xi*W Loss_train = 0.5*tf.reduce_mean(tf.square(Y_train-pred_train)) pred_test = tf.matmul(X_test, W) # 预测值 yi = _Xi*W Loss_test = 0.5*tf.reduce_mean(tf.square(Y_test-pred_test)) mse_train.append(Loss_train) mse_test.append(Loss_test) dl_dw = tape.gradient(Loss_train, W) # 获取偏导 W.assign_sub(learn_rate*dl_dw) # 更新模型参数 if i % display_step == 0: print('i:%i, train loss:%f, test loss:%f'%(i, Loss_train, Loss_test)) # 绘制loss图像 plt.figure() plt.plot(mse_train, color='blue', linewidth=3,label="train_loss") plt.plot(mse_test, color='red', linewidth=1.5,label="test_loss") plt.legend() # trian预测图像 plt.figure() plt.plot(y_train,color='blue',marker='o',label="train_result") plt.plot(pred_train,color='red',marker='.',label="predict_result(train)") plt.legend() # test预测图像 plt.figure() plt.plot(y_test, color='blue',marker='o',label="test_result") plt.plot(pred_test,color='red',marker='.',label="predict_result(test)") plt.legend() plt.show() # print(X_train.shape, X_test.shape)绘制图形如下:
需要注意的是:
1- 过拟合:例如在多元线性回归预测中,如果调整iter更大如10000次,一定会出现train loss不断下降,但是test loss在达到极小值后回升的现象
2- 欠拟合:如名,就是学习不足,train loss 和 test loss 都过大,预测偏差也大