参考:

  1. 使用Keras中的 LSTM 模型进行时间序列预测

bike_blog.py 文件:

import matplotlib.pyplot as plt
import numpy as np
import time
import csv
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.models import Sequential, load_model
import keras
from utils import LossHistory

np.random.seed(2017)

def data_bike_num(path_to_dataset='./bike_rnn.csv',
                           sequence_length=20,
                           ratio=1.0):

    max_values = ratio * 45949

    with open(path_to_dataset) as f:
        data = csv.reader(f, delimiter=",")
        next(data, None)  # skip the headers
        # print(len(data))
        bikes = []
        nb_of_values = 0
        for line in data:
            try:
                bikes.append(float(line[0]))
                nb_of_values += 1
            except ValueError:
                pass
            if nb_of_values >= max_values:
                break

    print ("Data loaded from csv. Formatting...")
    print(len(bikes))
    result = []
    for index in range(len(bikes) - sequence_length):
        result.append(bikes[index: index + sequence_length])
    result = np.array(result)  # shape (2049230, 50)

    result /= 45949

    result_mean = result.mean()
    result -= result_mean
    print("Shift: ", result_mean)
    print ("Data: ", result.shape)

    row = int(round(0.95 * result.shape[0]))
    train = result[:row, :]
    np.random.shuffle(train)
    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = result[row:, :-1] # 2297
    y_test = result[row:, -1]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    return [X_train, y_train, X_test, y_test, result_mean]


def build_model():
    model = Sequential()
    layers = [1, 50, 100, 1]

    model.add(LSTM(layers[1], input_shape=(None, layers[0]), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(layers[2], return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(layers[3]))
    model.add(Activation("linear"))  # 由于在进行线性回归操作,所以激活函数填 linear

    start = time.time()
    model.compile(loss="mse", optimizer=keras.optimizers.Adam(lr=0.001), metrics=['mae', 'mape'])  # rmsprop 是(线性回归问题的的标准做法)
    print ("Compilation Time : ", time.time() - start)
    return model


def run_network():
    epochs = 10
    ratio = 1
    sequence_length = 20
    # path = './bike_rnn_raw.csv'
    path = './bike_rnn.csv'

    print ('Loading data... ')
    X_train, y_train, X_test, y_test, result_mean = data_bike_num(path, sequence_length, ratio)
    print("X_train.shape:", X_train.shape)  # (43633, 19, 1)
    print("y_train.shape:", y_train.shape)  # (43633,)
    print("X_test.shape:", X_test.shape)    # (2296, 19, 1)
    print("y_test.shape:", y_test.shape)    # (2296,)

    print("X_test:", X_test)


    model = build_model()
    history = LossHistory()
    callback_list = [history]

    model.fit(X_train, y_train,batch_size=512, epochs=epochs, validation_split=0.05, callbacks=callback_list)

    history.acc_loss_plot('epoch')
    history.acc_loss_plot('batch')

    model.save('./bike.h5')
    predicted = model.predict(X_test)
    predicted = np.reshape(predicted, (predicted.size,))

    # Evaluate
    scores = model.evaluate(X_test, y_test, batch_size=512)
    print("\nevaluate result: \nmse={:.6f}\nmae={:.6f}\nmape={:.6f}".format(scores[0], scores[1], scores[2]))

    # draw the figure
    y_test *= 45949
    predicted *= 45949

    y_test += result_mean
    predicted += result_mean

    print("predicted:", predicted)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(y_test,label="Real")
    ax.legend(loc='upper left')
    plt.plot(predicted,label="Prediction")
    plt.legend(loc='upper left')
    plt.savefig("./regression.png")
    plt.show()

    return model, y_test, predicted

if __name__ == '__main__':
    run_network()

utils.py 文件

import keras
import matplotlib.pyplot as plt
import numpy as np
############## accuracy and loss #################

class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={
   }):
        self.losses = {
   'batch':[], 'epoch':[]}
        self.val_loss = {
   'batch':[], 'epoch':[]}

    def on_batch_end(self, batch, logs={
   }):
        self.losses['batch'].append(logs.get('loss'))
        self.val_loss['batch'].append(logs.get('val_loss'))

    def on_epoch_end(self, batch, logs={
   }):
        self.losses['epoch'].append(logs.get('loss'))
        self.val_loss['epoch'].append(logs.get('val_loss'))

    def acc_loss_plot(self, loss_type):
        """ 由于每个epoch才验证一次,所以每个batch只有训练集的精度和损失,而没有验证集的精度和损失 :param loss_type: :return: """
        fond_size = 10

        if loss_type == 'epoch':
            num_epochs = np.array(range(len(self.losses['epoch']))) + 1   # epoch从1开始而不是0开始

            fig2, ax2 = plt.subplots()
            ax2.plot(num_epochs, self.losses['epoch'], 'r', label='train loss')
            ax2.plot(num_epochs, self.val_loss['epoch'], 'b', label='val_loss')
            ax2.grid(True)
            ax2.set_xlabel('epoch',fontdict={
   'family' : 'Times New Roman', 'size'   : fond_size})
            ax2.set_ylabel('loss', fontdict={
   'family' : 'Times New Roman', 'size'   : fond_size})

            plt.yticks(fontproperties='Times New Roman', size=fond_size)
            plt.xticks(fontproperties='Times New Roman', size=fond_size)
            plt.legend(loc="upper right", prop={
   'family': 'Times New Roman', 'size': fond_size})

            ax2.tick_params(direction='in', bottom=True, top=False)  # 上下刻度线,且刻度线在内测
            ax2.tick_params(direction='in', left=True, right=False)  # 左右刻度线

            plt.tight_layout()
            plt.savefig('epoch_loss.png')

        elif loss_type == 'batch':

            # 每个epoch才验证一次,所以每个batch没有验证的曲线
            num_iters = np.array(range(len(self.losses['batch']))) +1
            num_epochs = np.linspace(0, len(self.losses['batch']), num=len(self.losses['epoch']) + 1)  # 等间隔采样出epoch
            # print("------------------------num_iters:",num_iters)
            # print("------------------------num_epochs:", num_epochs)

            # loss
            fig2, ax2 = plt.subplots()
            ax2.plot(num_iters, self.losses['batch'], 'r', label='train loss')
            ax2.plot(num_epochs[1:], self.val_loss['epoch'], 'b', label='val_loss')
            ax2.grid(True)
            ax2.set_xlabel('batch',fontdict={
   'family' : 'Times New Roman', 'size'   : fond_size})
            ax2.set_ylabel('loss',fontdict={
   'family' : 'Times New Roman', 'size'   : fond_size})

            plt.yticks(fontproperties='Times New Roman', size=fond_size)
            plt.xticks(fontproperties='Times New Roman', size=fond_size)
            plt.legend(loc="upper right", prop={
   'family': 'Times New Roman', 'size': fond_size})

            ax2.tick_params(direction='in', bottom=True, top=False)  # 上下刻度线,且刻度线在内测
            ax2.tick_params(direction='in', left=True, right=False)  # 左右刻度线

            plt.tight_layout()

            plt.savefig('batch_loss.png')