参考:
bike_blog.py 文件:
import matplotlib.pyplot as plt
import numpy as np
import time
import csv
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.models import Sequential, load_model
import keras
from utils import LossHistory
np.random.seed(2017)
def data_bike_num(path_to_dataset='./bike_rnn.csv',
sequence_length=20,
ratio=1.0):
max_values = ratio * 45949
with open(path_to_dataset) as f:
data = csv.reader(f, delimiter=",")
next(data, None) # skip the headers
# print(len(data))
bikes = []
nb_of_values = 0
for line in data:
try:
bikes.append(float(line[0]))
nb_of_values += 1
except ValueError:
pass
if nb_of_values >= max_values:
break
print ("Data loaded from csv. Formatting...")
print(len(bikes))
result = []
for index in range(len(bikes) - sequence_length):
result.append(bikes[index: index + sequence_length])
result = np.array(result) # shape (2049230, 50)
result /= 45949
result_mean = result.mean()
result -= result_mean
print("Shift: ", result_mean)
print ("Data: ", result.shape)
row = int(round(0.95 * result.shape[0]))
train = result[:row, :]
np.random.shuffle(train)
X_train = train[:, :-1]
y_train = train[:, -1]
X_test = result[row:, :-1] # 2297
y_test = result[row:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
return [X_train, y_train, X_test, y_test, result_mean]
def build_model():
model = Sequential()
layers = [1, 50, 100, 1]
model.add(LSTM(layers[1], input_shape=(None, layers[0]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(layers[2], return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(layers[3]))
model.add(Activation("linear")) # 由于在进行线性回归操作,所以激活函数填 linear
start = time.time()
model.compile(loss="mse", optimizer=keras.optimizers.Adam(lr=0.001), metrics=['mae', 'mape']) # rmsprop 是(线性回归问题的的标准做法)
print ("Compilation Time : ", time.time() - start)
return model
def run_network():
epochs = 10
ratio = 1
sequence_length = 20
# path = './bike_rnn_raw.csv'
path = './bike_rnn.csv'
print ('Loading data... ')
X_train, y_train, X_test, y_test, result_mean = data_bike_num(path, sequence_length, ratio)
print("X_train.shape:", X_train.shape) # (43633, 19, 1)
print("y_train.shape:", y_train.shape) # (43633,)
print("X_test.shape:", X_test.shape) # (2296, 19, 1)
print("y_test.shape:", y_test.shape) # (2296,)
print("X_test:", X_test)
model = build_model()
history = LossHistory()
callback_list = [history]
model.fit(X_train, y_train,batch_size=512, epochs=epochs, validation_split=0.05, callbacks=callback_list)
history.acc_loss_plot('epoch')
history.acc_loss_plot('batch')
model.save('./bike.h5')
predicted = model.predict(X_test)
predicted = np.reshape(predicted, (predicted.size,))
# Evaluate
scores = model.evaluate(X_test, y_test, batch_size=512)
print("\nevaluate result: \nmse={:.6f}\nmae={:.6f}\nmape={:.6f}".format(scores[0], scores[1], scores[2]))
# draw the figure
y_test *= 45949
predicted *= 45949
y_test += result_mean
predicted += result_mean
print("predicted:", predicted)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(y_test,label="Real")
ax.legend(loc='upper left')
plt.plot(predicted,label="Prediction")
plt.legend(loc='upper left')
plt.savefig("./regression.png")
plt.show()
return model, y_test, predicted
if __name__ == '__main__':
run_network()
utils.py 文件
import keras
import matplotlib.pyplot as plt
import numpy as np
############## accuracy and loss #################
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={
}):
self.losses = {
'batch':[], 'epoch':[]}
self.val_loss = {
'batch':[], 'epoch':[]}
def on_batch_end(self, batch, logs={
}):
self.losses['batch'].append(logs.get('loss'))
self.val_loss['batch'].append(logs.get('val_loss'))
def on_epoch_end(self, batch, logs={
}):
self.losses['epoch'].append(logs.get('loss'))
self.val_loss['epoch'].append(logs.get('val_loss'))
def acc_loss_plot(self, loss_type):
""" 由于每个epoch才验证一次,所以每个batch只有训练集的精度和损失,而没有验证集的精度和损失 :param loss_type: :return: """
fond_size = 10
if loss_type == 'epoch':
num_epochs = np.array(range(len(self.losses['epoch']))) + 1 # epoch从1开始而不是0开始
fig2, ax2 = plt.subplots()
ax2.plot(num_epochs, self.losses['epoch'], 'r', label='train loss')
ax2.plot(num_epochs, self.val_loss['epoch'], 'b', label='val_loss')
ax2.grid(True)
ax2.set_xlabel('epoch',fontdict={
'family' : 'Times New Roman', 'size' : fond_size})
ax2.set_ylabel('loss', fontdict={
'family' : 'Times New Roman', 'size' : fond_size})
plt.yticks(fontproperties='Times New Roman', size=fond_size)
plt.xticks(fontproperties='Times New Roman', size=fond_size)
plt.legend(loc="upper right", prop={
'family': 'Times New Roman', 'size': fond_size})
ax2.tick_params(direction='in', bottom=True, top=False) # 上下刻度线,且刻度线在内测
ax2.tick_params(direction='in', left=True, right=False) # 左右刻度线
plt.tight_layout()
plt.savefig('epoch_loss.png')
elif loss_type == 'batch':
# 每个epoch才验证一次,所以每个batch没有验证的曲线
num_iters = np.array(range(len(self.losses['batch']))) +1
num_epochs = np.linspace(0, len(self.losses['batch']), num=len(self.losses['epoch']) + 1) # 等间隔采样出epoch
# print("------------------------num_iters:",num_iters)
# print("------------------------num_epochs:", num_epochs)
# loss
fig2, ax2 = plt.subplots()
ax2.plot(num_iters, self.losses['batch'], 'r', label='train loss')
ax2.plot(num_epochs[1:], self.val_loss['epoch'], 'b', label='val_loss')
ax2.grid(True)
ax2.set_xlabel('batch',fontdict={
'family' : 'Times New Roman', 'size' : fond_size})
ax2.set_ylabel('loss',fontdict={
'family' : 'Times New Roman', 'size' : fond_size})
plt.yticks(fontproperties='Times New Roman', size=fond_size)
plt.xticks(fontproperties='Times New Roman', size=fond_size)
plt.legend(loc="upper right", prop={
'family': 'Times New Roman', 'size': fond_size})
ax2.tick_params(direction='in', bottom=True, top=False) # 上下刻度线,且刻度线在内测
ax2.tick_params(direction='in', left=True, right=False) # 左右刻度线
plt.tight_layout()
plt.savefig('batch_loss.png')