explore regularization techniques.

# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)
Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])  # 1 mean row 0 means column

1. 加入对权重矩阵的惩罚项到loss



# implement a logistic model
batch_size = 128
regularation_param = 0.0001

graph = tf.Graph()
with graph.as_default():
    # Input data. For the training data, we use a placeholder that will be fed ----------------------------------------1
    # at run time with a training minibatch.
    #  相当于开辟空间
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # Variables.                                                                                                       ------------------------------------------2
    weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
    # Training computation.                                                                                  ------------------------------------------3
    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + regularation_param * tf.nn.l2_loss(weights)
  # Optimizer.                                                                                                       -------------------------------------------4
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    # Predictions for the training, validation, and test data.                             --------------------------------------------5
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

num_steps = 3001

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% with regularation_param = %f " %( accuracy(test_prediction.eval(), test_labels), regularation_param))
Minibatch loss at step 0: 46.886597
Minibatch accuracy: 11.7%
Validation accuracy: 11.4%
Minibatch loss at step 500: 0.940757
Minibatch accuracy: 77.3%
Validation accuracy: 80.3%
Minibatch loss at step 1000: 0.832657
Minibatch accuracy: 82.0%
Validation accuracy: 81.2%
Minibatch loss at step 1500: 0.707090
Minibatch accuracy: 85.2%
Validation accuracy: 81.1%
Minibatch loss at step 2000: 0.972838
Minibatch accuracy: 76.6%
Validation accuracy: 81.3%
Minibatch loss at step 2500: 0.756738
Minibatch accuracy: 80.5%
Validation accuracy: 81.0%
Minibatch loss at step 3000: 0.879621
Minibatch accuracy: 82.8%
Validation accuracy: 81.4%
Test accuracy: 87.9% with regularation_param = 0.010000

# implement in neutron networks

batch_size = 128
hiden_layer_node_num = 1024
regularation_param = 0.01
graph = tf.Graph()
with graph.as_default():
    # input                                                                                                             -----------------------------------------1
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # Variables.                                                                                                       ------------------------------------------2
    weights1 = tf.Variable(tf.truncated_normal([image_size * image_size, hiden_layer_node_num]))
    biases1 = tf.Variable(tf.zeros([hiden_layer_node_num]))
    # input layer output (batch_size, hiden_layer_node_num)
    weights2 = tf.Variable(tf.truncated_normal([hiden_layer_node_num, num_labels]))
    biases2 = tf.Variable(tf.zeros([num_labels]))
    # Training computation.                                                                                  ------------------------------------------3
    logits = tf.matmul(tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1), weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + regularation_param * (tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2))
  # Optimizer.                                                                                                       -------------------------------------------4
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    # Predictions for the training, validation, and test data.                            --------------------------------------------5
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights1) + biases1), weights2) + biases2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1), weights2) + biases2)
num_steps = 3001

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% with regularation_param = %f " %( accuracy(test_prediction.eval(), test_labels), regularation_param))
Minibatch loss at step 0: 337.164185
Minibatch accuracy: 14.8%
Validation accuracy: 23.4%
Minibatch loss at step 500: 45.676537
Minibatch accuracy: 80.5%
Validation accuracy: 80.5%
Minibatch loss at step 1000: 41.467537
Minibatch accuracy: 80.5%
Validation accuracy: 81.5%
Minibatch loss at step 1500: 34.926945
Minibatch accuracy: 83.6%
Validation accuracy: 80.7%
Minibatch loss at step 2000: 28.990902
Minibatch accuracy: 80.5%
Validation accuracy: 81.4%
Minibatch loss at step 2500: 25.874664
Minibatch accuracy: 83.6%
Validation accuracy: 80.9%
Minibatch loss at step 3000: 24.691601
Minibatch accuracy: 86.7%
Validation accuracy: 81.6%
Test accuracy: 88.7% with regularation_param = 0.000100 

2. overfitting




# in lofistic regression  overfitting version
batch_size = 128
regularation_param = 0.01

graph = tf.Graph()
with graph.as_default():
    # Input data. For the training data, we use a placeholder that will be fed ----------------------------------------1
    # at run time with a training minibatch.
    #  相当于开辟空间
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # Variables.                                                                                                       ------------------------------------------2
    weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
    # Training computation.                                                                                  ------------------------------------------3
    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + regularation_param * tf.nn.l2_loss(weights)
  # Optimizer.                                                                                                       -------------------------------------------4
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    # Predictions for the training, validation, and test data.                             --------------------------------------------5
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

num_steps = 3001

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # restrict train dataset in a smaller dataset 
        if offset > 10000:
            offset = 0
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% with regularation_param = %f " %( accuracy(test_prediction.eval(), test_labels), regularation_param))
Minibatch loss at step 0: 49.519794
Minibatch accuracy: 11.7%
Validation accuracy: 13.5%
Minibatch loss at step 500: 0.310027
Minibatch accuracy: 100.0%
Validation accuracy: 73.7%
Minibatch loss at step 1000: 0.123203
Minibatch accuracy: 100.0%
Validation accuracy: 72.7%
Minibatch loss at step 1500: 0.121531
Minibatch accuracy: 100.0%
Validation accuracy: 72.7%
Minibatch loss at step 2000: 0.122027
Minibatch accuracy: 100.0%
Validation accuracy: 73.5%
Minibatch loss at step 2500: 0.121300
Minibatch accuracy: 100.0%
Validation accuracy: 72.7%
Minibatch loss at step 3000: 0.121102
Minibatch accuracy: 100.0%
Validation accuracy: 72.5%
Test accuracy: 79.1% with regularation_param = 0.010000 


# implement overfitting in neutron networks version
batch_size = 128
hiden_layer_node_num = 1024
regularation_param = 0.01
num_steps = 3001

graph = tf.Graph()
with graph.as_default():
    # input                                                                                                             -----------------------------------------1
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # Variables.                                                                                                       ------------------------------------------2
    weights1 = tf.Variable(tf.truncated_normal([image_size * image_size, hiden_layer_node_num]))
    biases1 = tf.Variable(tf.zeros([hiden_layer_node_num]))
    # input layer output (batch_size, hiden_layer_node_num)
    weights2 = tf.Variable(tf.truncated_normal([hiden_layer_node_num, num_labels]))
    biases2 = tf.Variable(tf.zeros([num_labels]))
    # Training computation.                                                                                  ------------------------------------------3
    logits = tf.matmul(tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1), weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + regularation_param * (tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2))
  # Optimizer.                                                                                                       -------------------------------------------4
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    # Predictions for the training, validation, and test data.                            --------------------------------------------5
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights1) + biases1), weights2) + biases2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1), weights2) + biases2)

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # restrict a smaller training dataset
        if offset > 1000:
            offset = 0
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% with regularation_param = %f " %( accuracy(test_prediction.eval(), test_labels), regularation_param))



Let's see docs of nn.dropout firstly.
tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
With probability keep_prob, outputs the input element scaled up by
1 / keep_prob, otherwise outputs 0. The scaling is so that the expected
sum is unchanged.
x: A tensor.
keep_prob: A scalar Tensor with the same type as x. The probability that each element is kept.
noise_shape: A 1-D Tensor of type int32, representing the shape for randomly generated keep/drop flags.
seed: A Python integer. Used to create random seeds. See set_random_seedfor behavior.
name: A name for this operation (optional).
A Tensor of the same shape of x.
ValueError: If keep_prob is not in (0, 1].


否则,你得到的validation 和 test 的准确度都是随机的。

batch_size = 128
hiden_layer_node_num = 1024
regularation_param = 0.01
num_steps = 3001

graph = tf.Graph()
with graph.as_default():
    # input                                                                                                             -----------------------------------------1
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # Variables.                                                                                                       ------------------------------------------2
    weights1 = tf.Variable(tf.truncated_normal([image_size * image_size, hiden_layer_node_num]))
    biases1 = tf.Variable(tf.zeros([hiden_layer_node_num]))
    # input layer output (batch_size, hiden_layer_node_num)
    weights2 = tf.Variable(tf.truncated_normal([hiden_layer_node_num, num_labels]))
    biases2 = tf.Variable(tf.zeros([num_labels]))
    # Training computation.                                                                                  ------------------------------------------3
    temp = tf.nn.dropout(tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1), 0.8)  # ---------------------------------------------droupout
    logits = tf.matmul(temp, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + regularation_param * (tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2))
  # Optimizer.                                                                                                       -------------------------------------------4
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    # Predictions for the training, validation, and test data.                            --------------------------------------------5
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights1) + biases1), weights2) + biases2)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1), weights2) + biases2)

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% with regularation_param = %f " %( accuracy(test_prediction.eval(), test_labels), regularation_param))

Minibatch loss at step 0: 3401.901367
Minibatch accuracy: 16.4%
Validation accuracy: 18.4%
Minibatch loss at step 500: 21.367651
Minibatch accuracy: 82.0%
Validation accuracy: 84.1%
Minibatch loss at step 1000: 0.976428
Minibatch accuracy: 81.2%
Validation accuracy: 83.5%
Minibatch loss at step 1500: 0.671196
Minibatch accuracy: 86.7%
Validation accuracy: 82.8%
Minibatch loss at step 2000: 0.887370
Minibatch accuracy: 78.1%
Validation accuracy: 83.8%
Minibatch loss at step 2500: 0.701504
Minibatch accuracy: 85.9%
Validation accuracy: 83.2%
Minibatch loss at step 3000: 0.803447
Minibatch accuracy: 85.2%
Validation accuracy: 83.6%
Test accuracy: 89.9% with regularation_param = 0.010000





tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)


global_step = tf.Variable(0) # count the number of steps taken.

learning_rate = tf.train.exponential_decay(0.5, global_step, decay_steps)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)



# without regularation
# test accuracy 96.3%
batch_size =128

num_steps = 20001
def compute_logits(data, weightss, biasess, dropout_vals=None):
    temp = data
    if dropout_vals:
        for w,b,d in zip(weightss[:-1], biasess[:-1], dropout_vals[:-1]):
            temp = tf.nn.relu_layer(tf.nn.dropout(temp, d), w, b)
        temp = tf.matmul(temp, weightss[-1]) + biasess[-1]
        for w,b in zip(weightss[:-1], biasess[:-1]):
            temp = tf.nn.relu_layer(temp, w, b)
        temp = tf.matmul(temp, weightss[-1]) + biasess[-1]
    return temp

graph = tf.Graph()
with graph.as_default():
    # input
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    # variable
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.08, global_step, 200, 0.98)
    weights1 = tf.Variable(tf.truncated_normal([784, 1024], stddev = np.sqrt(2.0/1024)))
    biases1 = tf.Variable(tf.zeros([1024]))
    weights2 = tf.Variable(tf.truncated_normal([1024, 800], stddev = np.sqrt(2.0/800)))
    biases2 = tf.Variable(tf.zeros([800]))
    weights3 = tf.Variable(tf.truncated_normal([800, 512], stddev = np.sqrt(2.0/512)))
    biases3 = tf.Variable(tf.zeros([512]))
    weights4 = tf.Variable(tf.truncated_normal([512, 256], stddev = np.sqrt(2.0/256)))
    biases4 = tf.Variable(tf.zeros([256]))
    weights5 = tf.Variable(tf.truncated_normal([256, 10], stddev = np.sqrt(2.0/10)))
    biases5 = tf.Variable(tf.zeros([10]))
    logits = compute_logits(tf_train_dataset, [weights1, weights2, weights3,weights4, weights5], [biases1,biases2,biases3,biases4, biases5], \
    #temp = tf.nn.relu_layer(tf_train_dataset, weights1, biases1)
    #temp = tf.nn.relu_layer(tf.nn.dropout(temp, 0.95), weights2, biases2)
    #temp = tf.nn.relu_layer(tf.nn.dropout(temp, 0.95), weights3, biases3)
    #logits = tf.matmul(temp, weights4) + biases4
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    # prediction
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(compute_logits(tf_valid_dataset, [weights1, weights2, weights3, weights4, weights5], \
                                                    [biases1, biases2, biases3, biases4,biases5]))
    test_prediction = tf.nn.softmax(compute_logits(tf_test_dataset, [weights1, weights2, weights3, weights4,weights5],\
                                                   [biases1, biases2, biases3, biases4,biases5]))

with tf.Session(graph=graph) as session:
    for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
        #  传递值到tf的命名空间
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%% " %( accuracy(test_prediction.eval(), test_labels)))

Minibatch loss at step 19500: 0.203250
Minibatch accuracy: 94.5%
Validation accuracy: 91.3%
Minibatch loss at step 20000: 0.137203
Minibatch accuracy: 95.3%
Validation accuracy: 91.2%
Test accuracy: 96.1%