from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

mnist = input_data.read_data_sets("data/", one_hot=True)  # 读取图片数据集
#### sess = tf.InteractiveSession() # 创建session

######################### 1,函数声明部分 ############################
def weight_variable(shape, n):
    # 正态分布,标准差为0.1,默认最大为1,最小为-1,均值为0
    initial = tf.truncated_normal(shape, stddev= n, dtype=tf.float32)
    return tf.Variable(initial)
def bias_variable(shape):
    # 创建一个结构为shape矩阵也可以说是数组shape声明其行列,初始化所有值为0.1
    initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
    return tf.Variable(initial)
def conv2d(x, W):
    # 卷积遍历各方向步数为1,SAME:边缘外自动补0,遍历相乘
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x, name):
    # 池化卷积结果(conv2d)池化层采用kernel大小为2*2,步数也为2,周围补0,取最大值。数据量缩小了4倍
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

####################### 2,搭建网络,定义算法公式,也就是forward时的计算 #########################
def niive_CNN(x_image, keep_prob):
    ## 第一层卷积操作 ##
    with tf.variable_scope('conv1') as scope:
        # 所谓名字的scope,指当绑定了一个名字到一个对象的时候,该名字在程序文本中的可见范围
        # 卷积核(5 * 5),图像通道数 1,卷积核数目 32,代表会出现多少个卷积特征图像;
        W_conv1 = tf.Variable(weight_variable([5, 5, 1, 32], 0.1),  name='weights', dtype=tf.float32)
        # 对于每一个卷积核都有一个对应的偏置量。
        b_conv1 = tf.Variable(bias_variable([32]), name='biases', dtype=tf.float32)
        # 图片乘以卷积核,并加上偏执量,卷积结果28x28x32
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1, name='conv1')
    ## 第一层池化操作 ##
    with tf.variable_scope('pooling1_lrn') as scope:
        # 池化结果14x14x32 卷积结果乘以池化卷积核
        pool1 = max_pool_2x2(h_conv1, 'pooling1')
        # tf.nn.lrn 局部响应归一化原理是仿造生物学上活跃的神经元对相邻神经元的抑制现象(侧抑制),然后根据论文有公式如下
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
    ## 第二层卷积操作 ##
    with tf.variable_scope('conv2') as scope:
        # 32通道卷积,卷积出64个特征
        w_conv2 = tf.Variable(weight_variable([5, 5, 32, 64], 0.1), name='weights', dtype=tf.float32)
        b_conv2 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)
        h_conv2 = tf.nn.relu(conv2d(norm1, w_conv2) + b_conv2, name='conv2') # 注意h_pool1是上一层的池化结果,#卷积结果14x14x64
    ## 第二层池化操作 ##
    with tf.variable_scope('pooling2_lrn') as scope:
        pool2 = max_pool_2x2(h_conv2, 'pooling2')  # 池化结果7x7x64
        # 原图像尺寸28*28,第一轮图像缩小为14*14,共有32张,第二轮后图像缩小为7*7,共有64张
        norm2 = tf.nn.lrn(pool2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    ## 第三层全连接操作 ##
    with tf.variable_scope('fc1') as scope:
        # 将第二层卷积池化结果reshape成只有一行7*7*64个数据# [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]
        h_pool2_flat = tf.reshape(norm2, [-1, 7 * 7 * 64])
        # 二维张量,第一个参数7*7*64的patch,也可以认为是只有一行7*7*64个数据的卷积,第二个参数代表卷积个数共1024个
        W_fc1 = tf.Variable(weight_variable([7 * 7 * 64, 1024], 0.005),  name='weights', dtype=tf.float32)
        b_fc1 = tf.Variable(bias_variable([1024]), name='biases', dtype=tf.float32)
        # 卷积操作,结果是1*1*1024,单行乘以单列等于1*1矩阵,matmul实现最基本的矩阵相乘,不同于tf.nn.conv2d的遍历相乘,自动认为是前行向量后列向量
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1, name=scope.name)

    # 使用占位符,由dropout自动确定scale,也可以自定义,比如0.5,根据tensorflow文档可知,程序中真实使用的值为1/0.5=2,也就是某些输入乘以2,同时某些输入乘以0
    h_fc1_drop = tf.nn.dropout(h_fc1 , keep_prob=keep_prob)  # 对卷积结果执行dropout操作

    ## 第四层输出操作 ##
    with tf.variable_scope('softmax_linear') as scope:
        W_fc2 = tf.Variable(weight_variable([1024, 10], 0.005), name='softmax_linear', dtype=tf.float32)
        b_fc2 = tf.Variable(bias_variable([10]), name='biases', dtype=tf.float32)
        softmax_linear  = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    return softmax_linear

######################### 3,定义输入输出结构 #########################
# 声明一个占位符,None表示输入图片的数量不定,28*28图片分辨率
xs = tf.placeholder(tf.float32, [None, 28 * 28])
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1])

_pred = niive_CNN(x_image, keep_prob)
# 四,定义loss(最小误差概率),选定优化优化loss,
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=ys, logits=_pred ))  # 定义交叉熵为loss函数
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)  # 调用优化器优化,其实就是通过喂数据争取cross_entropy最小化

# 五,开始数据训练以及评测
correct_prediction = tf.equal(tf.argmax(_pred , 1), tf.argmax(ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(20000):
        batch = mnist.train.next_batch(50)
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
   xs: batch[0], ys: batch[1], keep_prob:1})
            print('setp {},the train accuracy: {}'.format(i, train_accuracy))
        train_step.run(feed_dict={
   xs: batch[0], ys: batch[1], keep_prob: 1})
    print("test accuracy %g" % accuracy.eval(feed_dict={
   xs: mnist.test.images, ys: mnist.test.labels}))

另一种写法:

#====================================== 11课 卷积神经网络 =========================================================
#=============================================== Network ==========================================================
# 64个filter(3,3,1) 128个filter(3,3,64)
# input(n,784)-------------> Covl()--max_pool1(2,2)------------> Cov2()--max_pool2(2,2)--->Fc1(1024)-->Fc2(10)
#
#================================================================================================================

import numpy as np
import tensorflow as tf
import  matplotlib.pyplot as plt
import tensorflow.examples.tutorials.mnist.input_data as input_data   # 不采用别名 as input_data 会出错
mnist = input_data.read_data_sets('data/',one_hot=True)  # 自动下载一个data数据文件夹,里面包含数据
trainimg   = mnist.train.images
print("trainimg.shape",trainimg.shape)      # (55000, 784)
trainlabel = mnist.train.labels
print("trainlabel.shape",trainlabel.shape)  # (55000, 10)
testimg    = mnist.test.images
print("testimg.shape",testimg.shape)        # (10000, 784)
testlabel  = mnist.test.labels
print("testlabel.shape",testlabel.shape)    # (10000, 10)
print ("MNIST ready")


n_input = 784
n_output = 10
Weights = {
   
    'w_conv1' : tf.Variable(tf.random_normal([3,3,1,64],stddev=0.1)),       # 第1卷积层权重
    # [3,3,1,64] : filetr_Height=3,filter_width, 通道数=1(灰度图)(in_channel),filter个数 64(out_channel)
    'w_conv2' : tf.Variable(tf.random_normal([3,3,64,128],stddev=0.1)),     # 第2卷积层权重
    'w_fc1'   : tf.Variable(tf.random_normal([7*7*128,1024],stddev=0.1)),   # 第1全连接层权重
    'w_fc2'   : tf.Variable(tf.random_normal([1024, n_output],stddev=0.1))  # 第1全连接层权重
}

Biases = {
   
    'b_conv1' : tf.Variable(tf.random_normal([64],stddev=0.1)),
    'b_conv2' : tf.Variable(tf.random_normal([128],stddev=0.1)),
    'b_fc1'   : tf.Variable(tf.random_normal([1024],stddev=0.1)),
    'b_fc2'   : tf.Variable(tf.random_normal([n_output],stddev=0.1))
}

# 前向传播
def conv_basic(_input, _W, _b, _keep_ratio):
    # Input
    _input_r = tf.reshape(_input ,shape=[-1,28,28,1])   # 对输入格式化成4维便于tensorflow操作
    # [batcha_size大小n,Height,Width,channel] batcha_size大小n=-1 程序自行计算n大小
    print("_input_r.shape",_input_r.get_shape().as_list())                 # [None, 28, 28, 1]
    # ==========================================*Conv layer_1=======================================
    _conv1 = tf.nn.conv2d(_input_r, _W['w_conv1'], strides=[1,1,1,1], padding='SAME')
    # strides=[1,1,1,1]=[stride_in_batch_size(默认1), stride_in_Height(按实际修改), stride_in_Width(按实际修改), stride_in_channel(默认1)]
    print("_conv1.shape before relu",_conv1.get_shape().as_list())         # [None, 28, 28, 64]
    _conv1 = tf.nn.relu(tf.nn.bias_add(_conv1 ,_b['b_conv1']))
    print("_conv1.shape after relu",_conv1.get_shape().as_list())          # [None, 28, 28, 64]
    _pool1 = tf.nn.max_pool(_conv1, ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME')
    # ksize=[1,2,2,1]=[dimention_in_batch_size(默认1), in_Height(按实际修改), in_Width(按实际修改), in_channel(默认1)]
    print("_pool1.shape after relu",_pool1.get_shape().as_list())          # [None, 14, 14, 64]
    _pool_dr1 = tf.nn.dropout(_pool1, _keep_ratio)   # 神经元保留比例
    print("_pool_dr1.shape after relu",_pool_dr1.get_shape().as_list())    # [None, 14, 14, 64]

    # ==========================================*Conv layer_2=======================================
    _conv2 = tf.nn.conv2d(_pool_dr1, _W['w_conv2'], strides=[1,1,1,1],padding='SAME')
    print("_conv2.shape before relu",_conv2.get_shape().as_list())         # [None, 14, 14, 128]
    _conv2 = tf.nn.relu(tf.nn.bias_add(_conv2, _b['b_conv2']))
    print("_conv2.shape after relu",_conv2.get_shape().as_list())          # [None, 14, 14, 128]
    _pool2 = tf.nn.max_pool(_conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    print("_pool2.shape ",_pool2.get_shape().as_list())                    # [None, 7, 7, 128]
    _pool_dr2 = tf.nn.dropout(_pool2, _keep_ratio)
    print("_pool_dr2.shape",_pool_dr2.get_shape().as_list())               # [None, 7, 7, 128]

    # Vectorize (Flatten)
    _densel = tf.reshape(_pool_dr2, [-1, _W['w_fc1'].get_shape().as_list()[0]]) # 维度=[-1, 7*7*128 ]
    print("_densel.shape ",_densel.get_shape().as_list())                  # [None, 6272]
    # 只有tensor才能用 get_shape(),返回元组, 而后as_list 转为list(sess.run()里面,这个里面只能放operation和tensor)
    #============================================Full_Connected_Layer_1=====================================
    Linear_oper = tf.add(tf.matmul(_densel, _W['w_fc1']),_b['b_fc1'])
    print("Linear_oper.shape ",Linear_oper.get_shape().as_list())          # [None, 1024]
    _fc1 = tf.nn.relu(Linear_oper)
    print("_fc1.shape ",_fc1.get_shape().as_list())                        # [None, 1024]
    _fc1_dr1 = tf.nn.dropout(_fc1, _keep_ratio)
    print("_fc1_dr1.shape ",_fc1_dr1.get_shape().as_list())                # [None, 1024]
    #============================================Full_Connected_Layer_2=====================================
    _out = tf.add(tf.matmul(_fc1_dr1,_W['w_fc2']),_b['b_fc2'])   # X*W+b 注意矩阵维度匹配,不是W*X tf.matmul(a,b)矩阵a*b
    print("_out_dr1.shape ",_out.get_shape().as_list())                    # [None, 10]
    # Return (创建字典并返回)
    out = {
    'input_r': _input_r, 'conv1': _conv1, 'pool1': _pool1, 'pool_dr1': _pool_dr1,
                                 'conv2': _conv2, '_pool2':_pool2, 'pool_dr2': _pool_dr2,
            'densel' : _densel,  'fc1'  : _fc1,   'fc_dr1':_fc1_dr1,'out'    : _out


    }
    return out
print("=============================================== CNN is Ready ==================================")
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_output])
keep_ratio = tf.placeholder(tf.float32)

# Functions
_pred = conv_basic(x,Weights,Biases, keep_ratio)['out']
print("_pred.shape ",_pred.get_shape().as_list())                         # [None, 10]
cost  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=_pred))
optm  = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

_corr = tf.equal(tf.argmax(_pred, 1), tf.argmax(y, 1))
accr  = tf.reduce_mean(tf.cast(_corr, tf.float32))

# Saver
save_step = 3      # 每一个epoch 保存一次
saver = tf.train.Saver(max_to_keep=3)   # max_to_keep=3 最终只保留三组模型 None 是所有模型

print("=========================================== Graph is ready =======================================")
do_train = 1      # 1: 模型训练及保存,
#do_train = 0 # 0:模型加载及展示

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

    training_epochs = 3
    batch_size      = 2  # 正常应该稍大一些
    display_step    = 1
    if do_train == 1:     #
        for epoch in range (training_epochs):
            avg_cost = 0
            print("the length of mnist.train.num_examples is :", mnist.train.num_examples)
            print("The real num_iteation is :",int(mnist.train.num_examples/batch_size))
            #num_batch = int(mnist.train.num_examples/batch_size)
            num_batch = 10               #(真实的num_batch 太大,跑起来太慢)
            # Loop over all batchs 循环
            for i in range (num_batch):
                batch_xs,batch_ys = mnist.train.next_batch(batch_size)
                # Fit training using batch data
                sess.run(optm, feed_dict = {
   x:batch_xs, y:batch_ys,keep_ratio: 0.7})
                # Compute average loss
                avg_cost += sess.run(cost, feed_dict = {
   x:batch_xs, y:batch_ys,keep_ratio: 1})
                train_acc = sess.run(accr, feed_dict = {
   x:batch_xs, y:batch_ys, keep_ratio :1})
            avg_cost = avg_cost/num_batch
            train_acc = train_acc/num_batch
            print("Training accuracy :%.3f" % (train_acc))
            print("Training accuracy :%.3f" % (train_acc))
            # Display logs per epoch step
            if epoch % display_step ==0:
                print("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
                train_acc = sess.run(accr, feed_dict = {
   x:batch_xs, y:batch_ys, keep_ratio :1})
                print("Training accuracy :%.3f" % (train_acc))
                #test_acc = sess.run(accr, feed_dict = {x: testimg, y: testlabel, keepratio: 1})
                #print("Test accuracy : %.3f"% (test_acc) )

            # Save Net
            if epoch % save_step == 0:  # 能整除
                saver.save(sess, "save/nets/cnn_mnist_basic.ckpt-" + str(epoch))

    if do_train == 0:
        epoch = training_epochs - 1   # 第14次 即保存的最后一次 (0-14)
        saver.restore(sess, "save/nets/cnn_mnist_basic.ckpt-" + str(epoch))

        test_acc = sess.run(accr, feed_dict={
   x: testimg, y: testlabel, keep_ratio:1.})
        print("Test accuracy: %.3f " % (test_acc))
print("============================Optimization Finished=================================")


""" 【关于训练集、验证集、测试集的使用】 1 训练集: 拟合数据样本, 进行学习,更新参数 2 验证集: 用于调试超参数、使用多次,每几个epoch跑一次,!!!必须有!!! 模型训练过程单独留下的样本集, 用于调整超参数和用于对模型进行初步评估 验证集可以哟用在训练的过程中, 一般在训练时,几个epoch结束后跑一次验证集看看效果。 (但是验证的太频繁会影响训练速度) 优点: 1) 可以及时发现模型或者参数问题,比如验证集发散、出现奇怪的值(无穷大)、准确率不增长或者很慢, 此时可以及时终止训练,重新调参或者调整模型,而不需要等到训练结束。就是可以实时监控! 2)还有就是验证模型的泛化能力, 如果验证集上的效果比训练集上差很多,就可以考虑模型是否过拟合 一旦 validation_data 的分类精度达到饱和,就停止训练。这种策略叫做提前终止(early stopping) 3) 可以通过验证集对比不同的模型。在一般的神经网络中,我们用验证集去寻找最优的网络深度(number of hidden layers) 或者决定反向传播的停止点,或者在神经网络中选择隐藏神经元的个数 4) 交叉验证(Cross Validation) 就是把训练数据集本身再细分成不同的验证数据集 缺点: 1) 模型在一次次手动调参并举行训练后逼近的验证集, 可能只代表一部分非训练集,导致最终的模型泛化还不够好 3 测试集: !!!可以没有,可以用验证集来代替!!! 所有训练、验证、模型调整完毕以后,用整个测试集跑一次,看模型的泛化能力 不能作为调参、选择特征等算法相关的选择的依据。 4 验证集和测试集相互关系: 验证集具有足够泛化性(一般来说,如果验证集足够大到包括大部分非训练集时,也等于具有足够泛化性了) 验证集具有足够泛化性时,测试集就没有存在的必要了 如果验证集具有足够泛化代表性,测试集是可以没有的,但验证集是必须有的。 PS: 1) test_data是模型出炉的最后一道检测工序, test_data 来防止过拟合。如果基于 test_data 的评估结果设置超参数,有可能我们的网络最后是对 test_data 过拟合。 也就是说,我们或许只是找到了适合 test_data 具体特征的超参数,网络的性能不能推广到其它的数据集。 2) 普通参数可以通过网络来更新,自动调参(训练集训练),超参数是人工手动"更新",手动调参(验证集也类似在训练), 所以测试集有存在的必要! """


参考:

  1. 完整实现利用tensorflow训练自己的图片数据集