1. 项目简介
图片说明

项目的data目录下分别放了训练和测试的数据、标签以及数据增强的结果,放数据增强的中间结果是为了让大家更直观的理解数据增强的操作。

下面进入正题

2. main函数

from model import *
from data import *

# 定义是否可用cuda以及设备编号,只有一块卡就是只有一个“0”,如果使用两块就是:
#os.environ["CUDA_VISIBLE_DEVICES"] = "0, 2", 去掉#注释符,,,0,2代表了gpu的标签,但是如果只有两块卡就是0,1

#os.environ["CUDA_VISIBLE_DEVICES"] = "0"

#下面代表了数据增强的参数,以及自己需要的操作,作者一共使用了旋转、便宜、剪切、放大、水平翻转,具体请含义请自行百度
data_gen_args = dict(rotation_range=0.2,
                    width_shift_range=0.05,
                    height_shift_range=0.05,
                    shear_range=0.05,
                    zoom_range=0.05,
                    horizontal_flip=True,
                    fill_mode='nearest')

#调用数据生成器产生数据,在data.py中讲解
myGene = trainGenerator(2,'data/membrane/train','image','label',data_gen_args,save_to_dir = None)

#生成网络结构,该网络结构中有自定义的加载预训练的动作,在网络结构中讲解。但是由于没有传递预训练参数,因此不会做参数预加载
model = unet()

#保存训练的模型,具体请查看ModelCheckpoint的用法,这里是只保存最佳模型,save_best_only
model_checkpoint = ModelCheckpoint('unet_membrane.hdf5', monitor='loss',verbose=1, save_best_only=True)

#训练模型,这里采用了fit_generator,每次取一个batch的数据训练,fit_generator参数含义请百度keras的fit_generator用法
比如第一项是数据,下面依次是一个轮次分多少批训练,训练几个轮次,训练过程中调用上面的监视函数并保存模型
model.fit_generator(myGene,steps_per_epoch=300,epochs=1,callbacks=[model_checkpoint])

#产生测试数据
testGene = testGenerator("data/membrane/test")

#模型测试,请百度model.predict_generator
results = model.predict_generator(testGene,30,verbose=1)
#保存结果,这个函数是自定义的,在data.py中
saveResult("data/membrane/test",results)

3. data.py
这个文件里主要写了如何读取自己的图片和标签图片,怎样制作数据集,标签图片的颜色和label的对应等,另外提供了训练和测试时候的数据产生器,标签可视化,以及数据保存等

from __future__ import print_function
from keras.preprocessing.image import ImageDataGenerator
import numpy as np 
import os
import glob
import skimage.io as io
import skimage.transform as trans

#标签定义,不同的色彩代表了不同的类型,这个只是在预测阶段上色用,个人理解
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]

COLOR_DICT = np.array([Sky, Building, Pole, Road, Pavement,
                          Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])

# 数据调整,主要是对图片做归一化,像素值缩放到01,对label图片做分离,一个类别对应一张二值图,例如检测自行车和行人,
# 那么自行车类别标签对应一张图,图中值为1的为自行车,为0的为背景,行人的标签图同样。
# 多个类别对应多张二值图,可以保存为一张多通道的图片
# 参数依次为:输入图片,标签图片,是否多类别,类别数
def adjustData(img,mask,flag_multi_class,num_class):
# 如果是多标签
    if(flag_multi_class):
# 读取的图片数值先做归一化
        img = img / 255
# 获取标签图的shape,主要是获取长宽,可以在运行的时候查看打印下mask的维度,有更清晰的了解
# print(mask.shape)
        mask = mask[:,:,:,0] if(len(mask.shape) == 4) else mask[:,:,0]

# 新建一个标签图层,shape为(宽,高,类别数),也就是这个图每一个点有类别数个值,代表不同类别在该位置的取值
        new_mask = np.zeros(mask.shape + (num_class,))

# 遍历每一个类别,在原来标签图中的值找到所有等于该类别的位置,在新的mask中将该位置置为1
# 举例子:有一个标签图  [[0,0,1],   那么它生成的新的标签图为:[[[1,0],[1,0],[0,1]],   原图0,0的位置值为0,新图该位置值为1,0,代表该位置的类别是0
#                      [1,1,0]]                            [[0,1][0,1][1,0]]]
        for i in range(num_class):
            #for one pixel in the image, find the class in mask and convert it into one-hot vector
            #index = np.where(mask == i)
            #index_mask = (index[0],index[1],index[2],np.zeros(len(index[0]),dtype = np.int64) + i) if (len(mask.shape) == 4) else (index[0],index[1],np.zeros(len(index[0]),dtype = np.int64) + i)
            #new_mask[index_mask] = 1
            new_mask[mask == i,i] = 1

        new_mask = np.reshape(new_mask,(new_mask.shape[0],new_mask.shape[1]*new_mask.shape[2],new_mask.shape[3])) if flag_multi_class else np.reshape(new_mask,(new_mask.shape[0]*new_mask.shape[1],new_mask.shape[2]))
        mask = new_mask

# 如果是单类别,那么标签图只分成前景和背景,归一化到【0,1】,大于0.5的是前景,否则是背景,这个处理方式跟标签图生成方式有关,
    elif(np.max(img) > 1):
        img = img / 255
        mask = mask /255
        mask[mask > 0.5] = 1
        mask[mask <= 0.5] = 0
    return (img,mask)


# 获得训练数据,作者模型输入图片默认是灰度的image_color_mode = "grayscale",标签图也是灰度的mask_color_mode = "grayscale"
# lag_multi_class = False,num_class = 2 只有1类,加上背景类是两类
def trainGenerator(batch_size,train_path,image_folder,mask_folder,aug_dict,image_color_mode = "grayscale",
                    mask_color_mode = "grayscale",image_save_prefix  = "image",mask_save_prefix  = "mask",
                    flag_multi_class = False,num_class = 2,save_to_dir = None,target_size = (256,256),seed = 1):
    '''
    can generate image and mask at the same time
    use the same seed for image_datagen and mask_datagen to ensure the transformation for image and mask is the same
    if you want to visualize the results of generator, set save_to_dir = "your path"
    '''
# 定义数据增强动作,对输入和标签图做相同的增强动作,并保证随机值相同,这个在下面的seed中保证,这样会产生相同的动作
    image_datagen = ImageDataGenerator(**aug_dict)
    mask_datagen = ImageDataGenerator(**aug_dict)

# 将数据增强应用到自己的图片中,详细用法请百度flow_from_directory
    image_generator = image_datagen.flow_from_directory(
        train_path,   # 图片路径
        classes = [image_folder], # 子文件夹列表,只遍历这些文件夹
        class_mode = None, # 不产生标签,因为标签由标签图产生
        color_mode = image_color_mode, # 图片是RGB还是灰度
        target_size = target_size,
        batch_size = batch_size, # 是否批处理
        save_to_dir = save_to_dir,
        save_prefix  = image_save_prefix,
        seed = seed) # 随机数,保证输入图和标签图的随机数相同,不然会不对应
    mask_generator = mask_datagen.flow_from_directory(
        train_path,
        classes = [mask_folder],
        class_mode = None,
        color_mode = mask_color_mode,
        target_size = target_size,
        batch_size = batch_size,
        save_to_dir = save_to_dir,
        save_prefix  = mask_save_prefix,
        seed = seed)
# 请百度zip用法
    train_generator = zip(image_generator, mask_generator)
# 遍历,这个是迭代器的用法,python的语法,和zip一起的
    for (img,mask) in train_generator:
# 将原图和原标签进行处理
        img,mask = adjustData(img,mask,flag_multi_class,num_class)
#请百度yield用法
        yield (img,mask)


# 测试图片生成,都是python语法,自己看
def testGenerator(test_path,num_image = 30,target_size = (256,256),flag_multi_class = False,as_gray = True):
    for i in range(num_image):
        img = io.imread(os.path.join(test_path,"%d.png"%i),as_gray = as_gray)
        img = img / 255
        img = trans.resize(img,target_size)
        img = np.reshape(img,img.shape+(1,)) if (not flag_multi_class) else img
        img = np.reshape(img,(1,)+img.shape)
        yield img

# 将图片转成numpy数组,这个貌似废弃不用了
def geneTrainNpy(image_path,mask_path,flag_multi_class = False,num_class = 2,image_prefix = "image",mask_prefix = "mask",image_as_gray = True,mask_as_gray = True):
    image_name_arr = glob.glob(os.path.join(image_path,"%s*.png"%image_prefix))
    image_arr = []
    mask_arr = []
    for index,item in enumerate(image_name_arr):
        img = io.imread(item,as_gray = image_as_gray)
        img = np.reshape(img,img.shape + (1,)) if image_as_gray else img
        mask = io.imread(item.replace(image_path,mask_path).replace(image_prefix,mask_prefix),as_gray = mask_as_gray)
        mask = np.reshape(mask,mask.shape + (1,)) if mask_as_gray else mask
        img,mask = adjustData(img,mask,flag_multi_class,num_class)
        image_arr.append(img)
        mask_arr.append(mask)
    image_arr = np.array(image_arr)
    mask_arr = np.array(mask_arr)
    return image_arr,mask_arr

# 标签可视化,对不同类别上色
def labelVisualize(num_class,color_dict,img):
    img = img[:,:,0] if len(img.shape) == 3 else img
    img_out = np.zeros(img.shape + (3,))
    for i in range(num_class):
        img_out[img == i,:] = color_dict[i]
    return img_out / 255


# 保存测试后可视化的图
def saveResult(save_path,npyfile,flag_multi_class = False,num_class = 2):
    for i,item in enumerate(npyfile):
        img = labelVisualize(num_class,COLOR_DICT,item) if flag_multi_class else item[:,:,0]
        io.imsave(os.path.join(save_path,"%d_predict.png"%i),img)
  1. 模型文件model.py
    这个文件没啥可讲的,都是模型定义,只讲下最后几行:

    # 前面一系列卷积池化上采样
    
     conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
     # 输出层,通道数为1,卷积核尺寸为1, 可以看出,作者的标签是灰度的,也就是只有1类,如果输出是多类别的,需要将通道数改成对应的类别数
     # 将输出图片通过sigmoid响应归一化到[0,1]概率,用来求交叉熵损失
     # 这里还有个小坑,作者在标签处理的时候,如果是多类别,对标签图做了reshape,也就是标签图的shape不是(N H W C),而是(N H * W,C)
     # 所以,如果是多类别,在输出之后,也要把模型输出层由(N H W C)reshape成(N H*W,C)
     conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9)
    
     # 定义模型的输入和输出,就是模型的数据入口和结果出口
     model = Model(input = inputs, output = conv10)
     # 定义模型的优化函数和损失函数,可以搜索binary_crossentropy去查看keras的损失函数应该已经对分割任务做了适配
     model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
    
     #model.summary()
    # 如果有预加载参数,就将参数加载到网络中
     if(pretrained_weights):
         model.load_weights(pretrained_weights)
    
     return model