1. 项目简介
项目的data目录下分别放了训练和测试的数据、标签以及数据增强的结果,放数据增强的中间结果是为了让大家更直观的理解数据增强的操作。
下面进入正题
2. main函数
from model import * from data import * # 定义是否可用cuda以及设备编号,只有一块卡就是只有一个“0”,如果使用两块就是: #os.environ["CUDA_VISIBLE_DEVICES"] = "0, 2", 去掉#注释符,,,0,2代表了gpu的标签,但是如果只有两块卡就是0,1 #os.environ["CUDA_VISIBLE_DEVICES"] = "0" #下面代表了数据增强的参数,以及自己需要的操作,作者一共使用了旋转、便宜、剪切、放大、水平翻转,具体请含义请自行百度 data_gen_args = dict(rotation_range=0.2, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.05, zoom_range=0.05, horizontal_flip=True, fill_mode='nearest') #调用数据生成器产生数据,在data.py中讲解 myGene = trainGenerator(2,'data/membrane/train','image','label',data_gen_args,save_to_dir = None) #生成网络结构,该网络结构中有自定义的加载预训练的动作,在网络结构中讲解。但是由于没有传递预训练参数,因此不会做参数预加载 model = unet() #保存训练的模型,具体请查看ModelCheckpoint的用法,这里是只保存最佳模型,save_best_only model_checkpoint = ModelCheckpoint('unet_membrane.hdf5', monitor='loss',verbose=1, save_best_only=True) #训练模型,这里采用了fit_generator,每次取一个batch的数据训练,fit_generator参数含义请百度keras的fit_generator用法 比如第一项是数据,下面依次是一个轮次分多少批训练,训练几个轮次,训练过程中调用上面的监视函数并保存模型 model.fit_generator(myGene,steps_per_epoch=300,epochs=1,callbacks=[model_checkpoint]) #产生测试数据 testGene = testGenerator("data/membrane/test") #模型测试,请百度model.predict_generator results = model.predict_generator(testGene,30,verbose=1) #保存结果,这个函数是自定义的,在data.py中 saveResult("data/membrane/test",results)
3. data.py
这个文件里主要写了如何读取自己的图片和标签图片,怎样制作数据集,标签图片的颜色和label的对应等,另外提供了训练和测试时候的数据产生器,标签可视化,以及数据保存等
from __future__ import print_function from keras.preprocessing.image import ImageDataGenerator import numpy as np import os import glob import skimage.io as io import skimage.transform as trans #标签定义,不同的色彩代表了不同的类型,这个只是在预测阶段上色用,个人理解 Sky = [128,128,128] Building = [128,0,0] Pole = [192,192,128] Road = [128,64,128] Pavement = [60,40,222] Tree = [128,128,0] SignSymbol = [192,128,128] Fence = [64,64,128] Car = [64,0,128] Pedestrian = [64,64,0] Bicyclist = [0,128,192] Unlabelled = [0,0,0] COLOR_DICT = np.array([Sky, Building, Pole, Road, Pavement, Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled]) # 数据调整,主要是对图片做归一化,像素值缩放到01,对label图片做分离,一个类别对应一张二值图,例如检测自行车和行人, # 那么自行车类别标签对应一张图,图中值为1的为自行车,为0的为背景,行人的标签图同样。 # 多个类别对应多张二值图,可以保存为一张多通道的图片 # 参数依次为:输入图片,标签图片,是否多类别,类别数 def adjustData(img,mask,flag_multi_class,num_class): # 如果是多标签 if(flag_multi_class): # 读取的图片数值先做归一化 img = img / 255 # 获取标签图的shape,主要是获取长宽,可以在运行的时候查看打印下mask的维度,有更清晰的了解 # print(mask.shape) mask = mask[:,:,:,0] if(len(mask.shape) == 4) else mask[:,:,0] # 新建一个标签图层,shape为(宽,高,类别数),也就是这个图每一个点有类别数个值,代表不同类别在该位置的取值 new_mask = np.zeros(mask.shape + (num_class,)) # 遍历每一个类别,在原来标签图中的值找到所有等于该类别的位置,在新的mask中将该位置置为1 # 举例子:有一个标签图 [[0,0,1], 那么它生成的新的标签图为:[[[1,0],[1,0],[0,1]], 原图0,0的位置值为0,新图该位置值为1,0,代表该位置的类别是0 # [1,1,0]] [[0,1][0,1][1,0]]] for i in range(num_class): #for one pixel in the image, find the class in mask and convert it into one-hot vector #index = np.where(mask == i) #index_mask = (index[0],index[1],index[2],np.zeros(len(index[0]),dtype = np.int64) + i) if (len(mask.shape) == 4) else (index[0],index[1],np.zeros(len(index[0]),dtype = np.int64) + i) #new_mask[index_mask] = 1 new_mask[mask == i,i] = 1 new_mask = np.reshape(new_mask,(new_mask.shape[0],new_mask.shape[1]*new_mask.shape[2],new_mask.shape[3])) if flag_multi_class else np.reshape(new_mask,(new_mask.shape[0]*new_mask.shape[1],new_mask.shape[2])) mask = new_mask # 如果是单类别,那么标签图只分成前景和背景,归一化到【0,1】,大于0.5的是前景,否则是背景,这个处理方式跟标签图生成方式有关, elif(np.max(img) > 1): img = img / 255 mask = mask /255 mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 return (img,mask) # 获得训练数据,作者模型输入图片默认是灰度的image_color_mode = "grayscale",标签图也是灰度的mask_color_mode = "grayscale" # lag_multi_class = False,num_class = 2 只有1类,加上背景类是两类 def trainGenerator(batch_size,train_path,image_folder,mask_folder,aug_dict,image_color_mode = "grayscale", mask_color_mode = "grayscale",image_save_prefix = "image",mask_save_prefix = "mask", flag_multi_class = False,num_class = 2,save_to_dir = None,target_size = (256,256),seed = 1): ''' can generate image and mask at the same time use the same seed for image_datagen and mask_datagen to ensure the transformation for image and mask is the same if you want to visualize the results of generator, set save_to_dir = "your path" ''' # 定义数据增强动作,对输入和标签图做相同的增强动作,并保证随机值相同,这个在下面的seed中保证,这样会产生相同的动作 image_datagen = ImageDataGenerator(**aug_dict) mask_datagen = ImageDataGenerator(**aug_dict) # 将数据增强应用到自己的图片中,详细用法请百度flow_from_directory image_generator = image_datagen.flow_from_directory( train_path, # 图片路径 classes = [image_folder], # 子文件夹列表,只遍历这些文件夹 class_mode = None, # 不产生标签,因为标签由标签图产生 color_mode = image_color_mode, # 图片是RGB还是灰度 target_size = target_size, batch_size = batch_size, # 是否批处理 save_to_dir = save_to_dir, save_prefix = image_save_prefix, seed = seed) # 随机数,保证输入图和标签图的随机数相同,不然会不对应 mask_generator = mask_datagen.flow_from_directory( train_path, classes = [mask_folder], class_mode = None, color_mode = mask_color_mode, target_size = target_size, batch_size = batch_size, save_to_dir = save_to_dir, save_prefix = mask_save_prefix, seed = seed) # 请百度zip用法 train_generator = zip(image_generator, mask_generator) # 遍历,这个是迭代器的用法,python的语法,和zip一起的 for (img,mask) in train_generator: # 将原图和原标签进行处理 img,mask = adjustData(img,mask,flag_multi_class,num_class) #请百度yield用法 yield (img,mask) # 测试图片生成,都是python语法,自己看 def testGenerator(test_path,num_image = 30,target_size = (256,256),flag_multi_class = False,as_gray = True): for i in range(num_image): img = io.imread(os.path.join(test_path,"%d.png"%i),as_gray = as_gray) img = img / 255 img = trans.resize(img,target_size) img = np.reshape(img,img.shape+(1,)) if (not flag_multi_class) else img img = np.reshape(img,(1,)+img.shape) yield img # 将图片转成numpy数组,这个貌似废弃不用了 def geneTrainNpy(image_path,mask_path,flag_multi_class = False,num_class = 2,image_prefix = "image",mask_prefix = "mask",image_as_gray = True,mask_as_gray = True): image_name_arr = glob.glob(os.path.join(image_path,"%s*.png"%image_prefix)) image_arr = [] mask_arr = [] for index,item in enumerate(image_name_arr): img = io.imread(item,as_gray = image_as_gray) img = np.reshape(img,img.shape + (1,)) if image_as_gray else img mask = io.imread(item.replace(image_path,mask_path).replace(image_prefix,mask_prefix),as_gray = mask_as_gray) mask = np.reshape(mask,mask.shape + (1,)) if mask_as_gray else mask img,mask = adjustData(img,mask,flag_multi_class,num_class) image_arr.append(img) mask_arr.append(mask) image_arr = np.array(image_arr) mask_arr = np.array(mask_arr) return image_arr,mask_arr # 标签可视化,对不同类别上色 def labelVisualize(num_class,color_dict,img): img = img[:,:,0] if len(img.shape) == 3 else img img_out = np.zeros(img.shape + (3,)) for i in range(num_class): img_out[img == i,:] = color_dict[i] return img_out / 255 # 保存测试后可视化的图 def saveResult(save_path,npyfile,flag_multi_class = False,num_class = 2): for i,item in enumerate(npyfile): img = labelVisualize(num_class,COLOR_DICT,item) if flag_multi_class else item[:,:,0] io.imsave(os.path.join(save_path,"%d_predict.png"%i),img)
模型文件model.py
这个文件没啥可讲的,都是模型定义,只讲下最后几行:# 前面一系列卷积池化上采样 conv9 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9) # 输出层,通道数为1,卷积核尺寸为1, 可以看出,作者的标签是灰度的,也就是只有1类,如果输出是多类别的,需要将通道数改成对应的类别数 # 将输出图片通过sigmoid响应归一化到[0,1]概率,用来求交叉熵损失 # 这里还有个小坑,作者在标签处理的时候,如果是多类别,对标签图做了reshape,也就是标签图的shape不是(N H W C),而是(N H * W,C) # 所以,如果是多类别,在输出之后,也要把模型输出层由(N H W C)reshape成(N H*W,C) conv10 = Conv2D(1, 1, activation = 'sigmoid')(conv9) # 定义模型的输入和输出,就是模型的数据入口和结果出口 model = Model(input = inputs, output = conv10) # 定义模型的优化函数和损失函数,可以搜索binary_crossentropy去查看keras的损失函数应该已经对分割任务做了适配 model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy']) #model.summary() # 如果有预加载参数,就将参数加载到网络中 if(pretrained_weights): model.load_weights(pretrained_weights) return model