前言

<math> <semantics> <mrow> </mrow> <annotation encoding="application&#47;x&#45;tex"> \quad </annotation> </semantics> </math>之前在Keras下训练Unet十分方便,但是想要平台移植和嵌入到C++代码却成为了一个很困难的问题,我花费了几天时间完成了Caffe版本的Unet在Windows下的训练,以及利用VS2015进行前向推理的过程,由于这个过程在网络上没有任何资料,所以打算将这个过程分享一下。

步骤

  1. 编译windows下的caffe并编译python接口,大家可以用BVLC下windows分支的caffe代码,使用scripts/build_win.cmd来编译,这里需要配置cmake和python环境,如果使用gpu的话还需要配置cuda8.0和cudnn5.1,环境配置很无脑,就不说了,自行配置即可。
  2. 制作数据集,准备一份原始图片,然后mask图片(通过labelme把json文件转成png的图片),注意由于python脚本里面img.txt只存图片的名字,要求原始图片和mask图片的名字一样,且后缀名也一样,所以需要把labelme转出来的mask图片png格式转为jpg格式。最后得到的数据集就是3个东西,一是原始图片,一个是mask图片(名字和原始图片对应),一个img.txt,这个保存图片名字,例如000001.jpg,每行保存一张图片名字,多少张图片就多少行,这个可以自己写一个简单的脚本实现。
  3. 下载caffe版本unet的网络结构,https://github.com/warden3344/unet, 使用这里面的train_val.prototxt进行训练,这个prototxt需要更改的地方就是最后的分类数,我是两个分类,所以score层的num_output=2。然后输入层是python实现的,克隆这个工程下面有一个mydatalayer.py,默认是一个分类,由于我们是二分类需要更改一下代码,数据集3个东西的路径自己相应更改即可,如下:
import caffe
import numpy as np
import cv2
import numpy.random as random

class DataLayer(caffe.Layer):

    def setup(self, bottom, top):

        self.imgdir = "/home/pic/zxy-project/caffe-unet/data/image/"
        self.maskdir = "/home/pic/zxy-project/caffe-unet/data/mask_png/"
        self.imgtxt = "/home/pic/zxy-project/caffe-unet/data/img.txt"
        self.random = True
        self.seed = None

        if len(top) != 2:
            raise Exception("Need to define two tops: data and mask.")

        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")

        self.lines = open(self.imgtxt, 'r').readlines()
        self.idx = 0

        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.lines) - 1)

    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.idx)
        self.mask = self.load_mask(self.idx)
        # reshape tops to fit (leading 1 is for batch dimension)
        top[0].reshape(1, *self.data.shape)
        top[1].reshape(1, *self.mask.shape)

    def forward(self, bottom, top):
        # assign output
        top[0].data[...] = self.data
        top[1].data[...] = self.mask

        # pick next input
        if self.random:
            self.idx = random.randint(0, len(self.lines) - 1)
        else:
            self.idx += 1
            if self.idx == len(self.lines):
                self.idx = 0

    def backward(self, top, propagate_down, bottom):
        pass

    def load_image(self, idx):

        imname = self.imgdir + self.lines[idx]
        imname = imname[:-1]
        #print 'load img %s' %imname
        im = cv2.imread(imname)
        #im = cv2.imread(imname)
        #print im.shape
        im = cv2.resize(im,(512,512))
        im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        #im = np.array(im, np.float64)
        #im /= 255.0
        #im -= 0.5
        return im[np.newaxis, :]

    def load_mask(self, idx):
        imname = self.maskdir + self.lines[idx]
        imname = imname[:-1]
        #print 'load mask %s' %imname
        im = cv2.imread(imname, 1)
        seg_labels = np.zeros((2, 512, 512))
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
	    im = cv2.resize(im,(512,512))
        #ret, img = cv2.threshold(im, 0.5, 1.0, cv2.THRESH_BINARY)
	    #ret, back = cv2.threshold(im, 0.5, 1.0, cv2.THRESH_BINARY_INV)
	    #outimg[0, ...] = img;
	    #outimg[1, ...] = back;
	    #outimg.astype(np.uint8)
        #return im[np.newaxis, :]
        im = im[:, :, 0]
        for c in range(2):
            seg_labels[c, :, :] = (im == c).astype(int)
        #seg_labels = np.reshape(seg_labels, (512 * 512, 2))
        return seg_labels
  1. 将VS2015配置caffe,我们使用cmake装好了caffe,现在需要配置一下vs2015的include和lib,还有链接器,之后调用caffe接口。
    VC的include:
F:\caffe-enet-windows\include;
C:\Users\Administrator\.caffe\dependencies\libraries_v140_x64_py27_1.1.0\libraries\include;
F:\caffe-windows\scripts\build\include;

VC的lib:

C:\Users\Administrator\.caffe\dependencies\libraries_v140_x64_py27_1.1.0\libraries\lib;
D:\Anaconda2\libs;
C:\Users\Administrator\.caffe\dependencies\libraries_v140_x64_py27_1.1.0\libraries\x64\vc14\lib;
F:\caffe-windows\scripts\build\lib\Release;

链接器输入:

opencv_core310.lib
opencv_highgui310.lib
opencv_imgproc310.lib
opencv_imgcodecs310.lib
caffe.lib
caffeproto.lib
caffehdf5.lib
caffehdf5_hl.lib
gflags.lib
glog.lib
leveldb.lib
libprotobuf.lib
libopenblas.dll.a
lmdb.lib
boost_python-vc140-mt-1_61.lib
boost_thread-vc140-mt-1_61.lib

如果配置vs2015使用caffe有什么其他错误,可以百度或者评论区问我。
5. 编写Unet的前向预测代码,对新的测试图片进行分割图像的测试。

#pragma once
//#include "stdafx.h"
#include <caffe/caffe.hpp>
#include "head.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

//#include <opencv/cv.hpp>
#define CPU_ONLY 1
#include <algorithm>
#include <iomanip>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <iostream>
#include <iostream>
#include <time.h>
#include <tchar.h>
//#include "datamatrix_location.h"

//#include "ExportsDef.h"
//#include "ParseBarcodeInterface.h"
//#include <stdio.h>
using namespace cv;
using namespace caffe;

//定义分割器
class Segmentation {
public:
	Segmentation(void);
	~Segmentation(void);
	void Segmentinit(const string& model_file_seg,
		const string& weights_file_seg);

	bool Segment(const cv::Mat& img, cv::Mat& result_img);

private:

	void WrapInputLayer(std::vector<cv::Mat>* input_channels);

	void Preprocess(const cv::Mat& img,
		std::vector<cv::Mat>* input_channels);

private:
	shared_ptr<Net<float> > net_seg;
	cv::Size input_geometry_seg;
	int num_channels_seg;
	cv::Mat mean_seg;
};
Segmentation::Segmentation(void)
{
}

Segmentation::~Segmentation(void)
{
}

void Segmentation::Segmentinit(const string& model_file_seg,
	const string& weights_file_seg) {
#ifdef CPU_ONLY
	Caffe::set_mode(Caffe::CPU);
#else
	Caffe::set_mode(Caffe::GPU);
#endif

	/* Load the network. */
	net_seg.reset(new Net<float>(model_file_seg, TEST));
	net_seg->CopyTrainedLayersFrom(weights_file_seg);
	//CHECK_EQ(net_seg->num_inputs(), 1) << "Network should have exactly one input.";
	//CHECK_EQ(net_seg->num_outputs(), 1) << "Network should have exactly one output.";

	Blob<float>* input_layer = net_seg->input_blobs()[0];
	num_channels_seg = input_layer->channels();
	CHECK(num_channels_seg == 3 || num_channels_seg == 1)
		<< "Input layer should have 1 or 3 channels.";
	input_geometry_seg = cv::Size(input_layer->width(), input_layer->height());

	/* Load the binaryproto mean file. */
}

bool Segmentation::Segment(const cv::Mat& img, cv::Mat& result_img) {
	Blob<float>* input_layer = net_seg->input_blobs()[0];
	input_layer->Reshape(1, num_channels_seg,
		input_geometry_seg.height, input_geometry_seg.width);
	/* Forward dimension change to all layers. */
	net_seg->Reshape();

	std::vector<cv::Mat> input_channels;
	WrapInputLayer(&input_channels);

	Preprocess(img, &input_channels);

	net_seg->Forward();

	/* Copy the output layer to a std::vector */
	Blob<float>* result_blob = net_seg->output_blobs()[0];
	const float* result = result_blob->cpu_data();

	//获取data中的数据转换成图像
	const int h = result_blob->height();
	const int w = result_blob->width();
	const int channels = result_blob->channels();
	const int num = result_blob->num();
	std::cout << "output_blob(n,c,h,w) = " << num << ", " << channels << ", "
		<< h << ", " << w << std::endl;
	//定义unet_result用于存放概率图
	//cv::Mat unet_result = cv::Mat(h, w, CV_8UC1);
	//int index = 0;
	//for (int r = 0; r < h; r++)
	//{
	//	for (int c = 0; c < w; c++)
	//	{
	//		index = r * w + c;
	//		//printf("%.5f\n", result[index]);
	//		int temp = static_cast<int>(result[index] * 255);
	//		//std::cout << "(" << r << "," << c << "): " << temp << std::endl;
	//		unet_result.at<uchar>(r, c) = (unsigned char)temp;
	//		//std::cout << "(" << r << "," << c << "): " << result[index] << std::endl;
	//	}
	//}

	//if (unet_result.data)
	//{
	//	result_img = unet_result.clone();
	//	return true;
	//}
	//else
	//{
	//	std::cout << "Abnormal!!!!" << std::endl;
	//	return false;
	//}
	// compute argmax
	cv::Mat class_each_row(channels, w * h, CV_32FC1, const_cast<float *>(result_blob->cpu_data()));
	class_each_row = class_each_row.t(); // transpose to make each row with all probabilities
	cv::Point maxId;    // point [x,y] values for index of max
	double maxValue;    // the holy max value itself
	cv::Mat prediction_map(h, w, CV_8UC1);
	printf("rows: %d\n", class_each_row.rows);
	printf("cols: %d\n", class_each_row.cols);
	for (int i = 0; i<class_each_row.rows; i++) {
		minMaxLoc(class_each_row.row(i), 0, &maxValue, 0, &maxId);
		//printf("%.5f\n", maxValue);
		//if (maxValue > 0.6) prediction_map.at<uchar>(i) = 255;
		//else prediction_map.at<uchar>(i) = 0;
		//printf("%.5f %.5f\n", class_each_row.at<float>(i, 0), class_each_row.at<float>(i, 1));
		//printf("%d\n", maxId.x);
		prediction_map.at<uchar>(i) = maxId.x;
	}
	cv::cvtColor(prediction_map.clone(), prediction_map, CV_GRAY2BGR);
	cv::Mat label_colours = cv::imread("F:\\caffe-enet-windows\\cityscapes19.png", 1);
	cv::cvtColor(label_colours, label_colours, CV_RGB2BGR);
	cv::Mat output_image;
	LUT(prediction_map, label_colours, output_image);
	result_img = output_image;
	return true;
}


/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Segmentation::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
	Blob<float>* input_layer = net_seg->input_blobs()[0];

	int width = input_layer->width();
	int height = input_layer->height();
	float* input_data = input_layer->mutable_cpu_data();
	for (int i = 0; i < input_layer->channels(); ++i) {
		cv::Mat channel(height, width, CV_32FC1, input_data);
		input_channels->push_back(channel);
		input_data += width * height;
	}
}

void Segmentation::Preprocess(const cv::Mat& img,
	std::vector<cv::Mat>* input_channels) {
	/* Convert the input image to the input image format of the network. */
	cv::Mat sample;
	if (img.channels() == 3 && num_channels_seg == 1)
		cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
	else if (img.channels() == 4 && num_channels_seg == 1)
		cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
	else if (img.channels() == 4 && num_channels_seg == 3)
		cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
	else if (img.channels() == 1 && num_channels_seg == 3)
		cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
	else
		sample = img;
	//std::cout << num_channels_seg << std::endl;
	//std::cout << sample.channels() << std::endl;
	cv::Mat sample_resized;
	if (sample.size() != input_geometry_seg)
		cv::resize(sample, sample_resized, input_geometry_seg);
	else
		sample_resized = sample;

	cv::Mat sample_float;
	if (num_channels_seg == 3)
		sample_resized.convertTo(sample_float, CV_32FC3);
	else
		sample_resized.convertTo(sample_float, CV_32FC1);

	//cv::Mat sample_normalized;
	//cv::subtract(sample_float, mean_seg, sample_normalized);

	/* This operation will write the separate BGR planes directly to the
	* input layer of the network because it is wrapped by the cv::Mat
	* objects in input_channels. */
	//cv::split(sample_normalized, *input_channels);
	cv::split(sample_float, *input_channels);
	CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
		== net_seg->input_blobs()[0]->cpu_data())
		<< "Input channels are not wrapping the input layer of the network.";
}

//用于初始化分割器
int init_segment_model(Segmentation& segmentation, const string& model_path, const string& weights_path)
{
	const string model_file_seg = model_path;
	const string weights_file_seg = weights_path;

	// Initialize the unet_network.
	segmentation.Segmentinit(model_file_seg, weights_file_seg);
	return 1;
}

Mat speed_rgb2gray(Mat src) {
	Mat dst(src.rows, src.cols, CV_8UC1);
	for (int i = 0; i < src.rows; i++) {
		for (int j = 0; j < src.cols; j++) {
			dst.at<uchar>(i, j) = ((src.at<Vec3b>(i, j)[0] << 18) + (src.at<Vec3b>(i, j)[0] << 15) + (src.at<Vec3b>(i, j)[0] << 14) +
				(src.at<Vec3b>(i, j)[0] << 11) + (src.at<Vec3b>(i, j)[0] << 7) + (src.at<Vec3b>(i, j)[0] << 7) + (src.at<Vec3b>(i, j)[0] << 5) +
				(src.at<Vec3b>(i, j)[0] << 4) + (src.at<Vec3b>(i, j)[0] << 2) +
				(src.at<Vec3b>(i, j)[1] << 19) + (src.at<Vec3b>(i, j)[1] << 16) + (src.at<Vec3b>(i, j)[1] << 14) + (src.at<Vec3b>(i, j)[1] << 13) +
				(src.at<Vec3b>(i, j)[1] << 10) + (src.at<Vec3b>(i, j)[1] << 8) + (src.at<Vec3b>(i, j)[1] << 4) + (src.at<Vec3b>(i, j)[1] << 3) + (src.at<Vec3b>(i, j)[1] << 1) +
				(src.at<Vec3b>(i, j)[2] << 16) + (src.at<Vec3b>(i, j)[2] << 15) + (src.at<Vec3b>(i, j)[2] << 14) + (src.at<Vec3b>(i, j)[2] << 12) +
				(src.at<Vec3b>(i, j)[2] << 9) + (src.at<Vec3b>(i, j)[2] << 7) + (src.at<Vec3b>(i, j)[2] << 6) + (src.at<Vec3b>(i, j)[2] << 5) + (src.at<Vec3b>(i, j)[2] << 4) + (src.at<Vec3b>(i, j)[2] << 1) >> 20);
		}
	}
	return dst;
}

int main(int argc, char** argv)

{
	const string& model_file_seg = "F:\\caffe-unet\\train_val_test.prototxt";
	const string& weights_file_seg = "F:\\caffe-unet\\zxy_iter_2000.caffemodel";
	Segmentation segmentation;
	int segment_init = init_segment_model(segmentation, model_file_seg, weights_file_seg);
	// load original images of dataMatrix

	std::string strOrgImg_Path = "F:\\make_data\\caffe-enet-data2\\image\\000001.jpg";

	cv::Mat img_seg = cv::imread(strOrgImg_Path);
	cvtColor(img_seg, img_seg, CV_BGR2GRAY);
	if (!img_seg.data)
	{
		return -1;
	}
	cv::Mat seg_result = cv::Mat(img_seg.rows, img_seg.cols, CV_8UC1);
	bool segment_flag = segmentation.Segment(img_seg, seg_result);
	if (segment_flag)
	{
		cv::namedWindow("unet_result");
		cv::imshow("unet_result", seg_result);
		cv::moveWindow("unet_result", 700, 0);
		cv::waitKey(0);
	}

	return 0;
}

我的代码里用了cityscapes19.png显示颜色,自己预测的时候可以自己修改想要的颜色。我这里贴一下,这3张图片。
原图:

预测结果:
cityscapes19.png:

另外一个prototxt,去掉了crop,速度稍快

https://github.com/jolibrain/deepdetect/tree/master/templates/caffe/unet