Tensorflow 构建自己的目标检测与识别模型之数据增强(二)
上次的博客中对如何安装Tensorflow Object Detection API的步骤及所遇到的问题进行说明。见链接:https://blog.csdn.net/weixin_41644725/article/details/83007901
接下来,对图像数据进行图像增强。虽然在配置.config文件(后面会说到)时,其中会提到数据增强(data argumentation),但是若是想手动实现,可参考本文,若不想则跳过即可。
1.用labelImage工具生成.xml文件。
该工具的界面如图所示,关于如何安装labelImage,可参考网上的相关博客,在windows和Linux下都有相应的安装过程,此处不叙述安装过程。其中“Open Dir”为打开存储所有图像文件的文件夹。“Change Save Dir”为将生成的.xml文件存储在指定文件夹下面。“Save”表示保存当前的.xml文件。
xml文件的格式如下图所示:
2. xml 转成csv文件
(1)将xml文件转成csv文件代码如下:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
xml_path = './xml' #存储xml的文件夹
xml_df = xml_to_csv(xml_path)
xml_df.to_csv('./csv/class.csv', index=None) #生成csv文件并存储在该路径下
print('Successfully converted xml to csv.')
main()
(2)得到该图像中对应类的边界框(bounding box),代码如下:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
def get_bbox(image_name,csv_path):
full_labels = pd.read_csv(csv_path)
selected_value = full_labels[full_labels.filename == image_name]
images_bbox = []
img_class = ''
for index,row in selected_value.iterrows():
list_bbox = []
list_bbox.append(row['xmin'])
list_bbox.append(row['ymin'])
list_bbox.append(row['xmax'])
list_bbox.append(row['ymax'])
list_bbox.append(image_name)
img_class = row['class']
images_bbox.append(list_bbox)
return images_bbox,img_class
img_path = '023.jpg'
csv_path = ''./csv/class.csv''
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
plt.subplot(111), plt.imshow(image), plt.title('original', fontsize='medium')
plt.show()
输出结果如下:
3.图像数据增强
(1)调整图像亮度
代码如下:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
'''调整亮度'''
def changeLight(img,bboxes):
flag = random.uniform(1.5, 2) # flag>1为调暗,小于1为调亮
img = exposure.adjust_gamma(img, flag)
cv2.imwrite('./1.jpg', img)
img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
for i in range(len(bboxes)):
bbox = bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
return img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''调整亮度'''
change_light_img = changeLight(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(change_light_img), plt.title('change light', fontsize='medium')
plt.show()
输出结果如下:
(2)cutout
代码如下:
'''cutout'''
def cutout(img, bboxes, length=100, n_holes=1, threshold=0.5):
'''
原版本:https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
Randomly mask out one or more patches from an image.
Args:
img : a 3D numpy array,(h,w,c)
bboxes : 框的坐标
n_holes (int): Number of patches to cut out of each image.
length (int): The length (in pixels) of each square patch.
'''
def cal_iou(boxA, boxB):
'''
boxA, boxB为两个框,返回iou
boxB为bouding box
'''
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
if xB <= xA or yB <= yA:
return 0.0
# compute the area of intersection rectangle
interArea = (xB - xA + 1) * (yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
#iou = interArea / float(boxBArea)
# return the intersection over union value
return iou
# 得到h和w
if img.ndim == 3:
h, w, c = img.shape
else:
_, h, w, c = img.shape
mask = np.ones((h, w, c), np.float32)
for n in range(n_holes):
chongdie = True # 看切割的区域是否与box重叠太多
while chongdie:
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - length // 2, 0,
h) # numpy.clip(a, a_min, a_max, out=None), clip这个函数将将数组中的元素限制在a_min, a_max之间,大于a_max的就使得它等于 a_max,小于a_min,的就使得它等于a_min
y2 = np.clip(y + length // 2, 0, h)
x1 = np.clip(x - length // 2, 0, w)
x2 = np.clip(x + length // 2, 0, w)
chongdie = False
for box in bboxes:
if cal_iou([x1, y1, x2, y2], box) > threshold:
chongdie = True
break
mask[y1: y2, x1: x2, :] = 0.
# mask = np.expand_dims(mask, axis=0)
img = img * mask
for i in range(len(bboxes)):
bbox = bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', img)
img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''调整亮度'''
cut_out_img = cutout(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(cut_out_img), plt.title('cutout', fontsize='medium')
plt.show()
输出结果如下:
(3)旋转
代码如下:
'''旋转'''
def rotate_img_bbox(img, bboxes, angle=5, scale=1.):
'''
参考:https://blog.csdn.net/u014540717/article/details/53301195crop_rate
输入:
img:图像array,(h,w,c)
bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
angle:旋转角度
scale:默认1
输出:
rot_img:旋转后的图像array
rot_bboxes:旋转后的boundingbox坐标list
'''
# ---------------------- 旋转图像 ----------------------
w = img.shape[1]
h = img.shape[0]
# 角度变弧度
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# 仿射变换
rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
# ---------------------- 矫正bbox坐标 ----------------------
# rot_mat是最终的旋转矩阵
# 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
rot_bboxes = list()
for bbox in bboxes:
xmin = bbox[0]
ymin = bbox[1]
xmax = bbox[2]
ymax = bbox[3]
point1 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymin, 1]))
point2 = np.dot(rot_mat, np.array([xmax, (ymin + ymax) / 2, 1]))
point3 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymax, 1]))
point4 = np.dot(rot_mat, np.array([xmin, (ymin + ymax) / 2, 1]))
# 合并np.array
concat = np.vstack((point1, point2, point3, point4))
# 改变array类型
concat = concat.astype(np.int32)
# 得到旋转后的坐标
rx, ry, rw, rh = cv2.boundingRect(concat)
rx_min = rx
ry_min = ry
rx_max = rx + rw
ry_max = ry + rh
# 加入list中
rot_bboxes.append([rx_min, ry_min, rx_max, ry_max])
for i in range(len(rot_bboxes)):
bbox = rot_bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(rot_img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', rot_img)
rot_img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return rot_img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''调整亮度'''
rotate_img = rotate_img_bbox(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(rotate_img), plt.title('rotate', fontsize='medium')
plt.show()
输出结果如下:
(4)裁剪
代码如下:
'''裁剪'''
def crop_img_bboxes(img, bboxes):
'''
裁剪后的图片要包含所有的框
输入:
img:图像array
bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
输出:
crop_img:裁剪后的图像array
crop_bboxes:裁剪后的bounding box的坐标list
'''
# ---------------------- 裁剪图像 ----------------------
w = img.shape[1]
h = img.shape[0]
x_min = w # 裁剪后的包含所有目标框的最小的框
x_max = 0
y_min = h
y_max = 0
for bbox in bboxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
d_to_left = x_min # 包含所有目标框的最小框到左边的距离
d_to_right = w - x_max # 包含所有目标框的最小框到右边的距离
d_to_top = y_min # 包含所有目标框的最小框到顶端的距离
d_to_bottom = h - y_max # 包含所有目标框的最小框到底部的距离
# 随机扩展这个最小框
crop_x_min = int(x_min - random.uniform(0, d_to_left))
crop_y_min = int(y_min - random.uniform(0, d_to_top))
crop_x_max = int(x_max + random.uniform(0, d_to_right))
crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
# 随机扩展这个最小框 , 防止别裁的太小
# crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left))
# crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top))
# crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right))
# crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom))
# 确保不要越界
crop_x_min = max(0, crop_x_min)
crop_y_min = max(0, crop_y_min)
crop_x_max = min(w, crop_x_max)
crop_y_max = min(h, crop_y_max)
crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
# ---------------------- 裁剪boundingbox ----------------------
# 裁剪后的boundingbox坐标计算
crop_bboxes = list()
for bbox in bboxes:
crop_bboxes.append([bbox[0] - crop_x_min, bbox[1] - crop_y_min, bbox[2] - crop_x_min, bbox[3] - crop_y_min])
for i in range(len(crop_bboxes)):
bbox = crop_bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(crop_img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', crop_img)
crop_img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return crop_img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''调整亮度'''
crop_img = crop_img_bboxes(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(crop_img), plt.title('crop', fontsize='medium')
plt.show()
输出结果如下: