前言:

在目标检测中,有些时候,我们需要一些小型的数据集来看看自己的模型怎么样。自己制作数据集,太费事,耗时间。那么我们可以把VOC2007 或者VOC2012数据集中的一个类别拿出来实验。这里教你怎么把你需要的类别拿出来!

一 把你需要的类别.xml文件和图片找出来

运行下方代码就可以了:你只需要修改对应的路径,和下方的classes1,classes2,选择你需要的类别,如果只需要一个类别的话,就把下方的****:for k in range(0, len(ind_start))😗**里面对应的classes注释掉就可以了。

import os
import shutil
ann_filepath='D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations/'
img_filepath='D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/'
img_savepath='D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages_ssd/'
ann_savepath='D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations_ssd/'
if not os.path.exists(img_savepath):
    os.mkdir(img_savepath)
 
if not os.path.exists(ann_savepath):
    os.mkdir(ann_savepath)
names = locals()
classes = ['aeroplane','bicycle','bird', 'boat', 'bottle',
           'bus', 'car', 'cat', 'chair', 'cow','diningtable',
           'dog', 'horse', 'motorbike', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor', 'person']
 
 
for file in os.listdir(ann_filepath):
    print(file)
    fp = open(ann_filepath + '\\' + file)
    ann_savefile=ann_savepath+file
    fp_w = open(ann_savefile, 'w')
    lines = fp.readlines()
 
    ind_start = []
    ind_end = []
    lines_id_start = lines[:]
    lines_id_end = lines[:]
 
    classes1 = '\t\t<name>bicycle</name>\n'
    classes2 = '\t\t<name>motorbike</name>\n'
    classes3 = '\t\t<name>bus</name>\n'
    classes4 = '\t\t<name>car</name>\n'
    classes5 = '\t\t<name>person</name>\n'
 
    #在xml中找到object块,并将其记录下来
    while "\t<object>\n" in lines_id_start:
        a = lines_id_start.index("\t<object>\n")
        ind_start.append(a)
        lines_id_start[a] = "delete"
 
 
    while "\t</object>\n" in lines_id_end:
        b = lines_id_end.index("\t</object>\n")
        ind_end.append(b)
        lines_id_end[b] = "delete"
 
    #names中存放所有的object块
    i = 0
    for k in range(0, len(ind_start)):
        names['block%d' % k] = []
        for j in range(0, len(classes)):
            if classes[j] in lines[ind_start[i] + 1]:
                a = ind_start[i]
                for o in range(ind_end[i] - ind_start[i] + 1):
                    names['block%d' % k].append(lines[a + o])
                break
        i += 1
        #print(names['block%d' % k])
 
 
    #xml
    string_start = lines[0:ind_start[0]]
    #xml
    string_end = [lines[len(lines) - 1]]
 
 
    #在给定的类中搜索,若存在则,写入object块信息
    a = 0
    for k in range(0, len(ind_start)):
        if classes1 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes2 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes3 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes4 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
        if classes5 in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
    string_start += string_end
    for c in range(0, len(string_start)):
        fp_w.write(string_start[c])
    fp_w.close()
    #如果没有我们寻找的模块,则删除此xml,有的话拷贝图片
    if a == 0:
        os.remove(ann_savepath+file)
    else:
        name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
        shutil.copy(name_img, img_savepath)
    fp.close()

然后你会得到这么两个文件(_ssd):


然后在共同目录下创建以下文件:

文件夹示意图:

在里面继续创建:

然后运行下方代码(修改对应路径):

import os
train_file=open('D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets_ssd/Main/train.txt','w')
test_file=open('D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets_ssd/Main/test.txt','w')
for _,_,train_files in os.walk('D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages_ssd'):
    continue
for _,_,test_files in os.walk('D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages_ssd'):
    continue
for file in train_files:
    print(file)

    train_file.write(file.split('.')[0]+'\n')

for file in test_files:
    print('进来了')
    test_file.write(file.split('.')[0]+'\n')
  

在MAIN文件夹会生成:

这两个文件对应的是照片的序号,后面转换为.txt文件要用。

在根目录文件下 在创建data\dataset\voc_train.txt文件。(这个文件目录,可以根据自己需求修改,只是把代码里面的路径修改即可。)

最后运行在把xml文件转化成我们的txt文件就可以了:

import os
import argparse
import xml.etree.ElementTree as ET

def convert_voc_annotation(data_path, data_type, anno_path, use_difficult_bbox=True):

    classes = [ 'bird']
    img_inds_file = os.path.join(data_path, 'ImageSets_ssd', 'Main', data_type + '.txt')
    print(img_inds_file)
    with open(img_inds_file, 'r') as f:
        txt = f.readlines()
        image_inds = [line.strip() for line in txt]

    with open(anno_path, 'a') as f:
        for image_ind in image_inds:
            image_path = os.path.join(data_path, 'JPEGImages_ssd', image_ind + '.jpg')
            annotation = image_path
            label_path = os.path.join(data_path, 'Annotations_ssd', image_ind + '.xml')
            root = ET.parse(label_path).getroot()
            objects = root.findall('object')
            for obj in objects:
                difficult = obj.find('difficult').text.strip()
                if (not use_difficult_bbox) and(int(difficult) == 1):
                    continue
                bbox = obj.find('bndbox')
                class_ind = classes.index(obj.find('name').text.lower().strip())
                xmin = bbox.find('xmin').text.strip()
                xmax = bbox.find('xmax').text.strip()
                ymin = bbox.find('ymin').text.strip()
                ymax = bbox.find('ymax').text.strip()
                annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
            print(annotation)
            f.write(annotation + "\n")
    return len(image_inds)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_path", default="D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/")
    parser.add_argument("--train_annotation", default="D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/data/dataset/voc_train.txt")
    parser.add_argument("--test_annotation",  default="D:/tensorflow-yolov3/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/data/dataset/voc_test.txt")
    flags = parser.parse_args()

    if os.path.exists(flags.train_annotation):os.remove(flags.train_annotation)
    if os.path.exists(flags.test_annotation):os.remove(flags.test_annotation)

    num1 = convert_voc_annotation(os.path.join(flags.data_path), 'train', flags.train_annotation, False)
    # num2 = convert_voc_annotation(os.path.join(flags.data_path, 'train/VOCdevkit/VOC2012'), 'trainval', flags.train_annotation, False)
    # num3 = convert_voc_annotation(os.path.join(flags.data_path, 'test/VOCdevkit/VOC2007'), 'test', flags.test_annotation, False)
    # print('=> The number of image for train is: %d\tThe number of image for test is:%d' %(num1 + num2, num3))

最后就成功转化了。

转换成功,是在共同文件目录下(文件目录如下):

结果展示:

二、giou_loss修改为平方差损失

复制到:yolov3.py文件即可

#! /usr/bin/env python
# coding=utf-8

import numpy as np
import tensorflow as tf
import core.utils as utils
import core.common as common
import core.backbone as backbone
from core.config import cfg


class YOLOV3(object):
    """Implement tensoflow yolov3 here"""

    def __init__(self, input_data, trainable):

        self.trainable = trainable
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_class = len(self.classes)
        self.strides = np.array(cfg.YOLO.STRIDES)
        self.anchors = utils.get_anchors(cfg.YOLO.ANCHORS)
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
        self.iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH
        self.upsample_method = cfg.YOLO.UPSAMPLE_METHOD

        try:
            self.conv_lbbox, self.conv_mbbox, self.conv_sbbox = self.__build_nework(input_data)
        except:
            raise NotImplementedError("Can not build up yolov3 network!")

        with tf.variable_scope('pred_sbbox'):
            self.pred_sbbox_offset, self.pred_sbbox = self.decode(self.conv_sbbox, self.anchors[0], self.strides[0])

        with tf.variable_scope('pred_mbbox'):
            self.pred_mbbox_offset, self.pred_mbbox = self.decode(self.conv_mbbox, self.anchors[1], self.strides[1])

        with tf.variable_scope('pred_lbbox'):
            self.pred_lbbox_offset, self.pred_lbbox = self.decode(self.conv_lbbox, self.anchors[2], self.strides[2])

        """
        with tf.variable_scope('pred_multi_scale'):
            self.pred_multi_scale = tf.concat([tf.reshape(self.pred_sbbox, [-1, 85]),
                                               tf.reshape(self.pred_mbbox, [-1, 85]),
                                               tf.reshape(self.pred_lbbox, [-1, 85])], axis=0, name='concat')
        """
        # hand-coded the dimensions: if 608, use 19; if 416, use 13
        with tf.variable_scope('pred_multi_scale'):
            self.pred_multi_scale = tf.concat([tf.reshape(self.pred_sbbox, [-1, 13, 13, 6]),
                                               tf.reshape(self.pred_mbbox, [-1, 13, 13, 6]),
                                               tf.reshape(self.pred_lbbox, [-1, 13, 13, 6])], axis=0, name='concat')
        # 说明,如果训练自己的数据集,85改成 数据集里面的  类别数+5 ,再进行模型转化

    def __build_nework(self, input_data):

        route_1, route_2, input_data = backbone.darknet53(input_data, self.trainable)

        # input_data is -1*13*13*1024
        input_data = common.convolutional(input_data, (1, 1, 1024, 512), self.trainable, 'conv52')
        input_data = common.convolutional(input_data, (3, 3, 512, 1024), self.trainable, 'conv53')
        input_data = common.convolutional(input_data, (1, 1, 1024, 512), self.trainable, 'conv54')
        input_data = common.convolutional(input_data, (3, 3, 512, 1024), self.trainable, 'conv55')
        input_data = common.convolutional(input_data, (1, 1, 1024, 512), self.trainable, 'conv56')

        conv_lobj_branch = common.convolutional(input_data, (3, 3, 512, 1024), self.trainable, name='conv_lobj_branch')
        conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 1024, 3 * (self.num_class + 5)),
                                          trainable=self.trainable, name='conv_lbbox', activate=False, bn=False)

        input_data = common.convolutional(input_data, (1, 1, 512, 256), self.trainable, 'conv57')

        # upsampling input data (1/32) to match route_2 (1/16), -1*26*16*512
        input_data = common.upsample(input_data, name='upsample0', method=self.upsample_method)

        with tf.variable_scope('route_1'):
            # route_2 is -1*26*26*256, so input_data is -1*26*26*768
            input_data = tf.concat([input_data, route_2], axis=-1)

        input_data = common.convolutional(input_data, (1, 1, 768, 256), self.trainable, 'conv58')
        input_data = common.convolutional(input_data, (3, 3, 256, 512), self.trainable, 'conv59')
        input_data = common.convolutional(input_data, (1, 1, 512, 256), self.trainable, 'conv60')
        input_data = common.convolutional(input_data, (3, 3, 256, 512), self.trainable, 'conv61')
        input_data = common.convolutional(input_data, (1, 1, 512, 256), self.trainable, 'conv62')

        conv_mobj_branch = common.convolutional(input_data, (3, 3, 256, 512), self.trainable, name='conv_mobj_branch')
        conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3 * (self.num_class + 5)),
                                          trainable=self.trainable, name='conv_mbbox', activate=False, bn=False)

        input_data = common.convolutional(input_data, (1, 1, 256, 128), self.trainable, 'conv63')
        input_data = common.upsample(input_data, name='upsample1', method=self.upsample_method)

        with tf.variable_scope('route_2'):
            input_data = tf.concat([input_data, route_1], axis=-1)

        input_data = common.convolutional(input_data, (1, 1, 384, 128), self.trainable, 'conv64')
        input_data = common.convolutional(input_data, (3, 3, 128, 256), self.trainable, 'conv65')
        input_data = common.convolutional(input_data, (1, 1, 256, 128), self.trainable, 'conv66')
        input_data = common.convolutional(input_data, (3, 3, 128, 256), self.trainable, 'conv67')
        input_data = common.convolutional(input_data, (1, 1, 256, 128), self.trainable, 'conv68')

        conv_sobj_branch = common.convolutional(input_data, (3, 3, 128, 256), self.trainable, name='conv_sobj_branch')
        conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3 * (self.num_class + 5)),
                                          trainable=self.trainable, name='conv_sbbox', activate=False, bn=False)

        # dimensions are: -1*13*13*255, -1*26*26*255, -1*52*52*255
        return conv_lbbox, conv_mbbox, conv_sbbox

    def decode(self, conv_output, anchors, stride):
        """
        return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
               contains (x, y, w, h, score, probability)
        """

        conv_shape = tf.shape(conv_output)
        batch_size = conv_shape[0]
        output_size = conv_shape[1]
        # number of anchors
        anchor_per_scale = len(anchors)

        conv_output = tf.reshape(conv_output,
                                 (batch_size, output_size, output_size, anchor_per_scale, 5 + self.num_class))

        conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
        conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
        conv_raw_conf = conv_output[:, :, :, :, 4:5]
        conv_raw_prob = conv_output[:, :, :, :, 5:]

        # tf.tile creates a new tensor by replicating input m time
        y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
        x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])

        xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
        xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, anchor_per_scale, 1])
        xy_grid = tf.cast(xy_grid, tf.float32)

        # tf.sigmoid(dxdy) gives the relative position within a grid cell. Adding the position of the cell (xy_grid)
        # multiplying stride scales the relative positions to the original image
        pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride
        # tf.exp() scales the anchors larger or smaller or changes the shape
        pred_wh = (tf.exp(conv_raw_dwdh) * anchors) * stride
        pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)

        pred_conf = tf.sigmoid(conv_raw_conf)
        pred_prob = tf.sigmoid(conv_raw_prob)

        return xy_grid, tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

    def focal(self, target, actual, alpha=1, gamma=2):
        focal_loss = alpha * tf.pow(tf.abs(target - actual), gamma)
        return focal_loss

    def bbox_giou(self, boxes1, boxes2):

        boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                            boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
        boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                            boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

        boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
                            tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
        boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
                            tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)

        boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
        boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

        left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
        right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

        inter_section = tf.maximum(right_down - left_up, 0.0)
        inter_area = inter_section[..., 0] * inter_section[..., 1]
        union_area = boxes1_area + boxes2_area - inter_area
        iou = inter_area / tf.maximum(union_area, 1e-12)  # 避免学习率设置高了,出现NAN的情况

        enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
        enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
        enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
        enclose_area = enclose[..., 0] * enclose[..., 1]
        giou = iou - 1.0 * (enclose_area - union_area) / tf.maximum(enclose_area, 1e-12)
        # 避免学习率设置高了,出现NAN的情况
        return giou

    def bbox_iou(self, boxes1, boxes2):

        boxes1_area = boxes1[..., 2] * boxes1[..., 3]
        boxes2_area = boxes2[..., 2] * boxes2[..., 3]

        boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                            boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
        boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                            boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

        left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
        right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

        inter_section = tf.maximum(right_down - left_up, 0.0)
        inter_area = inter_section[..., 0] * inter_section[..., 1]
        union_area = boxes1_area + boxes2_area - inter_area
        iou = inter_area / tf.maximum(union_area, 1e-12)  # 避免学习率设置高了,出现NAN的情况

        return iou

    def loss_layer(self, x_y_offset, conv, pred, label, bboxes, anchors, stride):

        conv_shape = tf.shape(conv)
        batch_size = conv_shape[0]
        output_size = conv_shape[1]
        input_size = stride * output_size
        # conv is reshaped here to 5 dimensions
        conv = tf.reshape(conv, (batch_size, output_size, output_size,
                                 self.anchor_per_scale, 5 + self.num_class))
        # this is the logit before going into sigmoid functions
        conv_raw_conf = conv[:, :, :, :, 4:5]
        conv_raw_prob = conv[:, :, :, :, 5:]

        pred_xywh = pred[:, :, :, :, 0:4]
        pred_conf = pred[:, :, :, :, 4:5]

        # true coordinates (x, y, w, h)
        label_xywh = label[:, :, :, :, 0:4]
        # what is this?
        respond_bbox = label[:, :, :, :, 4:5]
        # true probabilities
        label_prob = label[:, :, :, :, 5:]

        # label_xywh and pred_xywh are used to compute giou
        # giou = tf.expand_dims(self.bbox_giou(pred_xywh, label_xywh), axis=-1)
        true_xy = label_xywh[..., 0:2] / stride - x_y_offset
        pred_xy = pred_xywh[..., 0:2] / stride - x_y_offset

        true_tw_th = label_xywh[..., 2:4] / anchors
        pred_tw_th = pred_xywh[..., 2:4] / anchors

        true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
                              x=tf.ones_like(true_tw_th), y=true_tw_th)
        pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
                              x=tf.ones_like(pred_tw_th), y=pred_tw_th)
        input_size = tf.cast(input_size, tf.float32)

        true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
        pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))

        bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
        # giou_loss = respond_bbox * bbox_loss_scale * (1- giou)

        xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * respond_bbox * bbox_loss_scale)
        wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * respond_bbox * bbox_loss_scale)
        # giou_loss = xy_loss + wh_loss

        # bboxes (true_bboxes) and pred_xywh are used to compute iou
        iou = self.bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
        max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

        respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < self.iou_loss_thresh, tf.float32)

        conf_focal = self.focal(respond_bbox, pred_conf)

        conf_loss = conf_focal * (
                respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
                +
                respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
        )

        # cross-entropy for classifications
        prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)

        giou_loss = tf.reduce_mean(tf.reduce_sum(xy_loss)) + tf.reduce_mean(tf.reduce_sum(wh_loss))
        conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
        prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))

        return giou_loss, conf_loss, prob_loss

    def compute_loss(self, label_sbbox, label_mbbox, label_lbbox, true_sbbox, true_mbbox, true_lbbox):

        with tf.name_scope('smaller_box_loss'):
            loss_sbbox = self.loss_layer(self.pred_sbbox_offset, self.conv_sbbox, self.pred_sbbox, label_sbbox,
                                         true_sbbox,
                                         anchors=self.anchors[0], stride=self.strides[0])

        with tf.name_scope('medium_box_loss'):
            loss_mbbox = self.loss_layer(self.pred_mbbox_offset, self.conv_mbbox, self.pred_mbbox, label_mbbox,
                                         true_mbbox,
                                         anchors=self.anchors[1], stride=self.strides[1])

        with tf.name_scope('bigger_box_loss'):
            loss_lbbox = self.loss_layer(self.pred_lbbox_offset, self.conv_lbbox, self.pred_lbbox, label_lbbox,
                                         true_lbbox,
                                         anchors=self.anchors[2], stride=self.strides[2])

        with tf.name_scope('giou_loss'):
            giou_loss = loss_sbbox[0] + loss_mbbox[0] + loss_lbbox[0]

        with tf.name_scope('conf_loss'):
            conf_loss = loss_sbbox[1] + loss_mbbox[1] + loss_lbbox[1]

        with tf.name_scope('prob_loss'):
            prob_loss = loss_sbbox[2] + loss_mbbox[2] + loss_lbbox[2]

        return giou_loss, conf_loss, prob_loss