暂时介绍 image-mask
型数据集, 以人手分割数据集 EGTEA Gaze+ 为例.
准备数据文件夹
- 需要将
Image
和Mask
分开存放, 对应文件的文件名必须保持一致. 提醒: Mask 图像一般为 png 单通道 - EGTEA Gaze+ 数据集下载解压后即得到如下的目录, 无需处理
hand14k
┣━ Images
┃ ┣━ OP01-R01-PastaSalad_000014.jpg
┃ ┣━ OP01-R01-PastaSalad_000015.jpg
┃ ┣━ OP01-R01-PastaSalad_000016.jpg
┃ ┗━ ···
┗━ Masks
┣━ OP01-R01-PastaSalad_000014.png
┣━ OP01-R01-PastaSalad_000015.png
┣━ OP01-R01-PastaSalad_000016.png
┗━ ···
生成路径文件, 划分数据集
脚本如下:
import cv2 as cv
import numpy as np
import PIL.Image as Image
import os
np.random.seed(42)
def split_dataset():
# 读取图像文件
images_path = "./Images/"
images_list = os.listdir(images_path) # 每次返回文件列表顺序不一致
images_list.sort() # 需要排序处理
# 读取标签/Mask图像
labels_path = "./Masks/"
labels_list = os.listdir(labels_path)
labels_list.sort()
# 创建路径文件 (使用二进制编码, 避免操作系统不匹配)
train_file = "./train.data"
test_file = "./test.data"
if os.path.isfile(train_file) and os.path.isfile(test_file):
return
train_file = open(train_file, "wb")
test_file = open(test_file, "wb")
# 划分数据集
split_ratio = 0.8
for image, label in zip(images_list, labels_list):
image = os.path.join(images_path, image)
label = os.path.join(labels_path, label)
if os.path.basename(image).split('.')[0] != os.path.basename(label).split('.')[0]:
continue
file = train_file if np.random.rand() < split_ratio else test_file
file.write((image + "\t" + label + "\n").encode("utf-8"))
train_file.close()
test_file.close()
print("成功划分数据集!")
def read_image(path):
img = np.array(Image.open(path))
if img.ndim == 2:
img = cv.merge([img, img, img])
return img
def test_read():
train_file = "./test.data"
with open(train_file, 'rb') as f:
datalist = f.readlines()
datalist = [(k, v) for k, v in map(lambda x: x.decode('utf-8').strip('\n').split('\t'), datalist)]
item = datalist[np.random.randint(42)]
image = read_image(item[0])
mask = read_image(item[1])
cv.imshow("image", image)
cv.imshow("mask", mask)
cv.waitKey(0)
cv.destroyAllWindows()
if __name__ == '__main__':
split_dataset()
test_read()
派生 Dataset 类
class MyDataset(Dataset):
def __init__(
self, data_file, data_dir, transform_trn=None, transform_val=None
):
""" Args: data_file (string): Path to the data file with annotations. data_dir (string): Directory with all the images. transform_{trn, val} (callable, optional): Optional transform to be applied on a sample. """
with open(data_file, 'rb') as f:
datalist = f.readlines()
self.datalist = [(k, v) for k, v in map(lambda x: x.decode('utf-8').strip('\n').split('\t'), datalist)]
self.root_dir = data_dir
self.transform_trn = transform_trn
self.transform_val = transform_val
self.stage = 'train'
def set_stage(self, stage):
self.stage = stage
def __len__(self):
return len(self.datalist)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.datalist[idx][0])
msk_name = os.path.join(self.root_dir, self.datalist[idx][1])
def read_image(x):
img_arr = np.array(Image.open(x))
if len(img_arr.shape) == 2: # grayscale
img_arr = np.tile(img_arr, [3, 1, 1]).transpose(1, 2, 0)
return img_arr
image = read_image(img_name)
mask = np.array(Image.open(msk_name))
if img_name != msk_name:
assert len(mask.shape) == 2, 'Masks must be encoded without colourmap'
sample = {'image': image, 'mask': mask}
if self.stage == 'train':
if self.transform_trn:
sample = self.transform_trn(sample)
elif self.stage == 'val':
if self.transform_val:
sample = self.transform_val(sample)
return sample
构造DataLoader
# 定义Transform
composed_trn = transforms.Compose([ResizeShorterScale(shorter_side, low_scale, high_scale),
Pad(crop_size, [123.675, 116.28, 103.53], ignore_label),
RandomMirror(),
RandomCrop(crop_size),
Normalise(*normalise_params),
ToTensor()])
composed_val = transforms.Compose([Normalise(*normalise_params),
ToTensor()])
# 导入数据集
trainset = MyDataset(data_file=train_list,
data_dir=train_dir,
transform_trn=composed_trn,
transform_val=composed_val)
valset = MyDataset(data_file=val_list,
data_dir=val_dir,
transform_trn=None,
transform_val=composed_val)
# 构建生成器
train_loader = DataLoader(trainset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=True,
drop_last=True)
val_loader = DataLoader(valset,
batch_size=1,
shuffle=False,
num_workers=num_workers,
pin_memory=True)
训练
for i, sample in enumerate(train_loader):
image = sample['image'].cuda()
target = sample['mask'].cuda()
image_var = torch.autograd.Variable(image).float()
target_var = torch.autograd.Variable(target).long()
# Compute output
output = net(image_var)
...