numpy高效实现双线性插值

如题,双线性插值算法在计算机视觉中运用广泛。
无论是传统的CV算法还是深度学习,都会用到。
例如图像的缩放、RoI Aligh、Deformable Convolution。

准备面试时发现网上虽然有双线性插值的图像缩放实现,但是大部分都是python版,没有用广播加速,导致处理的效率为O(nm),nm分别是图像的高和宽。
于是我写了一份,以供大家参考。
这次份代码做了两件事:

  1. 高效广播
  2. 解决几何中心点不重叠问题
import cv2

def np_repeat_2d(a, repeats, axis=0):
    assert len(a.shape) == 2
    a = np.expand_dims(a, 0)
    a = np.tile(a, [repeats, 1, 1])
    return a

def generate_coords(batch_size, input_height, input_width, output_height, output_width):
    coords = np.meshgrid(
        range(output_height), range(output_width), indexing='ij'
    )
    coords[0] = (coords[0] + 0.5) / output_height * input_height - 0.5
    coords[1] = (coords[1] + 0.5) / output_width * input_width - 0.5
    coords = np.stack(coords, axis=-1)
    coords = coords.reshape(-1, 2)

    coords = np_repeat_2d(coords, batch_size)
    return coords

def map_coordinates(input, coords):
    """
    input : shape = (batch_size, h, w)
    coords : shape = (n_points, 2)
    """
    batch_size = input.shape[0]
    input_height = input.shape[1]
    input_width = input.shape[2]

    n_coords = coords.shape[1]

    coords = np.concatenate(
        (np.clip(coords[..., 0:1], 0, input_height - 1),
         np.clip(coords[..., 1:2], 0, input_width - 1)), 
        2
    )
    assert(coords.shape[1] == n_coords)

    coords_lt = np.floor(coords).astype(np.int32)
    coords_rb = np.ceil(coords).astype(np.int32)
    coords_lb = np.stack([coords_lt[..., 0], coords_rb[..., 1]], 2)
    coords_rt = np.stack([coords_rb[..., 0], coords_lt[..., 1]], 2)
    idx = np.arange(batch_size).repeat(n_coords).astype(np.int32)

    def _get_vals_by_coords(input, coords):
        indices = np.stack([
            idx, coords[..., 0].flatten(), coords[..., 1].flatten()
        ], 1)

        inds = indices[:, 0] * input.shape[1] * input.shape[2] +  indices[:, 1] * input.shape[2] + indices[:, 2]

        vals = np.take(input.flatten(), inds, axis=0)
        vals = vals.reshape(batch_size, n_coords)
        return vals

    vals_lt = _get_vals_by_coords(input, coords_lt)
    vals_rb = _get_vals_by_coords(input, coords_rb)
    vals_lb = _get_vals_by_coords(input, coords_lb)
    vals_rt = _get_vals_by_coords(input, coords_rt)

    coords_offset_lt = coords - coords_lt

    vals_t = coords_offset_lt[..., 0] * vals_rt + (1 - coords_offset_lt[..., 0]) * vals_lt
    vals_b = coords_offset_lt[..., 0] * vals_rb + (1 - coords_offset_lt[..., 0]) * vals_lb

    mapped_vals = coords_offset_lt[..., 1] * vals_b + (1 - coords_offset_lt[..., 1]) * vals_t

    return mapped_vals

def bi_linear_interpolate(input, output_height, output_width):
    """rescale feature to target size
    Parameters
    ---------
    input : numpy.ndarray. shape = (batch_size, height, width)
    output_height: int. value > 0
    output_width: int. value > 0
    Returns
    -------
    numpy.ndarray. shape = (batch_size, output_height, output_width)
    """
    input_channel, input_height, input_width = input.shape
    output_channel = input_channel

    coords = generate_coords(input_channel, input_height, input_width, output_height, output_width)

    mapped_vals = map_coordinates(input, coords)
    mapped_vals = mapped_vals.reshape((input_channel, output_height, output_width))

    return mapped_vals

# input numpy.ndarray.shape (h, w, 3)
img = cv2.imread("./Lenna_(test_image).png")
input = img.transpose(2, 0, 1)
output = bi_linear_interpolate(input, 400, 800).transpose((1, 2, 0))

import matplotlib.pyplot as plt
%matplotlib inline
plt.subplot(1, 2, 1)
plt.imshow(img[..., ::-1].astype(np.uint8))
plt.subplot(1, 2, 2)
plt.imshow(output[..., ::-1].astype(np.uint8))
cv2.imwrite('./scaled_lena_numpy.png', mapped_vals)

运行结果

参考资料

  1. deformable convoluation 内部实现
  2. deformable convoluation API实现