numpy高效实现双线性插值
如题,双线性插值算法在计算机视觉中运用广泛。
无论是传统的CV算法还是深度学习,都会用到。
例如图像的缩放、RoI Aligh、Deformable Convolution。
准备面试时发现网上虽然有双线性插值的图像缩放实现,但是大部分都是python版,没有用广播加速,导致处理的效率为O(nm),nm分别是图像的高和宽。
于是我写了一份,以供大家参考。
这次份代码做了两件事:
- 高效广播
- 解决几何中心点不重叠问题
import cv2 def np_repeat_2d(a, repeats, axis=0): assert len(a.shape) == 2 a = np.expand_dims(a, 0) a = np.tile(a, [repeats, 1, 1]) return a def generate_coords(batch_size, input_height, input_width, output_height, output_width): coords = np.meshgrid( range(output_height), range(output_width), indexing='ij' ) coords[0] = (coords[0] + 0.5) / output_height * input_height - 0.5 coords[1] = (coords[1] + 0.5) / output_width * input_width - 0.5 coords = np.stack(coords, axis=-1) coords = coords.reshape(-1, 2) coords = np_repeat_2d(coords, batch_size) return coords def map_coordinates(input, coords): """ input : shape = (batch_size, h, w) coords : shape = (n_points, 2) """ batch_size = input.shape[0] input_height = input.shape[1] input_width = input.shape[2] n_coords = coords.shape[1] coords = np.concatenate( (np.clip(coords[..., 0:1], 0, input_height - 1), np.clip(coords[..., 1:2], 0, input_width - 1)), 2 ) assert(coords.shape[1] == n_coords) coords_lt = np.floor(coords).astype(np.int32) coords_rb = np.ceil(coords).astype(np.int32) coords_lb = np.stack([coords_lt[..., 0], coords_rb[..., 1]], 2) coords_rt = np.stack([coords_rb[..., 0], coords_lt[..., 1]], 2) idx = np.arange(batch_size).repeat(n_coords).astype(np.int32) def _get_vals_by_coords(input, coords): indices = np.stack([ idx, coords[..., 0].flatten(), coords[..., 1].flatten() ], 1) inds = indices[:, 0] * input.shape[1] * input.shape[2] + indices[:, 1] * input.shape[2] + indices[:, 2] vals = np.take(input.flatten(), inds, axis=0) vals = vals.reshape(batch_size, n_coords) return vals vals_lt = _get_vals_by_coords(input, coords_lt) vals_rb = _get_vals_by_coords(input, coords_rb) vals_lb = _get_vals_by_coords(input, coords_lb) vals_rt = _get_vals_by_coords(input, coords_rt) coords_offset_lt = coords - coords_lt vals_t = coords_offset_lt[..., 0] * vals_rt + (1 - coords_offset_lt[..., 0]) * vals_lt vals_b = coords_offset_lt[..., 0] * vals_rb + (1 - coords_offset_lt[..., 0]) * vals_lb mapped_vals = coords_offset_lt[..., 1] * vals_b + (1 - coords_offset_lt[..., 1]) * vals_t return mapped_vals def bi_linear_interpolate(input, output_height, output_width): """rescale feature to target size Parameters --------- input : numpy.ndarray. shape = (batch_size, height, width) output_height: int. value > 0 output_width: int. value > 0 Returns ------- numpy.ndarray. shape = (batch_size, output_height, output_width) """ input_channel, input_height, input_width = input.shape output_channel = input_channel coords = generate_coords(input_channel, input_height, input_width, output_height, output_width) mapped_vals = map_coordinates(input, coords) mapped_vals = mapped_vals.reshape((input_channel, output_height, output_width)) return mapped_vals # input numpy.ndarray.shape (h, w, 3) img = cv2.imread("./Lenna_(test_image).png") input = img.transpose(2, 0, 1) output = bi_linear_interpolate(input, 400, 800).transpose((1, 2, 0)) import matplotlib.pyplot as plt %matplotlib inline plt.subplot(1, 2, 1) plt.imshow(img[..., ::-1].astype(np.uint8)) plt.subplot(1, 2, 2) plt.imshow(output[..., ::-1].astype(np.uint8)) cv2.imwrite('./scaled_lena_numpy.png', mapped_vals)