题解 | #人脸关键点对齐#

题目链接

人脸关键点对齐

题目描述

给定一个输入图像矩阵 $A$ ，一个 $2 \times 3$ 的仿射矩阵 $M$ ，以及目标图像的尺寸，通过前向映射的方式对图像进行仿射变换。

变换公式：对于源图像中的像素点 $(x, y)$ （ $x$ 为列坐标, $y$ 为行坐标），其在新坐标系中的位置 $(x', y')$ 计算如下： $x' = a \cdot x + b \cdot y + t_x$ $y' = c \cdot x + d \cdot y + t_y$ 其中 $M = \begin{pmatrix} a & b & t_x \\ c & d & t_y \end{pmatrix}$ 。
映射规则：
- 遍历源图像的每个像素，计算其新坐标。
- 如果新坐标在目标图像范围内，则将源像素的值赋给目标图像的对应位置。
- 如果新坐标超出目标图像范围，则丢弃该像素。
- 目标图像中未被任何源像素覆盖的位置，其值保持为 0。
输出：将最终生成的目标图像按行展开（行优先），输出为一行由空格分隔的数字。

解题思路

这是一个纯粹的模拟题，核心是精确地实现前向映射的仿射变换过程。

我们可以将整个流程分解为以下几个步骤：

输入解析与初始化：
- 读取源图像的行数 a，并读取源图像的像素数据，存入一个二维数组 source_image。在读取第一行像素时，可以确定源图像的列数 source_width。
- 读取 $2 \times 3$ 的仿射矩阵 M。
- 读取目标图像的尺寸 out_height 和 out_width。
- 创建一个大小为 out_height x out_width 的二维数组 target_image，并将其所有元素初始化为 0。
执行前向映射：
- 这是算法的核心。我们需要遍历源图像的每一个像素。
- 使用两层嵌套循环，外层循环遍历源图像的行 y (从 0 到 source_height - 1)，内层循环遍历源图像的列 x (从 0 到 source_width - 1)。
计算新坐标并赋值：
- 在循环体内，对于每个源像素坐标 (y, x)，我们从仿射矩阵中取出六个参数 a, b, tx, c, d, ty。
- 应用变换公式计算出新的坐标 (y', x')。
  - new_x = a*x + b*y + tx
  - new_y = c*x + d*y + ty
- 边界检查：判断新坐标 (new_y, new_x) 是否在目标图像的有效范围内。
  - 0 <= new_y < out_height
  - 0 <= new_x < out_width
- 如果检查通过，就执行赋值操作：
  - target_image[new_y][new_x] = source_image[y][x]
- 如果检查不通过，则不执行任何操作，该源像素被丢弃。
格式化输出：
- 当遍历完所有源像素后，target_image 就构建完成了。
- 最后，我们需要将这个二维矩阵“压平”成一行。
- 再次使用两层嵌套循环遍历 target_image，按行优先的顺序将每个像素值拼接成一个字符串或直接打印，并用空格分隔。

代码

cpp
java
python

#include <iostream>
#include <vector>
#include <string>
#include <sstream>

using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(NULL);

    int a_rows, m_rows, o_rows;
    cin >> a_rows >> m_rows >> o_rows;

    vector<vector<int>> source_image(a_rows);
    int source_cols = 0;
    string line;
    getline(cin, line); // Consume the rest of the first line

    for (int i = 0; i < a_rows; ++i) {
        getline(cin, line);
        stringstream ss(line);
        int pixel;
        while (ss >> pixel) {
            source_image[i].push_back(pixel);
        }
        if (i == 0) {
            source_cols = source_image[i].size();
        }
    }

    vector<vector<int>> m(m_rows, vector<int>(3));
    for (int i = 0; i < m_rows; ++i) {
        cin >> m[i][0] >> m[i][1] >> m[i][2];
    }

    int out_height, out_width;
    cin >> out_height >> out_width;

    vector<vector<int>> target_image(out_height, vector<int>(out_width, 0));

    int aff_a = m[0][0], b = m[0][1], tx = m[0][2];
    int c = m[1][0], d = m[1][1], ty = m[1][2];

    for (int y = 0; y < a_rows; ++y) {
        for (int x = 0; x < source_cols; ++x) {
            int new_x = aff_a * x + b * y + tx;
            int new_y = c * x + d * y + ty;

            if (new_x >= 0 && new_x < out_width && new_y >= 0 && new_y < out_height) {
                target_image[new_y][new_x] = source_image[y][x];
            }
        }
    }

    for (int i = 0; i < out_height; ++i) {
        for (int j = 0; j < out_width; ++j) {
            cout << target_image[i][j] << (j == out_width - 1 && i == out_height - 1 ? "" : " ");
        }
    }
    cout << "\n";

    return 0;
}

import java.util.Scanner;
import java.util.ArrayList;
import java.util.List;

public class Main {
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);

        int aRows = sc.nextInt();
        sc.nextInt(); // m, always 2
        sc.nextInt(); // o, always 1
        sc.nextLine(); // consume newline

        List<List<Integer>> sourceImage = new ArrayList<>();
        int sourceCols = 0;
        for (int i = 0; i < aRows; i++) {
            String[] parts = sc.nextLine().split(" ");
            List<Integer> row = new ArrayList<>();
            for (String part : parts) {
                row.add(Integer.parseInt(part));
            }
            sourceImage.add(row);
            if (i == 0) {
                sourceCols = row.size();
            }
        }

        int[][] m = new int[2][3];
        for (int i = 0; i < 2; i++) {
            for (int j = 0; j < 3; j++) {
                m[i][j] = sc.nextInt();
            }
        }

        int outHeight = sc.nextInt();
        int outWidth = sc.nextInt();

        int[][] targetImage = new int[outHeight][outWidth];

        int a = m[0][0], b = m[0][1], tx = m[0][2];
        int c = m[1][0], d = m[1][1], ty = m[1][2];

        for (int y = 0; y < aRows; y++) {
            for (int x = 0; x < sourceCols; x++) {
                int newX = a * x + b * y + tx;
                int newY = c * x + d * y + ty;

                if (newX >= 0 && newX < outWidth && newY >= 0 && newY < outHeight) {
                    targetImage[newY][newX] = sourceImage.get(y).get(x);
                }
            }
        }

        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < outHeight; i++) {
            for (int j = 0; j < outWidth; j++) {
                sb.append(targetImage[i][j]);
                if (i != outHeight - 1 || j != outWidth - 1) {
                    sb.append(" ");
                }
            }
        }
        System.out.println(sb.toString());
    }
}

def main():
    a_rows, m_rows, o_rows = map(int, input().split())

    source_image = []
    source_cols = 0
    for i in range(a_rows):
        row = list(map(int, input().split()))
        if i == 0:
            source_cols = len(row)
        source_image.append(row)

    m = [list(map(int, input().split())) for _ in range(m_rows)]
    
    out_height, out_width = map(int, input().split())

    target_image = [[0] * out_width for _ in range(out_height)]

    a, b, tx = m[0]
    c, d, ty = m[1]

    # 遍历源图像
    for y in range(a_rows):
        for x in range(source_cols):
            # 计算新坐标
            new_x = a * x + b * y + tx
            new_y = c * x + d * y + ty

            # 边界检查并赋值
            if 0 <= new_x < out_width and 0 <= new_y < out_height:
                target_image[new_y][new_x] = source_image[y][x]
    
    # 展开输出
    output = []
    for row in target_image:
        output.extend(row)
    
    print(*output)

if __name__ == "__main__":
    main()

算法及复杂度

算法: 模拟
时间复杂度: $\mathcal{O}(H_{in} \cdot W_{in})$ ，其中 $H_{in}$ 和 $W_{in}$ 分别是输入图像的高度和宽度。算法的主体是遍历输入图像的每一个像素并进行一次常量时间的计算和赋值。
空间复杂度: $\mathcal{O}(H_{out} \cdot W_{out})$ ，主要用于存储 out_height $\times$ out_width 大小的目标图像矩阵。