4个4bit先行进位加法器(CLA)组成16bitCLA
adder.v

`timescale 1ns / 1ps
module adder
    (
    input[3:0] A,
    input[3:0] B,
    input C0,//进位输入
    output[3:0]S,
    output C4//进位输出
    );
    wire[3:0] G ,P ,C ;  
    assign G = A & B; 
    assign P = A ^ B;
    assign C[0] = C0; 
    assign C[1] = G[0] | (P[0] & C[0]);
    assign C[2] = G[1] | (P[1] & G[0]) | (P[1] & P[0] & C[0]);
    assign C[3] = G[2] | (P[2] & G[1]) | (P[2] & P[1] & G[0]) | (P[2] & P[1] & P[0] & C[0]);
    assign C4   = G[3] | (P[3] & G[2]) | (P[3] & P[2] & G[1]) | (P[3] & P[2] & P[1] & G[0]) | (P[3] & P[2] & P[1] & P[0] & C[0]);
    assign S = A ^ B ^ C;
endmodule

adder_display.v

`timescale 1ns / 1ps
module adder_display
    (
    input[15:0] A,
    input[15:0] B,
    input C0,
    output[15:0] S,
    output C4
    );
    wire[0:2] C;
    adder part_1(.A(A[3:0]), .B(B[3:0]), .C0(C0),.S(S[3:0]),.C4(C[0]));
    adder part_2(.A(A[7:4]), .B(B[7:4]),.C0(C[0]),.S(S[7:4]),.C4(C[1]));
    adder part_3(.A(A[11:8]), .B(B[11:8]),.C0(C[1]),.S(S[11:8]),.C4(C[2]));
    adder part_4(.A(A[15:12]), .B(B[15:12]),.C0(C[2]),.S(S[15:12]),.C4(C4));
endmodule

testbench.v

`timescale 1ns / 1ps //仿真时单位时间1ns,精度1ps
module testbench;
 reg [15:0] A;
 reg [15:0] B;
 reg cin;
 wire [15:0] S;
 wire cout;
 adder_display test 
     (
     .A(A), 
     .B(B), 
     .C0(cin), 
     .S(S), 
     .C4(cout)
     );
 initial begin
 A = 0;
 B = 0;
 cin = 0;
 //等待100ns出结果
 #100;
 end
 always #10 A = $random; //每过10ns,产生一个随机的 32 位数
 always #10 B = $random;
 always #10 cin = {$random} % 2;
endmodule