DPC的算法讲解和MATLAB仿真参考上一节:
ISP算法之坏点校正DPC(一):MATLAB仿真验证-CSDN博客
本节讲解Verilog的硬件实现与仿真
行缓存设计
DPC算法是基于窗口邻域的像素级别算法,因此需要对实时到来的视频流进行行缓存,行缓存的设计参考:Verilog实现图像处理的行缓存Line Buffer_verilog行缓冲-CSDN博客
行缓存会并行输出N行同一列位置的数据,在算法模块实例化行缓存模块并对其并行输出打拍寄存就可以得到一定大小的窗口数据。
Pipeline设计
硬件设计的难点是拆分算法逻辑并对齐时序,将算法实现的步骤按照Pipeline的方式进行分解,在每个时钟周期完成一个步骤并寄存,经过一定延时后,每个时钟周期完成处理并输出一个像素。
例如在下列pipeline框图中,一个实现被差分成了3个时钟周期ABC,每个时钟周期实现一部分组合逻辑(A、B、C),延时一段时间后(3个时钟周期),每个时钟周期都能处理完ABC三个过程,这样的设计其实是面积换速度策略,通过添加寄存器存储中间值来实现Pipeline。
DPC硬件设计框架
按照Pipeline的设计方式,DPC的数据流与算法硬件pipeline设计框架图如下所示:
每个时钟周期的已经用T1~T10表示。
1、通过行缓存得到并行的四行相同列的数据,同时输入第5行数据,此过程需要对行缓存和输入的像素进行打拍得到窗口大小的像素区域。
2、得到5x5窗口区域后,根据当前Bayer像素的格式R/B或G得到邻域区域的9个像素点(包含中心像素),详细查看上一节中的MATLAB仿真验证。
3、接下来充分利用硬件的并行性,同时进行中值的求取和坏点的检测过程,最后根据是否是坏点来输出中值或原值。每个过程按照Pipeline拆分为子过程。
计算中值
这里中值的获取采用三分法的原理,可以参考:
3x3开窗中值滤波器的FPGA硬件实现 - olivermahout - 博客园
(该方法的证明可以参考网络上的博客,这里会使用即可)
通过此方法无需对9个像素进行复杂的排序操作,只需要三三比较即可。
坏点检测
由于坏点检测当中涉及到了减法,因此需要对参与计算的数据扩展一位位宽用于符号判断(0正1负),如上述算法框图中过程6。计算得到的差值通过最高位来判断正负。
如果全为正或全为负且差值的绝对值超过一定阈值则判断为坏点,绝对值的计算使用有符号的0减去为负的数值。详细查看代码中的注释
t4_diff1 <= t3_diff1[BITS] ? 1'sd0 - t3_diff1 : t3_diff1;
对齐控制时序
随数据输入的还有HREF和VSYNC控制信号,需要对这些信号进行时序对齐,由于算法整个算法Pipeline过程延时为10个时钟周期,所以对这些信号也统一打拍10个时钟周期。
verilog硬件设计源码
`timescale 1 ns / 1 ns
/*
* ISP - Defective Pixel Correction
*/
/*
* bayer 5x5邻域内同意颜色通道相对于中心像素都有8个临近像素。矫正按以下步骤操作:
* 计算中心像素与周围八个像素值的差;
* 判断八个差值是否都为正值或者都为负值;
* 如果有的为正有的为负,那么就为正常值,否则进行下一步;
* 设置一个阈值,如果八个差值的绝对值都查过阈值,那么就判断为坏点;
* 判断为坏点后就用八个临近的像素值的中位值来替换当前的像素值;
*/
module isp_dpc
#(
parameter BITS = 8,
parameter WIDTH = 1280,
parameter HEIGHT = 960,
parameter BAYER = 0 //0:RGGB 1:GRBG 2:GBRG 3:BGGR
)
(
input pclk,
input rst_n,
input [BITS-1:0] threshold, //阈值越小,检测越松,坏点检测数越多
input in_href,
input in_vsync,
input [BITS-1:0] in_raw,
output out_href,
output out_vsync,
output [BITS-1:0] out_raw
);
wire [BITS-1:0] shiftout;
wire [BITS-1:0] tap3x, tap2x, tap1x, tap0x;
shift_register #(BITS, WIDTH, 4) linebuffer(pclk, in_href, in_raw, shiftout, {tap3x, tap2x, tap1x, tap0x}); //缓存4行,第5行开始实时处理
reg [BITS-1:0] in_raw_r;
reg [BITS-1:0] p11,p12,p13,p14,p15; 5x5窗口
reg [BITS-1:0] p21,p22,p23,p24,p25;
reg [BITS-1:0] p31,p32,p33,p34,p35;
reg [BITS-1:0] p41,p42,p43,p44,p45;
reg [BITS-1:0] p51,p52,p53,p54,p55;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
in_raw_r <= 0;
p11 <= 0; p12 <= 0; p13 <= 0; p14 <= 0; p15 <= 0;
p21 <= 0; p22 <= 0; p23 <= 0; p24 <= 0; p25 <= 0;
p31 <= 0; p32 <= 0; p33 <= 0; p34 <= 0; p35 <= 0;
p41 <= 0; p42 <= 0; p43 <= 0; p44 <= 0; p45 <= 0;
p51 <= 0; p52 <= 0; p53 <= 0; p54 <= 0; p55 <= 0;
end
else begin
in_raw_r <= in_raw;
p11 <= p12; p12 <= p13; p13 <= p14; p14 <= p15; p15 <= tap3x;
p21 <= p22; p22 <= p23; p23 <= p24; p24 <= p25; p25 <= tap2x;
p31 <= p32; p32 <= p33; p33 <= p34; p34 <= p35; p35 <= tap1x;
p41 <= p42; p42 <= p43; p43 <= p44; p44 <= p45; p45 <= tap0x;
p51 <= p52; p52 <= p53; p53 <= p54; p54 <= p55; p55 <= in_raw_r;
end
end
reg odd_pix;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n)
odd_pix <= 0;
else if (!in_href)
odd_pix <= 0;
else
odd_pix <= ~odd_pix; //对列进行奇偶判断
end
wire odd_pix_sync_shift = odd_pix;
reg prev_href; //数据有效信号寄存一拍,用于后续提取下降沿
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n)
prev_href <= 0;
else
prev_href <= in_href;
end
reg odd_line;
always @ (posedge pclk or negedge rst_n) begin //对行进行奇偶判断
if (!rst_n)
odd_line <= 0;
else if (in_vsync)
odd_line <= 0;
else if (prev_href & (~in_href)) //数据有效下降沿
odd_line <= ~odd_line;
else
odd_line <= odd_line;
end
wire odd_line_sync_shift = odd_line;
//根据输入的图像格式和奇偶行判断窗口中心像素的格式
wire [1:0] p33_fmt = BAYER[1:0] ^ {odd_line_sync_shift, odd_pix_sync_shift}; //pixel format 0:[R]GGB 1:R[G]GB 2:RG[G]B 3:RG
reg [BITS-1:0] t1_p1, t1_p2, t1_p3; //3x3窗口
reg [BITS-1:0] t1_p4, t1_p5, t1_p6;
reg [BITS-1:0] t1_p7, t1_p8, t1_p9;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t1_p1 <= 0; t1_p2 <= 0; t1_p3 <= 0;
t1_p4 <= 0; t1_p5 <= 0; t1_p6 <= 0;
t1_p7 <= 0; t1_p8 <= 0; t1_p9 <= 0;
end
else begin
case (p33_fmt) //根据中间像素的格式分离出不同的模式
2'd0,2'd3: begin //R/B ,在5x5窗口中,中心点红色和绿色像素可以提取3x3
t1_p1 <= p11; t1_p2 <= p13; t1_p3 <= p15;
t1_p4 <= p31; t1_p5 <= p33; t1_p6 <= p35;
t1_p7 <= p51; t1_p8 <= p53; t1_p9 <= p55;
end
2'd1,2'd2: begin //Gr/Gb //同样提取9个绿色像素
t1_p1 <= p22; t1_p2 <= p13; t1_p3 <= p24;
t1_p4 <= p31; t1_p5 <= p33; t1_p6 <= p35;
t1_p7 <= p42; t1_p8 <= p53; t1_p9 <= p44;
end
default: begin
t1_p1 <= 0; t1_p2 <= 0; t1_p3 <= 0;
t1_p4 <= 0; t1_p5 <= 0; t1_p6 <= 0;
t1_p7 <= 0; t1_p8 <= 0; t1_p9 <= 0;
end
endcase
end
end
//中值滤波 step1
reg [BITS-1:0] t2_min1, t2_med1, t2_max1;
reg [BITS-1:0] t2_min2, t2_med2, t2_max2;
reg [BITS-1:0] t2_min3, t2_med3, t2_max3;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t2_min1 <= 0; t2_med1 <= 0; t2_max1 <= 0;
t2_min2 <= 0; t2_med2 <= 0; t2_max2 <= 0;
t2_min3 <= 0; t2_med3 <= 0; t2_max3 <= 0;
end
else begin
t2_min1 <= min(t1_p1, t1_p2, t1_p3);
t2_med1 <= med(t1_p1, t1_p2, t1_p3);
t2_max1 <= max(t1_p1, t1_p2, t1_p3);
t2_min2 <= min(t1_p4, t1_p5, t1_p6);
t2_med2 <= med(t1_p4, t1_p5, t1_p6);
t2_max2 <= max(t1_p4, t1_p5, t1_p6);
t2_min3 <= min(t1_p7, t1_p8, t1_p9);
t2_med3 <= med(t1_p7, t1_p8, t1_p9);
t2_max3 <= max(t1_p7, t1_p8, t1_p9);
end
end
//中值滤波 step2
reg [BITS-1:0] t3_max_of_min, t3_med_of_med, t3_min_of_max;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t3_max_of_min <= 0; t3_med_of_med <= 0; t3_min_of_max <= 0;
end
else begin
t3_max_of_min <= max(t2_min1, t2_min2, t2_min3);
t3_med_of_med <= med(t2_med1, t2_med2, t2_med3);
t3_min_of_max <= min(t2_max1, t2_max2, t2_max3);
end
end
//中值滤波 step3
reg [BITS-1:0] t4_medium; //该中值就是3x3窗口的中值
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t4_medium <= 0;
end
else begin
t4_medium <= med(t3_max_of_min, t3_med_of_med, t3_min_of_max);
end
end
//将中值打拍对齐到坏点检测时序
reg [BITS-1:0] t5_medium;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t5_medium <= 0;
end
else begin
t5_medium <= t4_medium;
end
end
//坏点检测 step1 (转有符号数)
reg signed [BITS:0] t2_p1, t2_p2, t2_p3; //扩展一位用于符号计算
reg signed [BITS:0] t2_p4, t2_p5, t2_p6;
reg signed [BITS:0] t2_p7, t2_p8, t2_p9;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t2_p1 <= 0; t2_p2 <= 0; t2_p3 <= 0;
t2_p4 <= 0; t2_p5 <= 0; t2_p6 <= 0;
t2_p7 <= 0; t2_p8 <= 0; t2_p9 <= 0;
end
else begin
t2_p1 <= {1'b0,t1_p1}; t2_p2 <= {1'b0,t1_p2}; t2_p3 <= {1'b0,t1_p3};
t2_p4 <= {1'b0,t1_p4}; t2_p5 <= {1'b0,t1_p5}; t2_p6 <= {1'b0,t1_p6};
t2_p7 <= {1'b0,t1_p7}; t2_p8 <= {1'b0,t1_p8}; t2_p9 <= {1'b0,t1_p9};
end
end
//坏点检测 step2 (计算中心像素与周围八个像素值的差)
reg [BITS:0] t3_center;
reg signed [BITS:0] t3_diff1, t3_diff2, t3_diff3, t3_diff4, t3_diff5, t3_diff6, t3_diff7, t3_diff8;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t3_center <= 0;
t3_diff1 <= 0; t3_diff2 <= 0;
t3_diff3 <= 0; t3_diff4 <= 0;
t3_diff5 <= 0; t3_diff6 <= 0;
t3_diff7 <= 0; t3_diff8 <= 0;
end
else begin
t3_center <= t2_p5[BITS-1:0]; //求取差值,得出的是补码
t3_diff1 <= t2_p5 - t2_p1;
t3_diff2 <= t2_p5 - t2_p2;
t3_diff3 <= t2_p5 - t2_p3;
t3_diff4 <= t2_p5 - t2_p4;
t3_diff5 <= t2_p5 - t2_p6;
t3_diff6 <= t2_p5 - t2_p7;
t3_diff7 <= t2_p5 - t2_p8;
t3_diff8 <= t2_p5 - t2_p9;
end
end
//坏点检测 step3 (判断差值是否都为正或都为负,计算差值绝对值)
reg t4_defective_pix;
reg [BITS-1:0] t4_center;
reg [BITS-1:0] t4_diff1, t4_diff2, t4_diff3, t4_diff4, t4_diff5, t4_diff6, t4_diff7, t4_diff8;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t4_defective_pix <= 0;
t4_center <= 0;
t4_diff1 <= 0; t4_diff2 <= 0;
t4_diff3 <= 0; t4_diff4 <= 0;
t4_diff5 <= 0; t4_diff6 <= 0;
t4_diff7 <= 0; t4_diff8 <= 0;
end
else begin
t4_center <= t3_center; //判断最高位是否都为正或者都为负,最高位0为正,1为负
t4_defective_pix <= (8'b0000_0000 == {t3_diff1[BITS],t3_diff2[BITS],t3_diff3[BITS],t3_diff4[BITS],t3_diff5[BITS],t3_diff6[BITS],t3_diff7[BITS],t3_diff8[BITS]})
|| (8'b1111_1111 == {t3_diff1[BITS],t3_diff2[BITS],t3_diff3[BITS],t3_diff4[BITS],t3_diff5[BITS],t3_diff6[BITS],t3_diff7[BITS],t3_diff8[BITS]});
t4_diff1 <= t3_diff1[BITS] ? 1'sd0 - t3_diff1 : t3_diff1; //有符号数减法相当于取绝对值
t4_diff2 <= t3_diff2[BITS] ? 1'sd0 - t3_diff2 : t3_diff2;
t4_diff3 <= t3_diff3[BITS] ? 1'sd0 - t3_diff3 : t3_diff3;
t4_diff4 <= t3_diff4[BITS] ? 1'sd0 - t3_diff4 : t3_diff4;
t4_diff5 <= t3_diff5[BITS] ? 1'sd0 - t3_diff5 : t3_diff5;
t4_diff6 <= t3_diff6[BITS] ? 1'sd0 - t3_diff6 : t3_diff6;
t4_diff7 <= t3_diff7[BITS] ? 1'sd0 - t3_diff7 : t3_diff7;
t4_diff8 <= t3_diff8[BITS] ? 1'sd0 - t3_diff8 : t3_diff8;
end
end
//坏点检测 step4 (判断差值绝对值是否超出阈值)
reg t5_defective_pix;
reg [BITS-1:0] t5_center;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t5_defective_pix <= 0;
t5_center <= 0;
end
else begin
t5_center <= t4_center; //如果与邻域差值都为正或者负,且差值绝对值达到阈值,则认为该点为坏点
t5_defective_pix <= t4_defective_pix && t4_diff1 > threshold && t4_diff2 > threshold && t4_diff3 > threshold && t4_diff4 > threshold &&
t4_diff5 > threshold && t4_diff6 > threshold && t4_diff7 > threshold && t4_diff8 > threshold;
end
end
//坏点检测 step5 (坏点成立输出中值滤波值, 非坏点输出原值)
reg [BITS-1:0] t6_center;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
t6_center <= 0;
end
else begin
t6_center <= t5_defective_pix ? t5_medium : t5_center; //如果是坏点则输出中值滤波值,否则输出原值
end
end
localparam DLY_CLK = 10; //对控制信号进行打拍以对齐时序
reg [DLY_CLK-1:0] href_dly;
reg [DLY_CLK-1:0] vsync_dly;
always @ (posedge pclk or negedge rst_n) begin
if (!rst_n) begin
href_dly <= 0;
vsync_dly <= 0;
end
else begin
href_dly <= {href_dly[DLY_CLK-2:0], in_href};
vsync_dly <= {vsync_dly[DLY_CLK-2:0], in_vsync};
end
end
//输出对齐后的控制时序
assign out_href = href_dly[DLY_CLK-1];
assign out_vsync = vsync_dly[DLY_CLK-1];
assign out_raw = out_href ? t6_center : {BITS{1'b0}};
function [BITS-1:0] min; //求三个值中的最小值
input [BITS-1:0] a, b, c;
begin
min = (a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c);
end
endfunction
function [BITS-1:0] med; //取三个值中的中值
input [BITS-1:0] a, b, c;
begin
med = (a < b) ? ((b < c) ? b : (a < c ? c : a)) : ((b > c) ? b : (a > c ? c : a));
end
endfunction
function [BITS-1:0] max; //求三个值中的最大值
input [BITS-1:0] a, b, c;
begin
max = (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c);
end
endfunction
endmodule
Verilog仿真验证
(验证框架的搭建查看往期博客)
编写testbench,实例化算法模块,代码如下:
`timescale 1ns / 1ns
module tb_dpc;
reg xclk = 0;
always #5 xclk <= ~xclk; //像素时钟
reg rst_n = 0; //axis 时钟复位
initial begin
rst_n <= 0;
#100 rst_n <= 1;
end
localparam BAYER = 3;
localparam BITS = 10;
localparam WIDTH = 2592;
localparam HEIGHT = 1944;
localparam IN_FILE = "E:/ISP/tb_dpc_2592x1944_16.raw";
localparam OUT_FILE = "E:/ISP/tb_dpc_2592x1944_16_verilogout.raw";
reg [BITS-1:0] dpc_thresh=100; ///DPC阈值参数
FILE TO DVP/
wire pclk_in, href_in, vsyn_in,hsync_in;
wire [BITS-1:0] data_in;
FILE_TO_DVP #(
.FILE(IN_FILE),
.BITS(BITS),
.H_DISP(WIDTH),
.V_DISP(HEIGHT)
)
file_to_dvp_inst
(
.xclk(xclk),
.rst_n(rst_n),
.pclk(pclk_in),
.href(href_in),
.hsync(hsync_in),
.vsync(vsyn_in),
.data(data_in)
);
ISP算法
wire [BITS-1:0] data_o;
wire href_o,vsyn_o;
isp_dpc #(
.BITS(BITS),
.WIDTH(WIDTH),
.HEIGHT(HEIGHT),
.BAYER(BAYER)
) dpc_inst(
.pclk(pclk_in),
.rst_n(rst_n),
.threshold(dpc_thresh),
.in_href(href_in),
.in_vsync(vsyn_in),
.in_raw(data_in),
.out_href(href_o),
.out_vsync(vsyn_o),
.out_raw(data_o)
);
//
///DVP to FILE
DVP_TO_FILE #(
.FILE(OUT_FILE),
.BITS(BITS)
)
dvp_to_file_inst
(
.pclk(pclk_in),
.rst_n(rst_n),
.href(href_o),
.vsync(vsyn_o),
.data(data_o)
);
endmodule
输入raw图像为:
(测试图像的生成参考往期博客)
得到的raw仅进行demosaic后得到如下:
可以看到坏点基本被消除,与MATAB仿真得到的结果基本一致。