FPGA滤波器实施概述

我协助我的一位研究生进行了一些有关FPGA滤波器实现的定向研究。她的任务是了解并实现FPGA硬件上的几种类型的过滤器。设计的所有滤波器均为15阶滤波器,并使用16位定点数学运算。然后检查了过滤器的性能和资源使用情况。她关于研究的最终演讲可以在下面查看:

FPGA滤波器实现

研究项目期间创建的Verilog源文件如下。

FIR滤波器

FIR滤波器是四个滤波器中最简单,最快的。它利用了预加器的对称性。而且,使用加法器树来最小化组合路径延迟。

FIR_Filter.v
`define FILT_LENGTH 16
 
module FIR_Filter(
    input  clk,
    input  en,
    input  [15:0]din,
    output [15:0]dout
    );
 
    reg signed [15:0]coeff[`FILT_LENGTH/2-1:0]; //Filter coefficients.
    reg signed [15:0]delay_line[`FILT_LENGTH-1:0]; //Input delay line.
    wire [31:0]accum; //Accumulator for output filter calculation.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    reg signed [16:0]preadd_regs[7:0]; //Save calc after preadd.
    reg signed [31:0]mult_regs[7:0];   //Save calc after multiplication.
    reg signed [31:0]tree1_regs[3:0];  //Save calc after first layer of adder tree.
    reg signed [31:0]tree2_regs[1:0];  //Save calc after first layer of adder tree.
    reg signed [31:0]treeout_reg;      //Save calc after complete.
 
    //assign dout = treeout_reg[31:16];
 
    //Calculate value in the accumulator.
    assign accum = (delay_line[0] + delay_line[15]) * coeff[0] +
                   (delay_line[1] + delay_line[14]) * coeff[1] +
                   (delay_line[2] + delay_line[13]) * coeff[2] +
                   (delay_line[3] + delay_line[12]) * coeff[3] +
                   (delay_line[4] + delay_line[11]) * coeff[4] +
                   (delay_line[5] + delay_line[10]) * coeff[5] +
                   (delay_line[6] + delay_line[9])  * coeff[6] +
                   (delay_line[7] + delay_line[8])  * coeff[7];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0] = 16'd2320;
        coeff[1] = 16'd4143;
        coeff[2] = 16'd4592;
        coeff[3] = 16'd7278;
        coeff[4] = 16'd8423;
        coeff[5] = 16'd10389;
        coeff[6] = 16'd11269;
        coeff[7] = 16'd12000;
 
        //Initialize delay line.
        for(i = 0; i < `FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
 
        //Initialize the preadder regs.
        for(i = 0; i < 8; i = i+1'b1) begin
            preadd_regs[i] = 17'd0;
        end
 
        //Initialize the multiplier regs.
        for(i = 0; i < 8; i = i+1'b1) begin
            mult_regs[i] = 32'd0;
        end
 
        //Initialize the first layer of the adder tree regs.
        for(i = 0; i < 4; i = i+1'b1) begin
            tree1_regs[i] = 32'd0;
        end
 
        //Initialize the second layer of the adder tree regs.
        for(i = 0; i < 2; i = i+1'b1) begin
            tree2_regs[i] = 32'd0;
        end        
 
        //Initialize the adder tree output reg.
        treeout_reg = 32'd0;
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
endmodule


filter_top.v
module filter_top(
    input clk,
    input [7:0]sw,
    output [7:0]JA
    );
 
    wire [15:0]tout; //The output of the tone lookup table.
    wire [15:0]fout; //The output of the FIR filter.
    wire [15:0]mout; //Selected output.
    wire en;         //Clock enble.
    wire clk_out;    //264MHz System clock.
 
    assign en = 1'b1;
 
    clk_wiz_0 cw (.clk_in1(clk), .clk_out1(clk_out));
    //clk_en ce(.clk(clk_out), .en(en));
    ToneGen tg(.clk(clk_out), .en(en), .dout(tout));
    FIR_Filter ff(.clk(clk_out), .en(en), .din(tout), .dout(fout));
 
    //Choose filtered or unfiltered output based on sw0.
    assign mout = sw[0] ? fout : tout;
 
    //Take only the upper 8 bits and make it unsigned.
    //assign JA = mout[15:8] + 8'd128;
    assign {JA[3:0], JA[7:4]} = mout[15:8] + 8'd128;
 
endmodule


ToneGen.v
`timescale 1ns/1ps
 
module ToneGen(
    input clk,
    input en,
    output [15:0]dout
    );
 
    //Tone generation data.  At a 44KHz sampling rate, the lookup table
    //generates a 440Hz and 4.4KHz tone mixed together.
    reg [15:0]tonetbl[99:0];
 
    //Pointer into the tone table.
    reg [6:0]toneptr = 7'h00;
 
    initial begin
        tonetbl[0] = 10970;
        tonetbl[1] = 18151;
        tonetbl[2] = 19197;
        tonetbl[3] = 14105;
        tonetbl[4] = 5211;
        tonetbl[5] = -3704;
        tonetbl[6] = -8858;
        tonetbl[7] = -7914;
        tonetbl[8] = -876;
        tonetbl[9] = 9912;
        tonetbl[10] = 20660;
        tonetbl[11] = 27581;
        tonetbl[12] = 28330;
        tonetbl[13] = 22905;
        tonetbl[14] = 13642;
        tonetbl[15] = 4326;
        tonetbl[16] = -1260;
        tonetbl[17] = -780;
        tonetbl[18] = 5767;
        tonetbl[19] = 16037;
        tonetbl[20] = 26244;
        tonetbl[21] = 32601;
        tonetbl[22] = 32767;
        tonetbl[23] = 26741;
        tonetbl[24] = 16863;
        tonetbl[25] = 6918;
        tonetbl[26] = 692;
        tonetbl[27] = 527;
        tonetbl[28] = 6421;
        tonetbl[29] = 16037;
        tonetbl[30] = 25590;
        tonetbl[31] = 31295;
        tonetbl[32] = 30814;
        tonetbl[33] = 24149;
        tonetbl[34] = 13642;
        tonetbl[35] = 3081;
        tonetbl[36] = -3745;
        tonetbl[37] = -4494;
        tonetbl[38] = 837;
        tonetbl[39] = 9912;
        tonetbl[40] = 18947;
        tonetbl[41] = 24161;
        tonetbl[42] = 23217;
        tonetbl[43] = 16119;
        tonetbl[44] = 5211;
        tonetbl[45] = -5718;
        tonetbl[46] = -12878;
        tonetbl[47] = -13924;
        tonetbl[48] = -8853;
        tonetbl[49] = 0;
        tonetbl[50] = 8853;
        tonetbl[51] = 13924;
        tonetbl[52] = 12878;
        tonetbl[53] = 5718;
        tonetbl[54] = -5211;
        tonetbl[55] = -16119;
        tonetbl[56] = -23217;
        tonetbl[57] = -24161;
        tonetbl[58] = -18947;
        tonetbl[59] = -9912;
        tonetbl[60] = -837;
        tonetbl[61] = 4494;
        tonetbl[62] = 3745;
        tonetbl[63] = -3081;
        tonetbl[64] = -13642;
        tonetbl[65] = -24149;
        tonetbl[66] = -30814;
        tonetbl[67] = -31295;
        tonetbl[68] = -25590;
        tonetbl[69] = -16037;
        tonetbl[70] = -6421;
        tonetbl[71] = -527;
        tonetbl[72] = -692;
        tonetbl[73] = -6918;
        tonetbl[74] = -16863;
        tonetbl[75] = -26741;
        tonetbl[76] = -32767;
        tonetbl[77] = -32601;
        tonetbl[78] = -26244;
        tonetbl[79] = -16037;
        tonetbl[80] = -5767;
        tonetbl[81] = 780;
        tonetbl[82] = 1260;
        tonetbl[83] = -4326;
        tonetbl[84] = -13642;
        tonetbl[85] = -22905;
        tonetbl[86] = -28330;
        tonetbl[87] = -27581;
        tonetbl[88] = -20660;
        tonetbl[89] = -9912;
        tonetbl[90] = 876;
        tonetbl[91] = 7914;
        tonetbl[92] = 8858;
        tonetbl[93] = 3704;
        tonetbl[94] = -5211;
        tonetbl[95] = -14105;
        tonetbl[96] = -19197;
        tonetbl[97] = -18151;
        tonetbl[98] = -10970;
        tonetbl[99] = 0;
    end
 
    //Assign the output to the value in the lookup table
    //pointed to by toneptr.
    assign dout = tonetbl[toneptr];
 
    //Increment through the tone lookup table and wrap back
    //to zero when at the end.
    always @(posedge clk) begin
        if(en) begin
            if(toneptr == 7'd99) begin
                toneptr <= 7'd0;
            end
            else begin
                toneptr <= toneptr + 1'b1;
            end
        end
    end
endmodule


clk_en.v
module clk_en(
    input clk,
    output reg en = 1'b0
    );
 
    reg [15:0]en_counter = 16'h0;
 
    always @(posedge clk) begin
        if(en_counter == 16'd5999) begin
            en_counter <= 16'h0;
            en <= 1'b1;
        end
        else begin
            en_counter <= en_counter + 1'b1;
            en <= 1'b0;
        end
    end
 
endmodule


TB_ToneGen.v
module TB_ToneGen;
 
    reg clk = 1'b0;
    reg sw0 = 1'b1;
    wire [7:0]JA;
 
    wire en;    
    //wire [15:0]cout;
 
    filter_top uut(.clk(clk), .sw({15'h00,sw0}), .JA({JA[3:0], JA[7:4]}));
 
    assign en = uut.en;
    //assign cout = uut.ce.en_counter;
 
    //initial begin
    //     #600000 sw0 = 1'b1;
    //end
 
    //100MHz clock
    always #5 clk = ~clk;
endmodule



LMS自适应滤波器

LMS自适应滤波器在单个时钟周期内完成其输出计算和权重更新。由于其单周期运行,因此存在大量的组合延迟。该滤波器只能以FIR滤波器速度的一小部分运行。

LMS_Adapt.v
`define ADAPT_FILT_LENGTH 16
module LMS_Adapt(
    input  clk,
    input  en,
    input  [15:0]din,
    input  signed [15:0]desired,
    output signed [15:0]dout,
    output signed [15:0]err
    );
 
    parameter MU = 16'd32767;
 
    reg  signed [15:0]mu = MU;
    reg  signed [15:0]coeff[`ADAPT_FILT_LENGTH-1:0]; //Filter coefficients.
    reg  signed [15:0]delay_line[`ADAPT_FILT_LENGTH-1:0]; //Input delay line.
    wire signed [31:0]accum; //Accumulator for output filter calculation.
    wire signed [31:0]stepped; //Error after step sized applied.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    //Calculate the error and multiply it by the step size.
    assign err = desired - dout;   
    //assign stepped = err * mu;
    assign stepped = err << 15;
 
    //Calculate value in the accumulator.
    assign accum = delay_line[0]*coeff[0] +
                   delay_line[1]*coeff[1] +
                   delay_line[2]*coeff[2] +
                   delay_line[3]*coeff[3] +
                   delay_line[4]*coeff[4] +
                   delay_line[5]*coeff[5] +
                   delay_line[6]*coeff[6] +
                   delay_line[7]*coeff[7] +
                   delay_line[8]*coeff[8] +
                   delay_line[9]*coeff[9] +
                   delay_line[10]*coeff[10] +
                   delay_line[11]*coeff[11] +
                   delay_line[12]*coeff[12] +
                   delay_line[13]*coeff[13] +
                   delay_line[14]*coeff[14] +
                   delay_line[15]*coeff[15];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0]  = 20'd0;
        coeff[1]  = 20'd0;
        coeff[2]  = 20'd0;
        coeff[3]  = 20'd0;
        coeff[4]  = 20'd0;
        coeff[5]  = 20'd0;
        coeff[6]  = 20'd0;
        coeff[7]  = 20'd0;
        coeff[8]  = 20'd0;
        coeff[9]  = 20'd0;
        coeff[10] = 20'd0;
        coeff[11] = 20'd0;
        coeff[12] = 20'd0;
        coeff[13] = 20'd0;
        coeff[14] = 20'd0;
        coeff[15] = 20'd0;
 
        //Initialize delay line.
        for(i = 0; i < `ADAPT_FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `ADAPT_FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
 
    wire signed [31:0]num2[`ADAPT_FILT_LENGTH-1:0];
    wire signed [15:0]n1[`ADAPT_FILT_LENGTH-1:0];
    wire signed [15:0]n2[`ADAPT_FILT_LENGTH-1:0];
 
    //Update the coefficients with the LMS algorithm.
    generate
        for (c = `ADAPT_FILT_LENGTH; c > 0; c = c - 1) begin: adapt_update
            assign n1[c-1] = stepped[31:16];
 
            assign num2[c-1] = n1[c-1]*delay_line[c-1];
            assign n2[c-1] = num2[c-1][31:16];
 
            always @(posedge clk) begin
                if(en) coeff[c-1] <= coeff[c-1] + n2[c-1];
            end
        end
    endgenerate
endmodule


filter_top.v
module filter_top(
    input clk,
    input [7:0]sw,
    output [7:0]JA
    );
 
    wire signed [15:0]tout;           //Output of the tone lookup table.
    wire [15:0]fout;                  //Output of the FIR filter.
    wire [15:0]aout;                  //Output of the adaptive filter.
    wire [15:0]err;                   //Adaptive filter error.
    wire [15:0]lfsr_out;              //16-bit Random noise variable.
    wire en;                          //Clock enble.
    wire [9:0]rnd;                    //10-bit noise.
    wire signed [15:0]tone_and_noise; //Combination of tone and noise.
    wire signed [15:0]delay_out;      //Output of the delay line.    
    wire [15:0]mout;                  //Selected desired output.
    wire clk19_8MHz;                  //19.8MHz system clock.
 
    assign rnd = lfsr_out[15:6];
    assign tone_and_noise = tout + rnd;
    //assign en = 1'b1;
 
    clk_wiz_0 cw(.clk_in1(clk), .clk_out1(clk19_8MHz), .reset(1'b0));
    ToneGen tg(.clk(clk19_8MHz), .en(en), .dout(tout));
    FIR_Filter ff(.clk(clk19_8MHz), .en(en), .din(tone_and_noise), .dout(fout));
    LMS_Adapt af(.clk(clk19_8MHz), .en(en), .din(tone_and_noise), .desired(mout), .dout(aout), .err(err));
    lfsr sr(.clk(clk19_8MHz), .en(en), .lfsr_out(lfsr_out));
    clk_en ce(.clk(clk19_8MHz), .en(en));
 
    //Choose filtered or unfiltered output based on sw0.
    assign mout = sw[0] ? fout : tone_and_noise;//delay_out;
 
    //Take only the upper 8 bits and make it unsigned.
    assign {JA[3:0], JA[7:4]} = aout[15:8] + 8'd128;
    //assign JA[7:0] = aout[15:8] + 8'd128;
 
    //assign {JA[3:0], JA[7:4]} = sw[7:0];   
endmodule


ToneGen.v
`timescale 1ns/1ps
 
module ToneGen(
    input clk,
    input en,
    output [15:0]dout
    );
 
    //Tone generation data.  At a 44KHz sampling rate, the lookup table
    //generates a 440Hz and 4.4KHz tone mixed together.
    reg [15:0]tonetbl[99:0];
 
    //Pointer into the tone table.
    reg [6:0]toneptr = 7'h00;
 
    initial begin
        tonetbl[0] = 10970;
        tonetbl[1] = 18151;
        tonetbl[2] = 19197;
        tonetbl[3] = 14105;
        tonetbl[4] = 5211;
        tonetbl[5] = -3704;
        tonetbl[6] = -8858;
        tonetbl[7] = -7914;
        tonetbl[8] = -876;
        tonetbl[9] = 9912;
        tonetbl[10] = 20660;
        tonetbl[11] = 27581;
        tonetbl[12] = 28330;
        tonetbl[13] = 22905;
        tonetbl[14] = 13642;
        tonetbl[15] = 4326;
        tonetbl[16] = -1260;
        tonetbl[17] = -780;
        tonetbl[18] = 5767;
        tonetbl[19] = 16037;
        tonetbl[20] = 26244;
        tonetbl[21] = 32601;
        tonetbl[22] = 32767;
        tonetbl[23] = 26741;
        tonetbl[24] = 16863;
        tonetbl[25] = 6918;
        tonetbl[26] = 692;
        tonetbl[27] = 527;
        tonetbl[28] = 6421;
        tonetbl[29] = 16037;
        tonetbl[30] = 25590;
        tonetbl[31] = 31295;
        tonetbl[32] = 30814;
        tonetbl[33] = 24149;
        tonetbl[34] = 13642;
        tonetbl[35] = 3081;
        tonetbl[36] = -3745;
        tonetbl[37] = -4494;
        tonetbl[38] = 837;
        tonetbl[39] = 9912;
        tonetbl[40] = 18947;
        tonetbl[41] = 24161;
        tonetbl[42] = 23217;
        tonetbl[43] = 16119;
        tonetbl[44] = 5211;
        tonetbl[45] = -5718;
        tonetbl[46] = -12878;
        tonetbl[47] = -13924;
        tonetbl[48] = -8853;
        tonetbl[49] = 0;
        tonetbl[50] = 8853;
        tonetbl[51] = 13924;
        tonetbl[52] = 12878;
        tonetbl[53] = 5718;
        tonetbl[54] = -5211;
        tonetbl[55] = -16119;
        tonetbl[56] = -23217;
        tonetbl[57] = -24161;
        tonetbl[58] = -18947;
        tonetbl[59] = -9912;
        tonetbl[60] = -837;
        tonetbl[61] = 4494;
        tonetbl[62] = 3745;
        tonetbl[63] = -3081;
        tonetbl[64] = -13642;
        tonetbl[65] = -24149;
        tonetbl[66] = -30814;
        tonetbl[67] = -31295;
        tonetbl[68] = -25590;
        tonetbl[69] = -16037;
        tonetbl[70] = -6421;
        tonetbl[71] = -527;
        tonetbl[72] = -692;
        tonetbl[73] = -6918;
        tonetbl[74] = -16863;
        tonetbl[75] = -26741;
        tonetbl[76] = -32767;
        tonetbl[77] = -32601;
        tonetbl[78] = -26244;
        tonetbl[79] = -16037;
        tonetbl[80] = -5767;
        tonetbl[81] = 780;
        tonetbl[82] = 1260;
        tonetbl[83] = -4326;
        tonetbl[84] = -13642;
        tonetbl[85] = -22905;
        tonetbl[86] = -28330;
        tonetbl[87] = -27581;
        tonetbl[88] = -20660;
        tonetbl[89] = -9912;
        tonetbl[90] = 876;
        tonetbl[91] = 7914;
        tonetbl[92] = 8858;
        tonetbl[93] = 3704;
        tonetbl[94] = -5211;
        tonetbl[95] = -14105;
        tonetbl[96] = -19197;
        tonetbl[97] = -18151;
        tonetbl[98] = -10970;
        tonetbl[99] = 0;
    end
 
    //Assign the output to the value in the lookup table
    //pointed to by toneptr.
    assign dout = tonetbl[toneptr];
 
    //Increment through the tone lookup table and wrap back
    //to zero when at the end.
    always @(posedge clk) begin
        if(en) begin
            if(toneptr == 7'd99) begin
                toneptr <= 7'd0;
            end
            else begin
                toneptr <= toneptr + 1'b1;
            end
        end
    end
endmodule


FIR_Filter.v
`define FILT_LENGTH 16
 
module FIR_Filter(
    input  clk,
    input  en,
    input  [15:0]din,
    output [15:0]dout
    );
 
    reg signed [15:0]coeff[`FILT_LENGTH/2-1:0]; //Filter coefficients.
    reg signed [15:0]delay_line[`FILT_LENGTH-1:0]; //Input delay line.
    wire [31:0]accum; //Accumulator for output filter calculation.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    //Calculate value in the accumulator.
    assign accum = (delay_line[0] + delay_line[15]) * coeff[0] +
                   (delay_line[1] + delay_line[14]) * coeff[1] +
                   (delay_line[2] + delay_line[13]) * coeff[2] +
                   (delay_line[3] + delay_line[12]) * coeff[3] +
                   (delay_line[4] + delay_line[11]) * coeff[4] +
                   (delay_line[5] + delay_line[10]) * coeff[5] +
                   (delay_line[6] + delay_line[9])  * coeff[6] +
                   (delay_line[7] + delay_line[8])  * coeff[7];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0] = 16'd2552;
        coeff[1] = 16'd4557;
        coeff[2] = 16'd5051;
        coeff[3] = 16'd8006;
        coeff[4] = 16'd9265;
        coeff[5] = 16'd11427;
        coeff[6] = 16'd12396;
        coeff[7] = 16'd13200;
 
        //Initialize delay line.
        for(i = 0; i < `FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
endmodule


clk_en.v
module clk_en(
    input clk,
    output reg en = 1'b0
    );
 
    reg [15:0]en_counter = 16'h0;
 
    always @(posedge clk) begin
        if(en_counter == 16'd449) begin
            en_counter <= 16'h0;
            en <= 1'b1;
        end
        else begin
            en_counter <= en_counter + 1'b1;
            en <= 1'b0;
        end
    end
 
endmodule


lfsr.v
`timescale 1ns / 1ps
 
module lfsr(
    input clk,
    input en,
    input rst,
    output [15:0]lfsr_out
    );
 
    //Create a 16-bit linear feedback shift register with
    //maximal polynomial x^16 + x^14 + x^13 + x^11 + 1.
    reg [15:0]lfsr = 16'd1;
    wire feedback;
 
    assign feedback = ((lfsr[15] ^ lfsr[13]) ^ lfsr[12]) ^ lfsr[10];
    assign lfsr_out = lfsr;
 
    //Update the linear feedback shift register.
    always @(posedge clk) begin
        if(rst) lfsr <= 16'd1;
        else if(en) lfsr <= {lfsr[14:0], feedback};
    end
endmodule


TB_LMS_Adapt.v
module TB_LMS_Adapt;
 
    reg  clk = 1'b0;
    reg  [7:0]sw = 1'b1;
    wire [7:0]JA;
 
    wire [15:0]d;
    wire [15:0]y;
    wire [15:0]e;
    wire [15:0]n1;
 
    filter_top uut(.clk(clk), .sw(sw), .JA(JA));
 
    assign d = uut.fout;
    assign y = uut.aout;
    assign e = uut.err;
    assign n1 = uut.af.n1[0];
 
    always #5 clk = ~clk;
 
    initial begin
        //#20000000 sw[0] = 1'b1;
        //#40000 sw[0] = 1'b1;
    end
endmodule


块FIR滤波器

块FIR滤波器的创建是对块概念的初步测试,因为其计算块比LMS滤波器计算块简单得多。滤波器的核心由四个计算模块组成,每个时钟周期对四个数据样本进行操作。

Block_FIR.v
`timescale 1ns / 1ps
 
module Block_FIR(
    input  clk,
    input  en,
    input  signed [15:0]x0_in,
    input  signed [15:0]x1_in,
    input  signed [15:0]x2_in,
    input  signed [15:0]x3_in,
    output reg signed [15:0]x0_out = 16'd0,
    output reg signed [15:0]x1_out = 16'd0,
    output reg signed [15:0]x2_out = 16'd0,
    output reg signed [15:0]x3_out = 16'd0,
    output signed [15:0]u0,
    output signed [15:0]u1,
    output signed [15:0]u2,
    output signed [15:0]u3
    );
 
    //Filter coefficients.
    parameter b0 = 16'd1;
    parameter b1 = 16'd1;
    parameter b2 = 16'd1;
    parameter b3 = 16'd1;
 
    //Coefficient registers.
    reg signed [15:0]_b0 = b0;
    reg signed [15:0]_b1 = b1;
    reg signed [15:0]_b2 = b2;
    reg signed [15:0]_b3 = b3;
 
    //Register unit.
    always @(posedge clk) begin
        if(en) begin
            x0_out <= x0_in;
            x1_out <= x1_in;
            x2_out <= x2_in;
            x3_out <= x3_in;
        end
    end
 
    /******************************IPC1******************************/   
    //Multiplier outputs.
    wire signed [31:0]IPC1_mult0;
    wire signed [31:0]IPC1_mult1;
    wire signed [31:0]IPC1_mult2;
    wire signed [31:0]IPC1_mult3;
 
    //Adder outputs.
    wire signed [31:0]IPC1_add0;
    wire signed [31:0]IPC1_add1;
    wire signed [31:0]IPC1_out;
 
    //Multiply inputs with coefficients.
    assign IPC1_mult0  = x3_in * _b0;
    assign IPC1_mult1  = x2_in * _b1;
    assign IPC1_mult2  = x1_in * _b2;
    assign IPC1_mult3  = x0_in * _b3;
 
    //Add multiplied values together.
    assign IPC1_add0 = IPC1_mult0 + IPC1_mult1;
    assign IPC1_add1 = IPC1_mult2 + IPC1_mult3;
    assign IPC1_out  = IPC1_add0  + IPC1_add1;
 
    //Assign output.
    assign u3 = IPC1_out[31:16];
 
    /******************************IPC2******************************/   
    //Multiplier outputs.
    wire signed [31:0]IPC2_mult0;
    wire signed [31:0]IPC2_mult1;
    wire signed [31:0]IPC2_mult2;
    wire signed [31:0]IPC2_mult3;
 
    //Adder outputs.
    wire signed [31:0]IPC2_add0;
    wire signed [31:0]IPC2_add1;
    wire signed [31:0]IPC2_out;
 
    //Multiply inputs with coefficients.
    assign IPC2_mult0  = x2_in *  _b0;
    assign IPC2_mult1  = x1_in *  _b1;
    assign IPC2_mult2  = x0_in *  _b2;
    assign IPC2_mult3  = x3_out * _b3;
 
    //Add multiplied values together.
    assign IPC2_add0 = IPC2_mult0 + IPC2_mult1;
    assign IPC2_add1 = IPC2_mult2 + IPC2_mult3;
    assign IPC2_out  = IPC2_add0  + IPC2_add1;
 
    //Assign output.
    assign u2 = IPC2_out[31:16];
 
    /******************************IPC3******************************/   
    //Multiplier outputs.
    wire signed [31:0]IPC3_mult0;
    wire signed [31:0]IPC3_mult1;
    wire signed [31:0]IPC3_mult2;
    wire signed [31:0]IPC3_mult3;
 
    //Adder outputs.
    wire signed [31:0]IPC3_add0;
    wire signed [31:0]IPC3_add1;
    wire signed [31:0]IPC3_out;
 
    //Multiply inputs with coefficients.
    assign IPC3_mult0  = x1_in *  _b0;
    assign IPC3_mult1  = x0_in *  _b1;
    assign IPC3_mult2  = x3_out * _b2;
    assign IPC3_mult3  = x2_out * _b3;
 
    //Add multiplied values together.
    assign IPC3_add0 = IPC3_mult0 + IPC3_mult1;
    assign IPC3_add1 = IPC3_mult2 + IPC3_mult3;
    assign IPC3_out  = IPC3_add0  + IPC3_add1;
 
    //Assign output.
    assign u1 = IPC3_out[31:16];
 
    /******************************IPC4******************************/   
    //Multiplier outputs.
    wire signed [31:0]IPC4_mult0;
    wire signed [31:0]IPC4_mult1;
    wire signed [31:0]IPC4_mult2;
    wire signed [31:0]IPC4_mult3;
 
    //Adder outputs.
    wire signed [31:0]IPC4_add0;
    wire signed [31:0]IPC4_add1;
    wire signed [31:0]IPC4_out;
 
    //Multiply inputs with coefficients.
    assign IPC4_mult0  = x0_in *  _b0;
    assign IPC4_mult1  = x3_out * _b1;
    assign IPC4_mult2  = x2_out * _b2;
    assign IPC4_mult3  = x1_out * _b3;
 
    //Add multiplied values together.
    assign IPC4_add0 = IPC4_mult0 + IPC4_mult1;
    assign IPC4_add1 = IPC4_mult2 + IPC4_mult3;
    assign IPC4_out  = IPC4_add0  + IPC4_add1;
 
    //Assign output.
    assign u0 = IPC4_out[31:16];
endmodule


Block_FIR_Top.v
`timescale 1ns / 1ps
 
module Block_FIR_Top(
    input  clk,
    input  [7:0]sw,
    output [7:0]JA
    );
 
    wire clk_100MHz;
    wire clk_25MHz;
    wire en;
 
    wire signed [15:0]tonegen_out;
 
    //Data divider outputs.
    wire en_out;
    wire signed [15:0]d0_out;
    wire signed [15:0]d1_out;
    wire signed [15:0]d2_out;
    wire signed [15:0]d3_out;
 
    //FIR f1 outputs
    wire signed [15:0]f1_x0_out;
    wire signed [15:0]f1_x1_out;
    wire signed [15:0]f1_x2_out;
    wire signed [15:0]f1_x3_out;
    wire signed [15:0]f1_u0;
    wire signed [15:0]f1_u1;
    wire signed [15:0]f1_u2;
    wire signed [15:0]f1_u3;
 
    //FIR f2 outputs
    wire signed [15:0]f2_x0_out;
    wire signed [15:0]f2_x1_out;
    wire signed [15:0]f2_x2_out;
    wire signed [15:0]f2_x3_out;
    wire signed [15:0]f2_u0;
    wire signed [15:0]f2_u1;
    wire signed [15:0]f2_u2;
    wire signed [15:0]f2_u3;
 
    //FIR f3 outputs
    wire signed [15:0]f3_x0_out;
    wire signed [15:0]f3_x1_out;
    wire signed [15:0]f3_x2_out;
    wire signed [15:0]f3_x3_out;
    wire signed [15:0]f3_u0;
    wire signed [15:0]f3_u1;
    wire signed [15:0]f3_u2;
    wire signed [15:0]f3_u3;
 
    //FIR f4 outputs
    wire signed [15:0]f4_x0_out;
    wire signed [15:0]f4_x1_out;
    wire signed [15:0]f4_x2_out;
    wire signed [15:0]f4_x3_out;
    wire signed [15:0]f4_u0;
    wire signed [15:0]f4_u1;
    wire signed [15:0]f4_u2;
    wire signed [15:0]f4_u3;
 
    //Block filter outputs.
    wire signed [15:0]y0;
    wire signed [15:0]y1;
    wire signed [15:0]y2;
    wire signed [15:0]y3;
 
    wire [15:0]serial_dout; //Serialized output.
    wire [15:0]mout;        //Selected output.
 
    assign en = 1'b1;
 
    clk_wiz_0 cw(.clk_in1(clk), .clk_out1(clk_100MHz), .clk_out2(clk_25MHz));   
    ToneGen tg(.clk(clk_100MHz), .en(en), .dout(tonegen_out));
    Data_Div4 dd(.clk(clk_100MHz), .en(en), .din(tonegen_out), .en_out(en_out),
                 .d0_out(d0_out), .d1_out(d1_out), .d2_out(d2_out), .d3_out(d3_out));
 
    //Build 16-tap block FIR filter.
    Block_FIR #(.b0(16'd2320), .b1(16'd4143), .b2(16'd4592), .b3(16'd7278))f1(.clk(clk_25MHz), .en(en),
                 .x0_in(d0_out), .x1_in(d1_out), .x2_in(d2_out), .x3_in(d3_out),
                 .x0_out(f1_x0_out), .x1_out(f1_x1_out), .x2_out(f1_x2_out), .x3_out(f1_x3_out),
                 .u0(f1_u0), .u1(f1_u1), .u2(f1_u2), .u3(f1_u3));
 
    Block_FIR #(.b0(16'd8423), .b1(16'd10389), .b2(16'd11269), .b3(16'd12000))f2(.clk(clk_25MHz), .en(en),
                 .x0_in(f1_x0_out), .x1_in(f1_x1_out), .x2_in(f1_x2_out), .x3_in(f1_x3_out),
                 .x0_out(f2_x0_out), .x1_out(f2_x1_out), .x2_out(f2_x2_out), .x3_out(f2_x3_out),
                 .u0(f2_u0), .u1(f2_u1), .u2(f2_u2), .u3(f2_u3));
 
    Block_FIR #(.b0(16'd12000), .b1(16'd11269), .b2(16'd10389), .b3(16'd8423))f3(.clk(clk_25MHz), .en(en),
                 .x0_in(f2_x0_out), .x1_in(f2_x1_out), .x2_in(f2_x2_out), .x3_in(f2_x3_out),
                 .x0_out(f3_x0_out), .x1_out(f3_x1_out), .x2_out(f3_x2_out), .x3_out(f3_x3_out),
                 .u0(f3_u0), .u1(f3_u1), .u2(f3_u2), .u3(f3_u3));
 
    Block_FIR #(.b0(16'd7278), .b1(16'd4592), .b2(16'd4143), .b3(16'd2320))f4(.clk(clk_25MHz), .en(en),
                 .x0_in(f3_x0_out), .x1_in(f3_x1_out), .x2_in(f3_x2_out), .x3_in(f3_x3_out),
                 .x0_out(f4_x0_out), .x1_out(f4_x1_out), .x2_out(f4_x2_out), .x3_out(f4_x3_out),
                 .u0(f4_u0), .u1(f4_u1), .u2(f4_u2), .u3(f4_u3));
 
    //Add outputs together.
    assign y0 = f1_u0 + f2_u0 + f3_u0 + f4_u0;
    assign y1 = f1_u1 + f2_u1 + f3_u1 + f4_u1;
    assign y2 = f1_u2 + f2_u2 + f3_u2 + f4_u2;
    assign y3 = f1_u3 + f2_u3 + f3_u3 + f4_u3;
 
    //Serialize the output data.
    Data_Mult4 dm(.clk(clk_100MHz), .en(en), .d0_in(y0), .d1_in(y1), .d2_in(y2), .d3_in(y3), .dout(serial_dout));
 
    //Choose filtered or unfiltered output based on sw0.
    assign mout = sw[0] ? serial_dout : tonegen_out;
 
    //Take only the upper 8 bits and make it unsigned.
    //assign JA = mout[15:8] + 8'd128;
    assign {JA[3:0], JA[7:4]} = mout[15:8] + 8'd128;
endmodule


ToneGen.v
`timescale 1ns/1ps
 
module ToneGen(
    input clk,
    input en,
    output [15:0]dout
    );
 
    //Tone generation data.  At a 44KHz sampling rate, the lookup table
    //generates a 440Hz and 4.4KHz tone mixed together.
    reg [15:0]tonetbl[99:0];
 
    //Pointer into the tone table.
    reg [6:0]toneptr = 7'h00;
 
    initial begin
        tonetbl[0] = 10970;
        tonetbl[1] = 18151;
        tonetbl[2] = 19197;
        tonetbl[3] = 14105;
        tonetbl[4] = 5211;
        tonetbl[5] = -3704;
        tonetbl[6] = -8858;
        tonetbl[7] = -7914;
        tonetbl[8] = -876;
        tonetbl[9] = 9912;
        tonetbl[10] = 20660;
        tonetbl[11] = 27581;
        tonetbl[12] = 28330;
        tonetbl[13] = 22905;
        tonetbl[14] = 13642;
        tonetbl[15] = 4326;
        tonetbl[16] = -1260;
        tonetbl[17] = -780;
        tonetbl[18] = 5767;
        tonetbl[19] = 16037;
        tonetbl[20] = 26244;
        tonetbl[21] = 32601;
        tonetbl[22] = 32767;
        tonetbl[23] = 26741;
        tonetbl[24] = 16863;
        tonetbl[25] = 6918;
        tonetbl[26] = 692;
        tonetbl[27] = 527;
        tonetbl[28] = 6421;
        tonetbl[29] = 16037;
        tonetbl[30] = 25590;
        tonetbl[31] = 31295;
        tonetbl[32] = 30814;
        tonetbl[33] = 24149;
        tonetbl[34] = 13642;
        tonetbl[35] = 3081;
        tonetbl[36] = -3745;
        tonetbl[37] = -4494;
        tonetbl[38] = 837;
        tonetbl[39] = 9912;
        tonetbl[40] = 18947;
        tonetbl[41] = 24161;
        tonetbl[42] = 23217;
        tonetbl[43] = 16119;
        tonetbl[44] = 5211;
        tonetbl[45] = -5718;
        tonetbl[46] = -12878;
        tonetbl[47] = -13924;
        tonetbl[48] = -8853;
        tonetbl[49] = 0;
        tonetbl[50] = 8853;
        tonetbl[51] = 13924;
        tonetbl[52] = 12878;
        tonetbl[53] = 5718;
        tonetbl[54] = -5211;
        tonetbl[55] = -16119;
        tonetbl[56] = -23217;
        tonetbl[57] = -24161;
        tonetbl[58] = -18947;
        tonetbl[59] = -9912;
        tonetbl[60] = -837;
        tonetbl[61] = 4494;
        tonetbl[62] = 3745;
        tonetbl[63] = -3081;
        tonetbl[64] = -13642;
        tonetbl[65] = -24149;
        tonetbl[66] = -30814;
        tonetbl[67] = -31295;
        tonetbl[68] = -25590;
        tonetbl[69] = -16037;
        tonetbl[70] = -6421;
        tonetbl[71] = -527;
        tonetbl[72] = -692;
        tonetbl[73] = -6918;
        tonetbl[74] = -16863;
        tonetbl[75] = -26741;
        tonetbl[76] = -32767;
        tonetbl[77] = -32601;
        tonetbl[78] = -26244;
        tonetbl[79] = -16037;
        tonetbl[80] = -5767;
        tonetbl[81] = 780;
        tonetbl[82] = 1260;
        tonetbl[83] = -4326;
        tonetbl[84] = -13642;
        tonetbl[85] = -22905;
        tonetbl[86] = -28330;
        tonetbl[87] = -27581;
        tonetbl[88] = -20660;
        tonetbl[89] = -9912;
        tonetbl[90] = 876;
        tonetbl[91] = 7914;
        tonetbl[92] = 8858;
        tonetbl[93] = 3704;
        tonetbl[94] = -5211;
        tonetbl[95] = -14105;
        tonetbl[96] = -19197;
        tonetbl[97] = -18151;
        tonetbl[98] = -10970;
        tonetbl[99] = 0;
    end
 
    //Assign the output to the value in the lookup table
    //pointed to by toneptr.
    assign dout = tonetbl[toneptr];
 
    //Increment through the tone lookup table and wrap back
    //to zero when at the end.
    always @(posedge clk) begin
        if(en) begin
            if(toneptr == 7'd99) begin
                toneptr <= 7'd0;
            end
            else begin
                toneptr <= toneptr + 1'b1;
            end
        end
    end
endmodule


Data_Div4.v
`timescale 1ns / 1ps
 
module Data_Div4(
    input clk,
    input en,
    input [15:0]din,
    output reg en_out = 1'b0,
    output reg [15:0]d0_out = 16'd0,
    output reg [15:0]d1_out = 16'd0,
    output reg [15:0]d2_out = 16'd0,
    output reg [15:0]d3_out = 16'd0
    );
 
    reg [1:0]count = 2'd0;
 
    //Internal registers for holding data.
    reg [15:0]d0 = 16'd0;
    reg [15:0]d1 = 16'd0;
    reg [15:0]d2 = 16'd0;
 
    always @(posedge clk) begin
        if(en) begin
            count <= count + 1'b1;
 
            if(count == 2'b00) begin
                d0 <= din;
                en_out <= 1'b0;
            end
            else if(count == 2'b01) begin
                d1 <= din;
                en_out <= 1'b0;
            end
            else if(count == 2'b10) begin
                d2 <= din;
                en_out <= 1'b0;
            end
            else begin
                d0_out <= d0;
                d1_out <= d1;
                d2_out <= d2;
                d3_out <= din;
                en_out <= 1'b1;
            end
        end
    end
endmodule


Data_Mult4.v
`timescale 1ns / 1ps
 
module Data_Mult4(
    input clk,
    input en,
    input [15:0]d0_in,
    input [15:0]d1_in,
    input [15:0]d2_in,
    input [15:0]d3_in,
    output reg [15:0]dout = 16'd0
    );
 
    reg [1:0]count = 2'd0;
 
    //Internal registers for holding data.
    reg [15:0]d0 = 16'd0;
    reg [15:0]d1 = 16'd0;
    reg [15:0]d2 = 16'd0;
    reg [15:0]d3 = 16'd0;
 
    always @(posedge clk) begin
        if(en) begin
            count <= count + 1'b1;
 
            if(count == 2'b00) begin
                dout <= d0;
            end
            else if(count == 2'b01) begin
                dout <= d1;  
            end
            else if(count == 2'b10) begin
                dout <= d2;
            end
            else begin
                d0 <= d0_in;
                d1 <= d1_in;
                d2 <= d2_in;
                d3 <= d3_in;
                dout <= d3;      
            end
        end
    end
 
endmodule


Block_FIR_TB.v
`timescale 1ns / 1ps
 
module Block_FIR_TB;
    reg  clk = 1'b0;
    reg  [7:0]sw = 8'd0;
    wire [7:0]JA;
 
    Block_FIR_Top uut(.clk(clk), .sw(sw), .JA(JA));
 
    initial begin
        #50000 sw = 8'd1;
    end
 
    always #5 clk = ~clk;   
endmodule