fix(fpga): PR-O.8 — cfg_tdata 24->16 for Pipelined Streaming I/O

PR-O in 8541443 packed cfg_tdata using PG109 Burst I/O semantics (22-bit
SCALE_SCH, 24-bit total). The xfft_2048 IP we instantiate is Pipelined
Streaming I/O — that arch has SCALE_SCH width = 2*ceil(NFFT_MAX/2) = 12
bits, cfg_tdata = 16 bits. Mismatch surfaced when the Vivado-regenerated
.xci reported C_S_AXIS_CONFIG_TDATA_WIDTH=16. Realigns wrappers + TBs.

Total /N scaling preserved: 22'h155555 (/N as 11 stages of >>1) becomes
12'hAA9 (stage 1 alone >>1 + stages 2-11 grouped as 5 pairs of >>2 each).
Iverilog fft_engine.v fallback unchanged — applies fixed >>>1 per stage.

Verified: tb_fft_engine_axi_bridge 4/4, tb_matched_filter_processing_chain
40/40. Vivado .dcp / .veo regenerated from .xci; gitignored as usual.
This commit is contained in:
Jason
2026-05-02 10:08:00 +05:45
parent 8541443c64
commit af64b0952e
6 changed files with 62 additions and 69 deletions

View File

@@ -20,10 +20,13 @@
// LogiCORE Pipelined Streaming ~N + ~150-cycle pipeline. Functional behavior
// is identical from the chain's view.
//
// AUDIT-C10/C-8: cfg_tdata carries SCALE_SCH+FWD/INV in scaled mode (24 bits).
// Schedule = `RP_FFT_SCALE_SCH (radar_params.vh) = >>1 per stage = total /N.
// Both the LogiCORE path and the iverilog fft_engine fallback honor the same
// schedule, so absolute output magnitudes match between sim and silicon.
// AUDIT-C10/C-8: cfg_tdata carries SCALE_SCH+FWD/INV in scaled mode. Layout
// is 16 bits per PG109 Pipelined Streaming I/O (12-bit SCALE_SCH + 1-bit
// FWD/INV + 3-bit padding); see radar_params.vh `RP_FFT_CFG_TDATA_W.
// Schedule = `RP_FFT_SCALE_SCH = total /N (LogiCORE pair-grouped 2'b10s on
// stages 2-11 + 2'b01 on stage 1; iverilog fft_engine applies >>>1 per stage
// for the same total). Absolute output magnitudes match between sim and
// silicon.
//
// PR-O.7 (2026-05-02): bridge widened to DATA_W=32 default and AXIS-data
// 64-bit packed {Q[31:0], I[31:0]}. The matched-filter chain feeds the
@@ -63,7 +66,7 @@ module fft_engine_axi_bridge #(
// ============================================================================
localparam AXIS_W = 2 * DATA_W; // 64 when DATA_W=32
reg [`RP_FFT_CFG_TDATA_W-1:0] cfg_tdata; // 24 bits: {pad, SCALE_SCH, FWD/INV}
reg [`RP_FFT_CFG_TDATA_W-1:0] cfg_tdata; // 16 bits: {3'b pad, 12'b SCALE_SCH, 1'b FWD/INV}
reg cfg_tvalid;
wire cfg_tready;
@@ -155,8 +158,9 @@ always @(posedge clk or negedge reset_n) begin
skid_valid <= 1'b0;
if (start) begin
inverse_latched <= inverse;
// {pad[0], SCALE_SCH[21:0], FWD/INV[0]}; ~inverse so FWD=1.
cfg_tdata <= {1'b0, `RP_FFT_SCALE_SCH, ~inverse};
// {pad[2:0], SCALE_SCH[11:0], FWD/INV[0]}; ~inverse so FWD=1.
// PG109 Pipelined Streaming I/O cfg_tdata = 16 bits total.
cfg_tdata <= {3'b0, `RP_FFT_SCALE_SCH, ~inverse};
cfg_tvalid <= 1'b1;
in_count <= 0;
accept_count <= 0;

View File

@@ -40,11 +40,11 @@
"model_parameters": {
"C_XDEVICEFAMILY": [ { "value": "artix7", "resolve_type": "generated", "usage": "all" } ],
"C_PART": [ { "value": "xc7a50tftg256-2", "resolve_type": "generated", "usage": "all" } ],
"C_S_AXIS_CONFIG_TDATA_WIDTH": [ { "value": "24", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_S_AXIS_CONFIG_TDATA_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_S_AXIS_DATA_TDATA_WIDTH": [ { "value": "64", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_DATA_TDATA_WIDTH": [ { "value": "64", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_DATA_TUSER_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_STATUS_TDATA_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_DATA_TUSER_WIDTH": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_STATUS_TDATA_WIDTH": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_THROTTLE_SCHEME": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_NSSR": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_CHANNELS": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
@@ -103,7 +103,7 @@
"boundary": {
"ports": {
"aclk": [ { "direction": "in", "driver_value": "0x1" } ],
"s_axis_config_tdata": [ { "direction": "in", "size_left": "23", "size_right": "0" } ],
"s_axis_config_tdata": [ { "direction": "in", "size_left": "15", "size_right": "0" } ],
"s_axis_config_tvalid": [ { "direction": "in" } ],
"s_axis_config_tready": [ { "direction": "out" } ],
"s_axis_data_tdata": [ { "direction": "in", "size_left": "63", "size_right": "0" } ],
@@ -111,13 +111,9 @@
"s_axis_data_tready": [ { "direction": "out" } ],
"s_axis_data_tlast": [ { "direction": "in" } ],
"m_axis_data_tdata": [ { "direction": "out", "size_left": "63", "size_right": "0" } ],
"m_axis_data_tuser": [ { "direction": "out", "size_left": "7", "size_right": "0" } ],
"m_axis_data_tvalid": [ { "direction": "out" } ],
"m_axis_data_tready": [ { "direction": "in", "driver_value": "0x1" } ],
"m_axis_data_tlast": [ { "direction": "out" } ],
"m_axis_status_tdata": [ { "direction": "out", "size_left": "7", "size_right": "0" } ],
"m_axis_status_tvalid": [ { "direction": "out" } ],
"m_axis_status_tready": [ { "direction": "in", "driver_value": "0x1" } ],
"event_frame_started": [ { "direction": "out", "driver_value": "0x0" } ],
"event_tlast_unexpected": [ { "direction": "out", "driver_value": "0x0" } ],
"event_tlast_missing": [ { "direction": "out", "driver_value": "0x0" } ],
@@ -269,31 +265,6 @@
"POLARITY": [ { "value": "ACTIVE_HIGH", "value_src": "constant", "usage": "all" } ]
}
},
"M_AXIS_STATUS": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "master",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TLAST": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"TDATA": [ { "physical_name": "m_axis_status_tdata" } ],
"TREADY": [ { "physical_name": "m_axis_status_tready" } ],
"TVALID": [ { "physical_name": "m_axis_status_tvalid" } ]
}
},
"M_AXIS_DATA": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
@@ -302,7 +273,7 @@
"TDATA_NUM_BYTES": [ { "value": "8", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "8", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
@@ -317,7 +288,6 @@
"TDATA": [ { "physical_name": "m_axis_data_tdata" } ],
"TLAST": [ { "physical_name": "m_axis_data_tlast" } ],
"TREADY": [ { "physical_name": "m_axis_data_tready" } ],
"TUSER": [ { "physical_name": "m_axis_data_tuser" } ],
"TVALID": [ { "physical_name": "m_axis_data_tvalid" } ]
}
},
@@ -326,7 +296,7 @@
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "slave",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "3", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDATA_NUM_BYTES": [ { "value": "2", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
@@ -349,5 +319,5 @@
}
}
},
"checksum": "6bf94ec5"
"checksum": "b2c20790"
}

View File

@@ -95,18 +95,33 @@
// chain output (FFT·conj(FFT)·IFFT) is /N², predictable and per-frame
// constant, so CFAR alpha calibrated in iverilog matches silicon counts.
//
// cfg_tdata layout per PG109 (1 channel, no CP, fixed NFFT, scaled):
// bit 0 = FWD/INV (1 = forward, 0 = inverse)
// bits[22:1] = SCALE_SCH (22 bits)
// bit 23 = byte-align padding (0)
// Total cfg_tdata width = 24 bits.
// cfg_tdata layout per PG109 (1 channel, no CP, fixed NFFT, scaled,
// Pipelined Streaming I/O architecture). The IP groups radix-2 stages
// into radix-4-style pairs for scheduling — each 2-bit field covers a
// pair of stages, so SCALE_SCH width = 2 * ceil(NFFT_MAX/2) = 12 bits
// for NFFT_MAX=11. (PR-O.2 originally used the 22-bit Burst-I/O
// layout — wrong for our Pipelined Streaming arch; corrected in
// PR-O.8 commit after Vivado IP regen reported cfg_tdata=16.)
//
// The same schedule is replicated in fft_engine.v (iverilog fallback) by
// applying convergent-rounding >>>1 at every BF_WRITE stage so absolute
// counts agree between sim and silicon.
`define RP_FFT_CFG_TDATA_W 24
`define RP_FFT_SCALE_SCH_W 22
`define RP_FFT_SCALE_SCH 22'h155555 // [01,01,01,01,01,01,01,01,01,01,01]
// bit 0 = FWD/INV (1 = forward, 0 = inverse)
// bits[12:1] = SCALE_SCH (12 bits, LSB = stage 1 alone, then 5 pairs)
// bits[15:13] = byte-align padding (0)
// Total cfg_tdata width = 16 bits.
//
// SCALE_SCH = 12'hAA9 = 12'b10_10_10_10_10_01:
// stage 1 alone bits[1:0] = 2'b01 → >>1
// stages 2..3 bits[3:2] = 2'b10 → >>2 (/4 across pair)
// stages 4..5 bits[5:4] = 2'b10
// stages 6..7 bits[7:6] = 2'b10
// stages 8..9 bits[9:8] = 2'b10
// stages 10..11 bits[11:10] = 2'b10
// Total shift = 1 + 5*2 = 11 = /N. The iverilog fft_engine.v fallback
// applies >>>1 at every BF_WRITE (= /N total too) so absolute output
// magnitudes match between sim and silicon for any /N-equivalent
// schedule.
`define RP_FFT_CFG_TDATA_W 16
`define RP_FFT_SCALE_SCH_W 12
`define RP_FFT_SCALE_SCH 12'hAA9
// 3-ladder waveform identity (replaces 1-bit use_long_chirp rail in PR-C onward)
// `define RP_WAVE_<NAME> values are 2-bit waveform selectors carried on

View File

@@ -319,14 +319,15 @@ endmodule
// ============================================================================
// Stub xfft_2048 replaces the production wrapper for this TB.
// AUDIT-C10/C-8: cfg_tdata is 24-bit in scaled mode; tuser dropped with BFP.
// AUDIT-C10/C-8 + PR-O.8: cfg_tdata is 16-bit in scaled mode (PG109 Pipelined
// Streaming I/O); tuser dropped with BFP.
// PR-O.7: AXIS data widened to 64-bit packed {Q[31:0], I[31:0]} so the IFFT
// can carry the conjugate-mult Q30 product end-to-end.
// ============================================================================
module xfft_2048 (
input wire aclk,
input wire aresetn,
input wire [23:0] s_axis_config_tdata,
input wire [15:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,
input wire [63:0] s_axis_data_tdata,

View File

@@ -32,10 +32,12 @@ module tb_xfft_2048_xsim;
reg aclk = 0;
reg aresetn = 0;
// AUDIT-C10/C-8: cfg_tdata widened to 24 bits (scaled mode SCALE_SCH+FWD/INV).
// PR-O.7: data AXIS widened to 64-bit packed {Q[31:0], I[31:0]}
// matches the regenerated xfft_2048_ip with input_width=32.
reg [23:0] cfg_tdata;
// AUDIT-C10/C-8 + PR-O.8: cfg_tdata is 16 bits (scaled mode + Pipelined
// Streaming I/O SCALE_SCH width = 2*ceil(NFFT_MAX/2) = 12 bits + 1 bit
// FWD/INV + 3 bits padding). PR-O.7: data AXIS widened to 64-bit
// packed {Q[31:0], I[31:0]} matches the Vivado-regenerated
// xfft_2048_ip with input_width=32.
reg [15:0] cfg_tdata;
reg cfg_tvalid;
wire cfg_tready;
@@ -101,8 +103,8 @@ module tb_xfft_2048_xsim;
input fwd;
begin
@(posedge aclk);
// {pad[0], SCALE_SCH[21:0], FWD/INV[0]} see radar_params.vh
cfg_tdata <= {1'b0, `RP_FFT_SCALE_SCH, fwd};
// {pad[2:0], SCALE_SCH[11:0], FWD/INV[0]} see radar_params.vh
cfg_tdata <= {3'b0, `RP_FFT_SCALE_SCH, fwd};
cfg_tvalid <= 1'b1;
@(posedge aclk);
while (!cfg_tready) @(posedge aclk);

View File

@@ -25,11 +25,11 @@
// >>15+saturate that crushed chirp/DC/impulse autocorrelations to zero under
// deterministic /N scaling see project_mf_chain_dynrange_defect_2026-05-02.
//
// Config tdata layout (24-bit, scaled mode see AUDIT-C10/C-8 in
// Config tdata layout (16-bit, scaled mode see AUDIT-C10/C-8 in
// radar_params.vh `RP_FFT_SCALE_SCH):
// bit 0 = FWD/INV (1 = forward, 0 = inverse)
// bits[22:1] = SCALE_SCH (22 bits, fixed schedule from RP_FFT_SCALE_SCH)
// bit 23 = byte-align padding
// bits[12:1] = SCALE_SCH (12 bits, fixed schedule from RP_FFT_SCALE_SCH)
// bits[15:13]= byte-align padding
//
// Scaled mode replaces the previous Block-Floating-Point setting. BFP returned
// a per-frame BLK_EXP on m_axis_data_tuser that the bridge dropped sim and
@@ -42,9 +42,10 @@ module xfft_2048 (
input wire aclk,
input wire aresetn,
// Configuration channel (AXI-Stream slave). 24-bit tdata carries
// {pad, SCALE_SCH[21:0], FWD/INV}.
input wire [23:0] s_axis_config_tdata,
// Configuration channel (AXI-Stream slave). 16-bit tdata carries
// {pad[2:0], SCALE_SCH[11:0], FWD/INV} per PG109 Pipelined Streaming I/O
// (PR-O.8: SCALE_SCH width is 2*ceil(NFFT_MAX/2)=12, not 2*NFFT_MAX).
input wire [15:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,