feat(fpga): swap matched-filter chain to Xilinx LogiCORE FFT v9.1 IP

Replaces the in-house iterative fft_engine.v in the matched-filter chain
with the Pipelined Streaming Xilinx FFT IP, closing RX-NEW-3 (FFT chain
~11x too slow vs PRI budget).

Components:
  * ip/xfft_2048_ip/xfft_2048_ip.xci — committed IP definition
    (16-bit fixed point, BFP scaling, convergent rounding, natural order,
    pipelined-streaming, BRAM data/reorder/phase factors). Vivado
    regenerates .dcp / sim-netlist from this on each build.
  * scripts/50t/gen_xfft_2048_ip.tcl — IP-Catalog generation script
  * scripts/50t/run_xfft_xsim.sh — XSim batch runner for tb_xfft_2048_xsim
  * xfft_2048.v — AXI-Stream wrapper. FFT_USE_XILINX_IP define routes to
    real LogiCORE for synth/XSim; falls back to fft_engine batched
    one-shot for iverilog (unit coverage only).
  * fft_engine_axi_bridge.v — exposes legacy fft_engine port surface on
    top of the xfft_2048 AXI wrapper, so the chain swap is a 1-line
    module-name change.
  * matched_filter_processing_chain.v — fft_engine -> fft_engine_axi_bridge
  * scripts/50t/build_50t.tcl — read_ip + generate_target + synth_ip;
    adds FFT_USE_XILINX_IP to verilog defines.
  * tb/tb_xfft_2048_xsim.v — XSim verification (DC, impulse, tone bin 128).
    All 5 assertions PASS on remote with the real IP; tuser=0x0a (BLK_EXP=10)
    confirms BFP scaling working.

Local iverilog regression: 32/34 PASS — identical to baseline. Same two
RX-NEW-3 failures (Receiver Integration, Matched Filter Chain) — these
only resolve in remote XSim with the real IP, since iverilog uses the
fft_engine fallback inside xfft_2048 (~150K cycles/pass, not the
~2200-cycle Pipelined Streaming throughput). MF cosim 4/4 PASS confirms
bridge bit-exact in fallback mode.

Pending: remote XSim of tb_radar_receiver_final to demonstrate Doppler
frames produced within PRI budget; remote synth to confirm DSP/timing
post-IP.
This commit is contained in:
Jason
2026-04-23 12:39:33 +05:45
parent cc6691dec9
commit 5c8cc8c96a
10 changed files with 1256 additions and 6 deletions

View File

@@ -17,3 +17,9 @@ tb/cosim/rx_final_doppler_out.csv
*.str
*.bit
*.ltx
# Vivado IP build artifacts (regenerated from .xci by gen_xfft_2048_ip.tcl).
# Only the .xci is committed — Vivado reproduces .dcp / sim-netlist / etc.
ip/xfft_2048_ip/*
!ip/xfft_2048_ip/xfft_2048_ip.xci
build_xsim_xfft/

View File

@@ -0,0 +1,177 @@
`timescale 1ns / 1ps
// ============================================================================
// fft_engine_axi_bridge drop-in fft_engine replacement backed by xfft_2048
// ============================================================================
// Port list mirrors fft_engine.v exactly, so call sites in
// matched_filter_processing_chain.v swap with a single module-name change
// (`fft_engine` `fft_engine_axi_bridge`). Internally instantiates the
// xfft_2048 AXI-Stream wrapper, which routes to either the LogiCORE FFT v9.1
// (synth/XSim, when FFT_USE_XILINX_IP is defined) or the in-house batched
// fft_engine (iverilog fallback). Either way the legacy interface is the same.
//
// Behavior contract preserved from fft_engine:
// - `start` pulse begins a frame; `inverse` selects FWD/INV
// - feed N samples on din_re/im with din_valid (any spacing OK)
// - dout_re/im pulse out with dout_valid for N samples
// - `done` pulses on the last output sample (tlast)
// - `busy` is high from start through done
//
// Latency: replaces fft_engine's ~150-180K-cycle iterative compute with the
// LogiCORE Pipelined Streaming ~N + ~150-cycle pipeline. Functional behavior
// is identical from the chain's view.
// ============================================================================
module fft_engine_axi_bridge #(
parameter N = 2048,
parameter LOG2N = 11,
parameter DATA_W = 16,
parameter INTERNAL_W = 32,
parameter TWIDDLE_W = 16,
parameter TWIDDLE_FILE = "fft_twiddle_2048.mem"
) (
input wire clk,
input wire reset_n,
input wire start,
input wire inverse,
input wire signed [DATA_W-1:0] din_re,
input wire signed [DATA_W-1:0] din_im,
input wire din_valid,
output wire signed [DATA_W-1:0] dout_re,
output wire signed [DATA_W-1:0] dout_im,
output wire dout_valid,
output reg busy,
output reg done
);
// ============================================================================
// AXI-Stream signals to/from xfft_2048
// ============================================================================
reg [7:0] cfg_tdata;
reg cfg_tvalid;
wire cfg_tready;
reg [31:0] axi_din_tdata;
reg axi_din_tvalid;
reg axi_din_tlast;
wire axi_din_tready;
wire [31:0] axi_dout_tdata;
wire [7:0] axi_dout_tuser;
wire axi_dout_tvalid;
wire axi_dout_tlast;
// xfft_2048 wrapper. AXI master always-accept (no backpressure modeling here).
xfft_2048 u_xfft (
.aclk (clk),
.aresetn (reset_n),
.s_axis_config_tdata (cfg_tdata),
.s_axis_config_tvalid (cfg_tvalid),
.s_axis_config_tready (cfg_tready),
.s_axis_data_tdata (axi_din_tdata),
.s_axis_data_tvalid (axi_din_tvalid),
.s_axis_data_tlast (axi_din_tlast),
.s_axis_data_tready (axi_din_tready),
.m_axis_data_tdata (axi_dout_tdata),
.m_axis_data_tuser (axi_dout_tuser),
.m_axis_data_tvalid (axi_dout_tvalid),
.m_axis_data_tlast (axi_dout_tlast),
.m_axis_data_tready (1'b1)
);
// Output mapping: AXI {Q,I} 32-bit fft_engine-style separate re/im
assign dout_re = $signed(axi_dout_tdata[15:0]);
assign dout_im = $signed(axi_dout_tdata[31:16]);
assign dout_valid = axi_dout_tvalid;
// ============================================================================
// Bridge FSM
// ============================================================================
// On `start`: latch inverse, send config (one-cycle pulse with FWD bit), then
// open the data path. Track sample count so we can assert tlast on the Nth
// input. `busy` raised on start, dropped after done. `done` pulsed on last
// output (tlast).
// ============================================================================
localparam [1:0] S_IDLE = 2'd0,
S_CFG = 2'd1,
S_FEED = 2'd2,
S_DRAIN = 2'd3;
reg [1:0] state;
reg inverse_latched;
reg [LOG2N:0] in_count; // counts inputs accepted into the IP
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
state <= S_IDLE;
cfg_tdata <= 8'd0;
cfg_tvalid <= 1'b0;
axi_din_tdata <= 32'd0;
axi_din_tvalid <= 1'b0;
axi_din_tlast <= 1'b0;
in_count <= 0;
inverse_latched <= 1'b0;
busy <= 1'b0;
done <= 1'b0;
end else begin
// Defaults pulses
done <= 1'b0;
case (state)
S_IDLE: begin
axi_din_tvalid <= 1'b0;
axi_din_tlast <= 1'b0;
cfg_tvalid <= 1'b0;
if (start) begin
inverse_latched <= inverse;
cfg_tdata <= {7'd0, ~inverse}; // tdata[0]=1 FWD
cfg_tvalid <= 1'b1;
in_count <= 0;
busy <= 1'b1;
state <= S_CFG;
end
end
S_CFG: begin
// Hold cfg_tvalid until IP accepts (tready). Then open data path.
if (cfg_tready) begin
cfg_tvalid <= 1'b0;
state <= S_FEED;
end
end
S_FEED: begin
// Forward din_valid AXI din_tvalid, packing {Q,I}.
// Assert tlast on the Nth input.
if (din_valid && (in_count < N)) begin
axi_din_tdata <= {din_im, din_re};
axi_din_tvalid <= 1'b1;
axi_din_tlast <= (in_count == N - 1);
in_count <= in_count + 1;
end else begin
axi_din_tvalid <= 1'b0;
axi_din_tlast <= 1'b0;
end
if (in_count == N) begin
// All inputs delivered; await output drain.
state <= S_DRAIN;
end
end
S_DRAIN: begin
// Wait for tlast on output, then return to idle.
if (axi_dout_tvalid && axi_dout_tlast) begin
done <= 1'b1;
busy <= 1'b0;
state <= S_IDLE;
end
end
default: state <= S_IDLE;
endcase
end
end
endmodule

View File

@@ -0,0 +1,353 @@
{
"schema": "xilinx.com:schema:json_instance:1.0",
"ip_inst": {
"xci_name": "xfft_2048_ip",
"component_reference": "xilinx.com:ip:xfft:9.1",
"ip_revision": "15",
"gen_directory": ".",
"parameters": {
"component_parameters": {
"Component_Name": [ { "value": "xfft_2048_ip", "resolve_type": "user", "usage": "all" } ],
"channels": [ { "value": "1", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"transform_length": [ { "value": "2048", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"target_clock_frequency": [ { "value": "100", "value_src": "user", "resolve_type": "user", "format": "long", "usage": "all" } ],
"implementation_options": [ { "value": "pipelined_streaming_io", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"target_data_throughput": [ { "value": "50", "value_src": "user", "resolve_type": "user", "format": "long", "usage": "all" } ],
"run_time_configurable_transform_length": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"data_format": [ { "value": "fixed_point", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"input_width": [ { "value": "16", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"phase_factor_width": [ { "value": "16", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"scaling_options": [ { "value": "block_floating_point", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"rounding_modes": [ { "value": "convergent_rounding", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"aclken": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"aresetn": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"ovflo": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"xk_index": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"throttle_scheme": [ { "value": "nonrealtime", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"output_ordering": [ { "value": "natural_order", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"cyclic_prefix_insertion": [ { "value": "false", "value_src": "user", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"memory_options_data": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"memory_options_phase_factors": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"memory_options_reorder": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"number_of_stages_using_block_ram_for_data_and_phase_factors": [ { "value": "4", "resolve_type": "user", "usage": "all" } ],
"memory_options_hybrid": [ { "value": "false", "value_src": "user", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"complex_mult_type": [ { "value": "use_mults_resources", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"butterfly_type": [ { "value": "use_xtremedsp_slices", "value_src": "user", "resolve_type": "user", "usage": "all" } ],
"super_sample_rates": [ { "value": "1", "resolve_type": "user", "usage": "all" } ],
"systolicfft_inv": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ],
"blocking_run_time_configuration": [ { "value": "false", "resolve_type": "user", "format": "bool", "enabled": false, "usage": "all" } ]
},
"model_parameters": {
"C_XDEVICEFAMILY": [ { "value": "artix7", "resolve_type": "generated", "usage": "all" } ],
"C_PART": [ { "value": "xc7a50tftg256-2", "resolve_type": "generated", "usage": "all" } ],
"C_S_AXIS_CONFIG_TDATA_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_S_AXIS_DATA_TDATA_WIDTH": [ { "value": "32", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_DATA_TDATA_WIDTH": [ { "value": "32", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_DATA_TUSER_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_M_AXIS_STATUS_TDATA_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_THROTTLE_SCHEME": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_NSSR": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_CHANNELS": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_NFFT_MAX": [ { "value": "11", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_ARCH": [ { "value": "3", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_NFFT": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_USE_FLT_PT": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_INPUT_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_TWIDDLE_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_OUTPUT_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_SCALING": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_BFP": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_ROUNDING": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_ACLKEN": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_ARESETN": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_OVFLO": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_NATURAL_INPUT": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_NATURAL_OUTPUT": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_CYCLIC_PREFIX": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_HAS_XK_INDEX": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_DATA_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_TWIDDLE_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_BRAM_STAGES": [ { "value": "4", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_REORDER_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_USE_HYBRID_RAM": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_OPTIMIZE_GOAL": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_CMPY_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_BFLY_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_SYSTOLICFFT_INV": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ],
"C_IS_BLOCKING_RUNTIME_CONF": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ]
},
"project_parameters": {
"ARCHITECTURE": [ { "value": "artix7", "resolve_type": "generated", "usage": "all" } ],
"BASE_BOARD_PART": [ { "value": "", "resolve_type": "generated", "usage": "all" } ],
"BOARD_CONNECTIONS": [ { "value": "", "resolve_type": "generated", "usage": "all" } ],
"DEVICE": [ { "value": "xc7a50t", "resolve_type": "generated", "usage": "all" } ],
"PACKAGE": [ { "value": "ftg256", "resolve_type": "generated", "usage": "all" } ],
"PREFHDL": [ { "value": "VERILOG", "resolve_type": "generated", "usage": "all" } ],
"SILICON_REVISION": [ { "value": "", "resolve_type": "generated", "usage": "all" } ],
"SIMULATOR_LANGUAGE": [ { "value": "MIXED", "resolve_type": "generated", "usage": "all" } ],
"SPEEDGRADE": [ { "value": "-2", "resolve_type": "generated", "usage": "all" } ],
"STATIC_POWER": [ { "value": "", "resolve_type": "generated", "usage": "all" } ],
"TEMPERATURE_GRADE": [ { "value": "", "resolve_type": "generated", "usage": "all" } ]
},
"runtime_parameters": {
"IPCONTEXT": [ { "value": "IP_Flow" } ],
"IPREVISION": [ { "value": "15" } ],
"MANAGED": [ { "value": "TRUE" } ],
"OUTPUTDIR": [ { "value": "." } ],
"SELECTEDSIMMODEL": [ { "value": "" } ],
"SHAREDDIR": [ { "value": "." } ],
"SWVERSION": [ { "value": "2025.2" } ],
"SYNTHESISFLOW": [ { "value": "OUT_OF_CONTEXT" } ]
}
},
"boundary": {
"ports": {
"aclk": [ { "direction": "in", "driver_value": "0x1" } ],
"s_axis_config_tdata": [ { "direction": "in", "size_left": "7", "size_right": "0" } ],
"s_axis_config_tvalid": [ { "direction": "in" } ],
"s_axis_config_tready": [ { "direction": "out" } ],
"s_axis_data_tdata": [ { "direction": "in", "size_left": "31", "size_right": "0" } ],
"s_axis_data_tvalid": [ { "direction": "in" } ],
"s_axis_data_tready": [ { "direction": "out" } ],
"s_axis_data_tlast": [ { "direction": "in" } ],
"m_axis_data_tdata": [ { "direction": "out", "size_left": "31", "size_right": "0" } ],
"m_axis_data_tuser": [ { "direction": "out", "size_left": "7", "size_right": "0" } ],
"m_axis_data_tvalid": [ { "direction": "out" } ],
"m_axis_data_tready": [ { "direction": "in", "driver_value": "0x1" } ],
"m_axis_data_tlast": [ { "direction": "out" } ],
"m_axis_status_tdata": [ { "direction": "out", "size_left": "7", "size_right": "0" } ],
"m_axis_status_tvalid": [ { "direction": "out" } ],
"m_axis_status_tready": [ { "direction": "in", "driver_value": "0x1" } ],
"event_frame_started": [ { "direction": "out", "driver_value": "0x0" } ],
"event_tlast_unexpected": [ { "direction": "out", "driver_value": "0x0" } ],
"event_tlast_missing": [ { "direction": "out", "driver_value": "0x0" } ],
"event_status_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ],
"event_data_in_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ],
"event_data_out_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ]
},
"interfaces": {
"event_frame_started_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_frame_started" } ]
}
},
"event_tlast_unexpected_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_tlast_unexpected" } ]
}
},
"event_tlast_missing_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_tlast_missing" } ]
}
},
"event_fft_overflow_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
}
},
"event_status_channel_halt_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_status_channel_halt" } ]
}
},
"event_data_in_channel_halt_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_data_in_channel_halt" } ]
}
},
"event_data_out_channel_halt_intf": {
"vlnv": "xilinx.com:signal:interrupt:1.0",
"abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0",
"mode": "master",
"parameters": {
"SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ],
"PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"INTERRUPT": [ { "physical_name": "event_data_out_channel_halt" } ]
}
},
"S_AXIS_DATA": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "slave",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "4", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TLAST": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"TDATA": [ { "physical_name": "s_axis_data_tdata" } ],
"TLAST": [ { "physical_name": "s_axis_data_tlast" } ],
"TREADY": [ { "physical_name": "s_axis_data_tready" } ],
"TVALID": [ { "physical_name": "s_axis_data_tvalid" } ]
}
},
"aclk_intf": {
"vlnv": "xilinx.com:signal:clock:1.0",
"abstraction_type": "xilinx.com:signal:clock_rtl:1.0",
"mode": "slave",
"parameters": {
"ASSOCIATED_BUSIF": [ { "value": "S_AXIS_CONFIG:M_AXIS_DATA:M_AXIS_STATUS:S_AXIS_DATA", "value_src": "constant", "usage": "all" } ],
"ASSOCIATED_RESET": [ { "value": "aresetn", "value_src": "constant", "usage": "all" } ],
"ASSOCIATED_CLKEN": [ { "value": "aclken", "value_src": "constant", "usage": "all" } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "user", "format": "long", "usage": "all" } ],
"FREQ_TOLERANCE_HZ": [ { "value": "0", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"ASSOCIATED_PORT": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"CLK": [ { "physical_name": "aclk" } ]
}
},
"aresetn_intf": {
"vlnv": "xilinx.com:signal:reset:1.0",
"abstraction_type": "xilinx.com:signal:reset_rtl:1.0",
"mode": "slave",
"parameters": {
"POLARITY": [ { "value": "ACTIVE_LOW", "value_src": "constant", "usage": "all" } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
}
},
"aclken_intf": {
"vlnv": "xilinx.com:signal:clockenable:1.0",
"abstraction_type": "xilinx.com:signal:clockenable_rtl:1.0",
"mode": "slave",
"parameters": {
"POLARITY": [ { "value": "ACTIVE_HIGH", "value_src": "constant", "usage": "all" } ]
}
},
"M_AXIS_STATUS": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "master",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TLAST": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"TDATA": [ { "physical_name": "m_axis_status_tdata" } ],
"TREADY": [ { "physical_name": "m_axis_status_tready" } ],
"TVALID": [ { "physical_name": "m_axis_status_tvalid" } ]
}
},
"M_AXIS_DATA": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "master",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "4", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "8", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TLAST": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"TDATA": [ { "physical_name": "m_axis_data_tdata" } ],
"TLAST": [ { "physical_name": "m_axis_data_tlast" } ],
"TREADY": [ { "physical_name": "m_axis_data_tready" } ],
"TUSER": [ { "physical_name": "m_axis_data_tuser" } ],
"TVALID": [ { "physical_name": "m_axis_data_tvalid" } ]
}
},
"S_AXIS_CONFIG": {
"vlnv": "xilinx.com:interface:axis:1.0",
"abstraction_type": "xilinx.com:interface:axis_rtl:1.0",
"mode": "slave",
"parameters": {
"TDATA_NUM_BYTES": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TREADY": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"HAS_TLAST": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ],
"PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ],
"CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ],
"INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ]
},
"port_maps": {
"TDATA": [ { "physical_name": "s_axis_config_tdata" } ],
"TREADY": [ { "physical_name": "s_axis_config_tready" } ],
"TVALID": [ { "physical_name": "s_axis_config_tvalid" } ]
}
}
}
}
},
"checksum": "6bf94ec5"
}

View File

@@ -151,7 +151,14 @@ wire fft_dout_valid;
wire fft_busy;
wire fft_done;
fft_engine #(
// xfft_2048 (Xilinx LogiCORE FFT v9.1) via fft_engine_axi_bridge preserves
// the legacy fft_engine port surface so this call site stays a 1-line swap.
// In synth + remote XSim: real Pipelined Streaming IP (~N + 150 cycles/pass,
// closes RX-NEW-3 PRI budget). In iverilog: bridge falls through to the
// in-house fft_engine batched fallback inside xfft_2048.v (~150K cycles/pass,
// for unit coverage only receiver-integration timing is meaningful only in
// XSim with the real IP).
fft_engine_axi_bridge #(
.N(FFT_SIZE),
.LOG2N(ADDR_BITS),
.DATA_W(16),

View File

@@ -69,6 +69,8 @@ PROD_RTL=(
doppler_processor.v
xfft_16.v
fft_engine.v
xfft_2048.v
fft_engine_axi_bridge.v
frequency_matched_filter.v
usb_data_interface.v
usb_data_interface_ft2232h.v
@@ -102,6 +104,7 @@ RECEIVER_RTL=(
chirp_memory_loader_param.v latency_buffer.v
matched_filter_multi_segment.v matched_filter_processing_chain.v
range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v
xfft_2048.v fft_engine_axi_bridge.v
frequency_matched_filter.v
rx_gain_control.v mti_canceller.v
)
@@ -282,7 +285,7 @@ run_mf_cosim() {
if [[ -n "$define" ]]; then
cmd="$cmd $define"
fi
cmd="$cmd -o $vvp tb/tb_mf_cosim.v matched_filter_processing_chain.v fft_engine.v frequency_matched_filter.v chirp_memory_loader_param.v"
cmd="$cmd -o $vvp tb/tb_mf_cosim.v matched_filter_processing_chain.v fft_engine.v xfft_2048.v fft_engine_axi_bridge.v frequency_matched_filter.v chirp_memory_loader_param.v"
if ! eval "$cmd" 2>/tmp/iverilog_err_$$; then
echo -e "${RED}COMPILE FAIL${NC}"
@@ -640,7 +643,8 @@ run_test "FIR Lowpass" \
run_test --timeout=600 "Matched Filter Chain" \
tb/tb_mf_reg.vvp \
tb/tb_matched_filter_processing_chain.v matched_filter_processing_chain.v \
fft_engine.v chirp_memory_loader_param.v frequency_matched_filter.v
fft_engine.v xfft_2048.v fft_engine_axi_bridge.v \
chirp_memory_loader_param.v frequency_matched_filter.v
# RX-B regression coverage: chain pipeline depth + full-chain
# autocorrelation peak position. Both run the production fft_engine
@@ -649,12 +653,13 @@ run_test --timeout=600 "Matched Filter Chain" \
run_test --timeout=120 "RX-B Chain Pipeline Latency (tb_rxb_latency_measure)" \
tb/tb_rxb_lat_reg.vvp \
tb/tb_rxb_latency_measure.v matched_filter_processing_chain.v \
fft_engine.v frequency_matched_filter.v
fft_engine.v xfft_2048.v fft_engine_axi_bridge.v frequency_matched_filter.v
run_test --timeout=600 "RX-B Full-Chain Autocorrelation (tb_rxb_fullchain_latency)" \
tb/tb_rxb_fc_reg.vvp \
tb/tb_rxb_fullchain_latency.v matched_filter_multi_segment.v \
matched_filter_processing_chain.v fft_engine.v frequency_matched_filter.v \
matched_filter_processing_chain.v fft_engine.v xfft_2048.v \
fft_engine_axi_bridge.v frequency_matched_filter.v \
chirp_memory_loader_param.v
echo ""

View File

@@ -43,7 +43,22 @@ foreach f [glob -directory $rtl_dir *.v] {
}
set_property top $top_module [current_fileset]
set_property verilog_define {FFT_XPM_BRAM} [current_fileset]
# FFT_USE_XILINX_IP routes xfft_2048.v's wrapper to the LogiCORE FFT v9.1 IP
# (xfft_2048_ip) instead of the in-house fft_engine fallback. The IP closes
# RX-NEW-3 (~6600-cycle 3-FFT chain budget vs 16700-cycle PRI).
set_property verilog_define {FFT_XPM_BRAM FFT_USE_XILINX_IP} [current_fileset]
# ===== IP CATALOG =====
# Read the pre-generated xfft_2048_ip XCI (produced by gen_xfft_2048_ip.tcl).
# generate_target + synth_ip prepare its OOC netlist before launch_runs.
set xci_path [file join $project_root "ip" "xfft_2048_ip" "xfft_2048_ip.xci"]
if {![file exists $xci_path]} {
puts "ERROR: $xci_path missing run scripts/50t/gen_xfft_2048_ip.tcl first."
exit 1
}
read_ip $xci_path
generate_target {synthesis simulation instantiation_template} [get_ips xfft_2048_ip]
synth_ip [get_ips xfft_2048_ip]
# Constraints — 50T XDC + MMCM supplement
add_files -fileset constrs_1 -norecurse [file join $project_root "constraints" "xc7a50t_ftg256.xdc"]

View File

@@ -0,0 +1,75 @@
################################################################################
# gen_xfft_2048_ip.tcl — Generate Xilinx LogiCORE FFT (xfft_v9_1) for AERIS-10
#
# Produces ip/xfft_2048/xfft_2048.xci configured for the matched-filter chain:
# - Transform Length: 2048
# - Architecture: Pipelined Streaming I/O
# - Data Format: Fixed Point
# - Scaling: Block Floating Point (run-time auto-scale)
# - Rounding: Convergent (round-to-even)
# - Input Width: 16-bit per real/imag (matches DDC output, DATA_W in chain)
# - Phase Width: 16-bit
# - Output Ordering: Natural Order
# - Throttle Scheme: Non Real Time (allows downstream backpressure)
# - Memory: Block RAM for data, reorder, phase factors
#
# Usage (run on remote Vivado box):
# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA
# vivado -mode batch -source scripts/50t/gen_xfft_2048_ip.tcl
#
# Output: ip/xfft_2048_ip/xfft_2048_ip.xci (committed; build_50t.tcl reads this)
# Note: IP module is named xfft_2048_ip to avoid collision with the wrapper
# module xfft_2048 in xfft_2048.v.
################################################################################
set script_dir [file dirname [file normalize [info script]]]
set project_root [file normalize [file join $script_dir "../.."]]
set ip_dir [file join $project_root "ip"]
set fpga_part "xc7a50tftg256-2"
file mkdir $ip_dir
# Spin up a throwaway in-memory project just for IP generation.
create_project -in_memory -part $fpga_part
set_property ip_repo_paths $ip_dir [current_project]
# Create the IP. Any prior version is overwritten via -force.
create_ip -name xfft -vendor xilinx.com -library ip \
-version 9.1 -module_name xfft_2048_ip -dir $ip_dir -force
set ip [get_ips xfft_2048_ip]
set_property -dict [list \
CONFIG.transform_length {2048} \
CONFIG.implementation_options {pipelined_streaming_io} \
CONFIG.channels {1} \
CONFIG.data_format {fixed_point} \
CONFIG.scaling_options {block_floating_point} \
CONFIG.rounding_modes {convergent_rounding} \
CONFIG.input_width {16} \
CONFIG.phase_factor_width {16} \
CONFIG.output_ordering {natural_order} \
CONFIG.cyclic_prefix_insertion {false} \
CONFIG.throttle_scheme {nonrealtime} \
CONFIG.target_clock_frequency {100} \
CONFIG.target_data_throughput {50} \
CONFIG.complex_mult_type {use_mults_resources} \
CONFIG.butterfly_type {use_xtremedsp_slices} \
CONFIG.memory_options_data {block_ram} \
CONFIG.memory_options_reorder {block_ram} \
CONFIG.memory_options_phase_factors {block_ram} \
CONFIG.memory_options_hybrid {false} \
] $ip
# Generate synthesis + simulation targets so XSim and Vivado synth both work.
generate_target {synthesis simulation instantiation_template} $ip
synth_ip $ip
puts "================================================================"
puts " xfft_2048_ip IP generation complete"
puts " XCI: $ip_dir/xfft_2048_ip/xfft_2048_ip.xci"
puts " DCP: [get_property IP_OUTPUT_DIR $ip]/xfft_2048_ip.dcp"
puts "================================================================"
close_project
exit 0

View File

@@ -0,0 +1,47 @@
#!/usr/bin/env bash
# ============================================================================
# run_xfft_xsim.sh — Compile + run xfft_2048 wrapper testbench in Vivado XSim
#
# Verifies the wrapper with the real LogiCORE FFT v9.1 (xfft_2048_ip).
# Cannot run in iverilog because the IP uses Xilinx primitives.
#
# Usage (on remote Vivado box):
# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA
# bash scripts/50t/run_xfft_xsim.sh
#
# Output: /tmp/xfft_xsim.log (look for "ALL TESTS PASSED")
# ============================================================================
set -e
PROJ_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
IP_NETLIST="$PROJ_ROOT/ip/xfft_2048_ip/xfft_2048_ip_sim_netlist.v"
WRAPPER="$PROJ_ROOT/xfft_2048.v"
TB="$PROJ_ROOT/tb/tb_xfft_2048_xsim.v"
WORK_DIR="$PROJ_ROOT/build_xsim_xfft"
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
echo "===== Compiling Verilog sources ====="
# Wrapper + testbench with the IP-on define
xvlog -d FFT_USE_XILINX_IP "$WRAPPER" "$TB"
# IP simulation netlist — references unisim primitives
xvlog "$IP_NETLIST"
# fft_engine etc. NOT needed because FFT_USE_XILINX_IP routes around it,
# but the wrapper still must compile cleanly under both branches; if xvlog
# complains about an unresolved fft_engine reference (it shouldn't because
# the `else` branch is hidden by the define), include it here:
# xvlog "$PROJ_ROOT/fft_engine.v"
echo "===== Elaborating ====="
# `glbl` is a Vivado-supplied module that Xilinx primitives (FDRE etc.)
# reference for the global GSR/GTS signals. Elaborating it as a second top
# satisfies the unresolved-reference error xelab raises for the IP netlist.
xelab -L unisims_ver -L secureip --debug typical \
tb_xfft_2048_xsim glbl -snapshot tb_xfft_2048_snap
echo "===== Running simulation ====="
xsim tb_xfft_2048_snap --runall --log /tmp/xfft_xsim.log
echo "===== Done. Tail of log: ====="
tail -40 /tmp/xfft_xsim.log

View File

@@ -0,0 +1,283 @@
`timescale 1ns / 1ps
// ============================================================================
// tb_xfft_2048_xsim.v XSim verification of xfft_2048 wrapper with real IP
// ============================================================================
// Compiled with `+define+FFT_USE_XILINX_IP` so the wrapper instantiates the
// LogiCORE FFT v9.1 (xfft_2048_ip). Cannot run in iverilog because that path
// uses Xilinx primitives (DSP48E1, BRAM18). For iverilog, leave the define
// off and the wrapper falls back to the fft_engine batched implementation.
//
// Three minimal stimuli:
// 1. DC (re=10000, im=0) peak bin = 0 with large magnitude;
// all other bins near zero.
// 2. Impulse (single sample (10000,0)) output magnitude flat across all bins
// (DFT of a delta = constant).
// 3. Tone (cos+jsin at bin K=128) peak bin = K with large magnitude;
// all other bins near zero.
//
// PASS criteria:
// - peak bin matches expected
// - peak magnitude > 8× mean of non-peak bins (analogous to receiver-chain
// SNR check that's been used elsewhere in this codebase)
// ============================================================================
module tb_xfft_2048_xsim;
localparam CLK_PERIOD = 10.0; // 100 MHz
localparam N = 2048;
localparam LOG2N = 11;
reg aclk = 0;
reg aresetn = 0;
reg [7:0] cfg_tdata;
reg cfg_tvalid;
wire cfg_tready;
reg [31:0] din_tdata;
reg din_tvalid;
reg din_tlast;
wire din_tready;
wire [31:0] dout_tdata;
wire [7:0] dout_tuser;
wire dout_tvalid;
wire dout_tlast;
reg dout_tready;
integer pass_count = 0;
integer fail_count = 0;
integer test_num = 0;
integer k;
integer out_idx;
integer peak_bin;
integer peak_mag;
integer mean_others;
integer mag_sum_others;
integer this_mag;
integer cur_re, cur_im;
// Capture the entire output frame
reg signed [15:0] out_re [0:N-1];
reg signed [15:0] out_im [0:N-1];
integer out_collected;
always #(CLK_PERIOD/2) aclk = ~aclk;
xfft_2048 dut (
.aclk (aclk),
.aresetn (aresetn),
.s_axis_config_tdata (cfg_tdata),
.s_axis_config_tvalid (cfg_tvalid),
.s_axis_config_tready (cfg_tready),
.s_axis_data_tdata (din_tdata),
.s_axis_data_tvalid (din_tvalid),
.s_axis_data_tlast (din_tlast),
.s_axis_data_tready (din_tready),
.m_axis_data_tdata (dout_tdata),
.m_axis_data_tuser (dout_tuser),
.m_axis_data_tvalid (dout_tvalid),
.m_axis_data_tlast (dout_tlast),
.m_axis_data_tready (dout_tready)
);
// Continuously capture output frame
always @(posedge aclk) begin
if (aresetn && dout_tvalid && dout_tready && out_collected < N) begin
out_re[out_collected] <= $signed(dout_tdata[15:0]);
out_im[out_collected] <= $signed(dout_tdata[31:16]);
out_collected <= out_collected + 1;
end
end
// ----------------------------------------------------------------
// Send config (FWD = bit 0 = 1)
// ----------------------------------------------------------------
task send_config;
input fwd;
begin
@(posedge aclk);
cfg_tdata <= {7'b0, fwd};
cfg_tvalid <= 1'b1;
@(posedge aclk);
while (!cfg_tready) @(posedge aclk);
@(posedge aclk);
cfg_tvalid <= 1'b0;
end
endtask
// ----------------------------------------------------------------
// Stream N samples; src=0 DC, 1 impulse, 2 tone (bin K=128)
// ----------------------------------------------------------------
task stream_frame;
input integer src;
integer i;
real arg;
integer re16, im16;
begin
out_collected = 0;
@(posedge aclk);
din_tvalid <= 1'b1;
for (i = 0; i < N; i = i + 1) begin
case (src)
0: begin re16 = 10000; im16 = 0; end
1: begin re16 = (i == 0) ? 10000 : 0; im16 = 0; end
2: begin
arg = 6.2831853 * 128.0 * i / N;
re16 = $rtoi(10000.0 * $cos(arg));
im16 = $rtoi(10000.0 * $sin(arg));
end
default: begin re16 = 0; im16 = 0; end
endcase
din_tdata <= {im16[15:0], re16[15:0]};
din_tlast <= (i == N-1);
@(posedge aclk);
while (!din_tready) @(posedge aclk);
end
din_tvalid <= 1'b0;
din_tlast <= 1'b0;
end
endtask
// ----------------------------------------------------------------
// Wait until the full output frame has been captured (out_collected == N)
// or a deadline elapses.
// ----------------------------------------------------------------
task wait_frame;
input integer max_cycles;
integer t;
begin
t = 0;
while (out_collected < N && t < max_cycles) begin
@(posedge aclk);
t = t + 1;
end
if (out_collected < N) begin
$display("[FAIL] Timed out collecting frame: got %0d / %0d after %0d cycles",
out_collected, N, t);
fail_count = fail_count + 1;
end
end
endtask
// ----------------------------------------------------------------
// Locate peak |Re|+|Im| bin in captured frame
// ----------------------------------------------------------------
task analyze_frame;
output integer pk_bin;
output integer pk_mag;
output integer mean_other;
integer i, mag, sum;
begin
pk_bin = 0;
pk_mag = 0;
sum = 0;
for (i = 0; i < N; i = i + 1) begin
mag = (out_re[i] < 0 ? -out_re[i] : out_re[i])
+ (out_im[i] < 0 ? -out_im[i] : out_im[i]);
if (mag > pk_mag) begin
pk_mag = mag;
pk_bin = i;
end
sum = sum + mag;
end
mean_other = (sum - pk_mag) / (N - 1);
end
endtask
task check;
input cond;
input [511:0] label;
begin
test_num = test_num + 1;
if (cond) begin
$display("[PASS] T%0d: %0s", test_num, label);
pass_count = pass_count + 1;
end else begin
$display("[FAIL] T%0d: %0s", test_num, label);
fail_count = fail_count + 1;
end
end
endtask
initial begin
$dumpfile("tb_xfft_2048_xsim.vcd");
$dumpvars(0, tb_xfft_2048_xsim);
cfg_tdata = 0;
cfg_tvalid = 0;
din_tdata = 0;
din_tvalid = 0;
din_tlast = 0;
dout_tready = 1; // Always accept output
out_collected = 0;
repeat (10) @(posedge aclk);
aresetn = 1'b1;
repeat (10) @(posedge aclk);
// ============================================================
// T1: DC stimulus expect peak at bin 0
// ============================================================
$display("\n--- DC stimulus ---");
send_config(1'b1);
stream_frame(0);
wait_frame(20000);
analyze_frame(peak_bin, peak_mag, mean_others);
$display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h",
peak_bin, peak_mag, mean_others, dout_tuser);
check(peak_bin == 0, "DC -> peak at bin 0");
check(peak_mag > 8 * mean_others + 1, "DC -> peak/mean > 8x");
// ============================================================
// T2: Impulse expect roughly flat magnitude
// ============================================================
$display("\n--- Impulse stimulus ---");
send_config(1'b1);
stream_frame(1);
wait_frame(20000);
analyze_frame(peak_bin, peak_mag, mean_others);
$display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h",
peak_bin, peak_mag, mean_others, dout_tuser);
// For an impulse at sample 0, |X[k]| is constant; peak/mean ratio
// close to 1. Allow up to 3x to account for bit-width quantization.
check(peak_mag < 3 * mean_others + 100,
"Impulse -> flat spectrum (peak < 3x mean)");
// ============================================================
// T3: Complex tone at bin 128 expect peak at bin 128
// ============================================================
$display("\n--- Tone (bin 128) stimulus ---");
send_config(1'b1);
stream_frame(2);
wait_frame(20000);
analyze_frame(peak_bin, peak_mag, mean_others);
$display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h",
peak_bin, peak_mag, mean_others, dout_tuser);
check(peak_bin == 128, "Tone -> peak at bin 128");
check(peak_mag > 8 * mean_others + 1, "Tone -> peak/mean > 8x");
$display("");
$display("============================================");
$display(" XFFT_2048 (Xilinx LogiCORE) XSim RESULTS");
$display(" PASSED: %0d / %0d", pass_count, test_num);
$display(" FAILED: %0d / %0d", fail_count, test_num);
if (fail_count == 0)
$display(" ** ALL TESTS PASSED **");
else
$display(" ** %0d TEST(S) FAILED **", fail_count);
$display("============================================");
#100;
$finish;
end
// Global timeout never let the sim run forever
initial begin
#2000000; // 2 ms
$display("[FAIL] Global timeout @ 2 ms");
$finish;
end
endmodule

View File

@@ -0,0 +1,282 @@
`timescale 1ns / 1ps
// ============================================================================
// xfft_2048.v 2048-point FFT wrapper (Xilinx LogiCORE for synth/XSim,
// in-house fft_engine fallback for iverilog)
// ============================================================================
// AXI-Stream port list mirrors Xilinx LogiCORE Fast Fourier Transform v9.1
// (PG109). Two implementation branches selected by `FFT_USE_XILINX_IP`:
//
// `define FFT_USE_XILINX_IP instantiates xfft_2048_ip (LogiCORE FFT v9.1)
// Pipelined Streaming I/O, BFP scaling, 16-bit.
// Use for: Vivado synth, remote XSim sim.
//
// `undef FFT_USE_XILINX_IP instantiates fft_engine batched one-shot
// (collect N compute drain N).
// Use for: iverilog local sim only.
//
// Throughput on production silicon (Xilinx IP path): ~N + ~150 cycles per
// transform with full overlap ~6600 cycles for 3 sequential transforms in
// the matched-filter chain, vs the 16700-cycle PRI budget. Closes RX-NEW-3.
//
// Data format: {Q[15:0], I[15:0]} packed 32-bit on s_axis/m_axis_data_tdata.
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT (matches PG109 convention).
//
// Block-FP scaling (Xilinx path only): per-frame BLK_EXP returned via
// m_axis_data_tuser[7:0] so chain-level normalization can rescale before
// magnitude compute. Sim path always returns tuser = 0 (no BFP).
// ============================================================================
module xfft_2048 (
input wire aclk,
input wire aresetn,
// Configuration channel (AXI-Stream slave). 8-bit tdata; only bit 0
// (FWD/INV) is decoded by the IP in BFP mode (no scale schedule).
input wire [7:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,
// Data input channel (AXI-Stream slave)
input wire [31:0] s_axis_data_tdata,
input wire s_axis_data_tvalid,
input wire s_axis_data_tlast,
output wire s_axis_data_tready,
// Data output channel (AXI-Stream master)
output wire [31:0] m_axis_data_tdata,
output wire [7:0] m_axis_data_tuser, // BLK_EXP[7:0] (Xilinx path); 0 (sim)
output wire m_axis_data_tvalid,
output wire m_axis_data_tlast,
input wire m_axis_data_tready
);
`ifdef FFT_USE_XILINX_IP
// ============================================================================
// XILINX LOGICORE FFT v9.1 production / XSim path
// ============================================================================
// Side-channels (status/event) are tied off here; if downstream needs them
// (e.g. for pipeline-stall debug), surface them through this wrapper.
wire [7:0] xfft_status_tdata;
wire xfft_status_tvalid;
xfft_2048_ip u_xfft (
.aclk (aclk),
.s_axis_config_tdata (s_axis_config_tdata),
.s_axis_config_tvalid (s_axis_config_tvalid),
.s_axis_config_tready (s_axis_config_tready),
.s_axis_data_tdata (s_axis_data_tdata),
.s_axis_data_tvalid (s_axis_data_tvalid),
.s_axis_data_tready (s_axis_data_tready),
.s_axis_data_tlast (s_axis_data_tlast),
.m_axis_data_tdata (m_axis_data_tdata),
.m_axis_data_tuser (m_axis_data_tuser),
.m_axis_data_tvalid (m_axis_data_tvalid),
.m_axis_data_tready (m_axis_data_tready),
.m_axis_data_tlast (m_axis_data_tlast),
.m_axis_status_tdata (xfft_status_tdata),
.m_axis_status_tvalid (xfft_status_tvalid),
.m_axis_status_tready (1'b1),
.event_frame_started (),
.event_tlast_unexpected (),
.event_tlast_missing (),
.event_status_channel_halt (),
.event_data_in_channel_halt (),
.event_data_out_channel_halt ()
);
`else
// ============================================================================
// FALLBACK fft_engine batched one-shot (iverilog path only)
// ============================================================================
// Collect N samples kick fft_engine drain N samples. Throughput is
// ~N (collect) + ~160 K (compute) + ~N (drain). NOT representative of the
// real LogiCORE used only for unit-level iverilog regression coverage.
// ============================================================================
localparam N = 2048;
localparam LOG2N = 11;
localparam CNT_W = LOG2N + 1;
localparam [2:0] S_IDLE = 3'd0,
S_FEED = 3'd1,
S_RUN = 3'd2,
S_OUTPUT = 3'd3;
reg [2:0] state;
reg inverse_reg;
(* ram_style = "block" *) reg signed [15:0] in_buf_re [0:N-1];
(* ram_style = "block" *) reg signed [15:0] in_buf_im [0:N-1];
(* ram_style = "block" *) reg signed [15:0] out_buf_re [0:N-1];
(* ram_style = "block" *) reg signed [15:0] out_buf_im [0:N-1];
reg [CNT_W-1:0] in_count;
reg [CNT_W-1:0] feed_count;
reg [CNT_W-1:0] out_total;
reg [CNT_W-1:0] out_count;
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
reg in_buf_we;
reg [LOG2N-1:0] in_buf_waddr;
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
reg out_buf_we;
reg [LOG2N-1:0] out_buf_waddr;
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
reg signed [15:0] out_rd_re, out_rd_im;
reg out_rd_valid;
fft_engine #(
.N(N), .LOG2N(LOG2N), .DATA_W(16), .INTERNAL_W(32),
.TWIDDLE_W(16), .TWIDDLE_FILE("fft_twiddle_2048.mem")
) fft_core (
.clk(aclk), .reset_n(aresetn),
.start(fft_start), .inverse(fft_inverse),
.din_re(fft_din_re), .din_im(fft_din_im), .din_valid(fft_din_valid),
.dout_re(fft_dout_re), .dout_im(fft_dout_im), .dout_valid(fft_dout_valid),
.busy(fft_busy), .done(fft_done)
);
assign s_axis_config_tready = (state == S_IDLE);
assign s_axis_data_tready = (state == S_FEED) && (in_count < N);
assign m_axis_data_tdata = {out_rd_im, out_rd_re};
assign m_axis_data_tuser = 8'h00; // No BFP in fallback path
assign m_axis_data_tvalid = out_rd_valid;
assign m_axis_data_tlast = out_rd_valid && (out_count == N);
always @(posedge aclk) begin
if (in_buf_we) begin
in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
end
if (out_buf_we) begin
out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
end
end
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= S_IDLE;
inverse_reg <= 1'b0;
in_count <= 0;
feed_count <= 0;
out_total <= 0;
out_count <= 0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
in_buf_waddr <= 0;
in_buf_wdata_re <= 0;
in_buf_wdata_im <= 0;
out_buf_we <= 1'b0;
out_buf_waddr <= 0;
out_buf_wdata_re <= 0;
out_buf_wdata_im <= 0;
out_rd_re <= 0;
out_rd_im <= 0;
out_rd_valid <= 1'b0;
end else begin
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
out_buf_we <= 1'b0;
case (state)
S_IDLE: begin
in_count <= 0;
feed_count <= 0;
out_total <= 0;
out_count <= 0;
out_rd_valid <= 1'b0;
if (s_axis_config_tvalid) begin
inverse_reg <= ~s_axis_config_tdata[0];
state <= S_FEED;
end
end
S_FEED: begin
if (in_count < N) begin
if (s_axis_data_tvalid) begin
in_buf_we <= 1'b1;
in_buf_waddr <= in_count[LOG2N-1:0];
in_buf_wdata_re <= s_axis_data_tdata[15:0];
in_buf_wdata_im <= s_axis_data_tdata[31:16];
in_count <= in_count + 1;
end
end else begin
fft_start <= 1'b1;
fft_inverse <= inverse_reg;
feed_count <= 0;
out_total <= 0;
state <= S_RUN;
end
end
S_RUN: begin
if (feed_count < N) begin
fft_din_re <= in_buf_re[feed_count[LOG2N-1:0]];
fft_din_im <= in_buf_im[feed_count[LOG2N-1:0]];
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && out_total < N) begin
out_buf_we <= 1'b1;
out_buf_waddr <= out_total[LOG2N-1:0];
out_buf_wdata_re <= fft_dout_re;
out_buf_wdata_im <= fft_dout_im;
out_total <= out_total + 1;
end
if (fft_done && out_total >= N) begin
state <= S_OUTPUT;
out_count <= 0;
out_rd_valid <= 1'b0;
end
end
S_OUTPUT: begin
if (m_axis_data_tready || !out_rd_valid) begin
if (out_count < N) begin
out_rd_re <= out_buf_re[out_count[LOG2N-1:0]];
out_rd_im <= out_buf_im[out_count[LOG2N-1:0]];
out_rd_valid <= 1'b1;
out_count <= out_count + 1;
end else begin
out_rd_valid <= 1'b0;
state <= S_IDLE;
end
end
end
default: state <= S_IDLE;
endcase
end
end
`ifdef SIMULATION
integer init_k;
initial begin
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
in_buf_re[init_k] = 0;
in_buf_im[init_k] = 0;
out_buf_re[init_k] = 0;
out_buf_im[init_k] = 0;
end
end
`endif
`endif // FFT_USE_XILINX_IP
endmodule