diff --git a/9_Firmware/9_2_FPGA/.gitignore b/9_Firmware/9_2_FPGA/.gitignore index 752802f..404e048 100644 --- a/9_Firmware/9_2_FPGA/.gitignore +++ b/9_Firmware/9_2_FPGA/.gitignore @@ -17,3 +17,9 @@ tb/cosim/rx_final_doppler_out.csv *.str *.bit *.ltx + +# Vivado IP build artifacts (regenerated from .xci by gen_xfft_2048_ip.tcl). +# Only the .xci is committed — Vivado reproduces .dcp / sim-netlist / etc. +ip/xfft_2048_ip/* +!ip/xfft_2048_ip/xfft_2048_ip.xci +build_xsim_xfft/ diff --git a/9_Firmware/9_2_FPGA/fft_engine_axi_bridge.v b/9_Firmware/9_2_FPGA/fft_engine_axi_bridge.v new file mode 100644 index 0000000..c208707 --- /dev/null +++ b/9_Firmware/9_2_FPGA/fft_engine_axi_bridge.v @@ -0,0 +1,177 @@ +`timescale 1ns / 1ps +// ============================================================================ +// fft_engine_axi_bridge — drop-in fft_engine replacement backed by xfft_2048 +// ============================================================================ +// Port list mirrors fft_engine.v exactly, so call sites in +// matched_filter_processing_chain.v swap with a single module-name change +// (`fft_engine` → `fft_engine_axi_bridge`). Internally instantiates the +// xfft_2048 AXI-Stream wrapper, which routes to either the LogiCORE FFT v9.1 +// (synth/XSim, when FFT_USE_XILINX_IP is defined) or the in-house batched +// fft_engine (iverilog fallback). Either way the legacy interface is the same. +// +// Behavior contract preserved from fft_engine: +// - `start` pulse begins a frame; `inverse` selects FWD/INV +// - feed N samples on din_re/im with din_valid (any spacing OK) +// - dout_re/im pulse out with dout_valid for N samples +// - `done` pulses on the last output sample (tlast) +// - `busy` is high from start through done +// +// Latency: replaces fft_engine's ~150-180K-cycle iterative compute with the +// LogiCORE Pipelined Streaming ~N + ~150-cycle pipeline. Functional behavior +// is identical from the chain's view. +// ============================================================================ + +module fft_engine_axi_bridge #( + parameter N = 2048, + parameter LOG2N = 11, + parameter DATA_W = 16, + parameter INTERNAL_W = 32, + parameter TWIDDLE_W = 16, + parameter TWIDDLE_FILE = "fft_twiddle_2048.mem" +) ( + input wire clk, + input wire reset_n, + input wire start, + input wire inverse, + + input wire signed [DATA_W-1:0] din_re, + input wire signed [DATA_W-1:0] din_im, + input wire din_valid, + + output wire signed [DATA_W-1:0] dout_re, + output wire signed [DATA_W-1:0] dout_im, + output wire dout_valid, + + output reg busy, + output reg done +); + +// ============================================================================ +// AXI-Stream signals to/from xfft_2048 +// ============================================================================ +reg [7:0] cfg_tdata; +reg cfg_tvalid; +wire cfg_tready; + +reg [31:0] axi_din_tdata; +reg axi_din_tvalid; +reg axi_din_tlast; +wire axi_din_tready; + +wire [31:0] axi_dout_tdata; +wire [7:0] axi_dout_tuser; +wire axi_dout_tvalid; +wire axi_dout_tlast; + +// xfft_2048 wrapper. AXI master always-accept (no backpressure modeling here). +xfft_2048 u_xfft ( + .aclk (clk), + .aresetn (reset_n), + .s_axis_config_tdata (cfg_tdata), + .s_axis_config_tvalid (cfg_tvalid), + .s_axis_config_tready (cfg_tready), + .s_axis_data_tdata (axi_din_tdata), + .s_axis_data_tvalid (axi_din_tvalid), + .s_axis_data_tlast (axi_din_tlast), + .s_axis_data_tready (axi_din_tready), + .m_axis_data_tdata (axi_dout_tdata), + .m_axis_data_tuser (axi_dout_tuser), + .m_axis_data_tvalid (axi_dout_tvalid), + .m_axis_data_tlast (axi_dout_tlast), + .m_axis_data_tready (1'b1) +); + +// Output mapping: AXI {Q,I} 32-bit → fft_engine-style separate re/im +assign dout_re = $signed(axi_dout_tdata[15:0]); +assign dout_im = $signed(axi_dout_tdata[31:16]); +assign dout_valid = axi_dout_tvalid; + +// ============================================================================ +// Bridge FSM +// ============================================================================ +// On `start`: latch inverse, send config (one-cycle pulse with FWD bit), then +// open the data path. Track sample count so we can assert tlast on the Nth +// input. `busy` raised on start, dropped after done. `done` pulsed on last +// output (tlast). +// ============================================================================ +localparam [1:0] S_IDLE = 2'd0, + S_CFG = 2'd1, + S_FEED = 2'd2, + S_DRAIN = 2'd3; + +reg [1:0] state; +reg inverse_latched; +reg [LOG2N:0] in_count; // counts inputs accepted into the IP + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + state <= S_IDLE; + cfg_tdata <= 8'd0; + cfg_tvalid <= 1'b0; + axi_din_tdata <= 32'd0; + axi_din_tvalid <= 1'b0; + axi_din_tlast <= 1'b0; + in_count <= 0; + inverse_latched <= 1'b0; + busy <= 1'b0; + done <= 1'b0; + end else begin + // Defaults — pulses + done <= 1'b0; + + case (state) + S_IDLE: begin + axi_din_tvalid <= 1'b0; + axi_din_tlast <= 1'b0; + cfg_tvalid <= 1'b0; + if (start) begin + inverse_latched <= inverse; + cfg_tdata <= {7'd0, ~inverse}; // tdata[0]=1 → FWD + cfg_tvalid <= 1'b1; + in_count <= 0; + busy <= 1'b1; + state <= S_CFG; + end + end + + S_CFG: begin + // Hold cfg_tvalid until IP accepts (tready). Then open data path. + if (cfg_tready) begin + cfg_tvalid <= 1'b0; + state <= S_FEED; + end + end + + S_FEED: begin + // Forward din_valid → AXI din_tvalid, packing {Q,I}. + // Assert tlast on the Nth input. + if (din_valid && (in_count < N)) begin + axi_din_tdata <= {din_im, din_re}; + axi_din_tvalid <= 1'b1; + axi_din_tlast <= (in_count == N - 1); + in_count <= in_count + 1; + end else begin + axi_din_tvalid <= 1'b0; + axi_din_tlast <= 1'b0; + end + if (in_count == N) begin + // All inputs delivered; await output drain. + state <= S_DRAIN; + end + end + + S_DRAIN: begin + // Wait for tlast on output, then return to idle. + if (axi_dout_tvalid && axi_dout_tlast) begin + done <= 1'b1; + busy <= 1'b0; + state <= S_IDLE; + end + end + + default: state <= S_IDLE; + endcase + end +end + +endmodule diff --git a/9_Firmware/9_2_FPGA/ip/xfft_2048_ip/xfft_2048_ip.xci b/9_Firmware/9_2_FPGA/ip/xfft_2048_ip/xfft_2048_ip.xci new file mode 100644 index 0000000..efea278 --- /dev/null +++ b/9_Firmware/9_2_FPGA/ip/xfft_2048_ip/xfft_2048_ip.xci @@ -0,0 +1,353 @@ +{ + "schema": "xilinx.com:schema:json_instance:1.0", + "ip_inst": { + "xci_name": "xfft_2048_ip", + "component_reference": "xilinx.com:ip:xfft:9.1", + "ip_revision": "15", + "gen_directory": ".", + "parameters": { + "component_parameters": { + "Component_Name": [ { "value": "xfft_2048_ip", "resolve_type": "user", "usage": "all" } ], + "channels": [ { "value": "1", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "transform_length": [ { "value": "2048", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "target_clock_frequency": [ { "value": "100", "value_src": "user", "resolve_type": "user", "format": "long", "usage": "all" } ], + "implementation_options": [ { "value": "pipelined_streaming_io", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "target_data_throughput": [ { "value": "50", "value_src": "user", "resolve_type": "user", "format": "long", "usage": "all" } ], + "run_time_configurable_transform_length": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "data_format": [ { "value": "fixed_point", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "input_width": [ { "value": "16", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "phase_factor_width": [ { "value": "16", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "scaling_options": [ { "value": "block_floating_point", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "rounding_modes": [ { "value": "convergent_rounding", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "aclken": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "aresetn": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "ovflo": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "xk_index": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "throttle_scheme": [ { "value": "nonrealtime", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "output_ordering": [ { "value": "natural_order", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "cyclic_prefix_insertion": [ { "value": "false", "value_src": "user", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "memory_options_data": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "memory_options_phase_factors": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "memory_options_reorder": [ { "value": "block_ram", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "number_of_stages_using_block_ram_for_data_and_phase_factors": [ { "value": "4", "resolve_type": "user", "usage": "all" } ], + "memory_options_hybrid": [ { "value": "false", "value_src": "user", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "complex_mult_type": [ { "value": "use_mults_resources", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "butterfly_type": [ { "value": "use_xtremedsp_slices", "value_src": "user", "resolve_type": "user", "usage": "all" } ], + "super_sample_rates": [ { "value": "1", "resolve_type": "user", "usage": "all" } ], + "systolicfft_inv": [ { "value": "false", "resolve_type": "user", "format": "bool", "usage": "all" } ], + "blocking_run_time_configuration": [ { "value": "false", "resolve_type": "user", "format": "bool", "enabled": false, "usage": "all" } ] + }, + "model_parameters": { + "C_XDEVICEFAMILY": [ { "value": "artix7", "resolve_type": "generated", "usage": "all" } ], + "C_PART": [ { "value": "xc7a50tftg256-2", "resolve_type": "generated", "usage": "all" } ], + "C_S_AXIS_CONFIG_TDATA_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_S_AXIS_DATA_TDATA_WIDTH": [ { "value": "32", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_M_AXIS_DATA_TDATA_WIDTH": [ { "value": "32", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_M_AXIS_DATA_TUSER_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_M_AXIS_STATUS_TDATA_WIDTH": [ { "value": "8", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_THROTTLE_SCHEME": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_NSSR": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_CHANNELS": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_NFFT_MAX": [ { "value": "11", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_ARCH": [ { "value": "3", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_NFFT": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_USE_FLT_PT": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_INPUT_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_TWIDDLE_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_OUTPUT_WIDTH": [ { "value": "16", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_SCALING": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_BFP": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_ROUNDING": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_ACLKEN": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_ARESETN": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_OVFLO": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_NATURAL_INPUT": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_NATURAL_OUTPUT": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_CYCLIC_PREFIX": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_HAS_XK_INDEX": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_DATA_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_TWIDDLE_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_BRAM_STAGES": [ { "value": "4", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_REORDER_MEM_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_USE_HYBRID_RAM": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_OPTIMIZE_GOAL": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_CMPY_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_BFLY_TYPE": [ { "value": "1", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_SYSTOLICFFT_INV": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ], + "C_IS_BLOCKING_RUNTIME_CONF": [ { "value": "0", "resolve_type": "generated", "format": "long", "usage": "all" } ] + }, + "project_parameters": { + "ARCHITECTURE": [ { "value": "artix7", "resolve_type": "generated", "usage": "all" } ], + "BASE_BOARD_PART": [ { "value": "", "resolve_type": "generated", "usage": "all" } ], + "BOARD_CONNECTIONS": [ { "value": "", "resolve_type": "generated", "usage": "all" } ], + "DEVICE": [ { "value": "xc7a50t", "resolve_type": "generated", "usage": "all" } ], + "PACKAGE": [ { "value": "ftg256", "resolve_type": "generated", "usage": "all" } ], + "PREFHDL": [ { "value": "VERILOG", "resolve_type": "generated", "usage": "all" } ], + "SILICON_REVISION": [ { "value": "", "resolve_type": "generated", "usage": "all" } ], + "SIMULATOR_LANGUAGE": [ { "value": "MIXED", "resolve_type": "generated", "usage": "all" } ], + "SPEEDGRADE": [ { "value": "-2", "resolve_type": "generated", "usage": "all" } ], + "STATIC_POWER": [ { "value": "", "resolve_type": "generated", "usage": "all" } ], + "TEMPERATURE_GRADE": [ { "value": "", "resolve_type": "generated", "usage": "all" } ] + }, + "runtime_parameters": { + "IPCONTEXT": [ { "value": "IP_Flow" } ], + "IPREVISION": [ { "value": "15" } ], + "MANAGED": [ { "value": "TRUE" } ], + "OUTPUTDIR": [ { "value": "." } ], + "SELECTEDSIMMODEL": [ { "value": "" } ], + "SHAREDDIR": [ { "value": "." } ], + "SWVERSION": [ { "value": "2025.2" } ], + "SYNTHESISFLOW": [ { "value": "OUT_OF_CONTEXT" } ] + } + }, + "boundary": { + "ports": { + "aclk": [ { "direction": "in", "driver_value": "0x1" } ], + "s_axis_config_tdata": [ { "direction": "in", "size_left": "7", "size_right": "0" } ], + "s_axis_config_tvalid": [ { "direction": "in" } ], + "s_axis_config_tready": [ { "direction": "out" } ], + "s_axis_data_tdata": [ { "direction": "in", "size_left": "31", "size_right": "0" } ], + "s_axis_data_tvalid": [ { "direction": "in" } ], + "s_axis_data_tready": [ { "direction": "out" } ], + "s_axis_data_tlast": [ { "direction": "in" } ], + "m_axis_data_tdata": [ { "direction": "out", "size_left": "31", "size_right": "0" } ], + "m_axis_data_tuser": [ { "direction": "out", "size_left": "7", "size_right": "0" } ], + "m_axis_data_tvalid": [ { "direction": "out" } ], + "m_axis_data_tready": [ { "direction": "in", "driver_value": "0x1" } ], + "m_axis_data_tlast": [ { "direction": "out" } ], + "m_axis_status_tdata": [ { "direction": "out", "size_left": "7", "size_right": "0" } ], + "m_axis_status_tvalid": [ { "direction": "out" } ], + "m_axis_status_tready": [ { "direction": "in", "driver_value": "0x1" } ], + "event_frame_started": [ { "direction": "out", "driver_value": "0x0" } ], + "event_tlast_unexpected": [ { "direction": "out", "driver_value": "0x0" } ], + "event_tlast_missing": [ { "direction": "out", "driver_value": "0x0" } ], + "event_status_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ], + "event_data_in_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ], + "event_data_out_channel_halt": [ { "direction": "out", "driver_value": "0x0" } ] + }, + "interfaces": { + "event_frame_started_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_frame_started" } ] + } + }, + "event_tlast_unexpected_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_tlast_unexpected" } ] + } + }, + "event_tlast_missing_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_tlast_missing" } ] + } + }, + "event_fft_overflow_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + } + }, + "event_status_channel_halt_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_status_channel_halt" } ] + } + }, + "event_data_in_channel_halt_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_data_in_channel_halt" } ] + } + }, + "event_data_out_channel_halt_intf": { + "vlnv": "xilinx.com:signal:interrupt:1.0", + "abstraction_type": "xilinx.com:signal:interrupt_rtl:1.0", + "mode": "master", + "parameters": { + "SENSITIVITY": [ { "value": "EDGE_RISING", "value_src": "constant", "usage": "all" } ], + "PortWidth": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "INTERRUPT": [ { "physical_name": "event_data_out_channel_halt" } ] + } + }, + "S_AXIS_DATA": { + "vlnv": "xilinx.com:interface:axis:1.0", + "abstraction_type": "xilinx.com:interface:axis_rtl:1.0", + "mode": "slave", + "parameters": { + "TDATA_NUM_BYTES": [ { "value": "4", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TREADY": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TLAST": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ], + "CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "TDATA": [ { "physical_name": "s_axis_data_tdata" } ], + "TLAST": [ { "physical_name": "s_axis_data_tlast" } ], + "TREADY": [ { "physical_name": "s_axis_data_tready" } ], + "TVALID": [ { "physical_name": "s_axis_data_tvalid" } ] + } + }, + "aclk_intf": { + "vlnv": "xilinx.com:signal:clock:1.0", + "abstraction_type": "xilinx.com:signal:clock_rtl:1.0", + "mode": "slave", + "parameters": { + "ASSOCIATED_BUSIF": [ { "value": "S_AXIS_CONFIG:M_AXIS_DATA:M_AXIS_STATUS:S_AXIS_DATA", "value_src": "constant", "usage": "all" } ], + "ASSOCIATED_RESET": [ { "value": "aresetn", "value_src": "constant", "usage": "all" } ], + "ASSOCIATED_CLKEN": [ { "value": "aclken", "value_src": "constant", "usage": "all" } ], + "FREQ_HZ": [ { "value": "100000000", "resolve_type": "user", "format": "long", "usage": "all" } ], + "FREQ_TOLERANCE_HZ": [ { "value": "0", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ], + "CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "ASSOCIATED_PORT": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "CLK": [ { "physical_name": "aclk" } ] + } + }, + "aresetn_intf": { + "vlnv": "xilinx.com:signal:reset:1.0", + "abstraction_type": "xilinx.com:signal:reset_rtl:1.0", + "mode": "slave", + "parameters": { + "POLARITY": [ { "value": "ACTIVE_LOW", "value_src": "constant", "usage": "all" } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + } + }, + "aclken_intf": { + "vlnv": "xilinx.com:signal:clockenable:1.0", + "abstraction_type": "xilinx.com:signal:clockenable_rtl:1.0", + "mode": "slave", + "parameters": { + "POLARITY": [ { "value": "ACTIVE_HIGH", "value_src": "constant", "usage": "all" } ] + } + }, + "M_AXIS_STATUS": { + "vlnv": "xilinx.com:interface:axis:1.0", + "abstraction_type": "xilinx.com:interface:axis_rtl:1.0", + "mode": "master", + "parameters": { + "TDATA_NUM_BYTES": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TLAST": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ], + "CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "TDATA": [ { "physical_name": "m_axis_status_tdata" } ], + "TREADY": [ { "physical_name": "m_axis_status_tready" } ], + "TVALID": [ { "physical_name": "m_axis_status_tvalid" } ] + } + }, + "M_AXIS_DATA": { + "vlnv": "xilinx.com:interface:axis:1.0", + "abstraction_type": "xilinx.com:interface:axis_rtl:1.0", + "mode": "master", + "parameters": { + "TDATA_NUM_BYTES": [ { "value": "4", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TUSER_WIDTH": [ { "value": "8", "value_src": "auto", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TREADY": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TLAST": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ], + "CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "TDATA": [ { "physical_name": "m_axis_data_tdata" } ], + "TLAST": [ { "physical_name": "m_axis_data_tlast" } ], + "TREADY": [ { "physical_name": "m_axis_data_tready" } ], + "TUSER": [ { "physical_name": "m_axis_data_tuser" } ], + "TVALID": [ { "physical_name": "m_axis_data_tvalid" } ] + } + }, + "S_AXIS_CONFIG": { + "vlnv": "xilinx.com:interface:axis:1.0", + "abstraction_type": "xilinx.com:interface:axis_rtl:1.0", + "mode": "slave", + "parameters": { + "TDATA_NUM_BYTES": [ { "value": "1", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TDEST_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TID_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "TUSER_WIDTH": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TREADY": [ { "value": "1", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TSTRB": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TKEEP": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "HAS_TLAST": [ { "value": "0", "value_src": "constant", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "FREQ_HZ": [ { "value": "100000000", "resolve_type": "generated", "format": "long", "is_ips_inferred": true, "is_static_object": false } ], + "PHASE": [ { "value": "0.0", "resolve_type": "generated", "format": "float", "is_ips_inferred": true, "is_static_object": false } ], + "CLK_DOMAIN": [ { "value": "", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "LAYERED_METADATA": [ { "value": "undef", "resolve_type": "generated", "is_ips_inferred": true, "is_static_object": false } ], + "INSERT_VIP": [ { "value": "0", "resolve_type": "user", "format": "long", "usage": "simulation.rtl", "is_ips_inferred": true, "is_static_object": false } ] + }, + "port_maps": { + "TDATA": [ { "physical_name": "s_axis_config_tdata" } ], + "TREADY": [ { "physical_name": "s_axis_config_tready" } ], + "TVALID": [ { "physical_name": "s_axis_config_tvalid" } ] + } + } + } + } + }, + "checksum": "6bf94ec5" +} \ No newline at end of file diff --git a/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v b/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v index 3ce273e..c0f3581 100644 --- a/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v +++ b/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v @@ -151,7 +151,14 @@ wire fft_dout_valid; wire fft_busy; wire fft_done; -fft_engine #( +// xfft_2048 (Xilinx LogiCORE FFT v9.1) via fft_engine_axi_bridge — preserves +// the legacy fft_engine port surface so this call site stays a 1-line swap. +// In synth + remote XSim: real Pipelined Streaming IP (~N + 150 cycles/pass, +// closes RX-NEW-3 PRI budget). In iverilog: bridge falls through to the +// in-house fft_engine batched fallback inside xfft_2048.v (~150K cycles/pass, +// for unit coverage only — receiver-integration timing is meaningful only in +// XSim with the real IP). +fft_engine_axi_bridge #( .N(FFT_SIZE), .LOG2N(ADDR_BITS), .DATA_W(16), diff --git a/9_Firmware/9_2_FPGA/run_regression.sh b/9_Firmware/9_2_FPGA/run_regression.sh index c2ecd03..da3bc86 100755 --- a/9_Firmware/9_2_FPGA/run_regression.sh +++ b/9_Firmware/9_2_FPGA/run_regression.sh @@ -69,6 +69,8 @@ PROD_RTL=( doppler_processor.v xfft_16.v fft_engine.v + xfft_2048.v + fft_engine_axi_bridge.v frequency_matched_filter.v usb_data_interface.v usb_data_interface_ft2232h.v @@ -102,6 +104,7 @@ RECEIVER_RTL=( chirp_memory_loader_param.v latency_buffer.v matched_filter_multi_segment.v matched_filter_processing_chain.v range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v + xfft_2048.v fft_engine_axi_bridge.v frequency_matched_filter.v rx_gain_control.v mti_canceller.v ) @@ -282,7 +285,7 @@ run_mf_cosim() { if [[ -n "$define" ]]; then cmd="$cmd $define" fi - cmd="$cmd -o $vvp tb/tb_mf_cosim.v matched_filter_processing_chain.v fft_engine.v frequency_matched_filter.v chirp_memory_loader_param.v" + cmd="$cmd -o $vvp tb/tb_mf_cosim.v matched_filter_processing_chain.v fft_engine.v xfft_2048.v fft_engine_axi_bridge.v frequency_matched_filter.v chirp_memory_loader_param.v" if ! eval "$cmd" 2>/tmp/iverilog_err_$$; then echo -e "${RED}COMPILE FAIL${NC}" @@ -640,7 +643,8 @@ run_test "FIR Lowpass" \ run_test --timeout=600 "Matched Filter Chain" \ tb/tb_mf_reg.vvp \ tb/tb_matched_filter_processing_chain.v matched_filter_processing_chain.v \ - fft_engine.v chirp_memory_loader_param.v frequency_matched_filter.v + fft_engine.v xfft_2048.v fft_engine_axi_bridge.v \ + chirp_memory_loader_param.v frequency_matched_filter.v # RX-B regression coverage: chain pipeline depth + full-chain # autocorrelation peak position. Both run the production fft_engine @@ -649,12 +653,13 @@ run_test --timeout=600 "Matched Filter Chain" \ run_test --timeout=120 "RX-B Chain Pipeline Latency (tb_rxb_latency_measure)" \ tb/tb_rxb_lat_reg.vvp \ tb/tb_rxb_latency_measure.v matched_filter_processing_chain.v \ - fft_engine.v frequency_matched_filter.v + fft_engine.v xfft_2048.v fft_engine_axi_bridge.v frequency_matched_filter.v run_test --timeout=600 "RX-B Full-Chain Autocorrelation (tb_rxb_fullchain_latency)" \ tb/tb_rxb_fc_reg.vvp \ tb/tb_rxb_fullchain_latency.v matched_filter_multi_segment.v \ - matched_filter_processing_chain.v fft_engine.v frequency_matched_filter.v \ + matched_filter_processing_chain.v fft_engine.v xfft_2048.v \ + fft_engine_axi_bridge.v frequency_matched_filter.v \ chirp_memory_loader_param.v echo "" diff --git a/9_Firmware/9_2_FPGA/scripts/50t/build_50t.tcl b/9_Firmware/9_2_FPGA/scripts/50t/build_50t.tcl index 51a0e5e..40c2ed3 100644 --- a/9_Firmware/9_2_FPGA/scripts/50t/build_50t.tcl +++ b/9_Firmware/9_2_FPGA/scripts/50t/build_50t.tcl @@ -43,7 +43,22 @@ foreach f [glob -directory $rtl_dir *.v] { } set_property top $top_module [current_fileset] -set_property verilog_define {FFT_XPM_BRAM} [current_fileset] +# FFT_USE_XILINX_IP routes xfft_2048.v's wrapper to the LogiCORE FFT v9.1 IP +# (xfft_2048_ip) instead of the in-house fft_engine fallback. The IP closes +# RX-NEW-3 (~6600-cycle 3-FFT chain budget vs 16700-cycle PRI). +set_property verilog_define {FFT_XPM_BRAM FFT_USE_XILINX_IP} [current_fileset] + +# ===== IP CATALOG ===== +# Read the pre-generated xfft_2048_ip XCI (produced by gen_xfft_2048_ip.tcl). +# generate_target + synth_ip prepare its OOC netlist before launch_runs. +set xci_path [file join $project_root "ip" "xfft_2048_ip" "xfft_2048_ip.xci"] +if {![file exists $xci_path]} { + puts "ERROR: $xci_path missing — run scripts/50t/gen_xfft_2048_ip.tcl first." + exit 1 +} +read_ip $xci_path +generate_target {synthesis simulation instantiation_template} [get_ips xfft_2048_ip] +synth_ip [get_ips xfft_2048_ip] # Constraints — 50T XDC + MMCM supplement add_files -fileset constrs_1 -norecurse [file join $project_root "constraints" "xc7a50t_ftg256.xdc"] diff --git a/9_Firmware/9_2_FPGA/scripts/50t/gen_xfft_2048_ip.tcl b/9_Firmware/9_2_FPGA/scripts/50t/gen_xfft_2048_ip.tcl new file mode 100644 index 0000000..d484459 --- /dev/null +++ b/9_Firmware/9_2_FPGA/scripts/50t/gen_xfft_2048_ip.tcl @@ -0,0 +1,75 @@ +################################################################################ +# gen_xfft_2048_ip.tcl — Generate Xilinx LogiCORE FFT (xfft_v9_1) for AERIS-10 +# +# Produces ip/xfft_2048/xfft_2048.xci configured for the matched-filter chain: +# - Transform Length: 2048 +# - Architecture: Pipelined Streaming I/O +# - Data Format: Fixed Point +# - Scaling: Block Floating Point (run-time auto-scale) +# - Rounding: Convergent (round-to-even) +# - Input Width: 16-bit per real/imag (matches DDC output, DATA_W in chain) +# - Phase Width: 16-bit +# - Output Ordering: Natural Order +# - Throttle Scheme: Non Real Time (allows downstream backpressure) +# - Memory: Block RAM for data, reorder, phase factors +# +# Usage (run on remote Vivado box): +# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA +# vivado -mode batch -source scripts/50t/gen_xfft_2048_ip.tcl +# +# Output: ip/xfft_2048_ip/xfft_2048_ip.xci (committed; build_50t.tcl reads this) +# Note: IP module is named xfft_2048_ip to avoid collision with the wrapper +# module xfft_2048 in xfft_2048.v. +################################################################################ + +set script_dir [file dirname [file normalize [info script]]] +set project_root [file normalize [file join $script_dir "../.."]] +set ip_dir [file join $project_root "ip"] +set fpga_part "xc7a50tftg256-2" + +file mkdir $ip_dir + +# Spin up a throwaway in-memory project just for IP generation. +create_project -in_memory -part $fpga_part +set_property ip_repo_paths $ip_dir [current_project] + +# Create the IP. Any prior version is overwritten via -force. +create_ip -name xfft -vendor xilinx.com -library ip \ + -version 9.1 -module_name xfft_2048_ip -dir $ip_dir -force + +set ip [get_ips xfft_2048_ip] + +set_property -dict [list \ + CONFIG.transform_length {2048} \ + CONFIG.implementation_options {pipelined_streaming_io} \ + CONFIG.channels {1} \ + CONFIG.data_format {fixed_point} \ + CONFIG.scaling_options {block_floating_point} \ + CONFIG.rounding_modes {convergent_rounding} \ + CONFIG.input_width {16} \ + CONFIG.phase_factor_width {16} \ + CONFIG.output_ordering {natural_order} \ + CONFIG.cyclic_prefix_insertion {false} \ + CONFIG.throttle_scheme {nonrealtime} \ + CONFIG.target_clock_frequency {100} \ + CONFIG.target_data_throughput {50} \ + CONFIG.complex_mult_type {use_mults_resources} \ + CONFIG.butterfly_type {use_xtremedsp_slices} \ + CONFIG.memory_options_data {block_ram} \ + CONFIG.memory_options_reorder {block_ram} \ + CONFIG.memory_options_phase_factors {block_ram} \ + CONFIG.memory_options_hybrid {false} \ +] $ip + +# Generate synthesis + simulation targets so XSim and Vivado synth both work. +generate_target {synthesis simulation instantiation_template} $ip +synth_ip $ip + +puts "================================================================" +puts " xfft_2048_ip IP generation complete" +puts " XCI: $ip_dir/xfft_2048_ip/xfft_2048_ip.xci" +puts " DCP: [get_property IP_OUTPUT_DIR $ip]/xfft_2048_ip.dcp" +puts "================================================================" + +close_project +exit 0 diff --git a/9_Firmware/9_2_FPGA/scripts/50t/run_xfft_xsim.sh b/9_Firmware/9_2_FPGA/scripts/50t/run_xfft_xsim.sh new file mode 100644 index 0000000..b943c86 --- /dev/null +++ b/9_Firmware/9_2_FPGA/scripts/50t/run_xfft_xsim.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# ============================================================================ +# run_xfft_xsim.sh — Compile + run xfft_2048 wrapper testbench in Vivado XSim +# +# Verifies the wrapper with the real LogiCORE FFT v9.1 (xfft_2048_ip). +# Cannot run in iverilog because the IP uses Xilinx primitives. +# +# Usage (on remote Vivado box): +# cd ~/PLFM_RADAR_work/PLFM_RADAR/9_Firmware/9_2_FPGA +# bash scripts/50t/run_xfft_xsim.sh +# +# Output: /tmp/xfft_xsim.log (look for "ALL TESTS PASSED") +# ============================================================================ +set -e + +PROJ_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +IP_NETLIST="$PROJ_ROOT/ip/xfft_2048_ip/xfft_2048_ip_sim_netlist.v" +WRAPPER="$PROJ_ROOT/xfft_2048.v" +TB="$PROJ_ROOT/tb/tb_xfft_2048_xsim.v" + +WORK_DIR="$PROJ_ROOT/build_xsim_xfft" +mkdir -p "$WORK_DIR" +cd "$WORK_DIR" + +echo "===== Compiling Verilog sources =====" +# Wrapper + testbench with the IP-on define +xvlog -d FFT_USE_XILINX_IP "$WRAPPER" "$TB" +# IP simulation netlist — references unisim primitives +xvlog "$IP_NETLIST" +# fft_engine etc. NOT needed because FFT_USE_XILINX_IP routes around it, +# but the wrapper still must compile cleanly under both branches; if xvlog +# complains about an unresolved fft_engine reference (it shouldn't because +# the `else` branch is hidden by the define), include it here: +# xvlog "$PROJ_ROOT/fft_engine.v" + +echo "===== Elaborating =====" +# `glbl` is a Vivado-supplied module that Xilinx primitives (FDRE etc.) +# reference for the global GSR/GTS signals. Elaborating it as a second top +# satisfies the unresolved-reference error xelab raises for the IP netlist. +xelab -L unisims_ver -L secureip --debug typical \ + tb_xfft_2048_xsim glbl -snapshot tb_xfft_2048_snap + +echo "===== Running simulation =====" +xsim tb_xfft_2048_snap --runall --log /tmp/xfft_xsim.log + +echo "===== Done. Tail of log: =====" +tail -40 /tmp/xfft_xsim.log diff --git a/9_Firmware/9_2_FPGA/tb/tb_xfft_2048_xsim.v b/9_Firmware/9_2_FPGA/tb/tb_xfft_2048_xsim.v new file mode 100644 index 0000000..e7f91e0 --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/tb_xfft_2048_xsim.v @@ -0,0 +1,283 @@ +`timescale 1ns / 1ps +// ============================================================================ +// tb_xfft_2048_xsim.v — XSim verification of xfft_2048 wrapper with real IP +// ============================================================================ +// Compiled with `+define+FFT_USE_XILINX_IP` so the wrapper instantiates the +// LogiCORE FFT v9.1 (xfft_2048_ip). Cannot run in iverilog because that path +// uses Xilinx primitives (DSP48E1, BRAM18). For iverilog, leave the define +// off and the wrapper falls back to the fft_engine batched implementation. +// +// Three minimal stimuli: +// 1. DC (re=10000, im=0) → peak bin = 0 with large magnitude; +// all other bins near zero. +// 2. Impulse (single sample (10000,0)) → output magnitude flat across all bins +// (DFT of a delta = constant). +// 3. Tone (cos+jsin at bin K=128) → peak bin = K with large magnitude; +// all other bins near zero. +// +// PASS criteria: +// - peak bin matches expected +// - peak magnitude > 8× mean of non-peak bins (analogous to receiver-chain +// SNR check that's been used elsewhere in this codebase) +// ============================================================================ + +module tb_xfft_2048_xsim; + + localparam CLK_PERIOD = 10.0; // 100 MHz + localparam N = 2048; + localparam LOG2N = 11; + + reg aclk = 0; + reg aresetn = 0; + + reg [7:0] cfg_tdata; + reg cfg_tvalid; + wire cfg_tready; + + reg [31:0] din_tdata; + reg din_tvalid; + reg din_tlast; + wire din_tready; + + wire [31:0] dout_tdata; + wire [7:0] dout_tuser; + wire dout_tvalid; + wire dout_tlast; + reg dout_tready; + + integer pass_count = 0; + integer fail_count = 0; + integer test_num = 0; + + integer k; + integer out_idx; + integer peak_bin; + integer peak_mag; + integer mean_others; + integer mag_sum_others; + integer this_mag; + integer cur_re, cur_im; + + // Capture the entire output frame + reg signed [15:0] out_re [0:N-1]; + reg signed [15:0] out_im [0:N-1]; + integer out_collected; + + always #(CLK_PERIOD/2) aclk = ~aclk; + + xfft_2048 dut ( + .aclk (aclk), + .aresetn (aresetn), + .s_axis_config_tdata (cfg_tdata), + .s_axis_config_tvalid (cfg_tvalid), + .s_axis_config_tready (cfg_tready), + .s_axis_data_tdata (din_tdata), + .s_axis_data_tvalid (din_tvalid), + .s_axis_data_tlast (din_tlast), + .s_axis_data_tready (din_tready), + .m_axis_data_tdata (dout_tdata), + .m_axis_data_tuser (dout_tuser), + .m_axis_data_tvalid (dout_tvalid), + .m_axis_data_tlast (dout_tlast), + .m_axis_data_tready (dout_tready) + ); + + // Continuously capture output frame + always @(posedge aclk) begin + if (aresetn && dout_tvalid && dout_tready && out_collected < N) begin + out_re[out_collected] <= $signed(dout_tdata[15:0]); + out_im[out_collected] <= $signed(dout_tdata[31:16]); + out_collected <= out_collected + 1; + end + end + + // ---------------------------------------------------------------- + // Send config (FWD = bit 0 = 1) + // ---------------------------------------------------------------- + task send_config; + input fwd; + begin + @(posedge aclk); + cfg_tdata <= {7'b0, fwd}; + cfg_tvalid <= 1'b1; + @(posedge aclk); + while (!cfg_tready) @(posedge aclk); + @(posedge aclk); + cfg_tvalid <= 1'b0; + end + endtask + + // ---------------------------------------------------------------- + // Stream N samples; src=0 DC, 1 impulse, 2 tone (bin K=128) + // ---------------------------------------------------------------- + task stream_frame; + input integer src; + integer i; + real arg; + integer re16, im16; + begin + out_collected = 0; + @(posedge aclk); + din_tvalid <= 1'b1; + for (i = 0; i < N; i = i + 1) begin + case (src) + 0: begin re16 = 10000; im16 = 0; end + 1: begin re16 = (i == 0) ? 10000 : 0; im16 = 0; end + 2: begin + arg = 6.2831853 * 128.0 * i / N; + re16 = $rtoi(10000.0 * $cos(arg)); + im16 = $rtoi(10000.0 * $sin(arg)); + end + default: begin re16 = 0; im16 = 0; end + endcase + din_tdata <= {im16[15:0], re16[15:0]}; + din_tlast <= (i == N-1); + @(posedge aclk); + while (!din_tready) @(posedge aclk); + end + din_tvalid <= 1'b0; + din_tlast <= 1'b0; + end + endtask + + // ---------------------------------------------------------------- + // Wait until the full output frame has been captured (out_collected == N) + // or a deadline elapses. + // ---------------------------------------------------------------- + task wait_frame; + input integer max_cycles; + integer t; + begin + t = 0; + while (out_collected < N && t < max_cycles) begin + @(posedge aclk); + t = t + 1; + end + if (out_collected < N) begin + $display("[FAIL] Timed out collecting frame: got %0d / %0d after %0d cycles", + out_collected, N, t); + fail_count = fail_count + 1; + end + end + endtask + + // ---------------------------------------------------------------- + // Locate peak |Re|+|Im| bin in captured frame + // ---------------------------------------------------------------- + task analyze_frame; + output integer pk_bin; + output integer pk_mag; + output integer mean_other; + integer i, mag, sum; + begin + pk_bin = 0; + pk_mag = 0; + sum = 0; + for (i = 0; i < N; i = i + 1) begin + mag = (out_re[i] < 0 ? -out_re[i] : out_re[i]) + + (out_im[i] < 0 ? -out_im[i] : out_im[i]); + if (mag > pk_mag) begin + pk_mag = mag; + pk_bin = i; + end + sum = sum + mag; + end + mean_other = (sum - pk_mag) / (N - 1); + end + endtask + + task check; + input cond; + input [511:0] label; + begin + test_num = test_num + 1; + if (cond) begin + $display("[PASS] T%0d: %0s", test_num, label); + pass_count = pass_count + 1; + end else begin + $display("[FAIL] T%0d: %0s", test_num, label); + fail_count = fail_count + 1; + end + end + endtask + + initial begin + $dumpfile("tb_xfft_2048_xsim.vcd"); + $dumpvars(0, tb_xfft_2048_xsim); + + cfg_tdata = 0; + cfg_tvalid = 0; + din_tdata = 0; + din_tvalid = 0; + din_tlast = 0; + dout_tready = 1; // Always accept output + out_collected = 0; + + repeat (10) @(posedge aclk); + aresetn = 1'b1; + repeat (10) @(posedge aclk); + + // ============================================================ + // T1: DC stimulus → expect peak at bin 0 + // ============================================================ + $display("\n--- DC stimulus ---"); + send_config(1'b1); + stream_frame(0); + wait_frame(20000); + analyze_frame(peak_bin, peak_mag, mean_others); + $display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h", + peak_bin, peak_mag, mean_others, dout_tuser); + check(peak_bin == 0, "DC -> peak at bin 0"); + check(peak_mag > 8 * mean_others + 1, "DC -> peak/mean > 8x"); + + // ============================================================ + // T2: Impulse → expect roughly flat magnitude + // ============================================================ + $display("\n--- Impulse stimulus ---"); + send_config(1'b1); + stream_frame(1); + wait_frame(20000); + analyze_frame(peak_bin, peak_mag, mean_others); + $display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h", + peak_bin, peak_mag, mean_others, dout_tuser); + // For an impulse at sample 0, |X[k]| is constant; peak/mean ratio + // close to 1. Allow up to 3x to account for bit-width quantization. + check(peak_mag < 3 * mean_others + 100, + "Impulse -> flat spectrum (peak < 3x mean)"); + + // ============================================================ + // T3: Complex tone at bin 128 → expect peak at bin 128 + // ============================================================ + $display("\n--- Tone (bin 128) stimulus ---"); + send_config(1'b1); + stream_frame(2); + wait_frame(20000); + analyze_frame(peak_bin, peak_mag, mean_others); + $display(" peak_bin=%0d peak_mag=%0d mean_others=%0d tuser=0x%h", + peak_bin, peak_mag, mean_others, dout_tuser); + check(peak_bin == 128, "Tone -> peak at bin 128"); + check(peak_mag > 8 * mean_others + 1, "Tone -> peak/mean > 8x"); + + $display(""); + $display("============================================"); + $display(" XFFT_2048 (Xilinx LogiCORE) XSim RESULTS"); + $display(" PASSED: %0d / %0d", pass_count, test_num); + $display(" FAILED: %0d / %0d", fail_count, test_num); + if (fail_count == 0) + $display(" ** ALL TESTS PASSED **"); + else + $display(" ** %0d TEST(S) FAILED **", fail_count); + $display("============================================"); + + #100; + $finish; + end + + // Global timeout — never let the sim run forever + initial begin + #2000000; // 2 ms + $display("[FAIL] Global timeout @ 2 ms"); + $finish; + end + +endmodule diff --git a/9_Firmware/9_2_FPGA/xfft_2048.v b/9_Firmware/9_2_FPGA/xfft_2048.v new file mode 100644 index 0000000..ad2eaea --- /dev/null +++ b/9_Firmware/9_2_FPGA/xfft_2048.v @@ -0,0 +1,282 @@ +`timescale 1ns / 1ps +// ============================================================================ +// xfft_2048.v — 2048-point FFT wrapper (Xilinx LogiCORE for synth/XSim, +// in-house fft_engine fallback for iverilog) +// ============================================================================ +// AXI-Stream port list mirrors Xilinx LogiCORE Fast Fourier Transform v9.1 +// (PG109). Two implementation branches selected by `FFT_USE_XILINX_IP`: +// +// `define FFT_USE_XILINX_IP → instantiates xfft_2048_ip (LogiCORE FFT v9.1) +// Pipelined Streaming I/O, BFP scaling, 16-bit. +// Use for: Vivado synth, remote XSim sim. +// +// `undef FFT_USE_XILINX_IP → instantiates fft_engine batched one-shot +// (collect N → compute → drain N). +// Use for: iverilog local sim only. +// +// Throughput on production silicon (Xilinx IP path): ~N + ~150 cycles per +// transform with full overlap → ~6600 cycles for 3 sequential transforms in +// the matched-filter chain, vs the 16700-cycle PRI budget. Closes RX-NEW-3. +// +// Data format: {Q[15:0], I[15:0]} packed 32-bit on s_axis/m_axis_data_tdata. +// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT (matches PG109 convention). +// +// Block-FP scaling (Xilinx path only): per-frame BLK_EXP returned via +// m_axis_data_tuser[7:0] so chain-level normalization can rescale before +// magnitude compute. Sim path always returns tuser = 0 (no BFP). +// ============================================================================ + +module xfft_2048 ( + input wire aclk, + input wire aresetn, + + // Configuration channel (AXI-Stream slave). 8-bit tdata; only bit 0 + // (FWD/INV) is decoded by the IP in BFP mode (no scale schedule). + input wire [7:0] s_axis_config_tdata, + input wire s_axis_config_tvalid, + output wire s_axis_config_tready, + + // Data input channel (AXI-Stream slave) + input wire [31:0] s_axis_data_tdata, + input wire s_axis_data_tvalid, + input wire s_axis_data_tlast, + output wire s_axis_data_tready, + + // Data output channel (AXI-Stream master) + output wire [31:0] m_axis_data_tdata, + output wire [7:0] m_axis_data_tuser, // BLK_EXP[7:0] (Xilinx path); 0 (sim) + output wire m_axis_data_tvalid, + output wire m_axis_data_tlast, + input wire m_axis_data_tready +); + +`ifdef FFT_USE_XILINX_IP +// ============================================================================ +// XILINX LOGICORE FFT v9.1 — production / XSim path +// ============================================================================ +// Side-channels (status/event) are tied off here; if downstream needs them +// (e.g. for pipeline-stall debug), surface them through this wrapper. + +wire [7:0] xfft_status_tdata; +wire xfft_status_tvalid; + +xfft_2048_ip u_xfft ( + .aclk (aclk), + .s_axis_config_tdata (s_axis_config_tdata), + .s_axis_config_tvalid (s_axis_config_tvalid), + .s_axis_config_tready (s_axis_config_tready), + .s_axis_data_tdata (s_axis_data_tdata), + .s_axis_data_tvalid (s_axis_data_tvalid), + .s_axis_data_tready (s_axis_data_tready), + .s_axis_data_tlast (s_axis_data_tlast), + .m_axis_data_tdata (m_axis_data_tdata), + .m_axis_data_tuser (m_axis_data_tuser), + .m_axis_data_tvalid (m_axis_data_tvalid), + .m_axis_data_tready (m_axis_data_tready), + .m_axis_data_tlast (m_axis_data_tlast), + .m_axis_status_tdata (xfft_status_tdata), + .m_axis_status_tvalid (xfft_status_tvalid), + .m_axis_status_tready (1'b1), + .event_frame_started (), + .event_tlast_unexpected (), + .event_tlast_missing (), + .event_status_channel_halt (), + .event_data_in_channel_halt (), + .event_data_out_channel_halt () +); + +`else +// ============================================================================ +// FALLBACK — fft_engine batched one-shot (iverilog path only) +// ============================================================================ +// Collect N samples → kick fft_engine → drain N samples. Throughput is +// ~N (collect) + ~160 K (compute) + ~N (drain). NOT representative of the +// real LogiCORE — used only for unit-level iverilog regression coverage. +// ============================================================================ + +localparam N = 2048; +localparam LOG2N = 11; +localparam CNT_W = LOG2N + 1; + +localparam [2:0] S_IDLE = 3'd0, + S_FEED = 3'd1, + S_RUN = 3'd2, + S_OUTPUT = 3'd3; + +reg [2:0] state; +reg inverse_reg; + +(* ram_style = "block" *) reg signed [15:0] in_buf_re [0:N-1]; +(* ram_style = "block" *) reg signed [15:0] in_buf_im [0:N-1]; +(* ram_style = "block" *) reg signed [15:0] out_buf_re [0:N-1]; +(* ram_style = "block" *) reg signed [15:0] out_buf_im [0:N-1]; + +reg [CNT_W-1:0] in_count; +reg [CNT_W-1:0] feed_count; +reg [CNT_W-1:0] out_total; +reg [CNT_W-1:0] out_count; + +reg fft_start; +reg fft_inverse; +reg signed [15:0] fft_din_re, fft_din_im; +reg fft_din_valid; +wire signed [15:0] fft_dout_re, fft_dout_im; +wire fft_dout_valid; +wire fft_busy; +wire fft_done; + +reg in_buf_we; +reg [LOG2N-1:0] in_buf_waddr; +reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im; +reg out_buf_we; +reg [LOG2N-1:0] out_buf_waddr; +reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im; + +reg signed [15:0] out_rd_re, out_rd_im; +reg out_rd_valid; + +fft_engine #( + .N(N), .LOG2N(LOG2N), .DATA_W(16), .INTERNAL_W(32), + .TWIDDLE_W(16), .TWIDDLE_FILE("fft_twiddle_2048.mem") +) fft_core ( + .clk(aclk), .reset_n(aresetn), + .start(fft_start), .inverse(fft_inverse), + .din_re(fft_din_re), .din_im(fft_din_im), .din_valid(fft_din_valid), + .dout_re(fft_dout_re), .dout_im(fft_dout_im), .dout_valid(fft_dout_valid), + .busy(fft_busy), .done(fft_done) +); + +assign s_axis_config_tready = (state == S_IDLE); +assign s_axis_data_tready = (state == S_FEED) && (in_count < N); +assign m_axis_data_tdata = {out_rd_im, out_rd_re}; +assign m_axis_data_tuser = 8'h00; // No BFP in fallback path +assign m_axis_data_tvalid = out_rd_valid; +assign m_axis_data_tlast = out_rd_valid && (out_count == N); + +always @(posedge aclk) begin + if (in_buf_we) begin + in_buf_re[in_buf_waddr] <= in_buf_wdata_re; + in_buf_im[in_buf_waddr] <= in_buf_wdata_im; + end + if (out_buf_we) begin + out_buf_re[out_buf_waddr] <= out_buf_wdata_re; + out_buf_im[out_buf_waddr] <= out_buf_wdata_im; + end +end + +always @(posedge aclk or negedge aresetn) begin + if (!aresetn) begin + state <= S_IDLE; + inverse_reg <= 1'b0; + in_count <= 0; + feed_count <= 0; + out_total <= 0; + out_count <= 0; + fft_start <= 1'b0; + fft_inverse <= 1'b0; + fft_din_re <= 0; + fft_din_im <= 0; + fft_din_valid <= 1'b0; + in_buf_we <= 1'b0; + in_buf_waddr <= 0; + in_buf_wdata_re <= 0; + in_buf_wdata_im <= 0; + out_buf_we <= 1'b0; + out_buf_waddr <= 0; + out_buf_wdata_re <= 0; + out_buf_wdata_im <= 0; + out_rd_re <= 0; + out_rd_im <= 0; + out_rd_valid <= 1'b0; + end else begin + fft_start <= 1'b0; + fft_din_valid <= 1'b0; + in_buf_we <= 1'b0; + out_buf_we <= 1'b0; + + case (state) + S_IDLE: begin + in_count <= 0; + feed_count <= 0; + out_total <= 0; + out_count <= 0; + out_rd_valid <= 1'b0; + if (s_axis_config_tvalid) begin + inverse_reg <= ~s_axis_config_tdata[0]; + state <= S_FEED; + end + end + + S_FEED: begin + if (in_count < N) begin + if (s_axis_data_tvalid) begin + in_buf_we <= 1'b1; + in_buf_waddr <= in_count[LOG2N-1:0]; + in_buf_wdata_re <= s_axis_data_tdata[15:0]; + in_buf_wdata_im <= s_axis_data_tdata[31:16]; + in_count <= in_count + 1; + end + end else begin + fft_start <= 1'b1; + fft_inverse <= inverse_reg; + feed_count <= 0; + out_total <= 0; + state <= S_RUN; + end + end + + S_RUN: begin + if (feed_count < N) begin + fft_din_re <= in_buf_re[feed_count[LOG2N-1:0]]; + fft_din_im <= in_buf_im[feed_count[LOG2N-1:0]]; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + if (fft_dout_valid && out_total < N) begin + out_buf_we <= 1'b1; + out_buf_waddr <= out_total[LOG2N-1:0]; + out_buf_wdata_re <= fft_dout_re; + out_buf_wdata_im <= fft_dout_im; + out_total <= out_total + 1; + end + if (fft_done && out_total >= N) begin + state <= S_OUTPUT; + out_count <= 0; + out_rd_valid <= 1'b0; + end + end + + S_OUTPUT: begin + if (m_axis_data_tready || !out_rd_valid) begin + if (out_count < N) begin + out_rd_re <= out_buf_re[out_count[LOG2N-1:0]]; + out_rd_im <= out_buf_im[out_count[LOG2N-1:0]]; + out_rd_valid <= 1'b1; + out_count <= out_count + 1; + end else begin + out_rd_valid <= 1'b0; + state <= S_IDLE; + end + end + end + + default: state <= S_IDLE; + endcase + end +end + +`ifdef SIMULATION +integer init_k; +initial begin + for (init_k = 0; init_k < N; init_k = init_k + 1) begin + in_buf_re[init_k] = 0; + in_buf_im[init_k] = 0; + out_buf_re[init_k] = 0; + out_buf_im[init_k] = 0; + end +end +`endif + +`endif // FFT_USE_XILINX_IP + +endmodule