diff --git a/9_Firmware/9_2_FPGA/tb/cosim/compare_independent.py b/9_Firmware/9_2_FPGA/tb/cosim/compare_independent.py new file mode 100644 index 0000000..89c9bf0 --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/cosim/compare_independent.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +T-6 drift cosim — twin (fpga_model.py, RTL-mirroring) vs reference +(fpga_reference.py, numpy-based). + +Designed to catch the bug class where a transcription error exists +identically in BOTH the bit-exact Python twin and the RTL (e.g. a +hand-copied LUT entry, a regenerated .mem with wrong arithmetic, a +window coefficient typo). The existing cosim suite cannot detect that +class because both sides of the comparison are computing the same +algorithm; this script gives an independent third leg. + +Strategy (from highest-value to lowest): + + 1. Bytewise LUT spot-checks — every entry of every LUT/ROM compared + to its analytical Q15 value. A single corrupted entry fails here. + * NCO_SINE_LUT (64 entries, sin(pi*k/128) Q15) + * Twiddle ROMs (.mem files) for N=16, N=2048 + * HAMMING_WINDOW (16 entries, sym Hamming N=16 Q15) + + 2. End-to-end peak-position invariants — feed canonical inputs + through both twin and reference; the peak bin/index must match. + + 3. Roundtrip / structural invariants — FFT->IFFT recovers input, + NCO output stays on the unit circle, etc. + +We intentionally avoid Q15-saturated magnitude comparisons; those are +where the twin's bit-exact Q15 saturation differs from numpy's float +output, and they don't represent a real drift. +""" + +from __future__ import annotations + +import math +import sys +from pathlib import Path + +import numpy as np + +# Make local imports work when invoked from anywhere +THIS_DIR = Path(__file__).resolve().parent +sys.path.insert(0, str(THIS_DIR)) + +import fpga_reference as ref # noqa: E402 +from fpga_model import ( # noqa: E402 + HAMMING_WINDOW, + NCO, + DopplerProcessor, + FFTEngine, + MatchedFilterChain, + NCO_SINE_LUT, + load_twiddle_rom, +) + + +# ============================================================================= +# Tolerances +# ============================================================================= + +TOL_NCO_LUT_LSB = 1 # NCO_SINE_LUT: tightest possible +TOL_TWIDDLE_LSB = 1 # twiddle ROMs: same — quarter-wave Q15 cosine +TOL_HAMMING_LSB = 4 # 4 LSB ≈ 1.2e-4 rounding budget on Q15 round +TOL_NCO_MAG_REL = 0.04 # quarter-wave LUT artifact at quadrant edges +TOL_FFT_ROUNDTRIP_LSB = 60 # 11 stages × Q15 noise on 2048-pt; empirical + + +# ============================================================================= +# Result tracker +# ============================================================================= + +class CheckResult: + def __init__(self): + self.pass_count = 0 + self.fail_count = 0 + + def check(self, ok: bool, label: str, detail: str = ""): + tag = "PASS" if ok else "FAIL" + msg = f" [{tag}] {label}" + if detail: + msg += f" — {detail}" + print(msg) + if ok: + self.pass_count += 1 + else: + self.fail_count += 1 + + def info(self, label: str, detail: str = ""): + print(f" [INFO] {label}" + (f" — {detail}" if detail else "")) + + def summary(self): + total = self.pass_count + self.fail_count + print(f"\n RESULT: {self.pass_count}/{total} drift checks passed") + return self.fail_count == 0 + + +# ============================================================================= +# Section 1 — bytewise LUT spot-checks +# ============================================================================= + +def check_nco_lut(result: CheckResult): + print("\n--- NCO_SINE_LUT bytewise check ---") + max_dev = 0 + bad = [] + for k in range(64): + ideal = round(32767.0 * math.sin(math.pi * k / 128.0)) + dev = abs(NCO_SINE_LUT[k] - ideal) + if dev > max_dev: + max_dev = dev + if dev > TOL_NCO_LUT_LSB: + bad.append((k, NCO_SINE_LUT[k], ideal, dev)) + result.check( + max_dev <= TOL_NCO_LUT_LSB, + f"NCO_SINE_LUT: all 64 entries match sin(pi*k/128) Q15 (tol {TOL_NCO_LUT_LSB} LSB)", + f"max |LUT - ideal| = {max_dev} LSB" + ( + f"; {len(bad)} entries off, e.g. k={bad[0][0]}: LUT={bad[0][1]}, " + f"ideal={bad[0][2]}" if bad else "" + ) + ) + + +def check_twiddle_rom(result: CheckResult, n: int, mem_filename: str): + print(f"\n--- Twiddle ROM ({mem_filename}, N={n}) bytewise check ---") + rom_path = Path(__file__).resolve().parents[2] / mem_filename + if not rom_path.exists(): + result.check(False, f"Twiddle ROM file present at {rom_path}", "missing") + return + cos_rom = load_twiddle_rom(str(rom_path), n=n) + expected_entries = n // 4 + if len(cos_rom) != expected_entries: + result.check( + False, + f"Twiddle ROM length = N/4 = {expected_entries}", + f"got {len(cos_rom)}" + ) + return + + max_dev = 0 + bad = [] + for k in range(expected_entries): + ideal = round(32767.0 * math.cos(2.0 * math.pi * k / n)) + # Q15 representation tops at 32767 — ideal cos(0)=32768 rounds down. + if ideal == 32768: + ideal = 32767 + dev = abs(cos_rom[k] - ideal) + if dev > max_dev: + max_dev = dev + if dev > TOL_TWIDDLE_LSB: + bad.append((k, cos_rom[k], ideal, dev)) + result.check( + max_dev <= TOL_TWIDDLE_LSB, + f"{mem_filename}: all {expected_entries} entries match cos(2pi*k/{n}) Q15 (tol {TOL_TWIDDLE_LSB} LSB)", + f"max |ROM - ideal| = {max_dev} LSB" + ( + f"; {len(bad)} bad, e.g. k={bad[0][0]}: ROM={bad[0][1]}, ideal={bad[0][2]}" + if bad else "" + ) + ) + + +def check_hamming_lut(result: CheckResult): + print("\n--- HAMMING_WINDOW bytewise check ---") + win_lut = np.array(HAMMING_WINDOW, dtype=np.int64) + win_ref = np.round(ref.hamming_16_ideal()).astype(np.int64) + diff = np.abs(win_lut - win_ref) + max_dev = int(diff.max()) + worst_idx = int(np.argmax(diff)) + bad = [(int(i), int(win_lut[i]), int(win_ref[i]), int(diff[i])) + for i in range(16) if diff[i] > TOL_HAMMING_LSB] + result.check( + max_dev <= TOL_HAMMING_LSB, + f"HAMMING_WINDOW: all 16 entries match 0.54-0.46*cos(2pi*n/15) Q15 (tol {TOL_HAMMING_LSB} LSB)", + f"max |LUT - ideal| = {max_dev} LSB at n={worst_idx} " + f"(LUT={int(win_lut[worst_idx])}, ideal={int(win_ref[worst_idx])})" + ) + if bad: + result.info( + "HAMMING_WINDOW drift detected — both fpga_model.py and " + "doppler_processor.v contain values that diverge from the " + "documented formula. Resolution: either update the comments to " + "reflect the actual values, or regen the LUTs to match the " + "formula and re-bless the affected goldens." + ) + + +# ============================================================================= +# Section 2 — end-to-end peak / structural invariants +# ============================================================================= + +def check_nco_invariants(result: CheckResult): + """NCO output should stay on unit circle and have the right frequency.""" + print("\n--- NCO output invariants (unit-circle, frequency) ---") + ftw = 0x4CCCCCCD # 120 MHz at 400 MSPS = 0.30 cycles/sample + n_capture = 1024 + + nco = NCO() + cos_stream: list[int] = [] + sin_stream: list[int] = [] + for _ in range(n_capture + 64): + s, c, ready = nco.step(ftw) + if ready: + cos_stream.append(c) + sin_stream.append(s) + if len(cos_stream) >= n_capture: + break + cos_arr = np.array(cos_stream, dtype=np.float64) + sin_arr = np.array(sin_stream, dtype=np.float64) + + # Unit-circle invariant: cos² + sin² ≈ Q15_max². The quarter-wave LUT + # uses sin(k*pi/128) for k=0..63 — at the right edge of each quadrant + # it falls back to LUT[0]=0 instead of cos(63*pi/128)≈sin(pi/128), so + # the magnitude can dip by up to ~3 % of full scale. Tolerance 4 % covers + # this expected architectural artifact. + mag_ratio = (cos_arr ** 2 + sin_arr ** 2) / (32767.0 ** 2) + max_mag_dev = float(np.max(np.abs(mag_ratio - 1.0))) + result.check( + max_mag_dev <= TOL_NCO_MAG_REL, + "NCO output on unit circle (cos²+sin² ≈ 1)", + f"max |mag²-1| = {max_mag_dev * 100:.2f} % (tol {TOL_NCO_MAG_REL * 100:.0f} %)" + ) + + # Frequency invariant: dominant FFT bin of cos+j*sin should equal + # round(ftw / 2^32 * N). + z = cos_arr + 1j * sin_arr + Z = np.fft.fft(z) + peak_bin = int(np.argmax(np.abs(Z))) + expected_bin = int(round(ftw / (1 << 32) * n_capture)) + result.check( + abs(peak_bin - expected_bin) <= 1, + f"NCO dominant frequency at FTW = {ftw:08X} (expected bin {expected_bin})", + f"got bin {peak_bin}" + ) + + +def check_fft_invariants(result: CheckResult): + """FFT structural sanity: impulse, roundtrip, peak position.""" + print("\n--- FFT-2048 invariants (peak position, roundtrip, impulse) ---") + n = 2048 + fft = FFTEngine(n=n) + + # Impulse → flat spectrum at amplitude (no saturation; amp < 32767/N is overkill). + in_re = [1000] + [0] * (n - 1) + in_im = [0] * n + twin_re, twin_im = fft.compute(in_re, in_im, inverse=False) + flat_max = max(max(twin_re) - 1000, 1000 - min(twin_re), + max(twin_im), -min(twin_im)) + result.check( + flat_max <= 5, + "FFT-2048(impulse): all bins ≈ amplitude (1000)", + f"max |bin - 1000| = {flat_max}" + ) + + # Single COMPLEX tone (cos + j*sin) → single peak at bin_k (no conjugate + # at N-bin_k as you'd get with a real cosine). Amp small enough that + # peak = amp*N stays below Q15 saturation (32767). + bin_k = 137 + amp = 15 + in_re = [int(round(amp * math.cos(2 * math.pi * bin_k * i / n))) for i in range(n)] + in_im = [int(round(amp * math.sin(2 * math.pi * bin_k * i / n))) for i in range(n)] + twin_re, twin_im = fft.compute(in_re, in_im, inverse=False) + ref_re, ref_im = ref.fft_reference(in_re, in_im, n=n) + twin_mag2 = np.array(twin_re) ** 2 + np.array(twin_im) ** 2 + ref_mag2 = np.asarray(ref_re) ** 2 + np.asarray(ref_im) ** 2 + twin_peak = int(np.argmax(twin_mag2)) + ref_peak = int(np.argmax(ref_mag2)) + result.check( + twin_peak == ref_peak == bin_k, + f"FFT-2048(complex tone): peak at bin {bin_k}", + f"twin={twin_peak}, ref={ref_peak}" + ) + + # Roundtrip — small amplitude (peak = amp*N/2 ≤ 32767 → amp ≤ 32) so the + # forward FFT does not saturate, then IFFT should recover input within + # 11×Q15 butterfly noise. + rt_amp = 30 + in_re = [int(rt_amp * math.sin(2 * math.pi * 73 * i / n)) for i in range(n)] + in_im = [0] * n + fwd_re, fwd_im = fft.compute(in_re, in_im, inverse=False) + rt_re, _ = fft.compute(fwd_re, fwd_im, inverse=True) + rt_max_err = max(abs(rt_re[i] - in_re[i]) for i in range(n)) + result.check( + rt_max_err <= TOL_FFT_ROUNDTRIP_LSB, + f"FFT-2048(roundtrip, amp={rt_amp}): FFT->IFFT recovers input within {TOL_FFT_ROUNDTRIP_LSB} LSB", + f"max |rt - in| = {rt_max_err}" + ) + + +def check_mf_invariants(result: CheckResult): + """MF: peak must land at the injected delay; both twin and ref must agree.""" + print("\n--- Matched filter invariants (peak position) ---") + n = 2048 + mf = MatchedFilterChain(fft_size=n) + + delay = 100 + bin_k = 17 + amp = 200 + sig_re = [0] * n + sig_im = [0] * n + ref_re_in = [0] * n + ref_im_in = [0] * n + pulse_len = 256 + for i in range(pulse_len): + ref_re_in[i] = int(round(amp * math.cos(2 * math.pi * bin_k * i / pulse_len))) + ref_im_in[i] = int(round(amp * math.sin(2 * math.pi * bin_k * i / pulse_len))) + sig_re[i + delay] = ref_re_in[i] + sig_im[i + delay] = ref_im_in[i] + + twin_re, twin_im = mf.process(sig_re, sig_im, ref_re_in, ref_im_in) + ref_real, ref_imag = ref.matched_filter_reference( + sig_re, sig_im, ref_re_in, ref_im_in, fft_size=n + ) + twin_mag = np.sqrt(np.array(twin_re) ** 2 + np.array(twin_im) ** 2) + ref_mag = np.sqrt(np.asarray(ref_real) ** 2 + np.asarray(ref_imag) ** 2) + twin_peak = int(np.argmax(twin_mag)) + ref_peak = int(np.argmax(ref_mag)) + result.check( + twin_peak == ref_peak == delay, + f"MF: peak at injected delay (bin {delay})", + f"twin={twin_peak}, ref={ref_peak}" + ) + + # Sidelobe behaviour: peak should be N×stronger than median. + twin_peak_val = float(twin_mag[delay]) + twin_median = float(np.median(twin_mag)) + pk_ratio = twin_peak_val / max(twin_median, 1.0) + result.check( + pk_ratio >= 5.0, + "MF peak-to-median ratio ≥ 5", + f"got ratio {pk_ratio:.2f}" + ) + + +def check_doppler_invariants(result: CheckResult): + """Doppler: peak per sub-frame must be at the injected Doppler bin.""" + print("\n--- Doppler invariants (peak per sub-frame) ---") + chirps_per_frame = 48 + range_bins = 16 # keep small; algorithm identical at any size + num_subframes = 3 + chirps_per_subframe = 16 + + inject = [(0, 5), (1, 11), (2, 3)] + target_rbin = 10 + amp = 1500 # avoid Q15 saturation: peak = amp * N/2 = 12000 < 32767 + chirp_i = np.zeros((chirps_per_frame, range_bins), dtype=np.int64) + chirp_q = np.zeros((chirps_per_frame, range_bins), dtype=np.int64) + for sf, dop_bin in inject: + for c in range(chirps_per_subframe): + chirp_idx = sf * chirps_per_subframe + c + phase = 2 * math.pi * dop_bin * c / chirps_per_subframe + chirp_i[chirp_idx, target_rbin] = int(round(amp * math.cos(phase))) + chirp_q[chirp_idx, target_rbin] = int(round(amp * math.sin(phase))) + + dop = DopplerProcessor(num_subframes=num_subframes, + chirps_per_frame=chirps_per_frame) + dop.RANGE_BINS = range_bins + twin_dop_i, twin_dop_q = dop.process_frame( + chirp_i.tolist(), chirp_q.tolist() + ) + ref_re, ref_im = ref.doppler_reference( + chirp_i, chirp_q, + num_subframes=num_subframes, + chirps_per_subframe=chirps_per_subframe, + range_bins=range_bins, + ) + + twin_mag = np.sqrt(np.array(twin_dop_i) ** 2 + np.array(twin_dop_q) ** 2) + ref_mag = np.sqrt(ref_re ** 2 + ref_im ** 2) + expected_bins = [sf * chirps_per_subframe + b for sf, b in inject] + + sf_peaks_twin = [] + sf_peaks_ref = [] + for sf in range(num_subframes): + lo = sf * chirps_per_subframe + hi = lo + chirps_per_subframe + sf_peaks_twin.append(lo + int(np.argmax(twin_mag[target_rbin, lo:hi]))) + sf_peaks_ref.append(lo + int(np.argmax(ref_mag[target_rbin, lo:hi]))) + + result.check( + sf_peaks_twin == expected_bins, + f"Doppler twin: peaks per sub-frame at {expected_bins}", + f"got {sf_peaks_twin}" + ) + result.check( + sf_peaks_ref == expected_bins, + f"Doppler reference: peaks per sub-frame at {expected_bins}", + f"got {sf_peaks_ref}" + ) + + +# ============================================================================= +# Main +# ============================================================================= + +def main(): + print("============================================================") + print(" T-6 INDEPENDENT REFERENCE DRIFT COSIM") + print(" fpga_model.py (twin) vs fpga_reference.py (numpy ideal)") + print("============================================================") + result = CheckResult() + + # 1. LUT bytewise spot-checks (highest-value transcription detector) + check_nco_lut(result) + check_twiddle_rom(result, n=16, mem_filename="fft_twiddle_16.mem") + check_twiddle_rom(result, n=2048, mem_filename="fft_twiddle_2048.mem") + check_hamming_lut(result) + + # 2/3. End-to-end invariants + check_nco_invariants(result) + check_fft_invariants(result) + check_mf_invariants(result) + check_doppler_invariants(result) + + ok = result.summary() + return 0 if ok else 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/9_Firmware/9_2_FPGA/tb/cosim/fpga_reference.py b/9_Firmware/9_2_FPGA/tb/cosim/fpga_reference.py new file mode 100644 index 0000000..c913a75 --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/cosim/fpga_reference.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +r""" +Independent floating-point reference for AERIS-10 signal processing chain. + +Unlike fpga_model.py (which is a bit-exact PORT of the RTL — same NCO LUT, +same twiddle ROMs, same Q15 quantization), this module computes the +algorithm using ideal numpy/scipy primitives. It is the second leg of the +T-6 three-way triangulation: + + fpga_model.py (twin) --bit-exact-- RTL simulation + \ / + \ / + \-- within tolerance --> / + fpga_reference.py + (this file) + +Drift signal interpretation: + * twin == reference (within tol) and twin == RTL (bit-exact) + -> chain healthy + * twin == reference (within tol) but twin != RTL (bit-exact) + -> RTL diverged from spec (real RTL bug) + * twin != reference (outside tol) but twin == RTL (bit-exact) + -> twin and RTL share a transcription error + (e.g. wrong NCO_SINE_LUT entry, wrong twiddle ROM, wrong window + coefficients). This is the bug class T-6 was opened to catch. + +Coverage in this revision (highest transcription risk first): + * NCO — numpy.cos/sin vs the 64-entry quarter-wave NCO_SINE_LUT + * FFT — numpy.fft.fft/ifft vs Q15 twiddle ROM butterfly + * MF — numpy ifft(fft(sig) * conj(fft(ref))) vs RTL pipeline + * Doppler — numpy.fft + ideal Hamming window vs Q15 window LUT + RTL FFT + +Out of scope here (lower transcription risk, deferred): + * CIC / FIR — coefficient files are derived from independent generators + * Mixer / RangeBinDecimator — trivially correct in both +""" + +from __future__ import annotations + +import numpy as np + + +# ============================================================================= +# NCO reference — ideal complex sinusoid +# ============================================================================= + +def nco_reference(num_samples: int, ftw: int, fs: float = 400e6, + phase_offset_deg: float = 0.0): + """Ideal floating-point NCO output, scaled to match Q15 fpga_model. + + The fpga_model.NCO uses a 32-bit phase accumulator stepped by ftw, with + a 64-entry quarter-wave cos LUT scaled to ±0x7FFF. This reference uses + numpy.cos/sin directly with no LUT. + + Args: + num_samples: number of samples to produce + ftw: 32-bit unsigned phase increment per sample + fs: sample rate (Hz) + phase_offset_deg: phase offset (degrees), default 0 + + Returns: + (cos_q15, sin_q15) — float arrays of length num_samples, in Q15 scale + """ + ftw = int(ftw) & 0xFFFFFFFF + n = np.arange(num_samples, dtype=np.float64) + # Phase accumulator semantics: phase[k] = (k * ftw) mod 2^32 + # Convert to radians: 2*pi * phase / 2^32 + phase_rad = 2.0 * np.pi * (n * ftw) / (1 << 32) + np.deg2rad(phase_offset_deg) + cos_q15 = np.cos(phase_rad) * 32767.0 + sin_q15 = np.sin(phase_rad) * 32767.0 + return cos_q15, sin_q15 + + +# ============================================================================= +# FFT reference — numpy.fft.fft / ifft +# ============================================================================= + +def fft_reference(in_re, in_im, n: int = 2048, inverse: bool = False): + """Ideal floating-point FFT. + + Scaling matches the RTL convention: + forward: y[k] = sum_n x[n] * exp(-j*2*pi*k*n/N) (no 1/N) + inverse: y[n] = (1/N) * sum_k X[k] * exp(+j*2*pi*k*n/N) (1/N applied) + + The RTL fft_engine implements >>>LOG2N before output saturation when + inverse=1, which is the same 1/N. numpy.fft.ifft already includes the + 1/N factor, so we use it directly with no rescaling. + + Args: + in_re/in_im: length-N int or float sequences + n: FFT size (16, 1024, 2048) + inverse: True for IFFT + + Returns: + (out_re, out_im) — float arrays, no Q15 saturation + """ + re = np.asarray(in_re, dtype=np.float64) + im = np.asarray(in_im, dtype=np.float64) + if len(re) != n or len(im) != n: + raise ValueError(f"input length {len(re)} != N={n}") + x = re + 1j * im + y = np.fft.ifft(x) if inverse else np.fft.fft(x) + return y.real.copy(), y.imag.copy() + + +# ============================================================================= +# Matched filter reference — IFFT(FFT(sig) * conj(FFT(ref))) +# ============================================================================= + +def matched_filter_reference(sig_re, sig_im, ref_re, ref_im, fft_size: int = 2048): + """Ideal range profile via FFT-domain matched filter. + + range_profile = IFFT( FFT(sig) * conj(FFT(ref)) ) + + The RTL pipeline does the same operation but with Q15 twiddles, 32-bit + accumulators, and Q15 round-and-saturate after the conjugate multiply. + This reference is unquantized. + + Args: + sig_re/im, ref_re/im: length-fft_size sequences (int or float) + fft_size: matched filter FFT size (default 2048) + + Returns: + (range_re, range_im) — float arrays, no Q15 saturation + """ + sig_re = np.asarray(sig_re, dtype=np.float64) + sig_im = np.asarray(sig_im, dtype=np.float64) + ref_re = np.asarray(ref_re, dtype=np.float64) + ref_im = np.asarray(ref_im, dtype=np.float64) + s = sig_re + 1j * sig_im + r = ref_re + 1j * ref_im + S = np.fft.fft(s, n=fft_size) + R = np.fft.fft(r, n=fft_size) + P = S * np.conj(R) + p = np.fft.ifft(P) + return p.real.copy(), p.imag.copy() + + +# ============================================================================= +# Doppler reference — Hamming-windowed per-sub-frame 16-pt FFT +# ============================================================================= + +def hamming_16_ideal(): + """Ideal Hamming(16) coefficients, scaled to Q15. + + fpga_model.HAMMING_WINDOW is a hard-coded Q15 LUT. If a coefficient was + transcribed wrong, this catches it. + + Definition (matches scipy.signal.windows.hamming(16, sym=False) is + DIFFERENT — that's a periodic Hamming. The RTL/fpga_model use the + classic symmetric form: w[n] = 0.54 - 0.46*cos(2*pi*n/(N-1)). + """ + n = np.arange(16, dtype=np.float64) + w = 0.54 - 0.46 * np.cos(2.0 * np.pi * n / 15.0) + return w * 32767.0 + + +def doppler_reference(chirp_data_i, chirp_data_q, + num_subframes: int = 3, + chirps_per_subframe: int = 16, + range_bins: int = 512): + """Ideal Doppler map using ideal Hamming + numpy.fft, no Q15 quantization. + + Args: + chirp_data_i/q: 2D arrays [chirps_per_frame][range_bins], int or float + num_subframes: number of independent 16-pt FFTs per range bin (default 3) + chirps_per_subframe: 16 + range_bins: 512 + + Returns: + (doppler_map_re, doppler_map_im) — 2D float arrays + shape [range_bins][num_subframes * chirps_per_subframe] + Sub-frame s occupies output bins [s*16 .. s*16+15]. + """ + chirp_data_i = np.asarray(chirp_data_i, dtype=np.float64) + chirp_data_q = np.asarray(chirp_data_q, dtype=np.float64) + chirps_per_frame = num_subframes * chirps_per_subframe + if chirp_data_i.shape != (chirps_per_frame, range_bins): + raise ValueError( + f"chirp_data_i shape {chirp_data_i.shape} != " + f"({chirps_per_frame}, {range_bins})" + ) + + win = hamming_16_ideal() # Q15-scaled + total_bins = num_subframes * chirps_per_subframe + out_re = np.zeros((range_bins, total_bins), dtype=np.float64) + out_im = np.zeros((range_bins, total_bins), dtype=np.float64) + + for rbin in range(range_bins): + for sf in range(num_subframes): + start = sf * chirps_per_subframe + stop = start + chirps_per_subframe + offset = sf * chirps_per_subframe + + # Apply Hamming and divide by 32768 to undo the Q15 scaling so + # the comparison is to ideal floating-point amplitudes (the RTL + # rounds (data*win + 1<<14) >> 15 which is an approximate /32768). + x_re = chirp_data_i[start:stop, rbin] * win / 32768.0 + x_im = chirp_data_q[start:stop, rbin] * win / 32768.0 + x = x_re + 1j * x_im + + X = np.fft.fft(x) + out_re[rbin, offset:offset + chirps_per_subframe] = X.real + out_im[rbin, offset:offset + chirps_per_subframe] = X.imag + + return out_re, out_im + + +# ============================================================================= +# Self-test (sanity checks against numpy's own analytical answers) +# ============================================================================= + +def _self_test(): + """Quick sanity checks.""" + # NCO: at FTW = 0x4CCCCCCD, frequency = 0.3 * fs = 120 MHz at 400 MSPS. + cos_q15, sin_q15 = nco_reference(8, 0x4CCCCCCD, fs=400e6) + # First sample should be cos(0)=1, sin(0)=0 in Q15 + assert abs(cos_q15[0] - 32767.0) < 1.0, f"NCO[0].cos = {cos_q15[0]}" + assert abs(sin_q15[0]) < 1.0, f"NCO[0].sin = {sin_q15[0]}" + + # FFT: impulse -> all bins = amplitude + in_re = [1000] + [0] * 15 + in_im = [0] * 16 + out_re, out_im = fft_reference(in_re, in_im, n=16) + for k in range(16): + assert abs(out_re[k] - 1000.0) < 1e-9, f"FFT impulse bin {k}: {out_re[k]}" + + # Doppler: zero input -> zero output + z_i = np.zeros((48, 512)) + z_q = np.zeros((48, 512)) + d_re, d_im = doppler_reference(z_i, z_q, num_subframes=3, + chirps_per_subframe=16, range_bins=512) + assert np.max(np.abs(d_re)) < 1e-9 + assert np.max(np.abs(d_im)) < 1e-9 + + # Hamming: peak at center, edges match formula + w = hamming_16_ideal() + assert abs(w[0] - 0.08 * 32767.0) < 0.5 + assert abs(w[7] - (0.54 - 0.46 * np.cos(2 * np.pi * 7 / 15)) * 32767.0) < 0.5 + + print("fpga_reference self-test: OK") + + +if __name__ == '__main__': + _self_test()