From d3972369a0dae4b9b61fa709683bc54d49c7d2ef Mon Sep 17 00:00:00 2001 From: Ernest Zaslavsky Date: Thu, 26 Mar 2026 16:47:01 +0200 Subject: [PATCH] scripts: add compare_build_systems.py to compare ninja build files Add a script that compares configure.py and CMake build systems by parsing their generated build.ninja files. The script checks: - Per-file compilation flags (defines, warnings, optimization) - Link target sets (detect missing/extra targets) - Per-target linker flags and libraries configure.py is treated as the baseline. CMake should match it. Both systems are always configured into a temporary directory so the user's build tree is never touched. Usage: scripts/compare_build_systems.py -m dev # single mode scripts/compare_build_systems.py # all modes scripts/compare_build_systems.py --ci # CI mode (strict) --- scripts/compare_build_systems.py | 1442 ++++++++++++++++++++++++++++++ 1 file changed, 1442 insertions(+) create mode 100755 scripts/compare_build_systems.py diff --git a/scripts/compare_build_systems.py b/scripts/compare_build_systems.py new file mode 100755 index 0000000000..062207ad58 --- /dev/null +++ b/scripts/compare_build_systems.py @@ -0,0 +1,1442 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (C) 2026-present ScyllaDB +# + +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 +# + +""" +Compare configure.py and CMake build systems by parsing their ninja build files. + +Checks three things: + 1. Per-file compilation flags — are the same source files compiled with + the same defines, warnings, optimization, and language flags? + 2. Link targets — do both systems produce the same set of + executables? + 3. Per-target linker settings — are link flags and libraries identical for + every common executable? + +configure.py is treated as the baseline. CMake should match it. + +Exit codes: + 0 All checked modes match + 1 Differences found + 2 Configuration failed + +Both build systems are always configured into a temporary directory — +the user's build tree is never touched. + +Examples: + # Compare dev mode + scripts/compare_build_systems.py -m dev + + # Compare all modes + scripts/compare_build_systems.py + + # CI mode: quiet, strict, all modes + scripts/compare_build_systems.py --ci + + # Verbose output showing every flag + scripts/compare_build_systems.py -m debug -v +""" + +import argparse +import concurrent.futures +import os +import re +import shlex +import shutil +import subprocess +import sys +import tempfile +from collections import defaultdict +from pathlib import Path + + +# ═══════════════════════════════════════════════════════════════════════════ +# Constants +# ═══════════════════════════════════════════════════════════════════════════ + +MODE_TO_CMAKE = { + "debug": "Debug", + "dev": "Dev", + "release": "RelWithDebInfo", + "sanitize": "Sanitize", + "coverage": "Coverage", +} +ALL_MODES = list(MODE_TO_CMAKE.keys()) + + +# Per-component Boost defines that CMake's imported targets add. +# configure.py uses the single BOOST_ALL_DYN_LINK instead. +_BOOST_PER_COMPONENT_DEFINES = re.compile( + r"-DBOOST_\w+_(DYN_LINK|NO_LIB)$") + +# Internal Scylla/Seastar/Abseil library targets that CMake creates as +# intermediate static/shared libraries. configure.py links .o files +# directly. These are structural differences, not bugs. +def _collect_internal_lib_names(*build_lists): + """Auto-detect internal library names from ninja build outputs. + + Any .a or .so file that is a build output (not a system library) + is an internal project library. Returns normalized library names. + This replaces a hardcoded list — new libraries added to either + build system are picked up automatically. + """ + names = set() + for builds in build_lists: + for b in builds: + for out in b["outputs"].split(): + if out.endswith(".a") or ".so" in out: + name = normalize_lib_name(out) + if name: + names.add(name) + return names + + +# Libraries that are known to appear on only one side due to how each +# build system resolves transitive dependencies. Value is the side +# where the library is expected to appear ("conf" or "cmake"). +# A library present on BOTH sides always matches and is not checked here. +# A library absent from both sides is irrelevant. +# Only asymmetric presence is checked against this table. +_KNOWN_LIB_ASYMMETRIES = { + # configure.py links these explicitly; CMake resolves them + # transitively through imported targets (Seastar, GnuTLS, etc.) + "stdc++fs": "conf", + "pthread": "conf", + "atomic": "conf", + "boost_date_time": "conf", + "ubsan": "conf", + # GnuTLS transitive deps — configure.py links explicitly + "tasn1": "conf", + "idn2": "conf", + "unistring": "conf", + "gmp": "conf", + "nettle": "conf", + "hogweed": "conf", + "p11-kit": "conf", + # Seastar transitive deps — configure.py links explicitly + "uring": "conf", + "hwloc": "conf", + "sctp": "conf", + "udev": "conf", + "protobuf": "conf", + "jsoncpp": "conf", + "fmt": "conf", + # CMake resolves these transitively through Boost imported targets + "boost_atomic": "cmake", + # CMake links ssl explicitly for encryption targets + "ssl": "cmake", + # Linked transitively via Seastar's rt::rt imported target + "rt": "cmake", + # Name differs between systems (configure.py: lua, CMake: lua-5.4) + "lua": "conf", + "lua-5.4": "cmake", +} + + +# ═══════════════════════════════════════════════════════════════════════════ +# Ninja file parsing +# ═══════════════════════════════════════════════════════════════════════════ + +def parse_ninja(filepath): + """Parse a ninja build file into (variables, rules, builds). + + Follows subninja/include directives. Returns: + variables: dict[str, str] — top-level variable assignments + rules: dict[str, dict] — rule name → {command, ...} + builds: list[dict] — build statements with outputs, + rule, inputs, implicit, vars + """ + variables = {} + builds = [] + rules = {} + + def _parse(path, into_vars, into_builds, into_rules): + base_dir = os.path.dirname(path) + try: + with open(path) as f: + lines = f.readlines() + except FileNotFoundError: + return + + i = 0 + while i < len(lines): + line = lines[i].rstrip("\n") + + if not line or line.startswith("#"): + i += 1 + continue + + # subninja / include + m = re.match(r"^(subninja|include)\s+(.+)", line) + if m: + inc_path = m.group(2).strip() + if not os.path.isabs(inc_path): + inc_path = os.path.join(base_dir, inc_path) + _parse(inc_path, into_vars, into_builds, into_rules) + i += 1 + continue + + # Rule definition + m = re.match(r"^rule\s+(\S+)", line) + if m: + rule_name = m.group(1) + rule_vars = {} + i += 1 + while i < len(lines) and lines[i].startswith(" "): + rline = lines[i].strip() + rm = re.match(r"(\S+)\s*=\s*(.*)", rline) + if rm: + rule_vars[rm.group(1)] = rm.group(2) + i += 1 + into_rules[rule_name] = rule_vars + continue + + # Top-level variable + m = re.match(r"^([a-zA-Z_][a-zA-Z0-9_.]*)\s*=\s*(.*)", line) + if m and not line.startswith(" "): + into_vars[m.group(1)] = m.group(2) + i += 1 + continue + + # Build statement + m = re.match(r"^build\s+(.+?):\s+(\S+)\s*(.*)", line) + if m: + outputs_str = m.group(1) + rule = m.group(2) + rest = m.group(3) + + i += 1 + build_vars = {} + while i < len(lines) and lines[i].startswith(" "): + bline = lines[i].strip() + bm = re.match(r"(\S+)\s*=\s*(.*)", bline) + if bm: + build_vars[bm.group(1)] = bm.group(2) + i += 1 + + parts = re.split(r"\s*\|\|\s*|\s*\|\s*", rest) + explicit = parts[0].strip() if parts else "" + implicit = parts[1].strip() if len(parts) > 1 else "" + + into_builds.append({ + "outputs": outputs_str.strip(), + "rule": rule, + "inputs": explicit, + "implicit": implicit, + "vars": build_vars, + }) + continue + + i += 1 + + _parse(str(filepath), variables, builds, rules) + return variables, rules, builds + + +def resolve_var(value, variables, depth=0): + """Recursively resolve $var and ${var} references.""" + if depth > 10 or "$" not in value: + return value + + def _repl(m): + name = m.group(1) or m.group(2) + return variables.get(name, "") + + result = re.sub(r"\$\{(\w+)\}|\$(\w+)", _repl, value) + if "$" in result and result != value: + return resolve_var(result, variables, depth + 1) + return result + + +# ═══════════════════════════════════════════════════════════════════════════ +# Flag extraction helpers +# ═══════════════════════════════════════════════════════════════════════════ + +def tokenize(flags_str): + """Split a flags string into tokens, joining multi-word flags.""" + tokens = [] + parts = flags_str.split() + i = 0 + while i < len(parts): + if (parts[i] in ("-Xclang", "-mllvm", "--param", "-Xlinker") + and i + 1 < len(parts)): + tokens.append(f"{parts[i]} {parts[i+1]}") + i += 2 + else: + tokens.append(parts[i]) + i += 1 + return tokens + + +def categorize_compile_flags(command_str): + """Extract and categorize compilation flags from a command string. + + Returns dict with keys: defines, warnings, f_flags, opt_flags, + arch_flags, std_flags. + """ + try: + tokens = shlex.split(command_str) + except ValueError: + tokens = command_str.split() + + flags = { + "defines": set(), + "warnings": set(), + "f_flags": set(), + "opt_flags": set(), + "arch_flags": set(), + "std_flags": set(), + } + + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if tok.startswith("-D"): + if _BOOST_PER_COMPONENT_DEFINES.match(tok): + continue + # Normalize version defines that contain git hashes + if tok.startswith("-DSCYLLA_RELEASE="): + tok = "-DSCYLLA_RELEASE=" + elif tok.startswith("-DSCYLLA_VERSION="): + tok = "-DSCYLLA_VERSION=" + flags["defines"].add(tok) + elif tok.startswith("-W"): + if tok == "-Winvalid-pch": + continue + # -Wno-backend-plugin is added by configure.py when a PGO + # profile is available. CMake handles PGO separately. + if tok == "-Wno-backend-plugin": + continue + flags["warnings"].add(tok) + elif tok.startswith("-f"): + if "-ffile-prefix-map=" in tok: + continue + # LTO and PGO flags are configuration-dependent options + # (--lto, --pgo, --use-profile for configure.py; + # Scylla_PROFDATA_FILE for CMake), not mode-inherent. + if (tok.startswith("-flto") + or tok == "-ffat-lto-objects" + or tok == "-fno-lto" + or tok.startswith("-fprofile-use=") + or tok.startswith("-fprofile-generate") + or tok == "-fpch-validate-input-files-content"): + continue + flags["f_flags"].add(tok) + elif tok.startswith("-O"): + flags["opt_flags"].add(tok) + elif tok.startswith("-march="): + flags["arch_flags"].add(tok) + elif tok.startswith("-std="): + flags["std_flags"].add(tok) + elif tok in ("-o", "-MT", "-MF", "-Xclang"): + skip_next = True + elif tok in ("-include-pch", "-include"): + skip_next = True + elif tok.startswith(("-I", "-iquote", "-isystem")): + if tok in ("-I", "-iquote", "-isystem"): + skip_next = True + continue + + return flags + + +def normalize_lib_name(token): + """Extract canonical library name from -l, .a, or .so tokens.""" + if token.startswith("-l"): + return token[2:] + basename = os.path.basename(token) + m = re.match(r"lib(.+?)\.(?:a|so(?:\.\S*)?)", basename) + return m.group(1) if m else None + + +def normalize_linker_flag(tok): + """Normalize a linker flag to a canonical comparable form.""" + if tok.startswith("-Wl,"): + parts = tok[4:].split(",") + result = set() + for part in parts: + if "--dynamic-linker" in part: + result.add("-Wl,--dynamic-linker=") + elif "-rpath" in part: + result.add("-Wl,-rpath=") + elif "--build-id" in part: + result.add(f"-Wl,{part}") + elif part in ("--push-state", "--pop-state", + "--whole-archive", "--no-whole-archive", + "-Bstatic", "-Bdynamic"): + continue + elif "--strip" in part: + result.add(f"-Wl,{part}") + elif part and not part.startswith("/"): + # Skip bare paths (rpath values, library search paths) + result.add(f"-Wl,{part}") + return result + if tok.startswith("-Xlinker "): + arg = tok.split(" ", 1)[1] + if "--dynamic-linker" in arg: + return {"-Wl,--dynamic-linker="} + if "--build-id" in arg: + return {f"-Wl,{arg}"} + if "-rpath" in arg: + return {"-Wl,-rpath="} + if "--dependency-file" in arg: + return set() + if arg in ("--push-state", "--pop-state", + "--whole-archive", "--no-whole-archive", + "-Bstatic", "-Bdynamic"): + return set() + return {f"-Wl,{arg}"} + if tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): + name = tok.split("=", 1)[1] + name = os.path.basename(name) + if name.startswith("ld."): + name = name[3:] + return {f"linker={name}"} + return {tok} + + +# ═══════════════════════════════════════════════════════════════════════════ +# Source file extraction from ninja builds +# ═══════════════════════════════════════════════════════════════════════════ + +def _is_scylla_source(rel_path): + """True if this is a Scylla-owned source file (not seastar/abseil).""" + return (not rel_path.startswith("seastar/") + and not rel_path.startswith("abseil/") + and not rel_path.startswith("build") + and not rel_path.startswith("..") + and not os.path.isabs(rel_path) + and rel_path != "tools/patchelf.cc" + and rel_path != "exported_templates.cc" + and (rel_path.endswith(".cc") or rel_path.endswith(".cpp"))) + + +def extract_configure_compile_entries(variables, rules, builds, + mode, source_dir): + """Extract per-source-file flags from configure.py build.ninja. + + Returns dict: relative_source_path → categorized flags dict. + """ + entries = {} + mode_prefix = f"build/{mode}/" + + # Find compile rules for this mode + compile_rules = {} + for name, rvars in rules.items(): + if (name.startswith(f"cxx.{mode}") + or name.startswith(f"cxx_with_pch.{mode}")): + compile_rules[name] = rvars + + if not compile_rules: + return entries + + for b in builds: + if b["rule"] not in compile_rules: + continue + + output = b["outputs"] + output = output.replace("$builddir/", "build/") + if not output.startswith(mode_prefix): + continue + + # Get source file from inputs + src_tokens = b["inputs"].strip().split() + if not src_tokens: + continue + src = src_tokens[0] + src = src.replace("$builddir/", "build/") + + # Make source path relative + if os.path.isabs(src): + try: + rel_src = os.path.relpath(src, source_dir) + except ValueError: + rel_src = src + else: + rel_src = src + + if not _is_scylla_source(rel_src): + continue + + # Build effective command by resolving variables. + # Ninja scoping: build-statement variable VALUES are resolved + # against the enclosing (file-level) scope, NOT against themselves. + rule_def = compile_rules[b["rule"]] + outer_scope = dict(variables) + outer_scope.update(rule_def) + resolved_build_vars = {} + for k, v in b["vars"].items(): + resolved_build_vars[k] = resolve_var(v, outer_scope) + + merged = dict(variables) + merged.update(rule_def) + merged.update(resolved_build_vars) + merged["in"] = b["inputs"] + merged["out"] = b["outputs"] + + command = rule_def.get("command", "") + resolved = resolve_var(command, merged) + + entries[rel_src] = categorize_compile_flags(resolved) + + return entries + + +def extract_cmake_compile_entries(builds, source_dir): + """Extract per-source-file flags from CMake build.ninja. + + Returns dict: relative_source_path → categorized flags dict. + """ + entries = {} + + for b in builds: + if "CXX_COMPILER" not in b["rule"]: + continue + + # Get source file from inputs + src_tokens = b["inputs"].strip().split() + if not src_tokens: + continue + src = src_tokens[0] + + if os.path.isabs(src): + try: + rel_src = os.path.relpath(src, source_dir) + except ValueError: + rel_src = src + else: + rel_src = src + + if not _is_scylla_source(rel_src): + continue + + # Build a pseudo-command from DEFINES + FLAGS + defines = b["vars"].get("DEFINES", "") + flags = b["vars"].get("FLAGS", "") + pseudo_cmd = f"{defines} {flags}" + + entries[rel_src] = categorize_compile_flags(pseudo_cmd) + + return entries + + +# ═══════════════════════════════════════════════════════════════════════════ +# Link target extraction from ninja builds +# ═══════════════════════════════════════════════════════════════════════════ + +def _is_link_rule(rule): + """True if the rule is a link rule (executable linker). + + Excludes link_stripped rules which are just stripped copies of the + unstripped targets (configure.py creates both variants). + """ + rl = rule.lower() + return ("link" in rl and "static" not in rl and "shared" not in rl + and "module" not in rl and "stripped" not in rl) + + +def _extract_link_info(build, variables, rules): + """Extract linker flags and libraries from a link build statement.""" + rule_def = rules.get(build["rule"], {}) + + # Resolve build variable values against the outer scope first + # (ninja scoping: build var RHS is evaluated in file scope). + outer_scope = dict(variables) + outer_scope.update(rule_def) + resolved_build_vars = {} + for k, v in build["vars"].items(): + resolved_build_vars[k] = resolve_var(v, outer_scope) + + merged = dict(variables) + merged.update(rule_def) + merged.update(resolved_build_vars) + merged["in"] = build["inputs"] + merged["out"] = build["outputs"] + + # Resolve command from the rule template (configure.py style) + command_template = rule_def.get("command", "") + command = resolve_var(command_template, merged) + + # For CMake, also look at explicit LINK_FLAGS and LINK_LIBRARIES vars + link_flags_var = build["vars"].get("LINK_FLAGS", "") + link_libs_var = build["vars"].get("LINK_LIBRARIES", "") + + linker_flags = set() + libraries = set() + + # Parse from resolved command (for configure.py) + if command_template: + try: + tokens = shlex.split(command) + except ValueError: + tokens = command.split() + + skip = False + for tok in tokens: + if skip: + skip = False + continue + if tok in ("-o", "-MF", "-MT"): + skip = True + continue + # Skip LTO/PGO linker flags — configuration-dependent + if (tok.startswith("-flto") or tok == "-fno-lto" + or tok == "-ffat-lto-objects" + or tok.startswith("-fprofile-use=") + or tok.startswith("-fprofile-generate")): + continue + if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-Wl,"): + linker_flags.update(normalize_linker_flag(tok)) + elif tok == "-static-libstdc++": + linker_flags.add(tok) + elif tok == "-s": + linker_flags.add("-Wl,--strip-all") + + # Libraries + if tok.startswith("-l"): + lib = tok[2:] + libraries.add(lib) + elif tok.endswith(".o"): + continue + elif tok.endswith(".a") or ".so" in tok: + name = normalize_lib_name(tok) + if name: + libraries.add(name) + + # Parse from explicit LINK_FLAGS/LINK_LIBRARIES (CMake style) + if link_flags_var: + for tok in tokenize(link_flags_var): + # Skip LTO/PGO linker flags — configuration-dependent + if (tok.startswith("-flto") or tok == "-fno-lto" + or tok == "-ffat-lto-objects" + or tok.startswith("-fprofile-use=") + or tok.startswith("-fprofile-generate")): + continue + if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "): + linker_flags.update(normalize_linker_flag(tok)) + elif tok == "-s": + linker_flags.add("-Wl,--strip-all") + + if link_libs_var: + for tok in tokenize(link_libs_var): + if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "): + linker_flags.update(normalize_linker_flag(tok)) + elif tok.startswith("-l"): + libraries.add(tok[2:]) + elif tok.endswith(".o"): + continue + elif tok.endswith(".a") or ".so" in tok: + name = normalize_lib_name(tok) + if name: + libraries.add(name) + + # Also extract libraries from build inputs (implicit deps) + all_inputs = build["inputs"] + " " + build.get("implicit", "") + all_inputs = resolve_var(all_inputs, merged) + for tok in all_inputs.split(): + if tok.endswith(".o"): + continue + if tok.endswith(".a") or ".so" in tok: + name = normalize_lib_name(tok) + if name: + libraries.add(name) + + return {"linker_flags": linker_flags, "libraries": libraries} + + +def extract_configure_link_targets(variables, rules, builds, mode): + """Extract link targets from configure.py build.ninja. + + Returns dict: target_name → {linker_flags, libraries}. + """ + result = {} + mode_prefix = f"build/{mode}/" + + for b in builds: + if not _is_link_rule(b["rule"]): + continue + if not b["rule"].endswith(f".{mode}"): + continue + + target = b["outputs"].replace("$builddir/", "build/") + if not target.startswith(mode_prefix): + continue + target = target[len(mode_prefix):] + + if (target.endswith(".stripped") or target.endswith(".debug") + or target.endswith(".so") or target.endswith(".a")): + continue + if target.startswith("seastar/") or target.startswith("abseil/"): + continue + + # Strip _g suffix (unstripped variant) + if target.endswith("_g"): + target = target[:-2] + + result[target] = _extract_link_info(b, variables, rules) + + return result + + +def extract_cmake_link_targets(variables, rules, builds, mode): + """Extract link targets from CMake build.ninja. + + Returns dict: target_name → {linker_flags, libraries}. + """ + result = {} + cmake_type = MODE_TO_CMAKE.get(mode, "") + + for b in builds: + if not _is_link_rule(b["rule"]): + continue + + target = b["outputs"] + + # Skip non-executable link rules + if (target.endswith(".stripped") or target.endswith(".debug") + or target.endswith(".so") or target.endswith(".a")): + continue + if target.startswith("seastar/") or target.startswith("abseil/"): + continue + + # Strip cmake type prefix if present (e.g., "Dev/scylla" → "scylla") + if cmake_type and target.startswith(f"{cmake_type}/"): + target = target[len(cmake_type) + 1:] + + result[target] = _extract_link_info(b, variables, rules) + + return result + + +# ═══════════════════════════════════════════════════════════════════════════ +# Configuration helpers +# ═══════════════════════════════════════════════════════════════════════════ + +def find_repo_root(): + """Find the repository root by looking for configure.py.""" + candidate = Path(__file__).resolve().parent.parent + if (candidate / "configure.py").exists(): + return candidate + candidate = Path.cwd() + if (candidate / "configure.py").exists(): + return candidate + sys.exit("ERROR: Cannot find repository root (no configure.py found)") + + +def _find_ninja(): + """Find the ninja executable.""" + for name in ("ninja", "ninja-build"): + path = shutil.which(name) + if path: + return path + return "ninja" + + +def run_configure_py(repo_root, modes, tmpdir, quiet=False): + """Run configure.py into a temporary directory. + + Uses --out and --build-dir so the user's build tree is never touched. + Returns the path to the generated build.ninja, or None on failure. + """ + ninja_file = tmpdir / "build.ninja" + build_dir = tmpdir / "conf-build" + mode_args = [] + for m in modes: + mode_args.extend(["--mode", m]) + cmd = [ + sys.executable, str(repo_root / "configure.py"), + "--out", str(ninja_file), + "--build-dir", str(build_dir), + ] + mode_args + if not quiet: + print(f" $ {' '.join(cmd)}") + result = subprocess.run(cmd, cwd=str(repo_root), + capture_output=quiet, text=True) + if result.returncode != 0: + print(f"ERROR: configure.py failed (exit {result.returncode})", + file=sys.stderr) + if quiet and result.stderr: + print(result.stderr, file=sys.stderr) + return None + return ninja_file + + +def run_cmake_configure(repo_root, mode, tmpdir, quiet=False): + """Run cmake into a temporary directory. + + Returns the path to the generated build.ninja, or None on failure. + """ + cmake_type = MODE_TO_CMAKE[mode] + build_dir = tmpdir / f"cmake-{mode}" + ninja = _find_ninja() + + cmd = [ + "cmake", + f"-DCMAKE_BUILD_TYPE={cmake_type}", + f"-DCMAKE_MAKE_PROGRAM={ninja}", + "-DCMAKE_C_COMPILER=clang", + "-DCMAKE_CXX_COMPILER=clang++", + "-G", "Ninja", + "-S", str(repo_root), + "-B", str(build_dir), + ] + if not quiet: + print(f" $ {' '.join(cmd)}") + result = subprocess.run(cmd, cwd=str(repo_root), + capture_output=quiet, text=True) + if result.returncode != 0: + print(f"ERROR: cmake failed for mode '{mode}' " + f"(exit {result.returncode})", file=sys.stderr) + if quiet and result.stderr: + print(result.stderr, file=sys.stderr) + return None + return build_dir / "build.ninja" + + +# ═══════════════════════════════════════════════════════════════════════════ +# Comparison logic +# ═══════════════════════════════════════════════════════════════════════════ + +def compare_flag_sets(label, set_a, set_b): + """Compare two sets, return list of difference strings.""" + only_a = set_a - set_b + only_b = set_b - set_a + diffs = [] + if only_a: + diffs.append(f"{label}: only in configure.py: {sorted(only_a)}") + if only_b: + diffs.append(f"{label}: only in CMake: {sorted(only_b)}") + return diffs + + +def compare_compile_entries(conf_entries, cmake_entries, verbose=False, + quiet=False): + """Compare per-file compilation flags. + + Returns (ok, summary_dict). + """ + common = sorted(set(conf_entries) & set(cmake_entries)) + only_conf = sorted(set(conf_entries) - set(cmake_entries)) + only_cmake = sorted(set(cmake_entries) - set(conf_entries)) + + if not quiet: + print(f"\n Source files in both: {len(common)}") + print(f" Source files only in configure.py: {len(only_conf)}") + print(f" Source files only in CMake: {len(only_cmake)}") + + if only_conf: + print("\n Files only in configure.py:") + for f in only_conf: + print(f" {f}") + if only_cmake: + print("\n Files only in CMake:") + for f in only_cmake: + print(f" {f}") + + files_with_diffs = 0 + aggregate = defaultdict(int) + + for src in common: + conf_flags = conf_entries[src] + cmake_flags = cmake_entries[src] + + file_diffs = [] + for cat in ("defines", "warnings", "f_flags", "opt_flags", + "arch_flags", "std_flags"): + d = compare_flag_sets(cat, conf_flags[cat], cmake_flags[cat]) + file_diffs.extend(d) + for flag in conf_flags[cat] - cmake_flags[cat]: + aggregate[f"only-configure.py {cat}: {flag}"] += 1 + for flag in cmake_flags[cat] - conf_flags[cat]: + aggregate[f"only-cmake {cat}: {flag}"] += 1 + + if file_diffs: + files_with_diffs += 1 + if verbose and not quiet: + print(f"\n DIFF {src}:") + for d in file_diffs: + print(f" {d}") + + if not quiet: + print(f"\n Files with flag differences: " + f"{files_with_diffs} / {len(common)}") + if aggregate: + print("\n Aggregate flag diffs (flag → # files):") + for key, cnt in sorted(aggregate.items(), key=lambda x: -x[1]): + print(f" {key} ({cnt} files)") + + ok = files_with_diffs == 0 and not only_conf and not only_cmake + return ok, { + "common": len(common), + "only_conf": only_conf, + "only_cmake": only_cmake, + "files_with_diffs": files_with_diffs, + "aggregate": dict(aggregate), + } + + +def compare_link_target_sets(conf_targets, cmake_targets, verbose=False, + quiet=False): + """Compare which targets exist in both systems. + + Returns (ok, summary_dict). + """ + conf_set = set(conf_targets) + cmake_set = set(cmake_targets) + + only_conf = sorted(conf_set - cmake_set) + only_cmake = sorted(cmake_set - conf_set) + + if not quiet: + print(f"\n Targets in configure.py: {len(conf_set)}") + print(f" Targets in CMake: {len(cmake_set)}") + + if only_conf: + print(f"\n ✗ Only in configure.py ({len(only_conf)}):") + for t in only_conf: + print(f" {t}") + if only_cmake: + print(f"\n ✗ Only in CMake ({len(only_cmake)}):") + for t in only_cmake: + print(f" {t}") + + ok = not only_conf and not only_cmake + if ok and not quiet: + print(" ✓ All targets match!") + + return ok, { + "only_conf": only_conf, + "only_cmake": only_cmake, + } + + +def compare_link_settings(conf_targets, cmake_targets, internal_libs, + verbose=False, quiet=False): + """Compare linker flags and libraries for common targets. + + Args: + internal_libs: set of library names that are build outputs of the + project (auto-detected). These are filtered from both sides + before comparison. + + Returns (ok, summary_dict). + """ + common = sorted(set(conf_targets) & set(cmake_targets)) + + # Standalone tools that have known structural differences + _CPP_APPS = {"patchelf"} + + flag_diffs = 0 + lib_diffs = 0 + flag_agg_conf = defaultdict(int) + flag_agg_cmake = defaultdict(int) + lib_agg_conf = defaultdict(int) + lib_agg_cmake = defaultdict(int) + + for target in common: + conf = conf_targets[target] + cmake = cmake_targets[target] + + # Linker flags + only_conf_flags = conf["linker_flags"] - cmake["linker_flags"] + only_cmake_flags = cmake["linker_flags"] - conf["linker_flags"] + + # Known exception: standalone tools don't get -fno-lto in configure.py + target_base = target.rsplit("/", 1)[-1] if "/" in target else target + if target_base in _CPP_APPS: + only_cmake_flags.discard("-fno-lto") + + if only_conf_flags or only_cmake_flags: + flag_diffs += 1 + for f in only_conf_flags: + flag_agg_conf[f] += 1 + for f in only_cmake_flags: + flag_agg_cmake[f] += 1 + if verbose and not quiet: + print(f"\n {target}:") + if only_conf_flags: + print(f" Linker flags only in configure.py: " + f"{sorted(only_conf_flags)}") + if only_cmake_flags: + print(f" Linker flags only in CMake: " + f"{sorted(only_cmake_flags)}") + + # Libraries + conf_libs = conf["libraries"] - internal_libs + cmake_libs = cmake["libraries"] - internal_libs + only_conf_libs = conf_libs - cmake_libs + only_cmake_libs = cmake_libs - conf_libs + + # Subtract known transitive-resolution asymmetries + for lib, expected_side in _KNOWN_LIB_ASYMMETRIES.items(): + if expected_side == "conf": + only_conf_libs.discard(lib) + elif expected_side == "cmake": + only_cmake_libs.discard(lib) + if only_conf_libs or only_cmake_libs: + lib_diffs += 1 + for lib in only_conf_libs: + lib_agg_conf[lib] += 1 + for lib in only_cmake_libs: + lib_agg_cmake[lib] += 1 + if verbose and not quiet: + print(f"\n {target}:") + if only_conf_libs: + print(f" Libs only in configure.py: " + f"{sorted(only_conf_libs)}") + if only_cmake_libs: + print(f" Libs only in CMake: " + f"{sorted(only_cmake_libs)}") + + if not quiet: + print(f"\n Linker flag differences: {flag_diffs} / {len(common)}") + if flag_agg_conf or flag_agg_cmake: + print("\n Aggregate linker flag diffs:") + for f, c in sorted(flag_agg_conf.items(), key=lambda x: -x[1]): + print(f" only-configure.py {f} ({c} targets)") + for f, c in sorted(flag_agg_cmake.items(), key=lambda x: -x[1]): + print(f" only-cmake {f} ({c} targets)") + + print(f"\n Library differences: {lib_diffs} / {len(common)}") + if lib_agg_conf or lib_agg_cmake: + print("\n Aggregate library diffs:") + for lib, c in sorted(lib_agg_conf.items(), key=lambda x: -x[1]): + print(f" only-configure.py {lib} ({c} targets)") + for lib, c in sorted(lib_agg_cmake.items(), key=lambda x: -x[1]): + print(f" only-cmake {lib} ({c} targets)") + + ok = flag_diffs == 0 and lib_diffs == 0 + if ok and not quiet: + print(" ✓ Linker flags and libraries match for all common targets!") + return ok, { + "flag_diffs": flag_diffs, + "lib_diffs": lib_diffs, + "flag_agg_conf": dict(flag_agg_conf), + "flag_agg_cmake": dict(flag_agg_cmake), + "lib_agg_conf": dict(lib_agg_conf), + "lib_agg_cmake": dict(lib_agg_cmake), + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# Mode-level comparison orchestrator +# ═══════════════════════════════════════════════════════════════════════════ + +def compare_mode(mode, repo_root, conf_parsed, cmake_parsed, + verbose=False, quiet=False): + """Run all comparisons for one mode. + + Args: + conf_parsed: Parsed configure.py build.ninja (variables, rules, builds). + cmake_parsed: Parsed CMake build.ninja (variables, rules, builds). + + Returns: + (status, details) where: + status: True=match, False=mismatch, None=skipped + details: dict with compile/targets/linker summaries, or None + """ + source_dir = str(repo_root) + + conf_vars, conf_rules, conf_builds = conf_parsed + cmake_vars, cmake_rules, cmake_builds = cmake_parsed + + # Check that configure.py build.ninja has this mode + has_mode = any(r.endswith(f".{mode}") for r in conf_rules) + if not has_mode: + if not quiet: + print(f" ⚠ configure.py build.ninja doesn't contain mode '{mode}'") + return None, None + + all_ok = True + + # ── 1. Per-file compilation flags ───────────────────────────── + if not quiet: + print(f"\n {'─'*56}") + print(f" Compilation flags (per-file)") + print(f" {'─'*56}") + + conf_entries = extract_configure_compile_entries( + conf_vars, conf_rules, conf_builds, mode, source_dir) + cmake_entries = extract_cmake_compile_entries( + cmake_builds, source_dir) + + flags_ok, compile_summary = compare_compile_entries( + conf_entries, cmake_entries, verbose, quiet) + if not flags_ok: + all_ok = False + + # ── 2. Link targets ─────────────────────────────────────────── + if not quiet: + print(f"\n {'─'*56}") + print(f" Link targets") + print(f" {'─'*56}") + + conf_link = extract_configure_link_targets( + conf_vars, conf_rules, conf_builds, mode) + cmake_link = extract_cmake_link_targets( + cmake_vars, cmake_rules, cmake_builds, mode) + + targets_ok, targets_summary = compare_link_target_sets( + conf_link, cmake_link, verbose, quiet) + if not targets_ok: + all_ok = False + + # ── 3. Linker flags & libraries ─────────────────────────────── + if not quiet: + print(f"\n {'─'*56}") + print(f" Linker flags & libraries") + print(f" {'─'*56}") + + # Auto-detect internal library names from build outputs of both + # systems, so we don't need a hardcoded list. + internal_libs = _collect_internal_lib_names(conf_builds, cmake_builds) + + linker_ok, linker_summary = compare_link_settings( + conf_link, cmake_link, internal_libs, verbose, quiet) + if not linker_ok: + all_ok = False + + details = { + "compile": compile_summary, + "targets": targets_summary, + "linker": linker_summary, + } + + return all_ok, details + + +# ═══════════════════════════════════════════════════════════════════════════ +# Summary formatting +# ═══════════════════════════════════════════════════════════════════════════ + +_MAX_AGGREGATE_ITEMS = 5 + + +def _format_mode_details(details, quiet=False): + """Format comparison details for inline display in the summary.""" + lines = [] + indent = " " if quiet else " " + compile_info = details.get("compile", {}) + targets_info = details.get("targets", {}) + linker_info = details.get("linker", {}) + + # Compilation flags + files_diff = compile_info.get("files_with_diffs", 0) + only_conf = compile_info.get("only_conf", []) + only_cmake = compile_info.get("only_cmake", []) + aggregate = compile_info.get("aggregate", {}) + + if files_diff or only_conf or only_cmake: + parts = [] + if files_diff: + parts.append(f"{files_diff} files with flag diffs") + if only_conf: + parts.append(f"{len(only_conf)} sources only in configure.py") + if only_cmake: + parts.append(f"{len(only_cmake)} sources only in CMake") + lines.append(f"{indent}Compilation: {', '.join(parts)}") + + if aggregate: + top = sorted(aggregate.items(), key=lambda x: -x[1]) + for key, cnt in top[:_MAX_AGGREGATE_ITEMS]: + lines.append(f"{indent} {key} ({cnt} files)") + if len(top) > _MAX_AGGREGATE_ITEMS: + lines.append(f"{indent} ... and {len(top) - _MAX_AGGREGATE_ITEMS} more") + + # Link targets + t_only_conf = targets_info.get("only_conf", []) + t_only_cmake = targets_info.get("only_cmake", []) + if t_only_conf or t_only_cmake: + parts = [] + if t_only_conf: + parts.append(f"{len(t_only_conf)} only in configure.py") + if t_only_cmake: + parts.append(f"{len(t_only_cmake)} only in CMake") + lines.append(f"{indent}Link targets: {', '.join(parts)}") + + # Linker settings + flag_diffs = linker_info.get("flag_diffs", 0) + lib_diffs = linker_info.get("lib_diffs", 0) + if flag_diffs or lib_diffs: + parts = [] + if flag_diffs: + parts.append(f"{flag_diffs} targets with flag diffs") + if lib_diffs: + parts.append(f"{lib_diffs} targets with lib diffs") + lines.append(f"{indent}Linker: {', '.join(parts)}") + + agg_items = [] + for key, cnt in sorted(linker_info.get("flag_agg_conf", {}).items(), + key=lambda x: -x[1]): + agg_items.append(f"{indent} flag only in configure.py: {key} ({cnt} targets)") + for key, cnt in sorted(linker_info.get("flag_agg_cmake", {}).items(), + key=lambda x: -x[1]): + agg_items.append(f"{indent} flag only in CMake: {key} ({cnt} targets)") + for key, cnt in sorted(linker_info.get("lib_agg_conf", {}).items(), + key=lambda x: -x[1]): + agg_items.append(f"{indent} lib only in configure.py: {key} ({cnt} targets)") + for key, cnt in sorted(linker_info.get("lib_agg_cmake", {}).items(), + key=lambda x: -x[1]): + agg_items.append(f"{indent} lib only in CMake: {key} ({cnt} targets)") + for item in agg_items[:_MAX_AGGREGATE_ITEMS]: + lines.append(item) + if len(agg_items) > _MAX_AGGREGATE_ITEMS: + lines.append(f"{indent} ... and {len(agg_items) - _MAX_AGGREGATE_ITEMS} more") + + return lines + + +def _configure_and_compare(repo_root, mode, conf_parsed, tmpdir, verbose): + """Configure cmake and compare a single mode. + + Runs quietly — intended for parallel execution. + Returns (ok, details) tuple. + """ + cmake_ninja = run_cmake_configure(repo_root, mode, tmpdir, quiet=True) + if cmake_ninja is None: + return None, "cmake configuration failed" + cmake_parsed = parse_ninja(cmake_ninja) + return compare_mode( + mode, repo_root, conf_parsed=conf_parsed, + cmake_parsed=cmake_parsed, verbose=verbose, quiet=True) + + +# ═══════════════════════════════════════════════════════════════════════════ +# CLI +# ═══════════════════════════════════════════════════════════════════════════ + +def parse_args(): + parser = argparse.ArgumentParser( + prog="compare_build_systems.py", + description=( + "Compare configure.py and CMake build systems by parsing their " + "ninja build files. Both systems are always configured into a " + "temporary directory — the user's build tree is never touched."), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="""\ +examples: + # Compare dev mode + %(prog)s -m dev + + # Compare all modes + %(prog)s + + # CI mode: quiet, strict (exit 1 on any diff) + %(prog)s --ci + + # Verbose output showing per-file differences + %(prog)s -m debug -v + +mode mapping: + configure.py CMake + ────────────── ────────────── + debug Debug + dev Dev + release RelWithDebInfo + sanitize Sanitize + coverage Coverage +""") + parser.add_argument( + "-m", "--mode", + choices=ALL_MODES + ["all"], + default="all", + help="Build mode to compare (default: all)") + parser.add_argument( + "-v", "--verbose", + action="store_true", + help="Show per-file/per-target differences") + parser.add_argument( + "-q", "--quiet", + action="store_true", + help="Minimal output — only summary and errors") + parser.add_argument( + "--ci", + action="store_true", + help="CI mode: quiet + strict (exit 1 on any diff)") + parser.add_argument( + "--source-dir", + type=Path, default=None, + help="Repository root directory (default: auto-detect)") + + args = parser.parse_args() + if args.ci: + args.quiet = True + return args + + +def main(): + args = parse_args() + repo_root = args.source_dir or find_repo_root() + modes = ALL_MODES if args.mode == "all" else [args.mode] + quiet = args.quiet + + if not quiet: + print("=" * 70) + print("Build System Comparison: configure.py vs CMake") + print("=" * 70) + + # Everything runs in a temporary directory so we never touch the + # user's build tree. + with tempfile.TemporaryDirectory(prefix="scylla-cmp-") as tmpdir_str: + tmpdir = Path(tmpdir_str) + + # ── 1. Run configure.py (all modes at once) ────────────── + if not quiet: + print("\n─── configure.py ───") + conf_ninja = run_configure_py(repo_root, modes, tmpdir, quiet) + if conf_ninja is None: + return 2 + + if not quiet: + print("\nParsing configure.py build.ninja...") + conf_parsed = parse_ninja(conf_ninja) + + # results: mode → (ok, details) + results = {} + + # ── 2. Canary mode for fail-fast ────────────────────────── + if len(modes) > 1: + canary = "dev" if "dev" in modes else modes[0] + remaining = [m for m in modes if m != canary] + + if not quiet: + print(f"\n─── cmake (canary: {canary}) ───") + cmake_ninja = run_cmake_configure( + repo_root, canary, tmpdir, quiet) + if cmake_ninja is None: + return 2 + cmake_parsed = parse_ninja(cmake_ninja) + + cmake_mode = MODE_TO_CMAKE[canary] + if not quiet: + print(f"\n{'═' * 70}") + print(f"Mode: {canary} (CMake: {cmake_mode})") + print(f"{'═' * 70}") + + canary_ok, canary_details = compare_mode( + canary, repo_root, + conf_parsed=conf_parsed, + cmake_parsed=cmake_parsed, + verbose=args.verbose, quiet=quiet) + results[canary] = (canary_ok, canary_details) + + if canary_ok is False: + if not quiet: + print(f"\n ✗ Canary mode '{canary}' has differences " + f"— skipping {len(remaining)} remaining modes") + for m in remaining: + results[m] = (None, f"canary '{canary}' failed") + else: + if not quiet: + print(f"\n─── cmake + compare " + f"({len(remaining)} remaining modes " + f"in parallel) ───") + + with concurrent.futures.ThreadPoolExecutor( + max_workers=len(remaining)) as executor: + futures = { + executor.submit( + _configure_and_compare, repo_root, m, + conf_parsed, tmpdir, args.verbose): m + for m in remaining + } + for future in concurrent.futures.as_completed(futures): + m = futures[future] + try: + ok, details = future.result() + results[m] = (ok, details) + except Exception as e: + print(f"ERROR: mode '{m}' raised: {e}", + file=sys.stderr) + results[m] = (None, f"exception: {e}") + else: + # Single mode + mode = modes[0] + if not quiet: + print(f"\n─── cmake ({mode}) ───") + cmake_ninja = run_cmake_configure( + repo_root, mode, tmpdir, quiet) + if cmake_ninja is None: + return 2 + cmake_parsed = parse_ninja(cmake_ninja) + + cmake_mode = MODE_TO_CMAKE[mode] + if not quiet: + print(f"\n{'═' * 70}") + print(f"Mode: {mode} (CMake: {cmake_mode})") + print(f"{'═' * 70}") + + ok, details = compare_mode( + mode, repo_root, + conf_parsed=conf_parsed, + cmake_parsed=cmake_parsed, + verbose=args.verbose, quiet=quiet) + results[mode] = (ok, details) + + # ── Summary ─────────────────────────────────────────────────── + if not quiet: + print(f"\n{'═' * 70}") + print("Summary") + print(f"{'═' * 70}") + + for mode in modes: + ok, details = results[mode] + cmake_mode = MODE_TO_CMAKE[mode] + + if ok is None: + if isinstance(details, str): + status = f"⚠ SKIPPED ({details})" + else: + status = "⚠ SKIPPED" + elif ok: + status = "✓ MATCH" + else: + status = "✗ MISMATCH" + + if quiet: + print(f"{mode}: {status}") + else: + print(f" {mode:10s} (CMake: {cmake_mode:15s}): {status}") + + if ok is False and details and isinstance(details, dict): + for line in _format_mode_details(details, quiet): + print(line) + + has_failures = any(v[0] is False for v in results.values()) + all_pass = all(v[0] is True for v in results.values()) + + if has_failures: + if not quiet: + print("\n✗ Some modes have differences.") + return 1 + elif all_pass: + if not quiet: + print("\n✓ All modes match!") + return 0 + else: + if not quiet: + print("\n✗ Some modes could not be compared.") + return 2 + + +if __name__ == "__main__": + sys.exit(main())