#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Copyright (C) 2026-present ScyllaDB # # # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 # """ Compare configure.py and CMake build systems by parsing their ninja build files. Checks three things: 1. Per-file compilation flags — are the same source files compiled with the same defines, warnings, optimization, and language flags? 2. Link targets — do both systems produce the same set of executables? 3. Per-target linker settings — are link flags and libraries identical for every common executable? configure.py is treated as the baseline. CMake should match it. Exit codes: 0 All checked modes match 1 Differences found 2 Configuration failed Both build systems are always configured into a temporary directory — the user's build tree is never touched. Examples: # Compare dev mode scripts/compare_build_systems.py -m dev # Compare all modes scripts/compare_build_systems.py # CI mode: quiet, strict, all modes scripts/compare_build_systems.py --ci # Verbose output showing every flag scripts/compare_build_systems.py -m debug -v """ import argparse import concurrent.futures import os import re import shlex import shutil import subprocess import sys import tempfile from collections import defaultdict from pathlib import Path # ═══════════════════════════════════════════════════════════════════════════ # Constants # ═══════════════════════════════════════════════════════════════════════════ MODE_TO_CMAKE = { "debug": "Debug", "dev": "Dev", "release": "RelWithDebInfo", "sanitize": "Sanitize", "coverage": "Coverage", } ALL_MODES = list(MODE_TO_CMAKE.keys()) # Per-component Boost defines that CMake's imported targets add. # configure.py uses the single BOOST_ALL_DYN_LINK instead. _BOOST_PER_COMPONENT_DEFINES = re.compile( r"-DBOOST_\w+_(DYN_LINK|NO_LIB)$") # Internal Scylla/Seastar/Abseil library targets that CMake creates as # intermediate static/shared libraries. configure.py links .o files # directly. These are structural differences, not bugs. def _collect_internal_lib_names(*build_lists): """Auto-detect internal library names from ninja build outputs. Any .a or .so file that is a build output (not a system library) is an internal project library. Returns normalized library names. This replaces a hardcoded list — new libraries added to either build system are picked up automatically. """ names = set() for builds in build_lists: for b in builds: for out in b["outputs"].split(): if out.endswith(".a") or ".so" in out: name = normalize_lib_name(out) if name: names.add(name) return names # Libraries that are known to appear on only one side due to how each # build system resolves transitive dependencies. Value is the side # where the library is expected to appear ("conf" or "cmake"). # A library present on BOTH sides always matches and is not checked here. # A library absent from both sides is irrelevant. # Only asymmetric presence is checked against this table. _KNOWN_LIB_ASYMMETRIES = { # configure.py links these explicitly; CMake resolves them # transitively through imported targets (Seastar, GnuTLS, etc.) "stdc++fs": "conf", "pthread": "conf", "atomic": "conf", "boost_date_time": "conf", "ubsan": "conf", # Lua transitive deps — configure.py gets them via pkg-config "m": "conf", # GnuTLS transitive deps — configure.py links explicitly "tasn1": "conf", "idn2": "conf", "unistring": "conf", "gmp": "conf", "nettle": "conf", "hogweed": "conf", "p11-kit": "conf", # Seastar transitive deps — configure.py links explicitly "uring": "conf", "hwloc": "conf", "sctp": "conf", "udev": "conf", "protobuf": "conf", "jsoncpp": "conf", "fmt": "conf", # CMake resolves these transitively through Boost imported targets "boost_atomic": "cmake", # CMake links ssl explicitly for encryption targets "ssl": "cmake", # Linked transitively via Seastar's rt::rt imported target "rt": "cmake", } # ═══════════════════════════════════════════════════════════════════════════ # Ninja file parsing # ═══════════════════════════════════════════════════════════════════════════ def parse_ninja(filepath): """Parse a ninja build file into (variables, rules, builds). Follows subninja/include directives. Returns: variables: dict[str, str] — top-level variable assignments rules: dict[str, dict] — rule name → {command, ...} builds: list[dict] — build statements with outputs, rule, inputs, implicit, vars """ variables = {} builds = [] rules = {} def _parse(path, into_vars, into_builds, into_rules): base_dir = os.path.dirname(path) try: with open(path) as f: lines = f.readlines() except FileNotFoundError: return i = 0 while i < len(lines): line = lines[i].rstrip("\n") if not line or line.startswith("#"): i += 1 continue # subninja / include m = re.match(r"^(subninja|include)\s+(.+)", line) if m: inc_path = m.group(2).strip() if not os.path.isabs(inc_path): inc_path = os.path.join(base_dir, inc_path) _parse(inc_path, into_vars, into_builds, into_rules) i += 1 continue # Rule definition m = re.match(r"^rule\s+(\S+)", line) if m: rule_name = m.group(1) rule_vars = {} i += 1 while i < len(lines) and lines[i].startswith(" "): rline = lines[i].strip() rm = re.match(r"(\S+)\s*=\s*(.*)", rline) if rm: rule_vars[rm.group(1)] = rm.group(2) i += 1 into_rules[rule_name] = rule_vars continue # Top-level variable m = re.match(r"^([a-zA-Z_][a-zA-Z0-9_.]*)\s*=\s*(.*)", line) if m and not line.startswith(" "): into_vars[m.group(1)] = m.group(2) i += 1 continue # Build statement m = re.match(r"^build\s+(.+?):\s+(\S+)\s*(.*)", line) if m: outputs_str = m.group(1) rule = m.group(2) rest = m.group(3) i += 1 build_vars = {} while i < len(lines) and lines[i].startswith(" "): bline = lines[i].strip() bm = re.match(r"(\S+)\s*=\s*(.*)", bline) if bm: build_vars[bm.group(1)] = bm.group(2) i += 1 parts = re.split(r"\s*\|\|\s*|\s*\|\s*", rest) explicit = parts[0].strip() if parts else "" implicit = parts[1].strip() if len(parts) > 1 else "" into_builds.append({ "outputs": outputs_str.strip(), "rule": rule, "inputs": explicit, "implicit": implicit, "vars": build_vars, }) continue i += 1 _parse(str(filepath), variables, builds, rules) return variables, rules, builds def resolve_var(value, variables, depth=0): """Recursively resolve $var and ${var} references.""" if depth > 10 or "$" not in value: return value def _repl(m): name = m.group(1) or m.group(2) return variables.get(name, "") result = re.sub(r"\$\{(\w+)\}|\$(\w+)", _repl, value) if "$" in result and result != value: return resolve_var(result, variables, depth + 1) return result # ═══════════════════════════════════════════════════════════════════════════ # Flag extraction helpers # ═══════════════════════════════════════════════════════════════════════════ def tokenize(flags_str): """Split a flags string into tokens, joining multi-word flags.""" tokens = [] parts = flags_str.split() i = 0 while i < len(parts): if (parts[i] in ("-Xclang", "-mllvm", "--param", "-Xlinker") and i + 1 < len(parts)): tokens.append(f"{parts[i]} {parts[i+1]}") i += 2 else: tokens.append(parts[i]) i += 1 return tokens def categorize_compile_flags(command_str): """Extract and categorize compilation flags from a command string. Returns dict with keys: defines, warnings, f_flags, opt_flags, arch_flags, std_flags. """ try: tokens = shlex.split(command_str) except ValueError: tokens = command_str.split() flags = { "defines": set(), "warnings": set(), "f_flags": set(), "opt_flags": set(), "arch_flags": set(), "std_flags": set(), } skip_next = False for tok in tokens: if skip_next: skip_next = False continue if tok.startswith("-D"): if _BOOST_PER_COMPONENT_DEFINES.match(tok): continue # Normalize version defines that contain git hashes if tok.startswith("-DSCYLLA_RELEASE="): tok = "-DSCYLLA_RELEASE=" elif tok.startswith("-DSCYLLA_VERSION="): tok = "-DSCYLLA_VERSION=" flags["defines"].add(tok) elif tok.startswith("-W"): if tok == "-Winvalid-pch": continue # -Wno-backend-plugin is added by configure.py when a PGO # profile is available. CMake handles PGO separately. if tok == "-Wno-backend-plugin": continue flags["warnings"].add(tok) elif tok.startswith("-f"): if "-ffile-prefix-map=" in tok: continue # LTO and PGO flags are configuration-dependent options # (--lto, --pgo, --use-profile for configure.py; # Scylla_PROFDATA_FILE for CMake), not mode-inherent. if (tok.startswith("-flto") or tok == "-ffat-lto-objects" or tok == "-fno-lto" or tok.startswith("-fprofile-use=") or tok.startswith("-fprofile-generate") or tok == "-fpch-validate-input-files-content"): continue flags["f_flags"].add(tok) elif tok.startswith("-O"): flags["opt_flags"].add(tok) elif tok.startswith("-march="): flags["arch_flags"].add(tok) elif tok.startswith("-std="): flags["std_flags"].add(tok) elif tok in ("-o", "-MT", "-MF", "-Xclang"): skip_next = True elif tok in ("-include-pch", "-include"): skip_next = True elif tok.startswith(("-I", "-iquote", "-isystem")): if tok in ("-I", "-iquote", "-isystem"): skip_next = True continue return flags def normalize_lib_name(token): """Extract canonical library name from -l, .a, or .so tokens.""" if token.startswith("-l"): return token[2:] basename = os.path.basename(token) m = re.match(r"lib(.+?)\.(?:a|so(?:\.\S*)?)", basename) return m.group(1) if m else None def normalize_linker_flag(tok): """Normalize a linker flag to a canonical comparable form.""" if tok.startswith("-Wl,"): parts = tok[4:].split(",") result = set() for part in parts: if "--dynamic-linker" in part: result.add("-Wl,--dynamic-linker=") elif "-rpath" in part: result.add("-Wl,-rpath=") elif "--build-id" in part: result.add(f"-Wl,{part}") elif part in ("--push-state", "--pop-state", "--whole-archive", "--no-whole-archive", "-Bstatic", "-Bdynamic"): continue elif "--strip" in part: result.add(f"-Wl,{part}") elif part and not part.startswith("/"): # Skip bare paths (rpath values, library search paths) result.add(f"-Wl,{part}") return result if tok.startswith("-Xlinker "): arg = tok.split(" ", 1)[1] if "--dynamic-linker" in arg: return {"-Wl,--dynamic-linker="} if "--build-id" in arg: return {f"-Wl,{arg}"} if "-rpath" in arg: return {"-Wl,-rpath="} if "--dependency-file" in arg: return set() if arg in ("--push-state", "--pop-state", "--whole-archive", "--no-whole-archive", "-Bstatic", "-Bdynamic"): return set() return {f"-Wl,{arg}"} if tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): name = tok.split("=", 1)[1] name = os.path.basename(name) if name.startswith("ld."): name = name[3:] return {f"linker={name}"} return {tok} # ═══════════════════════════════════════════════════════════════════════════ # Source file extraction from ninja builds # ═══════════════════════════════════════════════════════════════════════════ def _is_scylla_source(rel_path): """True if this is a Scylla-owned source file (not seastar/abseil).""" return (not rel_path.startswith("seastar/") and not rel_path.startswith("abseil/") and not rel_path.startswith("build") and not rel_path.startswith("..") and not os.path.isabs(rel_path) and rel_path != "tools/patchelf.cc" and rel_path != "exported_templates.cc" and (rel_path.endswith(".cc") or rel_path.endswith(".cpp"))) def extract_configure_compile_entries(variables, rules, builds, mode, source_dir): """Extract per-source-file flags from configure.py build.ninja. Returns dict: relative_source_path → categorized flags dict. """ entries = {} mode_prefix = f"build/{mode}/" # Find compile rules for this mode compile_rules = {} for name, rvars in rules.items(): if (name.startswith(f"cxx.{mode}") or name.startswith(f"cxx_with_pch.{mode}")): compile_rules[name] = rvars if not compile_rules: return entries for b in builds: if b["rule"] not in compile_rules: continue output = b["outputs"] output = output.replace("$builddir/", "build/") if not output.startswith(mode_prefix): continue # Get source file from inputs src_tokens = b["inputs"].strip().split() if not src_tokens: continue src = src_tokens[0] src = src.replace("$builddir/", "build/") # Make source path relative if os.path.isabs(src): try: rel_src = os.path.relpath(src, source_dir) except ValueError: rel_src = src else: rel_src = src if not _is_scylla_source(rel_src): continue # Build effective command by resolving variables. # Ninja scoping: build-statement variable VALUES are resolved # against the enclosing (file-level) scope, NOT against themselves. rule_def = compile_rules[b["rule"]] outer_scope = dict(variables) outer_scope.update(rule_def) resolved_build_vars = {} for k, v in b["vars"].items(): resolved_build_vars[k] = resolve_var(v, outer_scope) merged = dict(variables) merged.update(rule_def) merged.update(resolved_build_vars) merged["in"] = b["inputs"] merged["out"] = b["outputs"] command = rule_def.get("command", "") resolved = resolve_var(command, merged) entries[rel_src] = categorize_compile_flags(resolved) return entries def extract_cmake_compile_entries(builds, source_dir): """Extract per-source-file flags from CMake build.ninja. Returns dict: relative_source_path → categorized flags dict. """ entries = {} for b in builds: if "CXX_COMPILER" not in b["rule"]: continue # Get source file from inputs src_tokens = b["inputs"].strip().split() if not src_tokens: continue src = src_tokens[0] if os.path.isabs(src): try: rel_src = os.path.relpath(src, source_dir) except ValueError: rel_src = src else: rel_src = src if not _is_scylla_source(rel_src): continue # Build a pseudo-command from DEFINES + FLAGS defines = b["vars"].get("DEFINES", "") flags = b["vars"].get("FLAGS", "") pseudo_cmd = f"{defines} {flags}" entries[rel_src] = categorize_compile_flags(pseudo_cmd) return entries # ═══════════════════════════════════════════════════════════════════════════ # Link target extraction from ninja builds # ═══════════════════════════════════════════════════════════════════════════ def _is_link_rule(rule): """True if the rule is a link rule (executable linker). Excludes link_stripped rules which are just stripped copies of the unstripped targets (configure.py creates both variants). """ rl = rule.lower() return ("link" in rl and "static" not in rl and "shared" not in rl and "module" not in rl and "stripped" not in rl) def _extract_link_info(build, variables, rules): """Extract linker flags and libraries from a link build statement.""" rule_def = rules.get(build["rule"], {}) # Resolve build variable values against the outer scope first # (ninja scoping: build var RHS is evaluated in file scope). outer_scope = dict(variables) outer_scope.update(rule_def) resolved_build_vars = {} for k, v in build["vars"].items(): resolved_build_vars[k] = resolve_var(v, outer_scope) merged = dict(variables) merged.update(rule_def) merged.update(resolved_build_vars) merged["in"] = build["inputs"] merged["out"] = build["outputs"] # Resolve command from the rule template (configure.py style) command_template = rule_def.get("command", "") command = resolve_var(command_template, merged) # For CMake, also look at explicit LINK_FLAGS and LINK_LIBRARIES vars link_flags_var = build["vars"].get("LINK_FLAGS", "") link_libs_var = build["vars"].get("LINK_LIBRARIES", "") linker_flags = set() libraries = set() # Parse from resolved command (for configure.py) if command_template: try: tokens = shlex.split(command) except ValueError: tokens = command.split() skip = False for tok in tokens: if skip: skip = False continue if tok in ("-o", "-MF", "-MT"): skip = True continue # Skip LTO/PGO linker flags — configuration-dependent if (tok.startswith("-flto") or tok == "-fno-lto" or tok == "-ffat-lto-objects" or tok.startswith("-fprofile-use=") or tok.startswith("-fprofile-generate")): continue if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-Wl,"): linker_flags.update(normalize_linker_flag(tok)) elif tok == "-static-libstdc++": linker_flags.add(tok) elif tok == "-s": linker_flags.add("-Wl,--strip-all") # Libraries if tok.startswith("-l"): lib = tok[2:] libraries.add(lib) elif tok.endswith(".o"): continue elif tok.endswith(".a") or ".so" in tok: name = normalize_lib_name(tok) if name: libraries.add(name) # Parse from explicit LINK_FLAGS/LINK_LIBRARIES (CMake style) if link_flags_var: for tok in tokenize(link_flags_var): # Skip LTO/PGO linker flags — configuration-dependent if (tok.startswith("-flto") or tok == "-fno-lto" or tok == "-ffat-lto-objects" or tok.startswith("-fprofile-use=") or tok.startswith("-fprofile-generate")): continue if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "): linker_flags.update(normalize_linker_flag(tok)) elif tok == "-s": linker_flags.add("-Wl,--strip-all") if link_libs_var: for tok in tokenize(link_libs_var): if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "): linker_flags.update(normalize_linker_flag(tok)) elif tok.startswith("-l"): libraries.add(tok[2:]) elif tok.endswith(".o"): continue elif tok.endswith(".a") or ".so" in tok: name = normalize_lib_name(tok) if name: libraries.add(name) # Also extract libraries from build inputs (implicit deps) all_inputs = build["inputs"] + " " + build.get("implicit", "") all_inputs = resolve_var(all_inputs, merged) for tok in all_inputs.split(): if tok.endswith(".o"): continue if tok.endswith(".a") or ".so" in tok: name = normalize_lib_name(tok) if name: libraries.add(name) return {"linker_flags": linker_flags, "libraries": libraries} def extract_configure_link_targets(variables, rules, builds, mode): """Extract link targets from configure.py build.ninja. Returns dict: target_name → {linker_flags, libraries}. """ result = {} mode_prefix = f"build/{mode}/" for b in builds: if not _is_link_rule(b["rule"]): continue if not b["rule"].endswith(f".{mode}"): continue target = b["outputs"].replace("$builddir/", "build/") if not target.startswith(mode_prefix): continue target = target[len(mode_prefix):] if (target.endswith(".stripped") or target.endswith(".debug") or target.endswith(".so") or target.endswith(".a")): continue if target.startswith("seastar/") or target.startswith("abseil/"): continue # Strip _g suffix (unstripped variant) if target.endswith("_g"): target = target[:-2] result[target] = _extract_link_info(b, variables, rules) return result def extract_cmake_link_targets(variables, rules, builds, mode): """Extract link targets from CMake build.ninja. Returns dict: target_name → {linker_flags, libraries}. """ result = {} cmake_type = MODE_TO_CMAKE.get(mode, "") for b in builds: if not _is_link_rule(b["rule"]): continue target = b["outputs"] # Skip non-executable link rules if (target.endswith(".stripped") or target.endswith(".debug") or target.endswith(".so") or target.endswith(".a")): continue if target.startswith("seastar/") or target.startswith("abseil/"): continue # Strip cmake type prefix if present (e.g., "Dev/scylla" → "scylla") if cmake_type and target.startswith(f"{cmake_type}/"): target = target[len(cmake_type) + 1:] result[target] = _extract_link_info(b, variables, rules) return result # ═══════════════════════════════════════════════════════════════════════════ # Configuration helpers # ═══════════════════════════════════════════════════════════════════════════ def find_repo_root(): """Find the repository root by looking for configure.py.""" candidate = Path(__file__).resolve().parent.parent if (candidate / "configure.py").exists(): return candidate candidate = Path.cwd() if (candidate / "configure.py").exists(): return candidate sys.exit("ERROR: Cannot find repository root (no configure.py found)") def _find_ninja(): """Find the ninja executable.""" for name in ("ninja", "ninja-build"): path = shutil.which(name) if path: return path return "ninja" def run_configure_py(repo_root, modes, tmpdir, quiet=False): """Run configure.py into a temporary directory. Uses --out and --build-dir so the user's build tree is never touched. Returns the path to the generated build.ninja, or None on failure. """ ninja_file = tmpdir / "build.ninja" build_dir = tmpdir / "conf-build" mode_args = [] for m in modes: mode_args.extend(["--mode", m]) cmd = [ sys.executable, str(repo_root / "configure.py"), "--out", str(ninja_file), "--build-dir", str(build_dir), ] + mode_args if not quiet: print(f" $ {' '.join(cmd)}") result = subprocess.run(cmd, cwd=str(repo_root), capture_output=quiet, text=True) if result.returncode != 0: print(f"ERROR: configure.py failed (exit {result.returncode})", file=sys.stderr) if quiet and result.stderr: print(result.stderr, file=sys.stderr) return None return ninja_file def run_cmake_configure(repo_root, mode, tmpdir, quiet=False): """Run cmake into a temporary directory. Returns the path to the generated build.ninja, or None on failure. """ cmake_type = MODE_TO_CMAKE[mode] build_dir = tmpdir / f"cmake-{mode}" ninja = _find_ninja() cmd = [ "cmake", f"-DCMAKE_BUILD_TYPE={cmake_type}", f"-DCMAKE_MAKE_PROGRAM={ninja}", "-DCMAKE_C_COMPILER=clang", "-DCMAKE_CXX_COMPILER=clang++", "-G", "Ninja", "-S", str(repo_root), "-B", str(build_dir), ] if not quiet: print(f" $ {' '.join(cmd)}") result = subprocess.run(cmd, cwd=str(repo_root), capture_output=quiet, text=True) if result.returncode != 0: print(f"ERROR: cmake failed for mode '{mode}' " f"(exit {result.returncode})", file=sys.stderr) if quiet and result.stderr: print(result.stderr, file=sys.stderr) return None return build_dir / "build.ninja" # ═══════════════════════════════════════════════════════════════════════════ # Comparison logic # ═══════════════════════════════════════════════════════════════════════════ def compare_flag_sets(label, set_a, set_b): """Compare two sets, return list of difference strings.""" only_a = set_a - set_b only_b = set_b - set_a diffs = [] if only_a: diffs.append(f"{label}: only in configure.py: {sorted(only_a)}") if only_b: diffs.append(f"{label}: only in CMake: {sorted(only_b)}") return diffs def compare_compile_entries(conf_entries, cmake_entries, verbose=False, quiet=False): """Compare per-file compilation flags. Returns (ok, summary_dict). """ common = sorted(set(conf_entries) & set(cmake_entries)) only_conf = sorted(set(conf_entries) - set(cmake_entries)) only_cmake = sorted(set(cmake_entries) - set(conf_entries)) if not quiet: print(f"\n Source files in both: {len(common)}") print(f" Source files only in configure.py: {len(only_conf)}") print(f" Source files only in CMake: {len(only_cmake)}") if only_conf: print("\n Files only in configure.py:") for f in only_conf: print(f" {f}") if only_cmake: print("\n Files only in CMake:") for f in only_cmake: print(f" {f}") files_with_diffs = 0 aggregate = defaultdict(int) for src in common: conf_flags = conf_entries[src] cmake_flags = cmake_entries[src] file_diffs = [] for cat in ("defines", "warnings", "f_flags", "opt_flags", "arch_flags", "std_flags"): d = compare_flag_sets(cat, conf_flags[cat], cmake_flags[cat]) file_diffs.extend(d) for flag in conf_flags[cat] - cmake_flags[cat]: aggregate[f"only-configure.py {cat}: {flag}"] += 1 for flag in cmake_flags[cat] - conf_flags[cat]: aggregate[f"only-cmake {cat}: {flag}"] += 1 if file_diffs: files_with_diffs += 1 if verbose and not quiet: print(f"\n DIFF {src}:") for d in file_diffs: print(f" {d}") if not quiet: print(f"\n Files with flag differences: " f"{files_with_diffs} / {len(common)}") if aggregate: print("\n Aggregate flag diffs (flag → # files):") for key, cnt in sorted(aggregate.items(), key=lambda x: -x[1]): print(f" {key} ({cnt} files)") ok = files_with_diffs == 0 and not only_conf and not only_cmake return ok, { "common": len(common), "only_conf": only_conf, "only_cmake": only_cmake, "files_with_diffs": files_with_diffs, "aggregate": dict(aggregate), } def compare_link_target_sets(conf_targets, cmake_targets, verbose=False, quiet=False): """Compare which targets exist in both systems. Returns (ok, summary_dict). """ conf_set = set(conf_targets) cmake_set = set(cmake_targets) only_conf = sorted(conf_set - cmake_set) only_cmake = sorted(cmake_set - conf_set) if not quiet: print(f"\n Targets in configure.py: {len(conf_set)}") print(f" Targets in CMake: {len(cmake_set)}") if only_conf: print(f"\n ✗ Only in configure.py ({len(only_conf)}):") for t in only_conf: print(f" {t}") if only_cmake: print(f"\n ✗ Only in CMake ({len(only_cmake)}):") for t in only_cmake: print(f" {t}") ok = not only_conf and not only_cmake if ok and not quiet: print(" ✓ All targets match!") return ok, { "only_conf": only_conf, "only_cmake": only_cmake, } def compare_link_settings(conf_targets, cmake_targets, internal_libs, verbose=False, quiet=False): """Compare linker flags and libraries for common targets. Args: internal_libs: set of library names that are build outputs of the project (auto-detected). These are filtered from both sides before comparison. Returns (ok, summary_dict). """ common = sorted(set(conf_targets) & set(cmake_targets)) # Standalone tools that have known structural differences _CPP_APPS = {"patchelf"} flag_diffs = 0 lib_diffs = 0 flag_agg_conf = defaultdict(int) flag_agg_cmake = defaultdict(int) lib_agg_conf = defaultdict(int) lib_agg_cmake = defaultdict(int) for target in common: conf = conf_targets[target] cmake = cmake_targets[target] # Linker flags only_conf_flags = conf["linker_flags"] - cmake["linker_flags"] only_cmake_flags = cmake["linker_flags"] - conf["linker_flags"] # Known exception: standalone tools don't get -fno-lto in configure.py target_base = target.rsplit("/", 1)[-1] if "/" in target else target if target_base in _CPP_APPS: only_cmake_flags.discard("-fno-lto") if only_conf_flags or only_cmake_flags: flag_diffs += 1 for f in only_conf_flags: flag_agg_conf[f] += 1 for f in only_cmake_flags: flag_agg_cmake[f] += 1 if verbose and not quiet: print(f"\n {target}:") if only_conf_flags: print(f" Linker flags only in configure.py: " f"{sorted(only_conf_flags)}") if only_cmake_flags: print(f" Linker flags only in CMake: " f"{sorted(only_cmake_flags)}") # Libraries conf_libs = conf["libraries"] - internal_libs cmake_libs = cmake["libraries"] - internal_libs only_conf_libs = conf_libs - cmake_libs only_cmake_libs = cmake_libs - conf_libs # Subtract known transitive-resolution asymmetries for lib, expected_side in _KNOWN_LIB_ASYMMETRIES.items(): if expected_side == "conf": only_conf_libs.discard(lib) elif expected_side == "cmake": only_cmake_libs.discard(lib) if only_conf_libs or only_cmake_libs: lib_diffs += 1 for lib in only_conf_libs: lib_agg_conf[lib] += 1 for lib in only_cmake_libs: lib_agg_cmake[lib] += 1 if verbose and not quiet: print(f"\n {target}:") if only_conf_libs: print(f" Libs only in configure.py: " f"{sorted(only_conf_libs)}") if only_cmake_libs: print(f" Libs only in CMake: " f"{sorted(only_cmake_libs)}") if not quiet: print(f"\n Linker flag differences: {flag_diffs} / {len(common)}") if flag_agg_conf or flag_agg_cmake: print("\n Aggregate linker flag diffs:") for f, c in sorted(flag_agg_conf.items(), key=lambda x: -x[1]): print(f" only-configure.py {f} ({c} targets)") for f, c in sorted(flag_agg_cmake.items(), key=lambda x: -x[1]): print(f" only-cmake {f} ({c} targets)") print(f"\n Library differences: {lib_diffs} / {len(common)}") if lib_agg_conf or lib_agg_cmake: print("\n Aggregate library diffs:") for lib, c in sorted(lib_agg_conf.items(), key=lambda x: -x[1]): print(f" only-configure.py {lib} ({c} targets)") for lib, c in sorted(lib_agg_cmake.items(), key=lambda x: -x[1]): print(f" only-cmake {lib} ({c} targets)") ok = flag_diffs == 0 and lib_diffs == 0 if ok and not quiet: print(" ✓ Linker flags and libraries match for all common targets!") return ok, { "flag_diffs": flag_diffs, "lib_diffs": lib_diffs, "flag_agg_conf": dict(flag_agg_conf), "flag_agg_cmake": dict(flag_agg_cmake), "lib_agg_conf": dict(lib_agg_conf), "lib_agg_cmake": dict(lib_agg_cmake), } # ═══════════════════════════════════════════════════════════════════════════ # Mode-level comparison orchestrator # ═══════════════════════════════════════════════════════════════════════════ def compare_mode(mode, repo_root, conf_parsed, cmake_parsed, verbose=False, quiet=False): """Run all comparisons for one mode. Args: conf_parsed: Parsed configure.py build.ninja (variables, rules, builds). cmake_parsed: Parsed CMake build.ninja (variables, rules, builds). Returns: (status, details) where: status: True=match, False=mismatch, None=skipped details: dict with compile/targets/linker summaries, or None """ source_dir = str(repo_root) conf_vars, conf_rules, conf_builds = conf_parsed cmake_vars, cmake_rules, cmake_builds = cmake_parsed # Check that configure.py build.ninja has this mode has_mode = any(r.endswith(f".{mode}") for r in conf_rules) if not has_mode: if not quiet: print(f" ⚠ configure.py build.ninja doesn't contain mode '{mode}'") return None, None all_ok = True # ── 1. Per-file compilation flags ───────────────────────────── if not quiet: print(f"\n {'─'*56}") print(f" Compilation flags (per-file)") print(f" {'─'*56}") conf_entries = extract_configure_compile_entries( conf_vars, conf_rules, conf_builds, mode, source_dir) cmake_entries = extract_cmake_compile_entries( cmake_builds, source_dir) flags_ok, compile_summary = compare_compile_entries( conf_entries, cmake_entries, verbose, quiet) if not flags_ok: all_ok = False # ── 2. Link targets ─────────────────────────────────────────── if not quiet: print(f"\n {'─'*56}") print(f" Link targets") print(f" {'─'*56}") conf_link = extract_configure_link_targets( conf_vars, conf_rules, conf_builds, mode) cmake_link = extract_cmake_link_targets( cmake_vars, cmake_rules, cmake_builds, mode) targets_ok, targets_summary = compare_link_target_sets( conf_link, cmake_link, verbose, quiet) if not targets_ok: all_ok = False # ── 3. Linker flags & libraries ─────────────────────────────── if not quiet: print(f"\n {'─'*56}") print(f" Linker flags & libraries") print(f" {'─'*56}") # Auto-detect internal library names from build outputs of both # systems, so we don't need a hardcoded list. internal_libs = _collect_internal_lib_names(conf_builds, cmake_builds) linker_ok, linker_summary = compare_link_settings( conf_link, cmake_link, internal_libs, verbose, quiet) if not linker_ok: all_ok = False details = { "compile": compile_summary, "targets": targets_summary, "linker": linker_summary, } return all_ok, details # ═══════════════════════════════════════════════════════════════════════════ # Summary formatting # ═══════════════════════════════════════════════════════════════════════════ _MAX_AGGREGATE_ITEMS = 5 def _format_mode_details(details, quiet=False): """Format comparison details for inline display in the summary.""" lines = [] indent = " " if quiet else " " compile_info = details.get("compile", {}) targets_info = details.get("targets", {}) linker_info = details.get("linker", {}) # Compilation flags files_diff = compile_info.get("files_with_diffs", 0) only_conf = compile_info.get("only_conf", []) only_cmake = compile_info.get("only_cmake", []) aggregate = compile_info.get("aggregate", {}) if files_diff or only_conf or only_cmake: parts = [] if files_diff: parts.append(f"{files_diff} files with flag diffs") if only_conf: parts.append(f"{len(only_conf)} sources only in configure.py") if only_cmake: parts.append(f"{len(only_cmake)} sources only in CMake") lines.append(f"{indent}Compilation: {', '.join(parts)}") if aggregate: top = sorted(aggregate.items(), key=lambda x: -x[1]) for key, cnt in top[:_MAX_AGGREGATE_ITEMS]: lines.append(f"{indent} {key} ({cnt} files)") if len(top) > _MAX_AGGREGATE_ITEMS: lines.append(f"{indent} ... and {len(top) - _MAX_AGGREGATE_ITEMS} more") # Link targets t_only_conf = targets_info.get("only_conf", []) t_only_cmake = targets_info.get("only_cmake", []) if t_only_conf or t_only_cmake: parts = [] if t_only_conf: parts.append(f"{len(t_only_conf)} only in configure.py") if t_only_cmake: parts.append(f"{len(t_only_cmake)} only in CMake") lines.append(f"{indent}Link targets: {', '.join(parts)}") # Linker settings flag_diffs = linker_info.get("flag_diffs", 0) lib_diffs = linker_info.get("lib_diffs", 0) if flag_diffs or lib_diffs: parts = [] if flag_diffs: parts.append(f"{flag_diffs} targets with flag diffs") if lib_diffs: parts.append(f"{lib_diffs} targets with lib diffs") lines.append(f"{indent}Linker: {', '.join(parts)}") agg_items = [] for key, cnt in sorted(linker_info.get("flag_agg_conf", {}).items(), key=lambda x: -x[1]): agg_items.append(f"{indent} flag only in configure.py: {key} ({cnt} targets)") for key, cnt in sorted(linker_info.get("flag_agg_cmake", {}).items(), key=lambda x: -x[1]): agg_items.append(f"{indent} flag only in CMake: {key} ({cnt} targets)") for key, cnt in sorted(linker_info.get("lib_agg_conf", {}).items(), key=lambda x: -x[1]): agg_items.append(f"{indent} lib only in configure.py: {key} ({cnt} targets)") for key, cnt in sorted(linker_info.get("lib_agg_cmake", {}).items(), key=lambda x: -x[1]): agg_items.append(f"{indent} lib only in CMake: {key} ({cnt} targets)") for item in agg_items[:_MAX_AGGREGATE_ITEMS]: lines.append(item) if len(agg_items) > _MAX_AGGREGATE_ITEMS: lines.append(f"{indent} ... and {len(agg_items) - _MAX_AGGREGATE_ITEMS} more") return lines def _configure_and_compare(repo_root, mode, conf_parsed, tmpdir, verbose): """Configure cmake and compare a single mode. Runs quietly — intended for parallel execution. Returns (ok, details) tuple. """ cmake_ninja = run_cmake_configure(repo_root, mode, tmpdir, quiet=True) if cmake_ninja is None: return None, "cmake configuration failed" cmake_parsed = parse_ninja(cmake_ninja) return compare_mode( mode, repo_root, conf_parsed=conf_parsed, cmake_parsed=cmake_parsed, verbose=verbose, quiet=True) # ═══════════════════════════════════════════════════════════════════════════ # CLI # ═══════════════════════════════════════════════════════════════════════════ def parse_args(): parser = argparse.ArgumentParser( prog="compare_build_systems.py", description=( "Compare configure.py and CMake build systems by parsing their " "ninja build files. Both systems are always configured into a " "temporary directory — the user's build tree is never touched."), formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""\ examples: # Compare dev mode %(prog)s -m dev # Compare all modes %(prog)s # CI mode: quiet, strict (exit 1 on any diff) %(prog)s --ci # Verbose output showing per-file differences %(prog)s -m debug -v mode mapping: configure.py CMake ────────────── ────────────── debug Debug dev Dev release RelWithDebInfo sanitize Sanitize coverage Coverage """) parser.add_argument( "-m", "--mode", choices=ALL_MODES + ["all"], default="all", help="Build mode to compare (default: all)") parser.add_argument( "-v", "--verbose", action="store_true", help="Show per-file/per-target differences") parser.add_argument( "-q", "--quiet", action="store_true", help="Minimal output — only summary and errors") parser.add_argument( "--ci", action="store_true", help="CI mode: quiet + strict (exit 1 on any diff)") parser.add_argument( "--source-dir", type=Path, default=None, help="Repository root directory (default: auto-detect)") args = parser.parse_args() if args.ci: args.quiet = True return args def main(): args = parse_args() repo_root = args.source_dir or find_repo_root() modes = ALL_MODES if args.mode == "all" else [args.mode] quiet = args.quiet if not quiet: print("=" * 70) print("Build System Comparison: configure.py vs CMake") print("=" * 70) # Everything runs in a temporary directory so we never touch the # user's build tree. with tempfile.TemporaryDirectory(prefix="scylla-cmp-") as tmpdir_str: tmpdir = Path(tmpdir_str) # ── 1. Run configure.py (all modes at once) ────────────── if not quiet: print("\n─── configure.py ───") conf_ninja = run_configure_py(repo_root, modes, tmpdir, quiet) if conf_ninja is None: return 2 if not quiet: print("\nParsing configure.py build.ninja...") conf_parsed = parse_ninja(conf_ninja) # results: mode → (ok, details) results = {} # ── 2. Canary mode for fail-fast ────────────────────────── if len(modes) > 1: canary = "dev" if "dev" in modes else modes[0] remaining = [m for m in modes if m != canary] if not quiet: print(f"\n─── cmake (canary: {canary}) ───") cmake_ninja = run_cmake_configure( repo_root, canary, tmpdir, quiet) if cmake_ninja is None: return 2 cmake_parsed = parse_ninja(cmake_ninja) cmake_mode = MODE_TO_CMAKE[canary] if not quiet: print(f"\n{'═' * 70}") print(f"Mode: {canary} (CMake: {cmake_mode})") print(f"{'═' * 70}") canary_ok, canary_details = compare_mode( canary, repo_root, conf_parsed=conf_parsed, cmake_parsed=cmake_parsed, verbose=args.verbose, quiet=quiet) results[canary] = (canary_ok, canary_details) if canary_ok is False: if not quiet: print(f"\n ✗ Canary mode '{canary}' has differences " f"— skipping {len(remaining)} remaining modes") for m in remaining: results[m] = (None, f"canary '{canary}' failed") else: if not quiet: print(f"\n─── cmake + compare " f"({len(remaining)} remaining modes " f"in parallel) ───") with concurrent.futures.ThreadPoolExecutor( max_workers=len(remaining)) as executor: futures = { executor.submit( _configure_and_compare, repo_root, m, conf_parsed, tmpdir, args.verbose): m for m in remaining } for future in concurrent.futures.as_completed(futures): m = futures[future] try: ok, details = future.result() results[m] = (ok, details) except Exception as e: print(f"ERROR: mode '{m}' raised: {e}", file=sys.stderr) results[m] = (None, f"exception: {e}") else: # Single mode mode = modes[0] if not quiet: print(f"\n─── cmake ({mode}) ───") cmake_ninja = run_cmake_configure( repo_root, mode, tmpdir, quiet) if cmake_ninja is None: return 2 cmake_parsed = parse_ninja(cmake_ninja) cmake_mode = MODE_TO_CMAKE[mode] if not quiet: print(f"\n{'═' * 70}") print(f"Mode: {mode} (CMake: {cmake_mode})") print(f"{'═' * 70}") ok, details = compare_mode( mode, repo_root, conf_parsed=conf_parsed, cmake_parsed=cmake_parsed, verbose=args.verbose, quiet=quiet) results[mode] = (ok, details) # ── Summary ─────────────────────────────────────────────────── if not quiet: print(f"\n{'═' * 70}") print("Summary") print(f"{'═' * 70}") for mode in modes: ok, details = results[mode] cmake_mode = MODE_TO_CMAKE[mode] if ok is None: if isinstance(details, str): status = f"⚠ SKIPPED ({details})" else: status = "⚠ SKIPPED" elif ok: status = "✓ MATCH" else: status = "✗ MISMATCH" if quiet: print(f"{mode}: {status}") else: print(f" {mode:10s} (CMake: {cmake_mode:15s}): {status}") if ok is False and details and isinstance(details, dict): for line in _format_mode_details(details, quiet): print(line) has_failures = any(v[0] is False for v in results.values()) all_pass = all(v[0] is True for v in results.values()) if has_failures: if not quiet: print("\n✗ Some modes have differences.") return 1 elif all_pass: if not quiet: print("\n✓ All modes match!") return 0 else: if not quiet: print("\n✗ Some modes could not be compared.") return 2 if __name__ == "__main__": sys.exit(main())