Files
scylladb/scripts/compare_build_systems.py
Ernest Zaslavsky 1d779804a0 scripts: remove lua library rename workaround from comparison script
Now that cmake/FindLua.cmake uses pkg-config (matching configure.py),
both build systems resolve to the same 'lua' library name.  Remove the
lua/lua-5.4 entries from _KNOWN_LIB_ASYMMETRIES and add 'm' (math
library) as a known transitive dependency that configure.py gets via
pkg-config for lua.
2026-03-29 16:17:45 +03:00

1442 lines
53 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026-present ScyllaDB
#
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
"""
Compare configure.py and CMake build systems by parsing their ninja build files.
Checks three things:
1. Per-file compilation flags — are the same source files compiled with
the same defines, warnings, optimization, and language flags?
2. Link targets — do both systems produce the same set of
executables?
3. Per-target linker settings — are link flags and libraries identical for
every common executable?
configure.py is treated as the baseline. CMake should match it.
Exit codes:
0 All checked modes match
1 Differences found
2 Configuration failed
Both build systems are always configured into a temporary directory —
the user's build tree is never touched.
Examples:
# Compare dev mode
scripts/compare_build_systems.py -m dev
# Compare all modes
scripts/compare_build_systems.py
# CI mode: quiet, strict, all modes
scripts/compare_build_systems.py --ci
# Verbose output showing every flag
scripts/compare_build_systems.py -m debug -v
"""
import argparse
import concurrent.futures
import os
import re
import shlex
import shutil
import subprocess
import sys
import tempfile
from collections import defaultdict
from pathlib import Path
# ═══════════════════════════════════════════════════════════════════════════
# Constants
# ═══════════════════════════════════════════════════════════════════════════
MODE_TO_CMAKE = {
"debug": "Debug",
"dev": "Dev",
"release": "RelWithDebInfo",
"sanitize": "Sanitize",
"coverage": "Coverage",
}
ALL_MODES = list(MODE_TO_CMAKE.keys())
# Per-component Boost defines that CMake's imported targets add.
# configure.py uses the single BOOST_ALL_DYN_LINK instead.
_BOOST_PER_COMPONENT_DEFINES = re.compile(
r"-DBOOST_\w+_(DYN_LINK|NO_LIB)$")
# Internal Scylla/Seastar/Abseil library targets that CMake creates as
# intermediate static/shared libraries. configure.py links .o files
# directly. These are structural differences, not bugs.
def _collect_internal_lib_names(*build_lists):
"""Auto-detect internal library names from ninja build outputs.
Any .a or .so file that is a build output (not a system library)
is an internal project library. Returns normalized library names.
This replaces a hardcoded list — new libraries added to either
build system are picked up automatically.
"""
names = set()
for builds in build_lists:
for b in builds:
for out in b["outputs"].split():
if out.endswith(".a") or ".so" in out:
name = normalize_lib_name(out)
if name:
names.add(name)
return names
# Libraries that are known to appear on only one side due to how each
# build system resolves transitive dependencies. Value is the side
# where the library is expected to appear ("conf" or "cmake").
# A library present on BOTH sides always matches and is not checked here.
# A library absent from both sides is irrelevant.
# Only asymmetric presence is checked against this table.
_KNOWN_LIB_ASYMMETRIES = {
# configure.py links these explicitly; CMake resolves them
# transitively through imported targets (Seastar, GnuTLS, etc.)
"stdc++fs": "conf",
"pthread": "conf",
"atomic": "conf",
"boost_date_time": "conf",
"ubsan": "conf",
# Lua transitive deps — configure.py gets them via pkg-config
"m": "conf",
# GnuTLS transitive deps — configure.py links explicitly
"tasn1": "conf",
"idn2": "conf",
"unistring": "conf",
"gmp": "conf",
"nettle": "conf",
"hogweed": "conf",
"p11-kit": "conf",
# Seastar transitive deps — configure.py links explicitly
"uring": "conf",
"hwloc": "conf",
"sctp": "conf",
"udev": "conf",
"protobuf": "conf",
"jsoncpp": "conf",
"fmt": "conf",
# CMake resolves these transitively through Boost imported targets
"boost_atomic": "cmake",
# CMake links ssl explicitly for encryption targets
"ssl": "cmake",
# Linked transitively via Seastar's rt::rt imported target
"rt": "cmake",
}
# ═══════════════════════════════════════════════════════════════════════════
# Ninja file parsing
# ═══════════════════════════════════════════════════════════════════════════
def parse_ninja(filepath):
"""Parse a ninja build file into (variables, rules, builds).
Follows subninja/include directives. Returns:
variables: dict[str, str] — top-level variable assignments
rules: dict[str, dict] — rule name → {command, ...}
builds: list[dict] — build statements with outputs,
rule, inputs, implicit, vars
"""
variables = {}
builds = []
rules = {}
def _parse(path, into_vars, into_builds, into_rules):
base_dir = os.path.dirname(path)
try:
with open(path) as f:
lines = f.readlines()
except FileNotFoundError:
return
i = 0
while i < len(lines):
line = lines[i].rstrip("\n")
if not line or line.startswith("#"):
i += 1
continue
# subninja / include
m = re.match(r"^(subninja|include)\s+(.+)", line)
if m:
inc_path = m.group(2).strip()
if not os.path.isabs(inc_path):
inc_path = os.path.join(base_dir, inc_path)
_parse(inc_path, into_vars, into_builds, into_rules)
i += 1
continue
# Rule definition
m = re.match(r"^rule\s+(\S+)", line)
if m:
rule_name = m.group(1)
rule_vars = {}
i += 1
while i < len(lines) and lines[i].startswith(" "):
rline = lines[i].strip()
rm = re.match(r"(\S+)\s*=\s*(.*)", rline)
if rm:
rule_vars[rm.group(1)] = rm.group(2)
i += 1
into_rules[rule_name] = rule_vars
continue
# Top-level variable
m = re.match(r"^([a-zA-Z_][a-zA-Z0-9_.]*)\s*=\s*(.*)", line)
if m and not line.startswith(" "):
into_vars[m.group(1)] = m.group(2)
i += 1
continue
# Build statement
m = re.match(r"^build\s+(.+?):\s+(\S+)\s*(.*)", line)
if m:
outputs_str = m.group(1)
rule = m.group(2)
rest = m.group(3)
i += 1
build_vars = {}
while i < len(lines) and lines[i].startswith(" "):
bline = lines[i].strip()
bm = re.match(r"(\S+)\s*=\s*(.*)", bline)
if bm:
build_vars[bm.group(1)] = bm.group(2)
i += 1
parts = re.split(r"\s*\|\|\s*|\s*\|\s*", rest)
explicit = parts[0].strip() if parts else ""
implicit = parts[1].strip() if len(parts) > 1 else ""
into_builds.append({
"outputs": outputs_str.strip(),
"rule": rule,
"inputs": explicit,
"implicit": implicit,
"vars": build_vars,
})
continue
i += 1
_parse(str(filepath), variables, builds, rules)
return variables, rules, builds
def resolve_var(value, variables, depth=0):
"""Recursively resolve $var and ${var} references."""
if depth > 10 or "$" not in value:
return value
def _repl(m):
name = m.group(1) or m.group(2)
return variables.get(name, "")
result = re.sub(r"\$\{(\w+)\}|\$(\w+)", _repl, value)
if "$" in result and result != value:
return resolve_var(result, variables, depth + 1)
return result
# ═══════════════════════════════════════════════════════════════════════════
# Flag extraction helpers
# ═══════════════════════════════════════════════════════════════════════════
def tokenize(flags_str):
"""Split a flags string into tokens, joining multi-word flags."""
tokens = []
parts = flags_str.split()
i = 0
while i < len(parts):
if (parts[i] in ("-Xclang", "-mllvm", "--param", "-Xlinker")
and i + 1 < len(parts)):
tokens.append(f"{parts[i]} {parts[i+1]}")
i += 2
else:
tokens.append(parts[i])
i += 1
return tokens
def categorize_compile_flags(command_str):
"""Extract and categorize compilation flags from a command string.
Returns dict with keys: defines, warnings, f_flags, opt_flags,
arch_flags, std_flags.
"""
try:
tokens = shlex.split(command_str)
except ValueError:
tokens = command_str.split()
flags = {
"defines": set(),
"warnings": set(),
"f_flags": set(),
"opt_flags": set(),
"arch_flags": set(),
"std_flags": set(),
}
skip_next = False
for tok in tokens:
if skip_next:
skip_next = False
continue
if tok.startswith("-D"):
if _BOOST_PER_COMPONENT_DEFINES.match(tok):
continue
# Normalize version defines that contain git hashes
if tok.startswith("-DSCYLLA_RELEASE="):
tok = "-DSCYLLA_RELEASE=<release>"
elif tok.startswith("-DSCYLLA_VERSION="):
tok = "-DSCYLLA_VERSION=<version>"
flags["defines"].add(tok)
elif tok.startswith("-W"):
if tok == "-Winvalid-pch":
continue
# -Wno-backend-plugin is added by configure.py when a PGO
# profile is available. CMake handles PGO separately.
if tok == "-Wno-backend-plugin":
continue
flags["warnings"].add(tok)
elif tok.startswith("-f"):
if "-ffile-prefix-map=" in tok:
continue
# LTO and PGO flags are configuration-dependent options
# (--lto, --pgo, --use-profile for configure.py;
# Scylla_PROFDATA_FILE for CMake), not mode-inherent.
if (tok.startswith("-flto")
or tok == "-ffat-lto-objects"
or tok == "-fno-lto"
or tok.startswith("-fprofile-use=")
or tok.startswith("-fprofile-generate")
or tok == "-fpch-validate-input-files-content"):
continue
flags["f_flags"].add(tok)
elif tok.startswith("-O"):
flags["opt_flags"].add(tok)
elif tok.startswith("-march="):
flags["arch_flags"].add(tok)
elif tok.startswith("-std="):
flags["std_flags"].add(tok)
elif tok in ("-o", "-MT", "-MF", "-Xclang"):
skip_next = True
elif tok in ("-include-pch", "-include"):
skip_next = True
elif tok.startswith(("-I", "-iquote", "-isystem")):
if tok in ("-I", "-iquote", "-isystem"):
skip_next = True
continue
return flags
def normalize_lib_name(token):
"""Extract canonical library name from -l, .a, or .so tokens."""
if token.startswith("-l"):
return token[2:]
basename = os.path.basename(token)
m = re.match(r"lib(.+?)\.(?:a|so(?:\.\S*)?)", basename)
return m.group(1) if m else None
def normalize_linker_flag(tok):
"""Normalize a linker flag to a canonical comparable form."""
if tok.startswith("-Wl,"):
parts = tok[4:].split(",")
result = set()
for part in parts:
if "--dynamic-linker" in part:
result.add("-Wl,--dynamic-linker=<padded>")
elif "-rpath" in part:
result.add("-Wl,-rpath=<paths>")
elif "--build-id" in part:
result.add(f"-Wl,{part}")
elif part in ("--push-state", "--pop-state",
"--whole-archive", "--no-whole-archive",
"-Bstatic", "-Bdynamic"):
continue
elif "--strip" in part:
result.add(f"-Wl,{part}")
elif part and not part.startswith("/"):
# Skip bare paths (rpath values, library search paths)
result.add(f"-Wl,{part}")
return result
if tok.startswith("-Xlinker "):
arg = tok.split(" ", 1)[1]
if "--dynamic-linker" in arg:
return {"-Wl,--dynamic-linker=<padded>"}
if "--build-id" in arg:
return {f"-Wl,{arg}"}
if "-rpath" in arg:
return {"-Wl,-rpath=<paths>"}
if "--dependency-file" in arg:
return set()
if arg in ("--push-state", "--pop-state",
"--whole-archive", "--no-whole-archive",
"-Bstatic", "-Bdynamic"):
return set()
return {f"-Wl,{arg}"}
if tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="):
name = tok.split("=", 1)[1]
name = os.path.basename(name)
if name.startswith("ld."):
name = name[3:]
return {f"linker={name}"}
return {tok}
# ═══════════════════════════════════════════════════════════════════════════
# Source file extraction from ninja builds
# ═══════════════════════════════════════════════════════════════════════════
def _is_scylla_source(rel_path):
"""True if this is a Scylla-owned source file (not seastar/abseil)."""
return (not rel_path.startswith("seastar/")
and not rel_path.startswith("abseil/")
and not rel_path.startswith("build")
and not rel_path.startswith("..")
and not os.path.isabs(rel_path)
and rel_path != "tools/patchelf.cc"
and rel_path != "exported_templates.cc"
and (rel_path.endswith(".cc") or rel_path.endswith(".cpp")))
def extract_configure_compile_entries(variables, rules, builds,
mode, source_dir):
"""Extract per-source-file flags from configure.py build.ninja.
Returns dict: relative_source_path → categorized flags dict.
"""
entries = {}
mode_prefix = f"build/{mode}/"
# Find compile rules for this mode
compile_rules = {}
for name, rvars in rules.items():
if (name.startswith(f"cxx.{mode}")
or name.startswith(f"cxx_with_pch.{mode}")):
compile_rules[name] = rvars
if not compile_rules:
return entries
for b in builds:
if b["rule"] not in compile_rules:
continue
output = b["outputs"]
output = output.replace("$builddir/", "build/")
if not output.startswith(mode_prefix):
continue
# Get source file from inputs
src_tokens = b["inputs"].strip().split()
if not src_tokens:
continue
src = src_tokens[0]
src = src.replace("$builddir/", "build/")
# Make source path relative
if os.path.isabs(src):
try:
rel_src = os.path.relpath(src, source_dir)
except ValueError:
rel_src = src
else:
rel_src = src
if not _is_scylla_source(rel_src):
continue
# Build effective command by resolving variables.
# Ninja scoping: build-statement variable VALUES are resolved
# against the enclosing (file-level) scope, NOT against themselves.
rule_def = compile_rules[b["rule"]]
outer_scope = dict(variables)
outer_scope.update(rule_def)
resolved_build_vars = {}
for k, v in b["vars"].items():
resolved_build_vars[k] = resolve_var(v, outer_scope)
merged = dict(variables)
merged.update(rule_def)
merged.update(resolved_build_vars)
merged["in"] = b["inputs"]
merged["out"] = b["outputs"]
command = rule_def.get("command", "")
resolved = resolve_var(command, merged)
entries[rel_src] = categorize_compile_flags(resolved)
return entries
def extract_cmake_compile_entries(builds, source_dir):
"""Extract per-source-file flags from CMake build.ninja.
Returns dict: relative_source_path → categorized flags dict.
"""
entries = {}
for b in builds:
if "CXX_COMPILER" not in b["rule"]:
continue
# Get source file from inputs
src_tokens = b["inputs"].strip().split()
if not src_tokens:
continue
src = src_tokens[0]
if os.path.isabs(src):
try:
rel_src = os.path.relpath(src, source_dir)
except ValueError:
rel_src = src
else:
rel_src = src
if not _is_scylla_source(rel_src):
continue
# Build a pseudo-command from DEFINES + FLAGS
defines = b["vars"].get("DEFINES", "")
flags = b["vars"].get("FLAGS", "")
pseudo_cmd = f"{defines} {flags}"
entries[rel_src] = categorize_compile_flags(pseudo_cmd)
return entries
# ═══════════════════════════════════════════════════════════════════════════
# Link target extraction from ninja builds
# ═══════════════════════════════════════════════════════════════════════════
def _is_link_rule(rule):
"""True if the rule is a link rule (executable linker).
Excludes link_stripped rules which are just stripped copies of the
unstripped targets (configure.py creates both variants).
"""
rl = rule.lower()
return ("link" in rl and "static" not in rl and "shared" not in rl
and "module" not in rl and "stripped" not in rl)
def _extract_link_info(build, variables, rules):
"""Extract linker flags and libraries from a link build statement."""
rule_def = rules.get(build["rule"], {})
# Resolve build variable values against the outer scope first
# (ninja scoping: build var RHS is evaluated in file scope).
outer_scope = dict(variables)
outer_scope.update(rule_def)
resolved_build_vars = {}
for k, v in build["vars"].items():
resolved_build_vars[k] = resolve_var(v, outer_scope)
merged = dict(variables)
merged.update(rule_def)
merged.update(resolved_build_vars)
merged["in"] = build["inputs"]
merged["out"] = build["outputs"]
# Resolve command from the rule template (configure.py style)
command_template = rule_def.get("command", "")
command = resolve_var(command_template, merged)
# For CMake, also look at explicit LINK_FLAGS and LINK_LIBRARIES vars
link_flags_var = build["vars"].get("LINK_FLAGS", "")
link_libs_var = build["vars"].get("LINK_LIBRARIES", "")
linker_flags = set()
libraries = set()
# Parse from resolved command (for configure.py)
if command_template:
try:
tokens = shlex.split(command)
except ValueError:
tokens = command.split()
skip = False
for tok in tokens:
if skip:
skip = False
continue
if tok in ("-o", "-MF", "-MT"):
skip = True
continue
# Skip LTO/PGO linker flags — configuration-dependent
if (tok.startswith("-flto") or tok == "-fno-lto"
or tok == "-ffat-lto-objects"
or tok.startswith("-fprofile-use=")
or tok.startswith("-fprofile-generate")):
continue
if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-Wl,"):
linker_flags.update(normalize_linker_flag(tok))
elif tok == "-static-libstdc++":
linker_flags.add(tok)
elif tok == "-s":
linker_flags.add("-Wl,--strip-all")
# Libraries
if tok.startswith("-l"):
lib = tok[2:]
libraries.add(lib)
elif tok.endswith(".o"):
continue
elif tok.endswith(".a") or ".so" in tok:
name = normalize_lib_name(tok)
if name:
libraries.add(name)
# Parse from explicit LINK_FLAGS/LINK_LIBRARIES (CMake style)
if link_flags_var:
for tok in tokenize(link_flags_var):
# Skip LTO/PGO linker flags — configuration-dependent
if (tok.startswith("-flto") or tok == "-fno-lto"
or tok == "-ffat-lto-objects"
or tok.startswith("-fprofile-use=")
or tok.startswith("-fprofile-generate")):
continue
if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-fuse-ld=") or tok.startswith("--ld-path="):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "):
linker_flags.update(normalize_linker_flag(tok))
elif tok == "-s":
linker_flags.add("-Wl,--strip-all")
if link_libs_var:
for tok in tokenize(link_libs_var):
if tok.startswith("-fsanitize") or tok.startswith("-fno-sanitize"):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-Wl,") or tok.startswith("-Xlinker "):
linker_flags.update(normalize_linker_flag(tok))
elif tok.startswith("-l"):
libraries.add(tok[2:])
elif tok.endswith(".o"):
continue
elif tok.endswith(".a") or ".so" in tok:
name = normalize_lib_name(tok)
if name:
libraries.add(name)
# Also extract libraries from build inputs (implicit deps)
all_inputs = build["inputs"] + " " + build.get("implicit", "")
all_inputs = resolve_var(all_inputs, merged)
for tok in all_inputs.split():
if tok.endswith(".o"):
continue
if tok.endswith(".a") or ".so" in tok:
name = normalize_lib_name(tok)
if name:
libraries.add(name)
return {"linker_flags": linker_flags, "libraries": libraries}
def extract_configure_link_targets(variables, rules, builds, mode):
"""Extract link targets from configure.py build.ninja.
Returns dict: target_name → {linker_flags, libraries}.
"""
result = {}
mode_prefix = f"build/{mode}/"
for b in builds:
if not _is_link_rule(b["rule"]):
continue
if not b["rule"].endswith(f".{mode}"):
continue
target = b["outputs"].replace("$builddir/", "build/")
if not target.startswith(mode_prefix):
continue
target = target[len(mode_prefix):]
if (target.endswith(".stripped") or target.endswith(".debug")
or target.endswith(".so") or target.endswith(".a")):
continue
if target.startswith("seastar/") or target.startswith("abseil/"):
continue
# Strip _g suffix (unstripped variant)
if target.endswith("_g"):
target = target[:-2]
result[target] = _extract_link_info(b, variables, rules)
return result
def extract_cmake_link_targets(variables, rules, builds, mode):
"""Extract link targets from CMake build.ninja.
Returns dict: target_name → {linker_flags, libraries}.
"""
result = {}
cmake_type = MODE_TO_CMAKE.get(mode, "")
for b in builds:
if not _is_link_rule(b["rule"]):
continue
target = b["outputs"]
# Skip non-executable link rules
if (target.endswith(".stripped") or target.endswith(".debug")
or target.endswith(".so") or target.endswith(".a")):
continue
if target.startswith("seastar/") or target.startswith("abseil/"):
continue
# Strip cmake type prefix if present (e.g., "Dev/scylla" → "scylla")
if cmake_type and target.startswith(f"{cmake_type}/"):
target = target[len(cmake_type) + 1:]
result[target] = _extract_link_info(b, variables, rules)
return result
# ═══════════════════════════════════════════════════════════════════════════
# Configuration helpers
# ═══════════════════════════════════════════════════════════════════════════
def find_repo_root():
"""Find the repository root by looking for configure.py."""
candidate = Path(__file__).resolve().parent.parent
if (candidate / "configure.py").exists():
return candidate
candidate = Path.cwd()
if (candidate / "configure.py").exists():
return candidate
sys.exit("ERROR: Cannot find repository root (no configure.py found)")
def _find_ninja():
"""Find the ninja executable."""
for name in ("ninja", "ninja-build"):
path = shutil.which(name)
if path:
return path
return "ninja"
def run_configure_py(repo_root, modes, tmpdir, quiet=False):
"""Run configure.py into a temporary directory.
Uses --out and --build-dir so the user's build tree is never touched.
Returns the path to the generated build.ninja, or None on failure.
"""
ninja_file = tmpdir / "build.ninja"
build_dir = tmpdir / "conf-build"
mode_args = []
for m in modes:
mode_args.extend(["--mode", m])
cmd = [
sys.executable, str(repo_root / "configure.py"),
"--out", str(ninja_file),
"--build-dir", str(build_dir),
] + mode_args
if not quiet:
print(f" $ {' '.join(cmd)}")
result = subprocess.run(cmd, cwd=str(repo_root),
capture_output=quiet, text=True)
if result.returncode != 0:
print(f"ERROR: configure.py failed (exit {result.returncode})",
file=sys.stderr)
if quiet and result.stderr:
print(result.stderr, file=sys.stderr)
return None
return ninja_file
def run_cmake_configure(repo_root, mode, tmpdir, quiet=False):
"""Run cmake into a temporary directory.
Returns the path to the generated build.ninja, or None on failure.
"""
cmake_type = MODE_TO_CMAKE[mode]
build_dir = tmpdir / f"cmake-{mode}"
ninja = _find_ninja()
cmd = [
"cmake",
f"-DCMAKE_BUILD_TYPE={cmake_type}",
f"-DCMAKE_MAKE_PROGRAM={ninja}",
"-DCMAKE_C_COMPILER=clang",
"-DCMAKE_CXX_COMPILER=clang++",
"-G", "Ninja",
"-S", str(repo_root),
"-B", str(build_dir),
]
if not quiet:
print(f" $ {' '.join(cmd)}")
result = subprocess.run(cmd, cwd=str(repo_root),
capture_output=quiet, text=True)
if result.returncode != 0:
print(f"ERROR: cmake failed for mode '{mode}' "
f"(exit {result.returncode})", file=sys.stderr)
if quiet and result.stderr:
print(result.stderr, file=sys.stderr)
return None
return build_dir / "build.ninja"
# ═══════════════════════════════════════════════════════════════════════════
# Comparison logic
# ═══════════════════════════════════════════════════════════════════════════
def compare_flag_sets(label, set_a, set_b):
"""Compare two sets, return list of difference strings."""
only_a = set_a - set_b
only_b = set_b - set_a
diffs = []
if only_a:
diffs.append(f"{label}: only in configure.py: {sorted(only_a)}")
if only_b:
diffs.append(f"{label}: only in CMake: {sorted(only_b)}")
return diffs
def compare_compile_entries(conf_entries, cmake_entries, verbose=False,
quiet=False):
"""Compare per-file compilation flags.
Returns (ok, summary_dict).
"""
common = sorted(set(conf_entries) & set(cmake_entries))
only_conf = sorted(set(conf_entries) - set(cmake_entries))
only_cmake = sorted(set(cmake_entries) - set(conf_entries))
if not quiet:
print(f"\n Source files in both: {len(common)}")
print(f" Source files only in configure.py: {len(only_conf)}")
print(f" Source files only in CMake: {len(only_cmake)}")
if only_conf:
print("\n Files only in configure.py:")
for f in only_conf:
print(f" {f}")
if only_cmake:
print("\n Files only in CMake:")
for f in only_cmake:
print(f" {f}")
files_with_diffs = 0
aggregate = defaultdict(int)
for src in common:
conf_flags = conf_entries[src]
cmake_flags = cmake_entries[src]
file_diffs = []
for cat in ("defines", "warnings", "f_flags", "opt_flags",
"arch_flags", "std_flags"):
d = compare_flag_sets(cat, conf_flags[cat], cmake_flags[cat])
file_diffs.extend(d)
for flag in conf_flags[cat] - cmake_flags[cat]:
aggregate[f"only-configure.py {cat}: {flag}"] += 1
for flag in cmake_flags[cat] - conf_flags[cat]:
aggregate[f"only-cmake {cat}: {flag}"] += 1
if file_diffs:
files_with_diffs += 1
if verbose and not quiet:
print(f"\n DIFF {src}:")
for d in file_diffs:
print(f" {d}")
if not quiet:
print(f"\n Files with flag differences: "
f"{files_with_diffs} / {len(common)}")
if aggregate:
print("\n Aggregate flag diffs (flag → # files):")
for key, cnt in sorted(aggregate.items(), key=lambda x: -x[1]):
print(f" {key} ({cnt} files)")
ok = files_with_diffs == 0 and not only_conf and not only_cmake
return ok, {
"common": len(common),
"only_conf": only_conf,
"only_cmake": only_cmake,
"files_with_diffs": files_with_diffs,
"aggregate": dict(aggregate),
}
def compare_link_target_sets(conf_targets, cmake_targets, verbose=False,
quiet=False):
"""Compare which targets exist in both systems.
Returns (ok, summary_dict).
"""
conf_set = set(conf_targets)
cmake_set = set(cmake_targets)
only_conf = sorted(conf_set - cmake_set)
only_cmake = sorted(cmake_set - conf_set)
if not quiet:
print(f"\n Targets in configure.py: {len(conf_set)}")
print(f" Targets in CMake: {len(cmake_set)}")
if only_conf:
print(f"\n ✗ Only in configure.py ({len(only_conf)}):")
for t in only_conf:
print(f" {t}")
if only_cmake:
print(f"\n ✗ Only in CMake ({len(only_cmake)}):")
for t in only_cmake:
print(f" {t}")
ok = not only_conf and not only_cmake
if ok and not quiet:
print(" ✓ All targets match!")
return ok, {
"only_conf": only_conf,
"only_cmake": only_cmake,
}
def compare_link_settings(conf_targets, cmake_targets, internal_libs,
verbose=False, quiet=False):
"""Compare linker flags and libraries for common targets.
Args:
internal_libs: set of library names that are build outputs of the
project (auto-detected). These are filtered from both sides
before comparison.
Returns (ok, summary_dict).
"""
common = sorted(set(conf_targets) & set(cmake_targets))
# Standalone tools that have known structural differences
_CPP_APPS = {"patchelf"}
flag_diffs = 0
lib_diffs = 0
flag_agg_conf = defaultdict(int)
flag_agg_cmake = defaultdict(int)
lib_agg_conf = defaultdict(int)
lib_agg_cmake = defaultdict(int)
for target in common:
conf = conf_targets[target]
cmake = cmake_targets[target]
# Linker flags
only_conf_flags = conf["linker_flags"] - cmake["linker_flags"]
only_cmake_flags = cmake["linker_flags"] - conf["linker_flags"]
# Known exception: standalone tools don't get -fno-lto in configure.py
target_base = target.rsplit("/", 1)[-1] if "/" in target else target
if target_base in _CPP_APPS:
only_cmake_flags.discard("-fno-lto")
if only_conf_flags or only_cmake_flags:
flag_diffs += 1
for f in only_conf_flags:
flag_agg_conf[f] += 1
for f in only_cmake_flags:
flag_agg_cmake[f] += 1
if verbose and not quiet:
print(f"\n {target}:")
if only_conf_flags:
print(f" Linker flags only in configure.py: "
f"{sorted(only_conf_flags)}")
if only_cmake_flags:
print(f" Linker flags only in CMake: "
f"{sorted(only_cmake_flags)}")
# Libraries
conf_libs = conf["libraries"] - internal_libs
cmake_libs = cmake["libraries"] - internal_libs
only_conf_libs = conf_libs - cmake_libs
only_cmake_libs = cmake_libs - conf_libs
# Subtract known transitive-resolution asymmetries
for lib, expected_side in _KNOWN_LIB_ASYMMETRIES.items():
if expected_side == "conf":
only_conf_libs.discard(lib)
elif expected_side == "cmake":
only_cmake_libs.discard(lib)
if only_conf_libs or only_cmake_libs:
lib_diffs += 1
for lib in only_conf_libs:
lib_agg_conf[lib] += 1
for lib in only_cmake_libs:
lib_agg_cmake[lib] += 1
if verbose and not quiet:
print(f"\n {target}:")
if only_conf_libs:
print(f" Libs only in configure.py: "
f"{sorted(only_conf_libs)}")
if only_cmake_libs:
print(f" Libs only in CMake: "
f"{sorted(only_cmake_libs)}")
if not quiet:
print(f"\n Linker flag differences: {flag_diffs} / {len(common)}")
if flag_agg_conf or flag_agg_cmake:
print("\n Aggregate linker flag diffs:")
for f, c in sorted(flag_agg_conf.items(), key=lambda x: -x[1]):
print(f" only-configure.py {f} ({c} targets)")
for f, c in sorted(flag_agg_cmake.items(), key=lambda x: -x[1]):
print(f" only-cmake {f} ({c} targets)")
print(f"\n Library differences: {lib_diffs} / {len(common)}")
if lib_agg_conf or lib_agg_cmake:
print("\n Aggregate library diffs:")
for lib, c in sorted(lib_agg_conf.items(), key=lambda x: -x[1]):
print(f" only-configure.py {lib} ({c} targets)")
for lib, c in sorted(lib_agg_cmake.items(), key=lambda x: -x[1]):
print(f" only-cmake {lib} ({c} targets)")
ok = flag_diffs == 0 and lib_diffs == 0
if ok and not quiet:
print(" ✓ Linker flags and libraries match for all common targets!")
return ok, {
"flag_diffs": flag_diffs,
"lib_diffs": lib_diffs,
"flag_agg_conf": dict(flag_agg_conf),
"flag_agg_cmake": dict(flag_agg_cmake),
"lib_agg_conf": dict(lib_agg_conf),
"lib_agg_cmake": dict(lib_agg_cmake),
}
# ═══════════════════════════════════════════════════════════════════════════
# Mode-level comparison orchestrator
# ═══════════════════════════════════════════════════════════════════════════
def compare_mode(mode, repo_root, conf_parsed, cmake_parsed,
verbose=False, quiet=False):
"""Run all comparisons for one mode.
Args:
conf_parsed: Parsed configure.py build.ninja (variables, rules, builds).
cmake_parsed: Parsed CMake build.ninja (variables, rules, builds).
Returns:
(status, details) where:
status: True=match, False=mismatch, None=skipped
details: dict with compile/targets/linker summaries, or None
"""
source_dir = str(repo_root)
conf_vars, conf_rules, conf_builds = conf_parsed
cmake_vars, cmake_rules, cmake_builds = cmake_parsed
# Check that configure.py build.ninja has this mode
has_mode = any(r.endswith(f".{mode}") for r in conf_rules)
if not has_mode:
if not quiet:
print(f" ⚠ configure.py build.ninja doesn't contain mode '{mode}'")
return None, None
all_ok = True
# ── 1. Per-file compilation flags ─────────────────────────────
if not quiet:
print(f"\n {''*56}")
print(f" Compilation flags (per-file)")
print(f" {''*56}")
conf_entries = extract_configure_compile_entries(
conf_vars, conf_rules, conf_builds, mode, source_dir)
cmake_entries = extract_cmake_compile_entries(
cmake_builds, source_dir)
flags_ok, compile_summary = compare_compile_entries(
conf_entries, cmake_entries, verbose, quiet)
if not flags_ok:
all_ok = False
# ── 2. Link targets ───────────────────────────────────────────
if not quiet:
print(f"\n {''*56}")
print(f" Link targets")
print(f" {''*56}")
conf_link = extract_configure_link_targets(
conf_vars, conf_rules, conf_builds, mode)
cmake_link = extract_cmake_link_targets(
cmake_vars, cmake_rules, cmake_builds, mode)
targets_ok, targets_summary = compare_link_target_sets(
conf_link, cmake_link, verbose, quiet)
if not targets_ok:
all_ok = False
# ── 3. Linker flags & libraries ───────────────────────────────
if not quiet:
print(f"\n {''*56}")
print(f" Linker flags & libraries")
print(f" {''*56}")
# Auto-detect internal library names from build outputs of both
# systems, so we don't need a hardcoded list.
internal_libs = _collect_internal_lib_names(conf_builds, cmake_builds)
linker_ok, linker_summary = compare_link_settings(
conf_link, cmake_link, internal_libs, verbose, quiet)
if not linker_ok:
all_ok = False
details = {
"compile": compile_summary,
"targets": targets_summary,
"linker": linker_summary,
}
return all_ok, details
# ═══════════════════════════════════════════════════════════════════════════
# Summary formatting
# ═══════════════════════════════════════════════════════════════════════════
_MAX_AGGREGATE_ITEMS = 5
def _format_mode_details(details, quiet=False):
"""Format comparison details for inline display in the summary."""
lines = []
indent = " " if quiet else " "
compile_info = details.get("compile", {})
targets_info = details.get("targets", {})
linker_info = details.get("linker", {})
# Compilation flags
files_diff = compile_info.get("files_with_diffs", 0)
only_conf = compile_info.get("only_conf", [])
only_cmake = compile_info.get("only_cmake", [])
aggregate = compile_info.get("aggregate", {})
if files_diff or only_conf or only_cmake:
parts = []
if files_diff:
parts.append(f"{files_diff} files with flag diffs")
if only_conf:
parts.append(f"{len(only_conf)} sources only in configure.py")
if only_cmake:
parts.append(f"{len(only_cmake)} sources only in CMake")
lines.append(f"{indent}Compilation: {', '.join(parts)}")
if aggregate:
top = sorted(aggregate.items(), key=lambda x: -x[1])
for key, cnt in top[:_MAX_AGGREGATE_ITEMS]:
lines.append(f"{indent} {key} ({cnt} files)")
if len(top) > _MAX_AGGREGATE_ITEMS:
lines.append(f"{indent} ... and {len(top) - _MAX_AGGREGATE_ITEMS} more")
# Link targets
t_only_conf = targets_info.get("only_conf", [])
t_only_cmake = targets_info.get("only_cmake", [])
if t_only_conf or t_only_cmake:
parts = []
if t_only_conf:
parts.append(f"{len(t_only_conf)} only in configure.py")
if t_only_cmake:
parts.append(f"{len(t_only_cmake)} only in CMake")
lines.append(f"{indent}Link targets: {', '.join(parts)}")
# Linker settings
flag_diffs = linker_info.get("flag_diffs", 0)
lib_diffs = linker_info.get("lib_diffs", 0)
if flag_diffs or lib_diffs:
parts = []
if flag_diffs:
parts.append(f"{flag_diffs} targets with flag diffs")
if lib_diffs:
parts.append(f"{lib_diffs} targets with lib diffs")
lines.append(f"{indent}Linker: {', '.join(parts)}")
agg_items = []
for key, cnt in sorted(linker_info.get("flag_agg_conf", {}).items(),
key=lambda x: -x[1]):
agg_items.append(f"{indent} flag only in configure.py: {key} ({cnt} targets)")
for key, cnt in sorted(linker_info.get("flag_agg_cmake", {}).items(),
key=lambda x: -x[1]):
agg_items.append(f"{indent} flag only in CMake: {key} ({cnt} targets)")
for key, cnt in sorted(linker_info.get("lib_agg_conf", {}).items(),
key=lambda x: -x[1]):
agg_items.append(f"{indent} lib only in configure.py: {key} ({cnt} targets)")
for key, cnt in sorted(linker_info.get("lib_agg_cmake", {}).items(),
key=lambda x: -x[1]):
agg_items.append(f"{indent} lib only in CMake: {key} ({cnt} targets)")
for item in agg_items[:_MAX_AGGREGATE_ITEMS]:
lines.append(item)
if len(agg_items) > _MAX_AGGREGATE_ITEMS:
lines.append(f"{indent} ... and {len(agg_items) - _MAX_AGGREGATE_ITEMS} more")
return lines
def _configure_and_compare(repo_root, mode, conf_parsed, tmpdir, verbose):
"""Configure cmake and compare a single mode.
Runs quietly — intended for parallel execution.
Returns (ok, details) tuple.
"""
cmake_ninja = run_cmake_configure(repo_root, mode, tmpdir, quiet=True)
if cmake_ninja is None:
return None, "cmake configuration failed"
cmake_parsed = parse_ninja(cmake_ninja)
return compare_mode(
mode, repo_root, conf_parsed=conf_parsed,
cmake_parsed=cmake_parsed, verbose=verbose, quiet=True)
# ═══════════════════════════════════════════════════════════════════════════
# CLI
# ═══════════════════════════════════════════════════════════════════════════
def parse_args():
parser = argparse.ArgumentParser(
prog="compare_build_systems.py",
description=(
"Compare configure.py and CMake build systems by parsing their "
"ninja build files. Both systems are always configured into a "
"temporary directory — the user's build tree is never touched."),
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\
examples:
# Compare dev mode
%(prog)s -m dev
# Compare all modes
%(prog)s
# CI mode: quiet, strict (exit 1 on any diff)
%(prog)s --ci
# Verbose output showing per-file differences
%(prog)s -m debug -v
mode mapping:
configure.py CMake
────────────── ──────────────
debug Debug
dev Dev
release RelWithDebInfo
sanitize Sanitize
coverage Coverage
""")
parser.add_argument(
"-m", "--mode",
choices=ALL_MODES + ["all"],
default="all",
help="Build mode to compare (default: all)")
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="Show per-file/per-target differences")
parser.add_argument(
"-q", "--quiet",
action="store_true",
help="Minimal output — only summary and errors")
parser.add_argument(
"--ci",
action="store_true",
help="CI mode: quiet + strict (exit 1 on any diff)")
parser.add_argument(
"--source-dir",
type=Path, default=None,
help="Repository root directory (default: auto-detect)")
args = parser.parse_args()
if args.ci:
args.quiet = True
return args
def main():
args = parse_args()
repo_root = args.source_dir or find_repo_root()
modes = ALL_MODES if args.mode == "all" else [args.mode]
quiet = args.quiet
if not quiet:
print("=" * 70)
print("Build System Comparison: configure.py vs CMake")
print("=" * 70)
# Everything runs in a temporary directory so we never touch the
# user's build tree.
with tempfile.TemporaryDirectory(prefix="scylla-cmp-") as tmpdir_str:
tmpdir = Path(tmpdir_str)
# ── 1. Run configure.py (all modes at once) ──────────────
if not quiet:
print("\n─── configure.py ───")
conf_ninja = run_configure_py(repo_root, modes, tmpdir, quiet)
if conf_ninja is None:
return 2
if not quiet:
print("\nParsing configure.py build.ninja...")
conf_parsed = parse_ninja(conf_ninja)
# results: mode → (ok, details)
results = {}
# ── 2. Canary mode for fail-fast ──────────────────────────
if len(modes) > 1:
canary = "dev" if "dev" in modes else modes[0]
remaining = [m for m in modes if m != canary]
if not quiet:
print(f"\n─── cmake (canary: {canary}) ───")
cmake_ninja = run_cmake_configure(
repo_root, canary, tmpdir, quiet)
if cmake_ninja is None:
return 2
cmake_parsed = parse_ninja(cmake_ninja)
cmake_mode = MODE_TO_CMAKE[canary]
if not quiet:
print(f"\n{'' * 70}")
print(f"Mode: {canary} (CMake: {cmake_mode})")
print(f"{'' * 70}")
canary_ok, canary_details = compare_mode(
canary, repo_root,
conf_parsed=conf_parsed,
cmake_parsed=cmake_parsed,
verbose=args.verbose, quiet=quiet)
results[canary] = (canary_ok, canary_details)
if canary_ok is False:
if not quiet:
print(f"\n ✗ Canary mode '{canary}' has differences "
f"— skipping {len(remaining)} remaining modes")
for m in remaining:
results[m] = (None, f"canary '{canary}' failed")
else:
if not quiet:
print(f"\n─── cmake + compare "
f"({len(remaining)} remaining modes "
f"in parallel) ───")
with concurrent.futures.ThreadPoolExecutor(
max_workers=len(remaining)) as executor:
futures = {
executor.submit(
_configure_and_compare, repo_root, m,
conf_parsed, tmpdir, args.verbose): m
for m in remaining
}
for future in concurrent.futures.as_completed(futures):
m = futures[future]
try:
ok, details = future.result()
results[m] = (ok, details)
except Exception as e:
print(f"ERROR: mode '{m}' raised: {e}",
file=sys.stderr)
results[m] = (None, f"exception: {e}")
else:
# Single mode
mode = modes[0]
if not quiet:
print(f"\n─── cmake ({mode}) ───")
cmake_ninja = run_cmake_configure(
repo_root, mode, tmpdir, quiet)
if cmake_ninja is None:
return 2
cmake_parsed = parse_ninja(cmake_ninja)
cmake_mode = MODE_TO_CMAKE[mode]
if not quiet:
print(f"\n{'' * 70}")
print(f"Mode: {mode} (CMake: {cmake_mode})")
print(f"{'' * 70}")
ok, details = compare_mode(
mode, repo_root,
conf_parsed=conf_parsed,
cmake_parsed=cmake_parsed,
verbose=args.verbose, quiet=quiet)
results[mode] = (ok, details)
# ── Summary ───────────────────────────────────────────────────
if not quiet:
print(f"\n{'' * 70}")
print("Summary")
print(f"{'' * 70}")
for mode in modes:
ok, details = results[mode]
cmake_mode = MODE_TO_CMAKE[mode]
if ok is None:
if isinstance(details, str):
status = f"⚠ SKIPPED ({details})"
else:
status = "⚠ SKIPPED"
elif ok:
status = "✓ MATCH"
else:
status = "✗ MISMATCH"
if quiet:
print(f"{mode}: {status}")
else:
print(f" {mode:10s} (CMake: {cmake_mode:15s}): {status}")
if ok is False and details and isinstance(details, dict):
for line in _format_mode_details(details, quiet):
print(line)
has_failures = any(v[0] is False for v in results.values())
all_pass = all(v[0] is True for v in results.values())
if has_failures:
if not quiet:
print("\n✗ Some modes have differences.")
return 1
elif all_pass:
if not quiet:
print("\n✓ All modes match!")
return 0
else:
if not quiet:
print("\n✗ Some modes could not be compared.")
return 2
if __name__ == "__main__":
sys.exit(main())