test/pylib_test: add skip_bug CI validations and fixed-issue guards

Problem

After introducing strict skip_bug metadata, CI still lacked broad guards against metadata regressions (invalid links, stale references, and references to already-fixed issues).

What changed

- Added CI-oriented pylib tests that validate skip_bug link correctness.

- Added guards for PR/message references that point to fixed issues.

- Extracted shared AST file-scan helpers for consistent and maintainable checks.

Usage examples

Running test/pylib_test now catches malformed skip_bug links and stale/fixed issue references before they land.
This commit is contained in:
Artsiom Mishuta
2026-06-01 12:27:35 +02:00
parent db21aabfb9
commit 296471ee74
5 changed files with 311 additions and 23 deletions

View File

@@ -25,16 +25,25 @@ Usage::
from enum import StrEnum
import re
import pytest
from test.pylib.skip_reason_plugin import skip
_GITHUB_ISSUE_LINK_RE = re.compile(r"^https://github\.com/[^/\s]+/[^/\s]+/issues/\d+/?$")
_JIRA_LINK_RE = re.compile(r"^https://scylladb\.atlassian\.net/browse/[A-Z][A-Z0-9]+-\d+$")
def _is_valid_skip_bug_link(link: str) -> bool:
"""Return True if *link* is a supported skip_bug issue URL."""
return bool(_GITHUB_ISSUE_LINK_RE.fullmatch(link) or _JIRA_LINK_RE.fullmatch(link))
# ScyllaDB-specific skip categories.
# Each member name (lowercased) is the pytest marker, e.g.
# ``SKIP_BUG`` → ``@pytest.mark.skip_bug``, tag ``"bug"``.
class SkipType(StrEnum):
SKIP_BUG = "bug"
SKIP_NOT_IMPLEMENTED = "not_implemented"
SKIP_SLOW = "slow"
@@ -45,11 +54,9 @@ class SkipType(StrEnum):
"""Validate and format skip_bug arguments."""
link = link.strip()
reason = reason.strip()
_GITHUB_ISSUE_LINK_RE = re.compile(r"^https://github\.com/[^/\s]+/[^/\s]+/issues/\d+/?$")
_JIRA_LINK_RE = re.compile(r"^https://scylladb\.atlassian\.net/browse/[A-Z][A-Z0-9]+-\d+$")
if not link:
raise pytest.UsageError(f"{context}: 'link' is required.")
if not (_GITHUB_ISSUE_LINK_RE.fullmatch(link) or _JIRA_LINK_RE.fullmatch(link)):
if not _is_valid_skip_bug_link(link):
raise pytest.UsageError(
f"{context}: invalid 'link' value {link!r}. "
"Expected a GitHub issue URL or a ScyllaDB Jira URL.",

View File

@@ -0,0 +1,48 @@
#
# Copyright (C) 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
"""Shared helpers for AST-based guard tests in ``pylib_test``.
Centralizes the boilerplate (test-file enumeration + safe AST parsing)
shared by ``test_no_bare_skips``, ``test_skip_bug_links`` and
``test_pr_message_bug_references``.
"""
import ast
import pathlib
from collections.abc import Iterator
from test import TEST_DIR
# Directories under test/ that AST guard tests should not scan:
# - 'pylib_test' contains the guards themselves and their fixtures
# (which intentionally embed example skip patterns).
# - 'pylib' is shared framework code, not test code.
FRAMEWORK_DIRS = frozenset({"pylib", "pylib_test"})
def iter_test_py_files(*, exclude: frozenset[str] = FRAMEWORK_DIRS) -> Iterator[pathlib.Path]:
"""Yield Python files under ``test/``, skipping ``__pycache__`` and *exclude* dirs."""
for path in sorted(TEST_DIR.rglob("*.py")):
rel = path.relative_to(TEST_DIR)
parts = rel.parts
if "__pycache__" in parts:
continue
if exclude and parts[0] in exclude:
continue
yield path
def parse_python_file(path: pathlib.Path) -> ast.AST | None:
"""Return the parsed AST of *path* or ``None`` on read/syntax errors."""
try:
source = path.read_text()
except (OSError, UnicodeDecodeError):
return None
try:
return ast.parse(source, filename=str(path))
except SyntaxError:
return None

View File

@@ -13,32 +13,19 @@ Any new skip must use the typed markers or the typed skip() helper.
import ast
import os
import pathlib
import subprocess
import sys
_TEST_ROOT = pathlib.Path(__file__).resolve().parent.parent
def _iter_test_py_files():
"""Yield all .py files under test/ excluding pylib_test/, pylib/ and __pycache__."""
for p in sorted(_TEST_ROOT.rglob("*.py")):
rel = p.relative_to(_TEST_ROOT)
parts = rel.parts
if "__pycache__" in parts:
continue
if parts[0] in ("pylib_test", "pylib"):
continue
yield p
from test import TEST_DIR
from test.pylib_test._scan_py_files import iter_test_py_files, parse_python_file
def test_no_bare_pytest_skip_calls_in_codebase():
"""Verify no test files use bare pytest.skip() (must use typed skip() helper)."""
violations = []
for path in _iter_test_py_files():
source = path.read_text()
try:
tree = ast.parse(source, filename=str(path))
except SyntaxError:
for path in iter_test_py_files():
tree = parse_python_file(path)
if tree is None:
continue
for node in ast.walk(tree):
if not isinstance(node, ast.Call):
@@ -78,7 +65,7 @@ def test_no_bare_skip_markers_in_collection():
"--ignore=unit",
"-p", "no:sugar"],
capture_output=True, text=True,
cwd=str(_TEST_ROOT),
cwd=str(TEST_DIR),
env=env,
)
# If a bare skip exists, plugin raises UsageError → non-zero exit.

View File

@@ -0,0 +1,187 @@
#
# Copyright (C) 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
"""Guard fixed issues from staying in skip_bug references.
CI provides PR_MESSAGE, which may contain lines like::
Fixes: SCYLLADB-123 # JIRA key
Fixes: https://scylladb.atlassian.net/browse/SCYLLADB-123
Fixes: 9999 # GitHub issue in scylladb/scylladb
Fixes: #9999 # same, with leading '#'
Fixes: scylladb/scylla-enterprise#9999 # cross-repo GitHub issue
Fixes: https://github.com/scylladb/scylla-enterprise/issues/9999
For each fixed issue listed in PR_MESSAGE, this test verifies no Python
test still references it via ``skip_bug()`` / ``@pytest.mark.skip_bug(...)``.
CI runs this with::
./tools/toolchain/dbuild -e PR_MESSAGE='<commit message>' -- \\
pytest test/pylib_test --junit-xml=testlog/framework_tests/framework_test.xml
"""
import ast
import os
import pathlib
import re
from collections.abc import Iterator
import pytest
from test.pylib_test._scan_py_files import iter_test_py_files, parse_python_file
_FIXES_PREFIX = "fixes:"
# Bare 'Fixes: 9999' / 'Fixes: #9999' refer to the main scylladb/scylladb repo.
_DEFAULT_GH_REPO = "scylladb/scylladb"
# Recognized GitHub issue URL forms in PR_MESSAGE 'Fixes:' tokens,
# including an optional trailing slash.
_GH_URL_RE = re.compile(
r"^https?://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[^/\s]+)/issues/(?P<num>\d+)/?$",
)
# owner/repo#123 cross-repo shorthand.
_GH_SHORTHAND_RE = re.compile(
r"^(?P<owner>[^/\s#]+)/(?P<repo>[^/\s#]+)#(?P<num>\d+)$",
re.IGNORECASE,
)
# Full JIRA browse URL.
_JIRA_URL_RE = re.compile(
r"^https?://scylladb\.atlassian\.net/browse/(?P<key>[A-Z][A-Z0-9]+-\d+)$",
re.IGNORECASE,
)
# Bare JIRA key like SCYLLADB-123.
_JIRA_KEY_RE = re.compile(r"^[A-Z][A-Z0-9]+-\d+$")
def _parse_fixes_token(token: str) -> tuple | None:
"""Classify a ``Fixes:`` token.
Returns one of:
* ``('jira', KEY)`` — JIRA key (matched as substring).
* ``('gh', 'owner/repo', NUM)`` — GitHub issue. Bare numbers default
to the main scylladb/scylladb repo.
* ``None`` — unrecognized token.
"""
token = token.strip()
if not token:
return None
if (m := _JIRA_URL_RE.match(token)):
return ("jira", m.group("key").upper())
if (m := _GH_URL_RE.match(token)):
return ("gh", f"{m.group('owner')}/{m.group('repo')}".lower(), m.group("num"))
if (m := _GH_SHORTHAND_RE.match(token)):
return ("gh", f"{m.group('owner')}/{m.group('repo')}".lower(), m.group("num"))
bare = token.lstrip("#").upper()
if _JIRA_KEY_RE.match(bare):
return ("jira", bare)
if bare.isdigit():
return ("gh", _DEFAULT_GH_REPO, bare)
return None
def _extract_fixed_issues(pr_message: str) -> list[tuple]:
"""Extract issue references from ``Fixes:`` lines in PR_MESSAGE."""
issues: list[tuple] = []
for line in pr_message.splitlines():
stripped = line.strip()
if not stripped.lower().startswith(_FIXES_PREFIX):
continue
value = stripped[len(_FIXES_PREFIX):].strip()
if not value:
continue
parsed = _parse_fixes_token(value.split()[0])
if parsed is not None and parsed not in issues:
issues.append(parsed)
return issues
def _is_skip_bug_call(node: ast.Call) -> bool:
"""Match ``skip_bug(...)`` and ``<anything>.skip_bug(...)`` calls."""
func = node.func
if isinstance(func, ast.Name) and func.id == "skip_bug":
return True
if isinstance(func, ast.Attribute) and func.attr == "skip_bug":
return True
return False
def _iter_string_literals(node: ast.AST) -> Iterator[str]:
for child in ast.walk(node):
if isinstance(child, ast.Constant) and isinstance(child.value, str):
yield child.value
def _text_references_issue(text: str, issue: tuple) -> bool:
"""Return True if ``text`` references the given issue.
* JIRA: case-insensitive substring of the key.
* GitHub: substring of '/owner/repo/issues/NUM' (case-insensitive).
"""
kind = issue[0]
if kind == "jira":
return issue[1] in text.upper()
repo, num = issue[1], issue[2]
return f"/{repo}/issues/{num}" in text.lower()
def _format_issue(issue: tuple) -> str:
if issue[0] == "jira":
return issue[1]
repo, num = issue[1], issue[2]
return f"{repo}#{num}"
def _collect_violations(path: pathlib.Path, fixed_issues: list[tuple]) -> list[str]:
"""Collect violations only from skip_bug(...) call sites."""
tree = parse_python_file(path)
if tree is None:
return []
violations: list[str] = []
for node in ast.walk(tree):
if not isinstance(node, ast.Call) or not _is_skip_bug_call(node):
continue
for text in _iter_string_literals(node):
for issue in fixed_issues:
if _text_references_issue(text, issue):
violations.append(
f" {path}:{node.lineno} references fixed issue "
f"{_format_issue(issue)}: {text.strip()}"
)
return violations
def test_fixed_issues_not_referenced_by_skip_bug():
"""Verify fixed issues from PR_MESSAGE are not referenced in skip_bug usage."""
pr_message = os.environ.get("PR_MESSAGE")
if not pr_message:
pytest.skip(
"PR_MESSAGE environment variable is not set. "
"This test is intended to run in CI with PR_MESSAGE provided."
)
fixed_issues = _extract_fixed_issues(pr_message)
if not fixed_issues:
pytest.skip(
"PR_MESSAGE has no recognized 'Fixes:' entry "
"(SCYLLADB-NNN, NNN, #NNN, owner/repo#NNN, or full GitHub/JIRA URL)"
)
violations: list[str] = []
for path in iter_test_py_files():
violations.extend(_collect_violations(path, fixed_issues))
assert not violations, (
"Found references to issue(s) marked as fixed in PR_MESSAGE. "
"Remove/adjust those references before merging:\n"
+ "\n".join(violations)
)

View File

@@ -0,0 +1,59 @@
#
# Copyright (C) 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
#
"""Codebase guard test: validate links used by @pytest.mark.skip_bug.
The link validator is intentionally imported from
``test.pylib.skip_types`` so this CI guard and marker validation logic
cannot drift apart.
"""
import ast
from test.pylib.skip_types import _is_valid_skip_bug_link
from test.pylib_test._scan_py_files import iter_test_py_files, parse_python_file
def _is_skip_bug_marker(node: ast.Call) -> bool:
func = node.func
if not isinstance(func, ast.Attribute) or func.attr != "skip_bug":
return False
mark_attr = func.value
if not isinstance(mark_attr, ast.Attribute) or mark_attr.attr != "mark":
return False
base = mark_attr.value
return isinstance(base, ast.Name) and base.id == "pytest"
def test_skip_bug_markers_use_valid_links():
violations: list[str] = []
for path in iter_test_py_files():
tree = parse_python_file(path)
if tree is None:
continue
for node in ast.walk(tree):
if not isinstance(node, ast.Call) or not _is_skip_bug_marker(node):
continue
link_kw = next((kw for kw in node.keywords if kw.arg == "link"), None)
if link_kw is None:
violations.append(f" {path}:{node.lineno} missing link=...")
continue
if not isinstance(link_kw.value, ast.Constant) or not isinstance(link_kw.value.value, str):
violations.append(f" {path}:{node.lineno} link must be a string literal")
continue
link = link_kw.value.value
if not _is_valid_skip_bug_link(link):
violations.append(
f" {path}:{node.lineno} invalid link {link!r} (expected GitHub issue URL or ScyllaDB Jira URL)"
)
assert not violations, "Invalid @pytest.mark.skip_bug link(s) found:\n" + "\n".join(violations)