Files
scylladb/scripts/get_description.py
David Garcia d9593732b1 docs: add strict mode to control metrics validation behavior
The metrics extension now includes validation to detect missing metrics. This validation caused failures during multiversion publication because older versions did not generate all required properties.

Instead of fixing each branch, a strict mode flag was introduced to control when validation should run.

Strict mode is enabled in the workflow that validates pull requests, ensuring that new changes meet the expected metrics.

During multiversion builds, validation errors are now logged but do not raise exceptions, which prevents build failures while still providing visibility into missing data.

docs: verbose mode

docs: verbose mode

Closes scylladb/scylladb#27402
2025-12-03 14:09:08 +03:00

406 lines
17 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import re
import yaml
import json
import inspect
from pathlib import Path
from encodings import undefined
gr = re.compile(r'.*(\.|->)add_group\(\s*(.*)')
desc = re.compile(r'.*..::descrs\( *("[^"]+")(.*)')
alternative_name = re.compile(r'([^,]*),')
metric = re.compile(r'.*..::make_(absolute|counter|current|derive|gauge|histogram|queue|summary|total|total_operations|queue_length|total_bytes|current_bytes)\((.*)')
string_content = re.compile(r'\s*"([^"]+)"\s*,.*')
string_match = re.compile(r'"([^"]+)"')
sstring_match = re.compile(r'\s*sstring\(\s*("[^"]+")\s*\)\s*')
metrics_directive = re.compile(r'.*@metrics\s*([^=]+)\s*=\s*(\[[^\]]*\]).*')
format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*')
def handle_error(message, strict=True, verbose_mode=False):
if strict:
print(f"[ERROR] {message}")
exit(-1)
elif verbose_mode:
print(f"[WARNING] {message}")
def verbose(verb, *arg):
if verb:
print(*arg)
def get_end_part(str):
parenthes_count = 0
for idx, c in enumerate(str):
if c == ',' and parenthes_count == 0:
return str[:idx]
if c ==')':
parenthes_count -= 1
if c =='(':
parenthes_count += 1
return None
def find_end_parenthes(str, pc):
for idx, c in enumerate(str):
if c ==')':
pc -= 1
if c =='(':
pc += 1
if pc == 0:
return str[:idx]
return str
def split_paterns(str):
res = []
pos = 0
b = 0
cur_str = ""
while pos < len(str):
if str[pos] == '+':
res.append(cur_str)
cur_str = ""
elif str[pos] == '(':
s = find_end_parenthes(str[pos+1:], 1)
cur_str += "(" +s +")"
pos += len(s) + 1
else:
cur_str += str[pos]
pos += 1
if cur_str:
res.append(cur_str)
return res
def validate_parameter(txt, param_mapping, err="", strict=True, verbose_mode=False):
if isinstance(txt, str):
txt = [txt]
for t in txt:
if t not in param_mapping:
handle_error(f"Could not resolve param {err} {t}", strict, verbose_mode)
return None
if not param_mapping[t]:
handle_error(f"Could not resolve param is empty {err} {txt}", strict, verbose_mode)
return None
return txt
def sort_by_index(arr,ind):
return [arr[i] for i in ind]
def make_name_list(names, err, param_mapping, verb=None, strict=True):
param = []
format_string = ""
for txt in names:
if isinstance(txt, dict):
format_string += txt['str']
param = param + ([txt['param']] if isinstance(txt['param'], str) else txt['param'])
else:
if txt[0] == '"':
format_string += txt[1:-1]
else:
param = param + validate_parameter(txt, param_mapping, "(make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err, strict, verb)
format_string += "{}"
if not param:
handle_error(f"make_name_list:{inspect.getframeinfo(inspect.currentframe()).lineno} {names}", strict, verb)
return None
if not param:
return [format_string]
order_params = list(enumerate(param))
sorted_indexed_array = sorted(order_params, key=lambda x: x[1])
indexed_array = [index for index, value in sorted_indexed_array]
verbose(verb, "make_name_list", param)
param.sort()
param_keys = ';'.join(param)
if param_keys not in param_mapping:
handle_error(f"Parameter not found: {param_keys} {err}", strict)
return None
for p in param_mapping[param_keys]:
if not p:
handle_error(f"empty (make_name_list:{inspect.getframeinfo(inspect.currentframe()).lineno}){err} {param}", strict)
return None
verbose(verb, "make_name_list", [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]])
return [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]]
def get_decription(str):
b = str.find('::description(') + len('::description(')
p = b
parenthes_count = 1
while p < len(str):
if str[p] == '"':
p = str.find('"',p+1)
if p <0:
break
if str[p] == '(':
parenthes_count += 1
if str[p] == ')':
parenthes_count -= 1
if parenthes_count == 0:
return str[b:p]
p = p + 1
return None
def merge_strings(str, str2):
if str and str.rstrip()[-1] == '"':
if str2 and str2.lstrip()[0] == '"':
return str.rstrip()[:-1] + str2.lstrip()[1:]
return str.rstrip() + ' ' + str2.lstrip()
def clear_string(str):
m = sstring_match.match(str)
if m:
return m.group(1)
m = format_match.match(str)
if m:
params = [p.strip() for p in find_end_parenthes(m.group(2), 1).split(',')]
return {'str': m.group(1), 'param': params[0] if len(params) == 1 else params}
return str.strip()
def get_metrics_information(config_file):
with open(config_file, 'r') as file:
return yaml.safe_load(file)
def get_metrics_from_file(file_name, prefix, metrics_information, verb=None, strict=True):
current_group = ""
# Normalize path for cross-platform compatibility
# Convert absolute paths to relative and backslashes to forward slashes
clean_name = file_name
if file_name.startswith('./'):
clean_name = file_name[2:]
else:
# Handle absolute paths (Windows: C:\..., Unix: /...)
file_path = Path(file_name)
if file_path.is_absolute():
try:
# Try to make it relative to current directory
clean_name = str(file_path.relative_to(Path.cwd()))
except ValueError:
# If not relative to cwd, just use the name as-is
clean_name = file_name
# Normalize backslashes to forward slashes for config file compatibility
clean_name = clean_name.replace('\\', '/')
param_mapping = {}
groups = {}
if clean_name in metrics_information:
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
exit(0)
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
metrics = {}
multi_line = False
names = undefined
typ = undefined
line_number = 0;
current_metric = ""
parenthes_count = 0
serching_group = False
with open(file_name) as file:
for line in file:
if str(line_number) in groups:
current_group = groups[str(line_number)]
verbose(verb, "found group from config ", groups[str(line_number)])
if serching_group:
m = string_content.match(line)
if not m:
line_number += 1
continue
current_group = m.group(1)
serching_group = False
verbose(verb, "group found on new line", current_group)
m = metric.match(line)
# Check if add_group and metric are on the same line
if m and not current_group:
gr_match = gr.match(line)
if gr_match:
# Extract group from add_group on the same line
current_group = gr_match.group(2)
m_str = string_content.match(current_group)
if m_str:
current_group = m_str.group(1)
else:
m_alt = alternative_name.match(current_group)
if m_alt:
current_group = param_mapping[m_alt.group(1)] if m_alt.group(1) in param_mapping else m_alt.group(1)
verbose(verb, "group found on same line as metric", current_group)
else:
handle_error(f"new name found with no group {file_name} {line_number} {line}", strict)
continue
if current_metric or m:
# Only error if add_group appears after we've started processing a metric
if current_metric and gr.match(line):
handle_error(f"add group found unexpectedly {file_name} {line_number} {line}", strict)
continue
if current_metric and m:
handle_error(f"new metrics was found while parsing the previous one {file_name} {line_number} {line}", strict)
continue
ln = line.replace('\\"','#').rstrip()
current_metric = merge_strings(current_metric, ln)
no_string = re.sub(string_match, '', ln)
parenthes_count += no_string.count('(')
parenthes_count -= no_string.count(')')
if parenthes_count <= 0:
verbose(verb, current_metric, file_name, line_number)
m = metric.match(current_metric)
typ = m.group(1) # type is taken from the make_metrics part
prt = m.group(2)
m = string_content.match(m.group(2))
if not m:
multi_part_name = get_end_part(prt)
#m = alternative_name.match(prt)
verbose(verb, "multi part name ", multi_part_name)
if multi_part_name:
names = [clear_string(s) for s in multi_part_name.split('+')]
else:
handle_error(f"names not found {file_name} {line_number} {line} {current_metric}", strict)
continue
else:
names = ['"' + m.group(1) + '"']
desc_str = get_decription(current_metric)
if desc_str:
m = string_match.match(desc_str)
if m:
descrs = [desc_str]
else:
descrs = [clear_string(s) for s in split_paterns(desc_str)]
else:
handle_error(f"description not found {file_name} {line_number} {line} {current_metric}", strict)
continue
name_list = make_name_list(names, file_name+" "+str(line_number), param_mapping, verb, strict)
if not name_list:
handle_error(f"no name list {current_metric}", strict)
continue
description_list = make_name_list(descrs, file_name+" "+str(line_number), param_mapping, verb, strict)
current_groups = current_group if isinstance(current_group, list) else [current_group]
for cg in current_groups:
for idx, base_name in enumerate(name_list):
name = prefix + cg + "_" + base_name
description = description_list[0].replace('#','"') if len(description_list) == 1 else description_list[idx].replace('#','\\"')
if name in metrics:
if description != metrics[name][1]:
verbose(verb, f'Note: Multiple descriptions found for {name}, using first one from {metrics[name][4]}')
continue
metrics[name] = [typ, description, cg, base_name, file_name + ":" + str(line_number)]
current_metric = ""
parenthes_count = 0
else:
m = gr.match(line)
if m:
current_group = m.group(2)
if not current_group:
verbose(verb, "empty group found")
serching_group = True
m = string_content.match(current_group)
if m:
current_group = m.group(1)
else:
m = alternative_name.match(current_group)
if m:
current_group = param_mapping[m.group(1)] if m.group(1) in param_mapping else m.group(1)
verbose(verb, "Alternative group", file_name, line_number, current_group)
m = metrics_directive.match(line)
if m:
param_mapping[m.group(1).strip()] = json.loads(m.group(2))
line_number += 1
return metrics
def write_metrics_to_file(out_file, metrics, fmt="pipe"):
with open(out_file, "a") as fo:
if fmt == "yml":
yaml.dump(metrics,fo,sort_keys=False)
if fmt == "pipe":
for l in metrics.keys():
fo.write(l.replace('-','_')+'|' +'|'.join(metrics[l])+ '\n')
def validate_all_metrics(prefix, config_file, verbose=False, strict=True):
"""Validate all metrics files and report issues"""
print("Validating all metrics files...")
# Use pathlib to find all .cc files (cross-platform)
try:
current_dir = Path('.')
all_cc_files = list(current_dir.rglob('*.cc'))
# Filter files that contain '::description'
metric_files = []
for cc_file in all_cc_files:
try:
with open(cc_file, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
if '::description' in content:
# Normalize path to use forward slashes for cross-platform compatibility
normalized_path = str(cc_file).replace('\\', '/')
metric_files.append(normalized_path)
except Exception as e:
if verbose:
print(f"[WARN] Could not read {cc_file}: {e}")
continue
except Exception as e:
print(f"[ERROR] Error finding metrics files: {e}")
return False
total_files = len(metric_files)
print(f"Found {total_files} files with metrics")
failed_files = []
total_metrics = 0
metrics_info = get_metrics_information(config_file)
for file_path in metric_files:
try:
metrics = get_metrics_from_file(file_path, prefix, metrics_info, verbose, strict)
metrics_count = len(metrics)
total_metrics += metrics_count
if verbose:
print(f"[OK] {file_path} - {metrics_count} metrics")
else:
print(f"[OK] {file_path}")
except Exception as e:
print(f"[ERROR] {file_path}")
print(f" Error: {str(e)}")
failed_files.append((file_path, str(e)))
if failed_files:
print("\n[ERROR] METRICS VALIDATION FAILED")
print("Failed files:")
for file_path, error in failed_files:
print(f" - {file_path}: {error}")
print(f"\nAdd missing parameters to {config_file}")
return False
else:
working_files = total_files - len(failed_files)
coverage_pct = (working_files * 100 // total_files) if total_files > 0 else 0
print(f"\n[SUCCESS] All metrics files validated successfully")
print(f" Files: {working_files}/{total_files} working ({coverage_pct}% coverage)")
print(f" Metrics: {total_metrics} total processed")
return True
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='get metrics descriptions from file', conflict_handler="resolve")
parser.add_argument('-p', '--prefix', default="scylla_", help='the prefix added to the metrics names')
parser.add_argument('-o', '--out-file', default="metrics.out", help='The output file')
parser.add_argument('-c', '--config-file', default="metrics-config.yml", help='The configuration file used to add extra data missing in the code')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='When set prints verbose information')
parser.add_argument('-F', '--format', default="pipe", help='Set the output format, can be pipe, or yml')
parser.add_argument('--validate', action='store_true', help='Validate all metrics files instead of processing single file')
parser.add_argument('--non-strict', action='store_true', help='Continue with warnings instead of failing on errors (useful for multiversion builds)')
parser.add_argument('file', nargs='?', help='the file to parse (not needed with --validate)')
args = parser.parse_args()
# Determine strict mode
strict_mode = not args.non_strict
if args.validate:
success = validate_all_metrics(args.prefix, args.config_file, args.verbose, strict_mode)
exit(0 if success else 1)
if not args.file:
parser.error('file argument is required when not using --validate')
metrics = get_metrics_from_file(args.file, args.prefix, get_metrics_information(args.config_file), args.verbose, strict_mode)
write_metrics_to_file(args.out_file, metrics, args.format)