The metrics extension now includes validation to detect missing metrics. This validation caused failures during multiversion publication because older versions did not generate all required properties. Instead of fixing each branch, a strict mode flag was introduced to control when validation should run. Strict mode is enabled in the workflow that validates pull requests, ensuring that new changes meet the expected metrics. During multiversion builds, validation errors are now logged but do not raise exceptions, which prevents build failures while still providing visibility into missing data. docs: verbose mode docs: verbose mode Closes scylladb/scylladb#27402
406 lines
17 KiB
Python
Executable File
406 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import re
|
|
import yaml
|
|
import json
|
|
import inspect
|
|
from pathlib import Path
|
|
|
|
from encodings import undefined
|
|
|
|
gr = re.compile(r'.*(\.|->)add_group\(\s*(.*)')
|
|
desc = re.compile(r'.*..::descrs\( *("[^"]+")(.*)')
|
|
alternative_name = re.compile(r'([^,]*),')
|
|
metric = re.compile(r'.*..::make_(absolute|counter|current|derive|gauge|histogram|queue|summary|total|total_operations|queue_length|total_bytes|current_bytes)\((.*)')
|
|
string_content = re.compile(r'\s*"([^"]+)"\s*,.*')
|
|
string_match = re.compile(r'"([^"]+)"')
|
|
sstring_match = re.compile(r'\s*sstring\(\s*("[^"]+")\s*\)\s*')
|
|
metrics_directive = re.compile(r'.*@metrics\s*([^=]+)\s*=\s*(\[[^\]]*\]).*')
|
|
format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*')
|
|
|
|
def handle_error(message, strict=True, verbose_mode=False):
|
|
if strict:
|
|
print(f"[ERROR] {message}")
|
|
exit(-1)
|
|
elif verbose_mode:
|
|
print(f"[WARNING] {message}")
|
|
|
|
def verbose(verb, *arg):
|
|
if verb:
|
|
print(*arg)
|
|
|
|
def get_end_part(str):
|
|
parenthes_count = 0
|
|
for idx, c in enumerate(str):
|
|
if c == ',' and parenthes_count == 0:
|
|
return str[:idx]
|
|
if c ==')':
|
|
parenthes_count -= 1
|
|
if c =='(':
|
|
parenthes_count += 1
|
|
return None
|
|
|
|
def find_end_parenthes(str, pc):
|
|
for idx, c in enumerate(str):
|
|
if c ==')':
|
|
pc -= 1
|
|
if c =='(':
|
|
pc += 1
|
|
if pc == 0:
|
|
return str[:idx]
|
|
return str
|
|
|
|
def split_paterns(str):
|
|
res = []
|
|
pos = 0
|
|
b = 0
|
|
cur_str = ""
|
|
while pos < len(str):
|
|
if str[pos] == '+':
|
|
res.append(cur_str)
|
|
cur_str = ""
|
|
elif str[pos] == '(':
|
|
s = find_end_parenthes(str[pos+1:], 1)
|
|
cur_str += "(" +s +")"
|
|
pos += len(s) + 1
|
|
else:
|
|
cur_str += str[pos]
|
|
pos += 1
|
|
if cur_str:
|
|
res.append(cur_str)
|
|
return res
|
|
|
|
def validate_parameter(txt, param_mapping, err="", strict=True, verbose_mode=False):
|
|
if isinstance(txt, str):
|
|
txt = [txt]
|
|
for t in txt:
|
|
if t not in param_mapping:
|
|
handle_error(f"Could not resolve param {err} {t}", strict, verbose_mode)
|
|
return None
|
|
if not param_mapping[t]:
|
|
handle_error(f"Could not resolve param is empty {err} {txt}", strict, verbose_mode)
|
|
return None
|
|
return txt
|
|
def sort_by_index(arr,ind):
|
|
return [arr[i] for i in ind]
|
|
|
|
def make_name_list(names, err, param_mapping, verb=None, strict=True):
|
|
param = []
|
|
format_string = ""
|
|
for txt in names:
|
|
if isinstance(txt, dict):
|
|
format_string += txt['str']
|
|
param = param + ([txt['param']] if isinstance(txt['param'], str) else txt['param'])
|
|
else:
|
|
if txt[0] == '"':
|
|
format_string += txt[1:-1]
|
|
else:
|
|
param = param + validate_parameter(txt, param_mapping, "(make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err, strict, verb)
|
|
format_string += "{}"
|
|
if not param:
|
|
handle_error(f"make_name_list:{inspect.getframeinfo(inspect.currentframe()).lineno} {names}", strict, verb)
|
|
return None
|
|
if not param:
|
|
return [format_string]
|
|
order_params = list(enumerate(param))
|
|
sorted_indexed_array = sorted(order_params, key=lambda x: x[1])
|
|
indexed_array = [index for index, value in sorted_indexed_array]
|
|
verbose(verb, "make_name_list", param)
|
|
param.sort()
|
|
param_keys = ';'.join(param)
|
|
if param_keys not in param_mapping:
|
|
handle_error(f"Parameter not found: {param_keys} {err}", strict)
|
|
return None
|
|
for p in param_mapping[param_keys]:
|
|
if not p:
|
|
handle_error(f"empty (make_name_list:{inspect.getframeinfo(inspect.currentframe()).lineno}){err} {param}", strict)
|
|
return None
|
|
verbose(verb, "make_name_list", [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]])
|
|
return [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]]
|
|
|
|
def get_decription(str):
|
|
b = str.find('::description(') + len('::description(')
|
|
p = b
|
|
parenthes_count = 1
|
|
while p < len(str):
|
|
if str[p] == '"':
|
|
p = str.find('"',p+1)
|
|
if p <0:
|
|
break
|
|
if str[p] == '(':
|
|
parenthes_count += 1
|
|
if str[p] == ')':
|
|
parenthes_count -= 1
|
|
if parenthes_count == 0:
|
|
return str[b:p]
|
|
p = p + 1
|
|
return None
|
|
|
|
def merge_strings(str, str2):
|
|
if str and str.rstrip()[-1] == '"':
|
|
if str2 and str2.lstrip()[0] == '"':
|
|
return str.rstrip()[:-1] + str2.lstrip()[1:]
|
|
return str.rstrip() + ' ' + str2.lstrip()
|
|
|
|
def clear_string(str):
|
|
m = sstring_match.match(str)
|
|
if m:
|
|
return m.group(1)
|
|
m = format_match.match(str)
|
|
if m:
|
|
params = [p.strip() for p in find_end_parenthes(m.group(2), 1).split(',')]
|
|
return {'str': m.group(1), 'param': params[0] if len(params) == 1 else params}
|
|
return str.strip()
|
|
|
|
def get_metrics_information(config_file):
|
|
with open(config_file, 'r') as file:
|
|
return yaml.safe_load(file)
|
|
|
|
def get_metrics_from_file(file_name, prefix, metrics_information, verb=None, strict=True):
|
|
current_group = ""
|
|
# Normalize path for cross-platform compatibility
|
|
# Convert absolute paths to relative and backslashes to forward slashes
|
|
clean_name = file_name
|
|
if file_name.startswith('./'):
|
|
clean_name = file_name[2:]
|
|
else:
|
|
# Handle absolute paths (Windows: C:\..., Unix: /...)
|
|
file_path = Path(file_name)
|
|
if file_path.is_absolute():
|
|
try:
|
|
# Try to make it relative to current directory
|
|
clean_name = str(file_path.relative_to(Path.cwd()))
|
|
except ValueError:
|
|
# If not relative to cwd, just use the name as-is
|
|
clean_name = file_name
|
|
# Normalize backslashes to forward slashes for config file compatibility
|
|
clean_name = clean_name.replace('\\', '/')
|
|
param_mapping = {}
|
|
groups = {}
|
|
if clean_name in metrics_information:
|
|
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
|
|
exit(0)
|
|
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
|
|
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
|
|
|
|
metrics = {}
|
|
multi_line = False
|
|
names = undefined
|
|
typ = undefined
|
|
line_number = 0;
|
|
current_metric = ""
|
|
parenthes_count = 0
|
|
serching_group = False
|
|
with open(file_name) as file:
|
|
for line in file:
|
|
if str(line_number) in groups:
|
|
current_group = groups[str(line_number)]
|
|
verbose(verb, "found group from config ", groups[str(line_number)])
|
|
if serching_group:
|
|
m = string_content.match(line)
|
|
if not m:
|
|
line_number += 1
|
|
continue
|
|
current_group = m.group(1)
|
|
serching_group = False
|
|
verbose(verb, "group found on new line", current_group)
|
|
m = metric.match(line)
|
|
# Check if add_group and metric are on the same line
|
|
if m and not current_group:
|
|
gr_match = gr.match(line)
|
|
if gr_match:
|
|
# Extract group from add_group on the same line
|
|
current_group = gr_match.group(2)
|
|
m_str = string_content.match(current_group)
|
|
if m_str:
|
|
current_group = m_str.group(1)
|
|
else:
|
|
m_alt = alternative_name.match(current_group)
|
|
if m_alt:
|
|
current_group = param_mapping[m_alt.group(1)] if m_alt.group(1) in param_mapping else m_alt.group(1)
|
|
verbose(verb, "group found on same line as metric", current_group)
|
|
else:
|
|
handle_error(f"new name found with no group {file_name} {line_number} {line}", strict)
|
|
continue
|
|
if current_metric or m:
|
|
# Only error if add_group appears after we've started processing a metric
|
|
if current_metric and gr.match(line):
|
|
handle_error(f"add group found unexpectedly {file_name} {line_number} {line}", strict)
|
|
continue
|
|
if current_metric and m:
|
|
handle_error(f"new metrics was found while parsing the previous one {file_name} {line_number} {line}", strict)
|
|
continue
|
|
ln = line.replace('\\"','#').rstrip()
|
|
current_metric = merge_strings(current_metric, ln)
|
|
no_string = re.sub(string_match, '', ln)
|
|
parenthes_count += no_string.count('(')
|
|
parenthes_count -= no_string.count(')')
|
|
if parenthes_count <= 0:
|
|
verbose(verb, current_metric, file_name, line_number)
|
|
m = metric.match(current_metric)
|
|
typ = m.group(1) # type is taken from the make_metrics part
|
|
prt = m.group(2)
|
|
m = string_content.match(m.group(2))
|
|
if not m:
|
|
multi_part_name = get_end_part(prt)
|
|
#m = alternative_name.match(prt)
|
|
verbose(verb, "multi part name ", multi_part_name)
|
|
if multi_part_name:
|
|
names = [clear_string(s) for s in multi_part_name.split('+')]
|
|
else:
|
|
handle_error(f"names not found {file_name} {line_number} {line} {current_metric}", strict)
|
|
continue
|
|
else:
|
|
names = ['"' + m.group(1) + '"']
|
|
desc_str = get_decription(current_metric)
|
|
if desc_str:
|
|
m = string_match.match(desc_str)
|
|
if m:
|
|
descrs = [desc_str]
|
|
else:
|
|
descrs = [clear_string(s) for s in split_paterns(desc_str)]
|
|
else:
|
|
handle_error(f"description not found {file_name} {line_number} {line} {current_metric}", strict)
|
|
continue
|
|
name_list = make_name_list(names, file_name+" "+str(line_number), param_mapping, verb, strict)
|
|
if not name_list:
|
|
handle_error(f"no name list {current_metric}", strict)
|
|
continue
|
|
description_list = make_name_list(descrs, file_name+" "+str(line_number), param_mapping, verb, strict)
|
|
current_groups = current_group if isinstance(current_group, list) else [current_group]
|
|
for cg in current_groups:
|
|
for idx, base_name in enumerate(name_list):
|
|
name = prefix + cg + "_" + base_name
|
|
description = description_list[0].replace('#','"') if len(description_list) == 1 else description_list[idx].replace('#','\\"')
|
|
if name in metrics:
|
|
if description != metrics[name][1]:
|
|
verbose(verb, f'Note: Multiple descriptions found for {name}, using first one from {metrics[name][4]}')
|
|
continue
|
|
metrics[name] = [typ, description, cg, base_name, file_name + ":" + str(line_number)]
|
|
current_metric = ""
|
|
parenthes_count = 0
|
|
else:
|
|
m = gr.match(line)
|
|
if m:
|
|
current_group = m.group(2)
|
|
if not current_group:
|
|
verbose(verb, "empty group found")
|
|
serching_group = True
|
|
m = string_content.match(current_group)
|
|
if m:
|
|
current_group = m.group(1)
|
|
else:
|
|
m = alternative_name.match(current_group)
|
|
if m:
|
|
current_group = param_mapping[m.group(1)] if m.group(1) in param_mapping else m.group(1)
|
|
verbose(verb, "Alternative group", file_name, line_number, current_group)
|
|
m = metrics_directive.match(line)
|
|
if m:
|
|
param_mapping[m.group(1).strip()] = json.loads(m.group(2))
|
|
line_number += 1
|
|
return metrics
|
|
|
|
def write_metrics_to_file(out_file, metrics, fmt="pipe"):
|
|
with open(out_file, "a") as fo:
|
|
if fmt == "yml":
|
|
yaml.dump(metrics,fo,sort_keys=False)
|
|
if fmt == "pipe":
|
|
for l in metrics.keys():
|
|
fo.write(l.replace('-','_')+'|' +'|'.join(metrics[l])+ '\n')
|
|
|
|
|
|
def validate_all_metrics(prefix, config_file, verbose=False, strict=True):
|
|
"""Validate all metrics files and report issues"""
|
|
|
|
print("Validating all metrics files...")
|
|
|
|
# Use pathlib to find all .cc files (cross-platform)
|
|
try:
|
|
current_dir = Path('.')
|
|
all_cc_files = list(current_dir.rglob('*.cc'))
|
|
|
|
# Filter files that contain '::description'
|
|
metric_files = []
|
|
for cc_file in all_cc_files:
|
|
try:
|
|
with open(cc_file, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
if '::description' in content:
|
|
# Normalize path to use forward slashes for cross-platform compatibility
|
|
normalized_path = str(cc_file).replace('\\', '/')
|
|
metric_files.append(normalized_path)
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f"[WARN] Could not read {cc_file}: {e}")
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] Error finding metrics files: {e}")
|
|
return False
|
|
|
|
|
|
total_files = len(metric_files)
|
|
|
|
print(f"Found {total_files} files with metrics")
|
|
|
|
failed_files = []
|
|
total_metrics = 0
|
|
metrics_info = get_metrics_information(config_file)
|
|
|
|
for file_path in metric_files:
|
|
try:
|
|
metrics = get_metrics_from_file(file_path, prefix, metrics_info, verbose, strict)
|
|
metrics_count = len(metrics)
|
|
total_metrics += metrics_count
|
|
if verbose:
|
|
print(f"[OK] {file_path} - {metrics_count} metrics")
|
|
else:
|
|
print(f"[OK] {file_path}")
|
|
except Exception as e:
|
|
print(f"[ERROR] {file_path}")
|
|
print(f" Error: {str(e)}")
|
|
failed_files.append((file_path, str(e)))
|
|
|
|
if failed_files:
|
|
print("\n[ERROR] METRICS VALIDATION FAILED")
|
|
print("Failed files:")
|
|
for file_path, error in failed_files:
|
|
print(f" - {file_path}: {error}")
|
|
print(f"\nAdd missing parameters to {config_file}")
|
|
return False
|
|
else:
|
|
working_files = total_files - len(failed_files)
|
|
coverage_pct = (working_files * 100 // total_files) if total_files > 0 else 0
|
|
|
|
print(f"\n[SUCCESS] All metrics files validated successfully")
|
|
print(f" Files: {working_files}/{total_files} working ({coverage_pct}% coverage)")
|
|
print(f" Metrics: {total_metrics} total processed")
|
|
return True
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='get metrics descriptions from file', conflict_handler="resolve")
|
|
parser.add_argument('-p', '--prefix', default="scylla_", help='the prefix added to the metrics names')
|
|
parser.add_argument('-o', '--out-file', default="metrics.out", help='The output file')
|
|
parser.add_argument('-c', '--config-file', default="metrics-config.yml", help='The configuration file used to add extra data missing in the code')
|
|
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='When set prints verbose information')
|
|
parser.add_argument('-F', '--format', default="pipe", help='Set the output format, can be pipe, or yml')
|
|
parser.add_argument('--validate', action='store_true', help='Validate all metrics files instead of processing single file')
|
|
parser.add_argument('--non-strict', action='store_true', help='Continue with warnings instead of failing on errors (useful for multiversion builds)')
|
|
parser.add_argument('file', nargs='?', help='the file to parse (not needed with --validate)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Determine strict mode
|
|
strict_mode = not args.non_strict
|
|
|
|
if args.validate:
|
|
success = validate_all_metrics(args.prefix, args.config_file, args.verbose, strict_mode)
|
|
exit(0 if success else 1)
|
|
|
|
if not args.file:
|
|
parser.error('file argument is required when not using --validate')
|
|
|
|
metrics = get_metrics_from_file(args.file, args.prefix, get_metrics_information(args.config_file), args.verbose, strict_mode)
|
|
write_metrics_to_file(args.out_file, metrics, args.format)
|