Files
scylladb/scripts/get_description.py
Amnon Heiman 09fa625672 scripts/get_description.py: param_mapping was missing
get_description.py was moved from a standalone script to a library.
During the transition, param_mapping was not included in the script
option.

This patch makes it possible to use the file as a standalone script
again.
2024-10-18 08:58:04 +03:00

289 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import re
import yaml
import json
import inspect
from encodings import undefined
gr = re.compile(r'.*(\.|->)add_group\(\s*(.*)')
desc = re.compile(r'.*..::descrs\( *("[^"]+")(.*)')
alternative_name = re.compile(r'([^,]*),')
metric = re.compile(r'.*..::make_(absolute|counter|current|derive|gauge|histogram|queue|summary|total|total_operations|queue_length|total_bytes|current_bytes)\((.*)')
string_content = re.compile(r'\s*"([^"]+)"\s*,.*')
string_match = re.compile(r'"([^"]+)"')
sstring_match = re.compile(r'\s*sstring\(\s*("[^"]+")\s*\)\s*')
metrics_directive = re.compile(r'.*@metrics\s*([^=]+)\s*=\s*(\[[^\]]*\]).*')
format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*')
def verbose(verb, *arg):
if verb:
print(*arg)
def get_end_part(str):
parenthes_count = 0
for idx, c in enumerate(str):
if c == ',' and parenthes_count == 0:
return str[:idx]
if c ==')':
parenthes_count -= 1
if c =='(':
parenthes_count += 1
return None
def find_end_parenthes(str, pc):
for idx, c in enumerate(str):
if c ==')':
pc -= 1
if c =='(':
pc += 1
if pc == 0:
return str[:idx]
return str
def split_paterns(str):
res = []
pos = 0
b = 0
cur_str = ""
while pos < len(str):
if str[pos] == '+':
res.append(cur_str)
cur_str = ""
elif str[pos] == '(':
s = find_end_parenthes(str[pos+1:], 1)
cur_str += "(" +s +")"
pos += len(s) + 1
else:
cur_str += str[pos]
pos += 1
if cur_str:
res.append(cur_str)
return res
def validate_parameter(txt, param_mapping, err=""):
if isinstance(txt, str):
txt = [txt]
for t in txt:
if t not in param_mapping:
print("Could not resolve param", err, t)
return None
if not param_mapping[t]:
print("Could not resolve param is empty", err, txt)
return None
return txt
def sort_by_index(arr,ind):
return [arr[i] for i in ind]
def make_name_list(names, err, param_mapping, verb=None):
param = []
format_string = ""
for txt in names:
if isinstance(txt, dict):
format_string += txt['str']
param = param + ([txt['param']] if isinstance(txt['param'], str) else txt['param'])
else:
if txt[0] == '"':
format_string += txt[1:-1]
else:
param = param + validate_parameter(txt, param_mapping, "(make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err)
format_string += "{}"
if not param:
print("make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno), names)
return None
if not param:
return [format_string]
order_params = list(enumerate(param))
sorted_indexed_array = sorted(order_params, key=lambda x: x[1])
indexed_array = [index for index, value in sorted_indexed_array]
verbose(verb, "make_name_list", param)
param.sort()
param_keys = ';'.join(param)
if param_keys not in param_mapping:
print("Parameter not found", param_keys, err)
exit(-1)
for p in param_mapping[param_keys]:
if not p:
print("empty (make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err, param)
exit(-1)
verbose(verb, "make_name_list", [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]])
return [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]]
def get_decription(str):
b = str.find('::description(') + len('::description(')
p = b
parenthes_count = 1
while p < len(str):
if str[p] == '"':
p = str.find('"',p+1)
if p <0:
break
if str[p] == '(':
parenthes_count += 1
if str[p] == ')':
parenthes_count -= 1
if parenthes_count == 0:
return str[b:p]
p = p + 1
return None
def merge_strings(str, str2):
if str and str.rstrip()[-1] == '"':
if str2 and str2.lstrip()[0] == '"':
return str.rstrip()[:-1] + str2.lstrip()[1:]
return str.rstrip() + ' ' + str2.lstrip()
def clear_string(str):
m = sstring_match.match(str)
if m:
return m.group(1)
m = format_match.match(str)
if m:
params = [p.strip() for p in find_end_parenthes(m.group(2), 1).split(',')]
return {'str': m.group(1), 'param': params[0] if len(params) == 1 else params}
return str.strip()
def get_metrics_information(config_file):
with open(config_file, 'r') as file:
return yaml.safe_load(file)
def get_metrics_from_file(file_name, prefix, metrics_information, verb=None):
current_group = ""
clean_name = file_name[2:] if file_name.startswith('./') else file_name
param_mapping = {}
groups = {}
allowmismatch = False
if clean_name in metrics_information:
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
exit(0)
if "allowmismatch" in metrics_information[clean_name]:
allowmismatch = metrics_information[clean_name]["allowmismatch"]
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
metrics = {}
multi_line = False
names = undefined
typ = undefined
line_number = 0;
current_metric = ""
parenthes_count = 0
serching_group = False
with open(file_name) as file:
for line in file:
if str(line_number) in groups:
current_group = groups[str(line_number)]
verbose(verb, "found group from config ", groups[str(line_number)])
if serching_group:
m = string_content.match(line)
if not m:
line_number += 1
continue
current_group = m.group(1)
serching_group = False
verbose(verb, "group found on new line", current_group)
m = metric.match(line)
if m and not current_group:
print("new name found with no group", file_name, line_number, line)
exit(-1)
if current_metric or m:
if gr.match(line):
print("add group found unexpectedly", file_name, line_number, line)
exit(-1)
if current_metric and m:
print("new metrics was found while parsing the previous one", file_name, line_number, line)
exit(-1)
ln = line.replace('\\"','#').rstrip()
current_metric = merge_strings(current_metric, ln)
no_string = re.sub(string_match, '', ln)
parenthes_count += no_string.count('(')
parenthes_count -= no_string.count(')')
if parenthes_count <= 0:
verbose(verb, current_metric, file_name, line_number)
m = metric.match(current_metric)
typ = m.group(1) # type is taken from the make_metrics part
prt = m.group(2)
m = string_content.match(m.group(2))
if not m:
multi_part_name = get_end_part(prt)
#m = alternative_name.match(prt)
verbose(verb, "multi part name ", multi_part_name)
if multi_part_name:
names = [clear_string(s) for s in multi_part_name.split('+')]
else:
print("names not found", file_name, line_number, line, current_metric)
exit(-1)
else:
names = ['"' + m.group(1) + '"']
desc_str = get_decription(current_metric)
if desc_str:
m = string_match.match(desc_str)
if m:
descrs = [desc_str]
else:
descrs = [clear_string(s) for s in split_paterns(desc_str)]
else:
print("description not found", file_name, line_number, line, current_metric)
exit(-1)
name_list = make_name_list(names, file_name+" "+str(line_number), param_mapping, verb)
if not name_list:
print("no name list", current_metric)
exit(-1)
description_list = make_name_list(descrs, file_name+" "+str(line_number), param_mapping, verb)
current_groups = current_group if isinstance(current_group, list) else [current_group]
for cg in current_groups:
for idx, base_name in enumerate(name_list):
name = prefix + cg + "_" + base_name
description = description_list[0].replace('#','"') if len(description_list) == 1 else description_list[idx].replace('#','\\"')
if not allowmismatch and name in metrics and description != metrics[name][1]:
print('description problem, different descriptions found', file_name, line_number, names, typ, line, name, metrics[name][1], description)
print(metrics[name][1])
print(description)
exit(-1)
metrics[name] = [typ, description, cg, base_name, file_name + ":" + str(line_number)]
current_metric = ""
parenthes_count = 0
else:
m = gr.match(line)
if m:
current_group = m.group(2)
if not current_group:
verbose(verb, "empty group found")
serching_group = True
m = string_content.match(current_group)
if m:
current_group = m.group(1)
else:
m = alternative_name.match(current_group)
if m:
current_group = param_mapping[m.group(1)] if m.group(1) in param_mapping else m.group(1)
verbose(verb, "Alternative group", file_name, line_number, current_group)
m = metrics_directive.match(line)
if m:
param_mapping[m.group(1).strip()] = json.loads(m.group(2))
line_number += 1
return metrics
def write_metrics_to_file(out_file, metrics, fmt="pipe"):
with open(out_file, "a") as fo:
if fmt == "yml":
yaml.dump(metrics,fo,sort_keys=False)
if fmt == "pipe":
for l in metrics.keys():
fo.write(l.replace('-','_')+'|' +'|'.join(metrics[l])+ '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='get metrics descriptions from file', conflict_handler="resolve")
parser.add_argument('-p', '--prefix', default="scylla_", help='the prefix added to the metrics names')
parser.add_argument('-o', '--out-file', default="metrics.out", help='The output file')
parser.add_argument('-c', '--config-file', default="metrics-config.yml", help='The configuration file used to add extra data missing in the code')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='When set prints verbose information')
parser.add_argument('-F', '--format', default="pipe", help='Set the output format, can be pipe, or yml')
parser.add_argument('file', help='the file to parse')
args = parser.parse_args()
metrics = get_metrics_from_file(args.file, args.prefix, get_metrics_information(args.config_file), args.verbose)
write_metrics_to_file(args.out_file, metrics, args.format)