Files
scylladb/scripts/get_description.py
Amnon Heiman 803d414896 get_description.py: Make the Script a library
This patch makes the get_description.py script easier to use by the
documentation automation:
1. The script is now a library.
2. You can choose the output of the script, currently supported pipee
   and yml.

You can still call the from the command line, like before, but you can
also calls it from another python script.

For example the folowing python script would generate the documentation
for the metrics description of the ./alternator/ttl.cc file.
```

import get_description

metrics = get_description.get_metrics_from_file("./alternator/ttl.cc", "scylla", get_description.get_metrics_information("metrics-config.yml"))
get_description.write_metrics_to_file("out.yaml", metrics, "yml")
```

Signed-off-by: Amnon Heiman <amnon@scylladb.com>

Closes scylladb/scylladb#18136
2024-04-02 10:07:11 +03:00

289 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import re
import yaml
import json
import inspect
from encodings import undefined
gr = re.compile(r'.*(\.|->)add_group\(\s*(.*)')
desc = re.compile(r'.*..::descrs\( *("[^"]+")(.*)')
alternative_name = re.compile(r'([^,]*),')
metric = re.compile(r'.*..::make_(absolute|counter|current|derive|gauge|histogram|queue|summary|total|total_operations|queue_length|total_bytes|current_bytes)\((.*)')
string_content = re.compile(r'\s*"([^"]+)"\s*,.*')
string_match = re.compile(r'"([^"]+)"')
sstring_match = re.compile(r'\s*sstring\(\s*("[^"]+")\s*\)\s*')
metrics_directive = re.compile(r'.*@metrics\s*([^=]+)\s*=\s*(\[[^\]]*\]).*')
format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*')
def verbose(verb, *arg):
if verb:
print(*arg)
def get_end_part(str):
parenthes_count = 0
for idx, c in enumerate(str):
if c == ',' and parenthes_count == 0:
return str[:idx]
if c ==')':
parenthes_count -= 1
if c =='(':
parenthes_count += 1
return None
def find_end_parenthes(str, pc):
for idx, c in enumerate(str):
if c ==')':
pc -= 1
if c =='(':
pc += 1
if pc == 0:
return str[:idx]
return str
def split_paterns(str):
res = []
pos = 0
b = 0
cur_str = ""
while pos < len(str):
if str[pos] == '+':
res.append(cur_str)
cur_str = ""
elif str[pos] == '(':
s = find_end_parenthes(str[pos+1:], 1)
cur_str += "(" +s +")"
pos += len(s) + 1
else:
cur_str += str[pos]
pos += 1
if cur_str:
res.append(cur_str)
return res
def validate_parameter(txt, err=""):
if isinstance(txt, str):
txt = [txt]
for t in txt:
if t not in param_mapping:
print("Could not resolve param", err, t)
return None
if not param_mapping[t]:
print("Could not resolve param is empty", err, txt)
return None
return txt
def sort_by_index(arr,ind):
return [arr[i] for i in ind]
def make_name_list(names, err, verb=None):
param = []
format_string = ""
for txt in names:
if isinstance(txt, dict):
format_string += txt['str']
param = param + ([txt['param']] if isinstance(txt['param'], str) else txt['param'])
else:
if txt[0] == '"':
format_string += txt[1:-1]
else:
param = param + validate_parameter(txt, "(make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err)
format_string += "{}"
if not param:
print("make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno), names)
return None
if not param:
return [format_string]
order_params = list(enumerate(param))
sorted_indexed_array = sorted(order_params, key=lambda x: x[1])
indexed_array = [index for index, value in sorted_indexed_array]
verbose(verb, "make_name_list", param)
param.sort()
param_keys = ';'.join(param)
if param_keys not in param_mapping:
print("Parameter not found", param_keys, err)
exit(-1)
for p in param_mapping[param_keys]:
if not p:
print("empty (make_name_list:"+ str(inspect.getframeinfo(inspect.currentframe()).lineno) +")"+err, param)
exit(-1)
verbose(verb, "make_name_list", [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]])
return [format_string.format(p) for p in param_mapping[param[0]]] if len(param) == 1 else [format_string.format(*sort_by_index(p,indexed_array)) for p in param_mapping[param_keys]]
def get_decription(str):
b = str.find('::description(') + len('::description(')
p = b
parenthes_count = 1
while p < len(str):
if str[p] == '"':
p = str.find('"',p+1)
if p <0:
break
if str[p] == '(':
parenthes_count += 1
if str[p] == ')':
parenthes_count -= 1
if parenthes_count == 0:
return str[b:p]
p = p + 1
return None
def merge_strings(str, str2):
if str and str.rstrip()[-1] == '"':
if str2 and str2.lstrip()[0] == '"':
return str.rstrip()[:-1] + str2.lstrip()[1:]
return str.rstrip() + ' ' + str2.lstrip()
def clear_string(str):
m = sstring_match.match(str)
if m:
return m.group(1)
m = format_match.match(str)
if m:
params = [p.strip() for p in find_end_parenthes(m.group(2), 1).split(',')]
return {'str': m.group(1), 'param': params[0] if len(params) == 1 else params}
return str.strip()
def get_metrics_information(config_file):
with open(config_file, 'r') as file:
return yaml.safe_load(file)
def get_metrics_from_file(file_name, prefix, metrics_information, verb=None):
current_group = ""
clean_name = file_name[2:] if file_name.startswith('./') else file_name
param_mapping = {}
groups = {}
allowmismatch = False
if clean_name in metrics_information:
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
exit(0)
if "allowmismatch" in metrics_information[clean_name]:
allowmismatch = metrics_information[clean_name]["allowmismatch"]
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
metrics = {}
multi_line = False
names = undefined
typ = undefined
line_number = 0;
current_metric = ""
parenthes_count = 0
serching_group = False
with open(file_name) as file:
for line in file:
if str(line_number) in groups:
current_group = groups[str(line_number)]
verbose(verb, "found group from config ", groups[str(line_number)])
if serching_group:
m = string_content.match(line)
if not m:
line_number += 1
continue
current_group = m.group(1)
serching_group = False
verbose(verb, "group found on new line", current_group)
m = metric.match(line)
if m and not current_group:
print("new name found with no group", file_name, line_number, line)
exit(-1)
if current_metric or m:
if gr.match(line):
print("add group found unexpectedly", file_name, line_number, line)
exit(-1)
if current_metric and m:
print("new metrics was found while parsing the previous one", file_name, line_number, line)
exit(-1)
ln = line.replace('\\"','#').rstrip()
current_metric = merge_strings(current_metric, ln)
no_string = re.sub(string_match, '', ln)
parenthes_count += no_string.count('(')
parenthes_count -= no_string.count(')')
if parenthes_count <= 0:
verbose(verb, current_metric, file_name, line_number)
m = metric.match(current_metric)
typ = m.group(1) # type is taken from the make_metrics part
prt = m.group(2)
m = string_content.match(m.group(2))
if not m:
multi_part_name = get_end_part(prt)
#m = alternative_name.match(prt)
verbose(verb, "multi part name ", multi_part_name)
if multi_part_name:
names = [clear_string(s) for s in multi_part_name.split('+')]
else:
print("names not found", file_name, line_number, line, current_metric)
exit(-1)
else:
names = ['"' + m.group(1) + '"']
desc_str = get_decription(current_metric)
if desc_str:
m = string_match.match(desc_str)
if m:
descrs = [desc_str]
else:
descrs = [clear_string(s) for s in split_paterns(desc_str)]
else:
print("description not found", file_name, line_number, line, current_metric)
exit(-1)
name_list = make_name_list(names, file_name+" "+str(line_number), verb)
if not name_list:
print("no name list", current_metric)
exit(-1)
description_list = make_name_list(descrs, file_name+" "+str(line_number), verb)
current_groups = current_group if isinstance(current_group, list) else [current_group]
for cg in current_groups:
for idx, base_name in enumerate(name_list):
name = prefix + cg + "_" + base_name
description = description_list[0].replace('#','"') if len(description_list) == 1 else description_list[idx].replace('#','\\"')
if not allowmismatch and name in metrics and description != metrics[name][1]:
print('description problem, different descriptions found', file_name, line_number, names, typ, line, name, metrics[name][1], description)
print(metrics[name][1])
print(description)
exit(-1)
metrics[name] = [typ, description, cg, base_name, file_name + ":" + str(line_number)]
current_metric = ""
parenthes_count = 0
else:
m = gr.match(line)
if m:
current_group = m.group(2)
if not current_group:
verbose(verb, "empty group found")
serching_group = True
m = string_content.match(current_group)
if m:
current_group = m.group(1)
else:
m = alternative_name.match(current_group)
if m:
current_group = param_mapping[m.group(1)] if m.group(1) in param_mapping else m.group(1)
verbose(verb, "Alternative group", file_name, line_number, current_group)
m = metrics_directive.match(line)
if m:
param_mapping[m.group(1).strip()] = json.loads(m.group(2))
line_number += 1
return metrics
def write_metrics_to_file(out_file, metrics, fmt="pipe"):
with open(out_file, "a") as fo:
if fmt == "yml":
yaml.dump(metrics,fo,sort_keys=False)
if fmt == "pipe":
for l in metrics.keys():
fo.write(l.replace('-','_')+'|' +'|'.join(metrics[l])+ '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='get metrics descriptions from file', conflict_handler="resolve")
parser.add_argument('-p', '--prefix', default="scylla_", help='the prefix added to the metrics names')
parser.add_argument('-o', '--out-file', default="metrics.out", help='The output file')
parser.add_argument('-c', '--config-file', default="metrics-config.yml", help='The configuration file used to add extra data missing in the code')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='When set prints verbose information')
parser.add_argument('-F', '--format', default="pipe", help='Set the output format, can be pipe, or yml')
parser.add_argument('file', help='the file to parse')
args = parser.parse_args()
metrics = get_metrics_from_file(args.file, args.prefix, get_metrics_information(args.config_file), args.verbose)
write_metrics_to_file(args.out_file, metrics, args.format)