There are some environment which has corrupted NUMA topology information, such as some instance types on AWS EC2 with specific Linux kernel images. On such environment, we cannot get HW information correctly from hwloc, so we cannot proceed optimization on perftune. To avoid causing script error, check NUMA topology information and skip running perftune if the information corrupted. Related scylladb/seastar#2925 Closes scylladb/scylladb#26344
132 lines
5.2 KiB
Python
Executable File
132 lines
5.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2018-present ScyllaDB
|
|
#
|
|
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import subprocess
|
|
import re
|
|
from scylla_util import *
|
|
from subprocess import run
|
|
|
|
def bool2str(val):
|
|
return 'yes' if val else 'no'
|
|
|
|
def str2bool(val):
|
|
return True if val == 'yes' else False
|
|
|
|
if __name__ == '__main__':
|
|
if os.getuid() > 0:
|
|
print('Requires root permission.')
|
|
sys.exit(1)
|
|
cfg = sysconfig_parser(sysconfdir_p() / 'scylla-server')
|
|
set_nic_and_disks = str2bool(get_set_nic_and_disks_config_value(cfg))
|
|
if cfg.has_option('SET_CLOCKSOURCE'):
|
|
set_clocksource = str2bool(cfg.get('SET_CLOCKSOURCE'))
|
|
else:
|
|
set_clocksource = 'no'
|
|
if cfg.has_option('DISABLE_WRITEBACK_CACHE'):
|
|
disable_writeback_cache = str2bool(cfg.get('DISABLE_WRITEBACK_CACHE'))
|
|
else:
|
|
disable_writeback_cache = 'no'
|
|
|
|
parser = argparse.ArgumentParser(description='Setting parameters on Scylla sysconfig file.')
|
|
parser.add_argument('--nic',
|
|
help='specify NIC')
|
|
parser.add_argument('--mode',
|
|
help='network mode (posix, dpdk)')
|
|
parser.add_argument('--nr-hugepages', type=int,
|
|
help='number of hugepages')
|
|
parser.add_argument('--user',
|
|
help='user (dpdk requires root)')
|
|
parser.add_argument('--group',
|
|
help='group (dpdk requires root)')
|
|
parser.add_argument('--homedir',
|
|
help='scylla home directory')
|
|
parser.add_argument('--confdir',
|
|
help='scylla config directory')
|
|
parser.add_argument('--setup-nic-and-disks', action='store_true', default=set_nic_and_disks,
|
|
help='setup NIC\'s and disks\' interrupts, RPS, XPS, nomerges and I/O scheduler')
|
|
parser.add_argument('--set-clocksource', action='store_true', default=set_clocksource,
|
|
help='Set enforcing fastest available Linux clocksource')
|
|
parser.add_argument('--disable-writeback-cache', action='store_true', default=disable_writeback_cache,
|
|
help='Disable disk writeback cache')
|
|
args = parser.parse_args()
|
|
|
|
if args.nic and not is_valid_nic(args.nic):
|
|
print('NIC {} not found.'.format(args.nic))
|
|
sys.exit(1)
|
|
|
|
ifname = args.nic if args.nic else cfg.get('IFNAME')
|
|
network_mode = args.mode if args.mode else cfg.get('NETWORK_MODE')
|
|
|
|
if args.setup_nic_and_disks:
|
|
if not check_sysfs_numa_topology_is_valid():
|
|
print('NUMA topology information is corrupted, not able to enable perftune feature.')
|
|
sys.exit(3)
|
|
res = out('{} --tune net --nic {} --get-cpu-mask'.format(perftune_base_command(), ifname))
|
|
# we need to extract CPU mask from output, since perftune.py may also print warning messages (#10082)
|
|
match = re.match('(.*\n)?(0x[0-9a-f]+(?:,0x[0-9a-f]+)*)', res, re.DOTALL)
|
|
try:
|
|
warning = match.group(1)
|
|
rps_cpus = match.group(2)
|
|
except:
|
|
raise Exception(f'Failed to retrieve CPU mask: {res}')
|
|
# print warning message if available
|
|
if warning:
|
|
print(warning.strip())
|
|
if len(rps_cpus) > 0:
|
|
cpuset = hex2list(rps_cpus)
|
|
run('/opt/scylladb/scripts/scylla_cpuset_setup --cpuset {}'.format(cpuset), shell=True, check=True)
|
|
|
|
ethdrv = ''
|
|
ethpciid = ''
|
|
if network_mode == 'dpdk':
|
|
dpdk_status = out('/opt/scylladb/scripts/dpdk-devbind.py --status')
|
|
match = re.search(r'if={} drv=(\S+)'.format(ifname), dpdk_status, flags=re.MULTILINE)
|
|
ethdrv = match.group(1)
|
|
match = re.search(r'^(\S+:\S+:\S+\.\S+) [^\n]+ if={} '.format(ifname), dpdk_status, flags=re.MULTILINE)
|
|
ethpciid = match.group(1)
|
|
|
|
if args.mode:
|
|
cfg.set('NETWORK_MODE', args.mode)
|
|
if args.nic:
|
|
cfg.set('IFNAME', args.nic)
|
|
if cfg.get('ETHDRV') != ethdrv:
|
|
cfg.set('ETHDRV', ethdrv)
|
|
if cfg.get('ETHPCIID') != ethpciid:
|
|
cfg.set('ETHPCIID', ethpciid)
|
|
if args.nr_hugepages:
|
|
cfg.set('NR_HUGEPAGES', args.nr_hugepages)
|
|
if args.user:
|
|
cfg.set('USER', args.user)
|
|
if args.group:
|
|
cfg.set('GROUP', args.group)
|
|
if args.homedir:
|
|
cfg.set('SCYLLA_HOME', args.homedir)
|
|
if args.confdir:
|
|
cfg.set('SCYLLA_CONF', args.confdir)
|
|
|
|
if str2bool(get_set_nic_and_disks_config_value(cfg)) != args.setup_nic_and_disks:
|
|
if cfg.has_option('SET_NIC'):
|
|
cfg.set('SET_NIC', bool2str(args.setup_nic_and_disks))
|
|
else:
|
|
cfg.set('SET_NIC_AND_DISKS', bool2str(args.setup_nic_and_disks))
|
|
|
|
if cfg.has_option('SET_CLOCKSOURCE') and str2bool(cfg.get('SET_CLOCKSOURCE')) != args.set_clocksource:
|
|
if not check_sysfs_numa_topology_is_valid():
|
|
print('NUMA topology information is corrupted, not able to enable perftune feature.')
|
|
sys.exit(3)
|
|
cfg.set('SET_CLOCKSOURCE', bool2str(args.set_clocksource))
|
|
|
|
if cfg.has_option('DISABLE_WRITEBACK_CACHE') and str2bool(cfg.get('DISABLE_WRITEBACK_CACHE')) != args.disable_writeback_cache:
|
|
cfg.set('DISABLE_WRITEBACK_CACHE', bool2str(args.disable_writeback_cache))
|
|
|
|
cfg.commit()
|