Files
scylladb/dist/common/scripts/scylla_prepare
Takuya ASADA eb30594a60 dist: detect corrupted NUMA topology information
There are some environment which has corrupted NUMA topology
information, such as some instance types on AWS EC2 with specific Linux
kernel images.
On such environment, we cannot get HW information correctly from hwloc,
so we cannot proceed optimization on perftune.
To avoid causing script error, check NUMA topology information and skip
running perftune if the information corrupted.

Related scylladb/seastar#2925

Closes scylladb/scylladb#26344
2025-10-22 01:11:14 +03:00

189 lines
7.4 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2018-present ScyllaDB
#
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
import os
import sys
import glob
import platform
import distro
from scylla_util import *
from subprocess import run
def get_cur_cpuset():
cfg = sysconfig_parser('/etc/scylla.d/cpuset.conf')
cpuset = cfg.get('CPUSET')
return re.sub(r'^--cpuset (.+)$', r'\1', cpuset).strip()
def cpu_mask_is_zero(cpu_mask):
"""
The cpu_mask is a comma-separated list of 32-bit hex values with possibly omitted zero components,
e.g. 0xffff,,0xffff
We want to estimate if the whole mask is all-zeros.
:param cpu_mask: hwloc-calc generated CPU mask
:return: True if mask is zero, False otherwise
"""
for cur_cpu_mask in cpu_mask.split(','):
if cur_cpu_mask and int(cur_cpu_mask, 16) != 0:
return False
return True
def get_irq_cpu_mask():
"""
Return an irq_cpu_mask corresponding to a value written in cpuset.conf
Let's use the "CPU masks invariant": irq_cpu_mask | compute_cpu_mask == cpu_mask.
This function is called when we are generating a perftune.yaml meaning that there are no restrictions on
cpu_mask defined.
And this means that in the context of this function call cpu_mask is "all CPUs", or in hwloc-cal lingo - 'all'.
(For any "special" value of a cpu_mask a user needs to write his/her own perftune.yaml)
Mentioned above means that in order to calculate an irq_cpu_mask that corresponds to a compute_cpu_mask defined
using --cpuset in cpuset.conf and cpu_mask == 'all' we need to invert bits from the compute_cpu_mask in the 'all'
mask.
This can be achieved by running the following hwloc-calc command:
hwloc-calc --pi all ~PU:X ~PU:Y ~PU:Z ...
where X,Y,Z,... are either a single CPU index or a CPU range.
For example, if we have the following cpuset:
0,2-7,17-24,35
to get irq_cpu_mask we want to run the following command:
hwloc-calc --pi all ~PU:0 ~PU:2-7 ~PU:17-24 ~PU:35
"""
if not os.path.exists('/etc/scylla.d/cpuset.conf'):
raise Exception('/etc/scylla.d/cpuset.conf not found')
cur_cpuset = get_cur_cpuset()
hwloc_cmd = "/opt/scylladb/bin/hwloc-calc --pi all {}".\
format(" ".join(['~PU:{}'.format(c) for c in cur_cpuset.split(",")]))
irq_cpu_mask = out(hwloc_cmd).strip()
# If the generated mask turns out to be all-zeros then it means that all present CPUs are used in cpuset.conf.
# In such a case irq_cpu_mask has to be all-CPUs too, a.k.a. MQ mode.
if cpu_mask_is_zero(irq_cpu_mask):
irq_cpu_mask = out("/opt/scylladb/bin/hwloc-calc all").strip()
return irq_cpu_mask
def create_perftune_conf(cfg):
"""
This function checks if a perftune configuration file should be created and
creates it if so is the case, returning a boolean accordingly. It returns False
if none of the perftune options are enabled in scylla_server file. If the perftune
configuration file already exists, none is created.
:return boolean indicating if perftune.py should be executed
"""
params = ''
if get_set_nic_and_disks_config_value(cfg) == 'yes':
if not check_sysfs_numa_topology_is_valid():
print('WARNING: NUMA topology information is corrupted, skip running perftune')
return False
nic = cfg.get('IFNAME')
if not nic:
nic = 'eth0'
irq_cpu_mask = get_irq_cpu_mask()
# Note that 'irq_cpu_mask' is a coma separated list of 32-bits wide masks.
# Therefore, we need to put it in quotes.
params += '--tune net --nic "{nic}" --irq-cpu-mask "{irq_cpu_mask}"'.format(nic=nic, irq_cpu_mask=irq_cpu_mask)
if cfg.has_option('SET_CLOCKSOURCE') and cfg.get('SET_CLOCKSOURCE') == 'yes':
params += ' --tune system --tune-clock'
if cfg.has_option('DISABLE_WRITEBACK_CACHE') and cfg.get('DISABLE_WRITEBACK_CACHE') == 'yes':
params += ' --write-back-cache=false'
if len(params) > 0:
if not check_sysfs_numa_topology_is_valid():
print('WARNING: NUMA topology information is corrupted, skip running perftune')
return False
if os.path.exists('/etc/scylla.d/perftune.yaml'):
return True
params += ' --dump-options-file'
yaml = out('/opt/scylladb/scripts/perftune.py ' + params)
with open('/etc/scylla.d/perftune.yaml', 'w') as f:
f.write(yaml)
os.chmod('/etc/scylla.d/perftune.yaml', 0o644)
return True
else:
return False
def verify_cpu():
if platform.machine() == 'x86_64':
needed_flags = set(['sse4_2', 'pclmulqdq'])
for line in open('/proc/cpuinfo'):
if line.startswith('flags'):
actual_flags = set(line.split()[2:])
missing_flags = needed_flags - actual_flags
if len(missing_flags) > 0:
print(f"ERROR: You will not be able to run Scylla on this machine because its CPU lacks the following features: {' '.join(missing_flags)}")
print('\nIf this is a virtual machine, please update its CPU feature configuration or upgrade to a newer hypervisor.')
sys.exit(1)
if __name__ == '__main__':
verify_cpu()
if os.getuid() > 0:
print('Requires root permission.')
sys.exit(1)
cfg = sysconfig_parser(sysconfdir_p() / 'scylla-server')
mode = cfg.get('NETWORK_MODE')
if mode == 'virtio':
tap = cfg.get('TAP')
user = cfg.get('USER')
group = cfg.get('GROUP')
bridge = cfg.get('BRIDGE')
run('ip tuntap del mode tap dev {TAP}'.format(TAP=tap), shell=True, check=True)
run('ip tuntap add mode tap dev {TAP} user {USER} one_queue vnet_hdr'.format(TAP=tap, USER=user), shell=True, check=True)
run('ip link set dev {TAP} up'.format(TAP=tap), shell=True, check=True)
run('ip link set dev {TAP} master {BRIDGE}'.format(TAP=tap, BRIDGE=bridge), shell=True, check=True)
run('chown {USER}.{GROUP} /dev/vhost-net'.format(USER=user, GROUP=group), shell=True, check=True)
elif mode == 'dpdk':
ethpciid = cfg.get('ETHPCIID')
nr_hugepages = cfg.get('NR_HUGEPAGES')
run('modprobe uio', shell=True, check=True)
run('modprobe uio_pci_generic', shell=True, check=True)
run('/opt/scylladb/scripts/dpdk-devbind.py --force --bind=uio_pci_generic {ETHPCIID}'.format(ETHPCIID=ethpciid), shell=True, check=True)
for n in glob.glob('/sys/devices/system/node/node?'):
with open('{n}/hugepages/hugepages-2048kB/nr_hugepages'.format(n=n), 'w') as f:
f.write(nr_hugepages)
if distro.name() == 'Ubuntu':
run('hugeadm --create-mounts', shell=True, check=True)
else:
try:
res = create_perftune_conf(cfg)
except Exception as e:
print(f'Exception occurred while creating perftune.yaml:\n')
scylla_excepthook(*sys.exc_info())
print('\nTo fix the error, please re-run scylla_setup.')
sys.exit(1)
try:
if res:
run("{} --options-file /etc/scylla.d/perftune.yaml".format(perftune_base_command()), shell=True, check=True)
except Exception as e:
print(f'Exception occurred while tuning system using perftune.yaml:\n')
traceback.print_exc()
print('\nTo fix the error, please re-run scylla_setup.')
sys.exit(1)