mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-22 15:52:13 +00:00
The container image inherits kernel.core_pattern from the host. When the host pipes core dumps to a handler (e.g. Ubuntu's apport), that handler does not exist or work correctly inside the container, so core dumps are silently lost. Override any pipe-based core_pattern with a file-based pattern that writes directly to /var/lib/scylla/coredump/. The override is attempted both from the entrypoint (scyllasetup.coredumpSetup) and from scylla-server.sh when running as root; it succeeds only when the container has write access to /proc/sys/kernel/core_pattern and is silently skipped otherwise. Fixes: SCYLLADB-1366 Closes scylladb/scylladb#29337
279 lines
11 KiB
Python
279 lines
11 KiB
Python
import subprocess
|
|
import logging
|
|
import yaml
|
|
import os
|
|
import socket
|
|
import errno
|
|
|
|
def is_bind_mount(path):
|
|
# Check if the file or its parent is a mount point (bind mount or otherwise)
|
|
path = os.path.abspath(path)
|
|
with open("/proc/self/mountinfo") as f:
|
|
for line in f:
|
|
mount_point = line.split()[4]
|
|
if path == mount_point or path.startswith(mount_point + "/"):
|
|
# If the mount point is not the root filesystem, it's a mount
|
|
if mount_point != "/":
|
|
return True
|
|
return False
|
|
|
|
class ScyllaSetup:
|
|
def __init__(self, arguments, extra_arguments):
|
|
self._developerMode = arguments.developerMode
|
|
self._seeds = arguments.seeds
|
|
self._cpuset = arguments.cpuset
|
|
self._listenAddress = arguments.listenAddress
|
|
self._rpcAddress = arguments.rpcAddress
|
|
self._alternatorAddress = arguments.alternatorAddress
|
|
self._broadcastAddress = arguments.broadcastAddress
|
|
self._broadcastRpcAddress = arguments.broadcastRpcAddress
|
|
self._apiAddress = arguments.apiAddress
|
|
self._alternatorPort = arguments.alternatorPort
|
|
self._alternatorHttpsPort = arguments.alternatorHttpsPort
|
|
self._alternatorWriteIsolation = arguments.alternatorWriteIsolation
|
|
self._smp = arguments.smp
|
|
self._memory = arguments.memory
|
|
self._reserveMemory = arguments.reserveMemory
|
|
self._overprovisioned = arguments.overprovisioned
|
|
self._housekeeping = not arguments.disable_housekeeping
|
|
self._experimental_features = arguments.experimental_features
|
|
self._authenticator = arguments.authenticator
|
|
self._authorizer = arguments.authorizer
|
|
self._clusterName = arguments.clusterName
|
|
self._endpointSnitch = arguments.endpointSnitch
|
|
self._replaceAddressFirstBoot = arguments.replaceAddressFirstBoot
|
|
self._replaceNodeFirstBoot = arguments.replaceNodeFirstBoot
|
|
self._io_setup = arguments.io_setup
|
|
self._extra_args = extra_arguments
|
|
self._dc = arguments.dc
|
|
self._rack = arguments.rack
|
|
self._blocked_reactor_notify_ms = arguments.blocked_reactor_notify_ms
|
|
self._coredump_dir = None
|
|
|
|
def _run(self, *args, **kwargs):
|
|
logging.info('running: {}'.format(args))
|
|
subprocess.check_call(*args, **kwargs)
|
|
|
|
def developerMode(self):
|
|
self._run(['/opt/scylladb/scripts/scylla_dev_mode_setup', '--developer-mode', self._developerMode])
|
|
|
|
def cpuSet(self):
|
|
if self._cpuset is None:
|
|
return
|
|
self._run(['/opt/scylladb/scripts/scylla_cpuset_setup', '--cpuset', self._cpuset])
|
|
|
|
def io(self):
|
|
conf_dir = "/etc/scylla"
|
|
cfg = yaml.safe_load(open(os.path.join(conf_dir, "scylla.yaml")))
|
|
if 'workdir' not in cfg or not cfg['workdir']:
|
|
cfg['workdir'] = '/var/lib/scylla'
|
|
if 'data_file_directories' not in cfg or \
|
|
not cfg['data_file_directories'] or \
|
|
not len(cfg['data_file_directories']) or \
|
|
not " ".join(cfg['data_file_directories']).strip():
|
|
cfg['data_file_directories'] = [os.path.join(cfg['workdir'], 'data')]
|
|
|
|
data_dirs = cfg["data_file_directories"]
|
|
if len(data_dirs) > 1:
|
|
logging.warn("%d data directories found. scylla_io_setup currently lacks support for it, and only %s will be evaluated",
|
|
len(data_dirs), data_dirs[0])
|
|
data_dir = data_dirs[0]
|
|
if not os.path.exists(data_dir):
|
|
os.makedirs(data_dir)
|
|
|
|
if self._io_setup == "1":
|
|
self._run(['/opt/scylladb/scripts/scylla_io_setup'])
|
|
|
|
def cqlshrc(self):
|
|
home = os.environ['HOME']
|
|
if self._rpcAddress:
|
|
hostname = self._rpcAddress
|
|
elif self._listenAddress:
|
|
hostname = self._listenAddress
|
|
else:
|
|
hostname = socket.gethostbyname(socket.gethostname())
|
|
self._run(["mkdir", "-p", "%s/.cassandra" % home])
|
|
with open("%s/.cassandra/cqlshrc" % home, "w") as cqlshrc:
|
|
cqlshrc.write("[connection]\nhostname = %s\n" % hostname)
|
|
|
|
def set_housekeeping(self):
|
|
with open("/etc/scylla.d/housekeeping.cfg", "w") as f:
|
|
f.write("[housekeeping]\ncheck-version: ")
|
|
if self._housekeeping:
|
|
f.write("True\n")
|
|
else:
|
|
f.write("False\n")
|
|
|
|
def write_rackdc_properties(self):
|
|
if self._dc is None and self._rack is None:
|
|
return
|
|
|
|
if self._endpointSnitch is None:
|
|
self._endpointSnitch = "GossipingPropertyFileSnitch"
|
|
|
|
if self._endpointSnitch != "GossipingPropertyFileSnitch":
|
|
raise RuntimeError(
|
|
f"Cannot use dc and rack parameters together with endpoint snitch '{self._endpointSnitch}'. "
|
|
"The dc and rack parameters are only supported with the endpoint snitch 'GossipingPropertyFileSnitch'."
|
|
)
|
|
|
|
conf_dir = "/etc/scylla"
|
|
rackdc_path = os.path.join(conf_dir, "cassandra-rackdc.properties")
|
|
|
|
if is_bind_mount(rackdc_path):
|
|
raise RuntimeError(
|
|
f"Cannot write {rackdc_path}: file is a bind mount. "
|
|
"The dc and rack parameters cannot be used when this file is mounted from the host."
|
|
)
|
|
|
|
# We must specify both dc and rack in the file.
|
|
# If only one of them is set, write a default value for the other.
|
|
dc = self._dc if self._dc is not None else "datacenter1"
|
|
rack = self._rack if self._rack is not None else "rack1"
|
|
with open(rackdc_path, "w") as f:
|
|
f.write(f"dc={dc}\n")
|
|
f.write(f"rack={rack}\n")
|
|
|
|
CORE_PATTERN_PATH = '/proc/sys/kernel/core_pattern'
|
|
|
|
def _get_coredump_dir(self):
|
|
"""Return the coredump directory, deriving it from scylla.yaml workdir if needed."""
|
|
if self._coredump_dir is not None:
|
|
return self._coredump_dir
|
|
conf_dir = "/etc/scylla"
|
|
try:
|
|
with open(os.path.join(conf_dir, "scylla.yaml")) as f:
|
|
cfg = yaml.safe_load(f) or {}
|
|
except Exception:
|
|
cfg = {}
|
|
workdir = cfg.get('workdir') or '/var/lib/scylla'
|
|
self._coredump_dir = os.path.join(workdir, 'coredump')
|
|
return self._coredump_dir
|
|
|
|
def coredumpSetup(self):
|
|
"""Configure coredump handling for containers.
|
|
|
|
The host's kernel.core_pattern may pipe core dumps to a handler
|
|
(e.g. Ubuntu's apport) that does not exist or work correctly
|
|
inside the container. This method tries to switch to a file-based
|
|
core_pattern so that coredumps are written directly to disk.
|
|
|
|
Writing to /proc/sys/kernel/core_pattern requires privileges
|
|
(root with CAP_SYS_ADMIN). When the container lacks permission
|
|
a warning is logged with guidance for the operator.
|
|
"""
|
|
coredump_dir = self._get_coredump_dir()
|
|
|
|
try:
|
|
os.makedirs(coredump_dir, exist_ok=True)
|
|
except OSError as e:
|
|
logging.warning('Could not create coredump directory %s: %s',
|
|
coredump_dir, e)
|
|
return
|
|
|
|
try:
|
|
with open(self.CORE_PATTERN_PATH) as f:
|
|
current = f.read().strip()
|
|
except Exception as e:
|
|
logging.debug('Could not read %s: %s', self.CORE_PATTERN_PATH, e)
|
|
return
|
|
|
|
if not current.startswith('|'):
|
|
return
|
|
|
|
desired = f'{coredump_dir}/core.%e.%p.%t'
|
|
try:
|
|
with open(self.CORE_PATTERN_PATH, 'w') as f:
|
|
f.write(desired + '\n')
|
|
logging.info('kernel.core_pattern set to %s', desired)
|
|
except OSError as e:
|
|
if e.errno in (errno.EACCES, errno.EPERM, errno.EROFS):
|
|
logging.warning(
|
|
'kernel.core_pattern pipes to a program that may not work '
|
|
'inside the container, and we lack permission to override it. '
|
|
'To fix this, either run with --privileged or set on the host: '
|
|
'sysctl -w kernel.core_pattern="%s"', desired)
|
|
else:
|
|
logging.debug('Unexpected OSError setting core_pattern: %s', e)
|
|
except Exception as e:
|
|
logging.debug('Unexpected error in coredumpSetup: %s', e)
|
|
|
|
def arguments(self):
|
|
args = []
|
|
if self._memory is not None:
|
|
args += ["--memory %s" % self._memory]
|
|
|
|
if self._reserveMemory is not None:
|
|
args += ["--reserve-memory %s" % self._reserveMemory]
|
|
|
|
if self._smp is not None:
|
|
args += ["--smp %s" % self._smp]
|
|
|
|
if self._overprovisioned == "1" or (self._overprovisioned is None and self._cpuset is None):
|
|
args += ["--overprovisioned"]
|
|
|
|
if self._listenAddress is None:
|
|
self._listenAddress = socket.gethostbyname(socket.gethostname())
|
|
|
|
if self._rpcAddress is None:
|
|
self._rpcAddress = self._listenAddress
|
|
|
|
if self._alternatorAddress is None:
|
|
self._alternatorAddress = self._listenAddress
|
|
|
|
if self._seeds is None:
|
|
if self._broadcastAddress is not None:
|
|
self._seeds = self._broadcastAddress
|
|
else:
|
|
self._seeds = self._listenAddress
|
|
|
|
args += ["--listen-address %s" % self._listenAddress,
|
|
"--rpc-address %s" % self._rpcAddress,
|
|
"--seed-provider-parameters seeds=%s" % self._seeds]
|
|
|
|
if self._broadcastAddress is not None:
|
|
args += ["--broadcast-address %s" % self._broadcastAddress]
|
|
if self._broadcastRpcAddress is not None:
|
|
args += ["--broadcast-rpc-address %s" % self._broadcastRpcAddress]
|
|
|
|
if self._apiAddress is not None:
|
|
args += ["--api-address %s" % self._apiAddress]
|
|
|
|
if self._alternatorAddress is not None:
|
|
args += ["--alternator-address %s" % self._alternatorAddress]
|
|
|
|
if self._alternatorPort is not None:
|
|
args += ["--alternator-port %s" % self._alternatorPort]
|
|
|
|
if self._alternatorHttpsPort is not None:
|
|
args += ["--alternator-https-port %s" % self._alternatorHttpsPort]
|
|
|
|
if self._alternatorWriteIsolation is not None:
|
|
args += ["--alternator-write-isolation %s" % self._alternatorWriteIsolation]
|
|
|
|
if self._authenticator is not None:
|
|
args += ["--authenticator %s" % self._authenticator]
|
|
|
|
if self._authorizer is not None:
|
|
args += ["--authorizer %s" % self._authorizer]
|
|
|
|
if self._experimental_features is not None:
|
|
for feature in self._experimental_features:
|
|
args += [f"--experimental-features {feature}"]
|
|
|
|
if self._clusterName is not None:
|
|
args += ["--cluster-name %s" % self._clusterName]
|
|
|
|
if self._endpointSnitch is not None:
|
|
args += ["--endpoint-snitch %s" % self._endpointSnitch]
|
|
|
|
if self._replaceNodeFirstBoot is not None:
|
|
args += ["--replace-node-first-boot %s" % self._replaceNodeFirstBoot]
|
|
elif self._replaceAddressFirstBoot is not None:
|
|
args += ["--replace-address-first-boot %s" % self._replaceAddressFirstBoot]
|
|
|
|
args += ["--blocked-reactor-notify-ms %s" % self._blocked_reactor_notify_ms]
|
|
|
|
with open("/etc/scylla.d/docker.conf", "w") as cqlshrc:
|
|
cqlshrc.write("SCYLLA_DOCKER_ARGS=\"%s\"\n" % (" ".join(args) + " " + " ".join(self._extra_args)))
|