scylla_raid_setup may fail on Ubuntu minimal image since it calls
update-initramfs without installing.
(cherry picked from commit b6dedf1ee1)
Closes scylladb/scylladb#19871
336 lines
12 KiB
Python
Executable File
336 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright 2018-present ScyllaDB
|
|
#
|
|
|
|
#
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
import os
|
|
import argparse
|
|
import distutils.util
|
|
import pwd
|
|
import grp
|
|
import sys
|
|
import stat
|
|
import logging
|
|
import pyudev
|
|
from pathlib import Path
|
|
from scylla_util import *
|
|
from subprocess import run, SubprocessError
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
class UdevInfo:
|
|
def __init__(self, device_file):
|
|
self.context = pyudev.Context()
|
|
self.device = pyudev.Devices.from_device_file(self.context, device_file)
|
|
|
|
def verify(self):
|
|
if not self.id_fs_uuid:
|
|
LOGGER.error('ID_FS_UUID does not found')
|
|
if self.id_fs_type != 'xfs':
|
|
LOGGER.error('ID_FS_TYPE is not "xfs"')
|
|
if self.id_fs_usage != 'filesystem':
|
|
LOGGER.error('ID_FS_USAGE is not "filesystem"')
|
|
|
|
def dump_variables(self):
|
|
LOGGER.error(f' sys_path: {self.device.sys_path}')
|
|
LOGGER.error(f' sys_name: {self.device.sys_name}')
|
|
LOGGER.error(f' sys_number: {self.device.sys_number}')
|
|
LOGGER.error(f' device_path: {self.device.device_path}')
|
|
LOGGER.error(f' tags: {list(self.device.tags)}')
|
|
LOGGER.error(f' subsystem: {self.device.subsystem}')
|
|
LOGGER.error(f' driver: {self.device.driver}')
|
|
LOGGER.error(f' device_type: {self.device.device_type}')
|
|
LOGGER.error(f' device_node: {self.device.device_node}')
|
|
LOGGER.error(f' device_number: {self.device.device_number}')
|
|
LOGGER.error(f' device_links: {list(self.device.device_links)}')
|
|
LOGGER.error(f' is_initialized: {self.device.is_initialized}')
|
|
LOGGER.error(f' time_since_initialized: {self.device.time_since_initialized}')
|
|
for k, v in self.device.properties.items():
|
|
LOGGER.error(f' {k}: {v}')
|
|
|
|
@property
|
|
def id_fs_uuid(self):
|
|
return self.device.properties.get('ID_FS_UUID')
|
|
|
|
@property
|
|
def id_fs_type(self):
|
|
return self.device.properties.get('ID_FS_TYPE')
|
|
|
|
@property
|
|
def id_fs_usage(self):
|
|
return self.device.properties.get('ID_FS_USAGE')
|
|
|
|
@property
|
|
def uuid_link(self):
|
|
for l in self.device.device_links:
|
|
if l.startswith('/dev/disk/by-uuid/'):
|
|
return l
|
|
|
|
@property
|
|
def label_link(self):
|
|
for l in self.device.device_links:
|
|
if l.startswith('/dev/disk/by-label/'):
|
|
return l
|
|
|
|
@property
|
|
def partuuid_link(self):
|
|
for l in self.device.device_links:
|
|
if l.startswith('/dev/disk/by-partuuid/'):
|
|
return l
|
|
|
|
@property
|
|
def path_link(self):
|
|
for l in self.device.device_links:
|
|
if l.startswith('/dev/disk/by-path/'):
|
|
return l
|
|
|
|
@property
|
|
def id_links(self):
|
|
return [l for l in self.device.device_links if l.startswith('/dev/disk/by-id')]
|
|
|
|
if __name__ == '__main__':
|
|
if os.getuid() > 0:
|
|
print('Requires root permission.')
|
|
sys.exit(1)
|
|
parser = argparse.ArgumentParser(description='Configure RAID volume for Scylla.')
|
|
parser.add_argument('--disks', required=True,
|
|
help='specify disks for RAID')
|
|
parser.add_argument('--raiddev',
|
|
help='MD device name for RAID')
|
|
parser.add_argument('--enable-on-nextboot', '--update-fstab', action='store_true', default=False,
|
|
help='mount RAID on next boot')
|
|
parser.add_argument('--root', default='/var/lib/scylla',
|
|
help='specify the root of the tree')
|
|
parser.add_argument('--volume-role', default='all',
|
|
help='specify how will this device be used (data, commitlog, or all)')
|
|
parser.add_argument('--force-raid', action='store_true', default=False,
|
|
help='force constructing RAID when only one disk is specified')
|
|
parser.add_argument('--raid-level', default='0',
|
|
help='specify RAID level')
|
|
parser.add_argument('--online-discard', default="True",
|
|
help='Enable XFS online discard (trim SSD cells after file deletion)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Allow args.online_discard to be used as a boolean value
|
|
args.online_discard = distutils.util.strtobool(args.online_discard)
|
|
|
|
root = args.root.rstrip('/')
|
|
if args.volume_role == 'all':
|
|
mount_at=root
|
|
elif args.volume_role == 'data':
|
|
mount_at='{}/data'.format(root)
|
|
elif args.volume_role == 'commitlog':
|
|
mount_at='{}/commitlog'.format(root)
|
|
else:
|
|
print('Invalid role specified ({})'.format(args.volume_role))
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
disks = args.disks.split(',')
|
|
for disk in disks:
|
|
if not os.path.exists(disk):
|
|
print('{} is not found'.format(disk))
|
|
sys.exit(1)
|
|
if not stat.S_ISBLK(os.stat(disk).st_mode):
|
|
print('{} is not block device'.format(disk))
|
|
sys.exit(1)
|
|
if not is_unused_disk(disk):
|
|
print('{} is busy'.format(disk))
|
|
sys.exit(1)
|
|
|
|
if len(disks) == 1 and not args.force_raid:
|
|
raid = False
|
|
fsdev = disks[0]
|
|
else:
|
|
raid = True
|
|
if args.raiddev is None:
|
|
raiddevs_to_try = [f'/dev/md{i}' for i in range(10)]
|
|
else:
|
|
raiddevs_to_try = [args.raiddev, ]
|
|
for fsdev in raiddevs_to_try:
|
|
raiddevname = os.path.basename(fsdev)
|
|
array_state = Path(f'/sys/block/{raiddevname}/md/array_state')
|
|
# mdX is not allocated
|
|
if not array_state.exists():
|
|
break
|
|
with array_state.open() as f:
|
|
# allocated, but no devices, not running
|
|
if f.read().strip() == 'clear':
|
|
break
|
|
print(f'{fsdev} is already using')
|
|
else:
|
|
if args.raiddev is None:
|
|
print("Can't find unused /dev/mdX")
|
|
sys.exit(1)
|
|
print(f'{fsdev} will be used to setup a RAID')
|
|
|
|
if os.path.ismount(mount_at):
|
|
print('{} is already mounted'.format(mount_at))
|
|
sys.exit(1)
|
|
|
|
mntunit_bn = out('systemd-escape -p --suffix=mount {}'.format(mount_at))
|
|
mntunit = Path('/etc/systemd/system/{}'.format(mntunit_bn))
|
|
if mntunit.exists():
|
|
print('mount unit {} already exists'.format(mntunit))
|
|
sys.exit(1)
|
|
|
|
if not shutil.which('mkfs.xfs'):
|
|
pkg_install('xfsprogs')
|
|
if not shutil.which('mdadm'):
|
|
pkg_install('mdadm')
|
|
if args.raid_level != '0':
|
|
try:
|
|
md_service = systemd_unit('mdmonitor.service')
|
|
except SystemdException:
|
|
md_service = systemd_unit('mdadm.service')
|
|
|
|
print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type=f'RAID{args.raid_level}' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks))
|
|
procs=[]
|
|
for disk in disks:
|
|
d = disk.replace('/dev/', '')
|
|
discard_path = '/sys/block/{}/queue/discard_granularity'.format(d)
|
|
if os.path.exists(discard_path):
|
|
with open(discard_path) as f:
|
|
discard = f.read().strip()
|
|
if discard != '0':
|
|
proc = subprocess.Popen(['blkdiscard', disk])
|
|
procs.append(proc)
|
|
for proc in procs:
|
|
proc.wait()
|
|
for disk in disks:
|
|
run(f'wipefs -a {disk}', shell=True, check=True)
|
|
if raid:
|
|
run('udevadm settle', shell=True, check=True)
|
|
run('mdadm --create --verbose --force --run {raid} --level={level} -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, level=args.raid_level, nr_disk=len(disks), disks=args.disks.replace(',', ' ')), shell=True, check=True)
|
|
run(f'wipefs -a {fsdev}', shell=True, check=True)
|
|
run('udevadm settle', shell=True, check=True)
|
|
|
|
major_minor = os.stat(fsdev).st_rdev
|
|
major, minor = major_minor // 256, major_minor % 256
|
|
sector_size = int(open(f'/sys/dev/block/{major}:{minor}/queue/logical_block_size').read())
|
|
# We want smaller block sizes to allow smaller commitlog writes without
|
|
# stalling. The minimum block size for crc enabled filesystems is 1024,
|
|
# see https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/mkfs/xfs_mkfs.c .
|
|
# and it also cannot be smaller than the sector size.
|
|
block_size = max(1024, sector_size)
|
|
run('udevadm settle', shell=True, check=True)
|
|
run(f'mkfs.xfs -b size={block_size} {fsdev} -K', shell=True, check=True)
|
|
run('udevadm settle', shell=True, check=True)
|
|
|
|
if is_debian_variant():
|
|
confpath = '/etc/mdadm/mdadm.conf'
|
|
else:
|
|
confpath = '/etc/mdadm.conf'
|
|
|
|
if raid:
|
|
res = out('mdadm --detail --scan')
|
|
with open(confpath, 'w') as f:
|
|
f.write(res)
|
|
f.write('\nMAILADDR root')
|
|
|
|
os.makedirs(mount_at, exist_ok=True)
|
|
|
|
udev_info = UdevInfo(fsdev)
|
|
mount_dev = None
|
|
if udev_info.uuid_link:
|
|
mount_dev = udev_info.uuid_link
|
|
else:
|
|
if udev_info.label_link:
|
|
mount_dev = udev_info.label_link
|
|
dev_type = 'label'
|
|
elif udev_info.partuuid_link:
|
|
mount_dev = udev_info.partuuid_link
|
|
dev_type = 'partuuid'
|
|
elif udev_info.path_link:
|
|
mount_dev = udev_info.path_link
|
|
dev_type = 'path'
|
|
elif udev_info.id_links:
|
|
mount_dev = udev_info.id_links[0]
|
|
dev_type = 'id'
|
|
else:
|
|
mount_dev = fsdev
|
|
dev_type = 'realpath'
|
|
LOGGER.error(f'Failed to detect uuid, using {dev_type}: {mount_dev}')
|
|
|
|
after = ''
|
|
wants = ''
|
|
if raid and args.raid_level != '0':
|
|
after = wants = 'md_service'
|
|
opt_discard = ''
|
|
if args.online_discard:
|
|
opt_discard = ',discard'
|
|
unit_data = f'''
|
|
[Unit]
|
|
Description=Scylla data directory
|
|
Before=local-fs.target scylla-server.service
|
|
After={after}
|
|
Wants={wants}
|
|
DefaultDependencies=no
|
|
|
|
[Mount]
|
|
What={mount_dev}
|
|
Where={mount_at}
|
|
Type=xfs
|
|
Options=noatime{opt_discard}
|
|
|
|
[Install]
|
|
WantedBy=local-fs.target
|
|
'''[1:-1]
|
|
with open(f'/etc/systemd/system/{mntunit_bn}', 'w') as f:
|
|
f.write(unit_data)
|
|
mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf'
|
|
if not os.path.exists(mounts_conf):
|
|
os.makedirs('/etc/systemd/system/scylla-server.service.d/', exist_ok=True)
|
|
with open(mounts_conf, 'w') as f:
|
|
f.write(f'[Unit]\nRequiresMountsFor={mount_at}\n')
|
|
else:
|
|
with open(mounts_conf, 'a') as f:
|
|
f.write(f'RequiresMountsFor={mount_at}\n')
|
|
|
|
systemd_unit.reload()
|
|
if args.raid_level != '0':
|
|
md_service.start()
|
|
try:
|
|
mount = systemd_unit(mntunit_bn)
|
|
mount.start()
|
|
except SubprocessError as e:
|
|
if mount_dev != fsdev:
|
|
if not os.path.islink(mount_dev):
|
|
LOGGER.error('{mount_dev} is not found')
|
|
if not os.path.exists(mount_dev):
|
|
LOGGER.error('{mount_dev} is broken link')
|
|
if not os.path.exists(fsdev):
|
|
LOGGER.error('{fsdev} is not found')
|
|
if not stat.S_ISBLK(os.stat(fsdev).st_mode):
|
|
LOGGER.error('{fsdev} is not block device')
|
|
LOGGER.error(f'Error detected, dumping udev env parameters on {fsdev}')
|
|
udev_info.verify()
|
|
udev_info.dump_variables()
|
|
raise e
|
|
|
|
if args.enable_on_nextboot:
|
|
mount.enable()
|
|
uid = pwd.getpwnam('scylla').pw_uid
|
|
gid = grp.getgrnam('scylla').gr_gid
|
|
os.chown(root, uid, gid)
|
|
|
|
for d in ['coredump', 'data', 'commitlog', 'hints', 'view_hints', 'saved_caches']:
|
|
dpath = '{}/{}'.format(root, d)
|
|
os.makedirs(dpath, exist_ok=True)
|
|
os.chown(dpath, uid, gid)
|
|
|
|
if is_debian_variant():
|
|
if not shutil.which('update-initramfs'):
|
|
pkg_install('initramfs-tools')
|
|
run('update-initramfs -u', shell=True, check=True)
|
|
|
|
if not udev_info.uuid_link:
|
|
LOGGER.error(f'Error detected, dumping udev env parameters on {fsdev}')
|
|
udev_info.verify()
|
|
udev_info.dump_variables()
|