Files
scylladb/dist/common/scripts/scylla_raid_setup
Takuya ASADA 48b6aec16a scripts: use "out()" function for all capture_output subprocesses
On acaf0bb we applied out() just for perftune.py because we had issue #10390
with this script.
But the issue can happen with other commands too, let's apply it to all
commands which uses capture_output.

related #10390

Closes #10414
2022-04-26 13:56:52 +03:00

209 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2018-present ScyllaDB
#
#
# SPDX-License-Identifier: AGPL-3.0-or-later
import os
import argparse
import pwd
import grp
import sys
import stat
import distro
from pathlib import Path
from scylla_util import *
from subprocess import run
if __name__ == '__main__':
if os.getuid() > 0:
print('Requires root permission.')
sys.exit(1)
parser = argparse.ArgumentParser(description='Configure RAID volume for Scylla.')
parser.add_argument('--disks', required=True,
help='specify disks for RAID')
parser.add_argument('--raiddev',
help='MD device name for RAID')
parser.add_argument('--enable-on-nextboot', '--update-fstab', action='store_true', default=False,
help='mount RAID on next boot')
parser.add_argument('--root', default='/var/lib/scylla',
help='specify the root of the tree')
parser.add_argument('--volume-role', default='all',
help='specify how will this device be used (data, commitlog, or all)')
parser.add_argument('--force-raid', action='store_true', default=False,
help='force constructing RAID when only one disk is specified')
parser.add_argument('--raid-level', default='0',
help='specify RAID level')
parser.add_argument('--online-discard', default=True,
help='Enable XFS online discard (trim SSD cells after file deletion)')
args = parser.parse_args()
root = args.root.rstrip('/')
if args.volume_role == 'all':
mount_at=root
elif args.volume_role == 'data':
mount_at='{}/data'.format(root)
elif args.volume_role == 'commitlog':
mount_at='{}/commitlog'.format(root)
else:
print('Invalid role specified ({})'.format(args.volume_role))
parser.print_help()
sys.exit(1)
disks = args.disks.split(',')
for disk in disks:
if not os.path.exists(disk):
print('{} is not found'.format(disk))
sys.exit(1)
if not stat.S_ISBLK(os.stat(disk).st_mode):
print('{} is not block device'.format(disk))
sys.exit(1)
if not is_unused_disk(disk):
print('{} is busy'.format(disk))
sys.exit(1)
if len(disks) == 1 and not args.force_raid:
raid = False
fsdev = disks[0]
else:
raid = True
if args.raiddev is None:
raiddevs_to_try = [f'/dev/md{i}' for i in range(10)]
else:
raiddevs_to_try = [args.raiddev, ]
for fsdev in raiddevs_to_try:
raiddevname = os.path.basename(fsdev)
array_state = Path(f'/sys/block/{raiddevname}/md/array_state')
# mdX is not allocated
if not array_state.exists():
break
with array_state.open() as f:
# allocated, but no devices, not running
if f.read().strip() == 'clear':
break
print(f'{fsdev} is already using')
else:
if args.raiddev is None:
print("Can't find unused /dev/mdX")
sys.exit(1)
print(f'{fsdev} will be used to setup a RAID')
if os.path.ismount(mount_at):
print('{} is already mounted'.format(mount_at))
sys.exit(1)
mntunit_bn = out('systemd-escape -p --suffix=mount {}'.format(mount_at))
mntunit = Path('/etc/systemd/system/{}'.format(mntunit_bn))
if mntunit.exists():
print('mount unit {} already exists'.format(mntunit))
sys.exit(1)
if not shutil.which('mkfs.xfs'):
pkg_install('xfsprogs')
if not shutil.which('mdadm'):
pkg_install('mdadm')
if args.raid_level != '0':
try:
md_service = systemd_unit('mdmonitor.service')
except SystemdException:
md_service = systemd_unit('mdadm.service')
print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type=f'RAID{args.raid_level}' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks))
procs=[]
for disk in disks:
d = disk.replace('/dev/', '')
discard_path = '/sys/block/{}/queue/discard_granularity'.format(d)
if os.path.exists(discard_path):
with open(discard_path) as f:
discard = f.read().strip()
if discard != '0':
proc = subprocess.Popen(['blkdiscard', disk])
procs.append(proc)
for proc in procs:
proc.wait()
if raid:
run('udevadm settle', shell=True, check=True)
run('mdadm --create --verbose --force --run {raid} --level={level} -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, level=args.raid_level, nr_disk=len(disks), disks=args.disks.replace(',', ' ')), shell=True, check=True)
run('udevadm settle', shell=True, check=True)
major_minor = os.stat(fsdev).st_rdev
major, minor = major_minor // 256, major_minor % 256
sector_size = int(open(f'/sys/dev/block/{major}:{minor}/queue/logical_block_size').read())
# We want smaller block sizes to allow smaller commitlog writes without
# stalling. The minimum block size for crc enabled filesystems is 1024,
# and it also cannot be smaller than the sector size.
block_size = max(1024, sector_size)
run(f'mkfs.xfs -b size={block_size} {fsdev} -f -K', shell=True, check=True)
if is_debian_variant():
confpath = '/etc/mdadm/mdadm.conf'
else:
confpath = '/etc/mdadm.conf'
if raid:
res = out('mdadm --detail --scan')
with open(confpath, 'w') as f:
f.write(res)
f.write('\nMAILADDR root')
os.makedirs(mount_at, exist_ok=True)
uuid = out(f'blkid -s UUID -o value {fsdev}')
after = 'local-fs.target'
wants = ''
if raid and args.raid_level != '0':
after += f' {md_service}'
wants = f'\nWants={md_service}'
opt_discard = ''
if args.online_discard:
opt_discard = ',discard'
unit_data = f'''
[Unit]
Description=Scylla data directory
Before=scylla-server.service
After={after}{wants}
DefaultDependencies=no
[Mount]
What=/dev/disk/by-uuid/{uuid}
Where={mount_at}
Type=xfs
Options=noatime{opt_discard}
[Install]
WantedBy=multi-user.target
'''[1:-1]
with open(f'/etc/systemd/system/{mntunit_bn}', 'w') as f:
f.write(unit_data)
mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf'
if not os.path.exists(mounts_conf):
os.makedirs('/etc/systemd/system/scylla-server.service.d/', exist_ok=True)
with open(mounts_conf, 'w') as f:
f.write(f'[Unit]\nRequiresMountsFor={mount_at}\n')
else:
with open(mounts_conf, 'a') as f:
f.write(f'RequiresMountsFor={mount_at}\n')
systemd_unit.reload()
if args.raid_level != '0':
md_service.start()
mount = systemd_unit(mntunit_bn)
mount.start()
if args.enable_on_nextboot:
mount.enable()
uid = pwd.getpwnam('scylla').pw_uid
gid = grp.getgrnam('scylla').gr_gid
os.chown(root, uid, gid)
for d in ['coredump', 'data', 'commitlog', 'hints', 'view_hints', 'saved_caches']:
dpath = '{}/{}'.format(root, d)
os.makedirs(dpath, exist_ok=True)
os.chown(dpath, uid, gid)
if is_debian_variant():
run('update-initramfs -u', shell=True, check=True)