#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Copyright 2018 ScyllaDB # # # This file is part of Scylla. # # Scylla is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Scylla is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Scylla. If not, see . import os import argparse import pwd import grp import sys import stat import distro from pathlib import Path from scylla_util import * from subprocess import run if __name__ == '__main__': if os.getuid() > 0: print('Requires root permission.') sys.exit(1) parser = argparse.ArgumentParser(description='Configure RAID volume for Scylla.') parser.add_argument('--disks', required=True, help='specify disks for RAID') parser.add_argument('--raiddev', help='MD device name for RAID') parser.add_argument('--enable-on-nextboot', '--update-fstab', action='store_true', default=False, help='mount RAID on next boot') parser.add_argument('--root', default='/var/lib/scylla', help='specify the root of the tree') parser.add_argument('--volume-role', default='all', help='specify how will this device be used (data, commitlog, or all)') parser.add_argument('--force-raid', action='store_true', default=False, help='force constructing RAID when only one disk is specified') args = parser.parse_args() root = args.root.rstrip('/') if args.volume_role == 'all': mount_at=root elif args.volume_role == 'data': mount_at='{}/data'.format(root) elif args.volume_role == 'commitlog': mount_at='{}/commitlog'.format(root) else: print('Invalid role specified ({})'.format(args.volume_role)) parser.print_help() sys.exit(1) disks = args.disks.split(',') for disk in disks: if not os.path.exists(disk): print('{} is not found'.format(disk)) sys.exit(1) if not stat.S_ISBLK(os.stat(disk).st_mode): print('{} is not block device'.format(disk)) sys.exit(1) if not is_unused_disk(disk): print('{} is busy'.format(disk)) sys.exit(1) if len(disks) == 1 and not args.force_raid: raid = False fsdev = disks[0] else: raid = True if args.raiddev is None: raiddevs_to_try = [f'/dev/md{i}' for i in range(10)] else: raiddevs_to_try = [args.raiddev, ] for fsdev in raiddevs_to_try: raiddevname = os.path.basename(fsdev) array_state = Path(f'/sys/block/{raiddevname}/md/array_state') # mdX is not allocated if not array_state.exists(): break with array_state.open() as f: # allocated, but no devices, not running if f.read().strip() == 'clear': break print(f'{fsdev} is already using') else: if args.raiddev is None: print("Can't find unused /dev/mdX") sys.exit(1) print(f'{fsdev} will be used to setup a RAID') if os.path.ismount(mount_at): print('{} is already mounted'.format(mount_at)) sys.exit(1) mntunit_bn = run('systemd-escape -p --suffix=mount {}'.format(mount_at), shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip() mntunit = Path('/etc/systemd/system/{}'.format(mntunit_bn)) if mntunit.exists(): print('mount unit {} already exists'.format(mntunit)) sys.exit(1) if not shutil.which('mkfs.xfs'): pkg_install('xfsprogs') if not shutil.which('mdadm'): pkg_install('mdadm') try: md_service = systemd_unit('mdmonitor.service') except SystemdException: md_service = systemd_unit('mdadm.service') print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type='RAID0' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks)) procs=[] for disk in disks: d = disk.replace('/dev/', '') discard_path = '/sys/block/{}/queue/discard_granularity'.format(d) if os.path.exists(discard_path): with open(discard_path) as f: discard = f.read().strip() if discard != '0': proc = subprocess.Popen(['blkdiscard', disk]) procs.append(proc) for proc in procs: proc.wait() if raid: run('udevadm settle', shell=True, check=True) run('mdadm --create --verbose --force --run {raid} --level=0 -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, nr_disk=len(disks), disks=args.disks.replace(',', ' ')), shell=True, check=True) run('udevadm settle', shell=True, check=True) major_minor = os.stat(fsdev).st_rdev major, minor = major_minor // 256, major_minor % 256 sector_size = int(open(f'/sys/dev/block/{major}:{minor}/queue/logical_block_size').read()) # We want smaller block sizes to allow smaller commitlog writes without # stalling. The minimum block size for crc enabled filesystems is 1024, # and it also cannot be smaller than the sector size. block_size = max(1024, sector_size) run(f'mkfs.xfs -b size={block_size} {fsdev} -f -K', shell=True, check=True) if is_debian_variant(): confpath = '/etc/mdadm/mdadm.conf' else: confpath = '/etc/mdadm.conf' if raid: res = run('mdadm --detail --scan', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip() with open(confpath, 'w') as f: f.write(res) f.write('\nMAILADDR root') os.makedirs(mount_at, exist_ok=True) uuid = run(f'blkid -s UUID -o value {fsdev}', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip() after = 'local-fs.target' if raid: after += f' {md_service}' unit_data = f''' [Unit] Description=Scylla data directory Before=scylla-server.service After={after} Wants={md_service} DefaultDependencies=no [Mount] What=/dev/disk/by-uuid/{uuid} Where={mount_at} Type=xfs Options=noatime [Install] WantedBy=multi-user.target '''[1:-1] with open(f'/etc/systemd/system/{mntunit_bn}', 'w') as f: f.write(unit_data) mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf' if not os.path.exists(mounts_conf): os.makedirs('/etc/systemd/system/scylla-server.service.d/', exist_ok=True) with open(mounts_conf, 'w') as f: f.write(f'[Unit]\nRequiresMountsFor={mount_at}\n') else: with open(mounts_conf, 'a') as f: f.write(f'RequiresMountsFor={mount_at}\n') systemd_unit.reload() md_service.start() mount = systemd_unit(mntunit_bn) mount.start() if args.enable_on_nextboot: mount.enable() uid = pwd.getpwnam('scylla').pw_uid gid = grp.getgrnam('scylla').gr_gid os.chown(root, uid, gid) for d in ['coredump', 'data', 'commitlog', 'hints', 'view_hints', 'saved_caches']: dpath = '{}/{}'.format(root, d) os.makedirs(dpath, exist_ok=True) os.chown(dpath, uid, gid) if is_debian_variant(): run('update-initramfs -u', shell=True, check=True)