mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-07 23:43:31 +00:00
We currently deny running scylla_setup when umask != 0022. To remove this limitation, run os.chmod(0o644) on every file creation to allow reading from scylla user. Note that perftune.yaml is not really needed to set 0644 since perftune.py is running in root user, but setting it to align permission with other files. Fixes #8049 Closes #8119
286 lines
13 KiB
Python
Executable File
286 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (C) 2017 ScyllaDB
|
|
#
|
|
|
|
#
|
|
# This file is part of Scylla.
|
|
#
|
|
# Scylla is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Scylla is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import os
|
|
import re
|
|
from scylla_util import *
|
|
import subprocess
|
|
import argparse
|
|
import yaml
|
|
import logging
|
|
import sys
|
|
import scylla_blocktune as blocktune
|
|
|
|
|
|
# Regular expression helpers
|
|
# non-advancing comment matcher
|
|
_nocomment = r"^\s*(?!#)"
|
|
# non-capturing grouping
|
|
_scyllaeq = r"(?:\s*|=)"
|
|
_cpuset = r"(?:\s*--cpuset" + _scyllaeq + r"(?P<cpuset>\d+(?:[-,]\d+)*))"
|
|
_smp = r"(?:\s*--smp" + _scyllaeq + r"(?P<smp>\d+))"
|
|
|
|
|
|
def _reopt(s):
|
|
return s + r"?"
|
|
|
|
|
|
def is_developer_mode():
|
|
f = open(etcdir() + "/scylla.d/dev-mode.conf", "r")
|
|
pattern = re.compile(_nocomment + r".*developer-mode" + _scyllaeq + "(1|true)")
|
|
return len([x for x in f if pattern.match(x)]) >= 1
|
|
|
|
class scylla_cpuinfo:
|
|
"""Class containing information about how Scylla sees CPUs in this machine.
|
|
Information that can be probed include in which hyperthreads Scylla is configured
|
|
to run, how many total threads exist in the system, etc"""
|
|
|
|
def __parse_cpuset(self):
|
|
f = open(etcdir() + "/scylla.d/cpuset.conf", "r")
|
|
pattern = re.compile(_nocomment + r"CPUSET=\s*\"" + _reopt(_cpuset) + _reopt(_smp) + "\s*\"")
|
|
grp = [pattern.match(x) for x in f.readlines() if pattern.match(x)]
|
|
if not grp:
|
|
d = {"cpuset": None, "smp": None}
|
|
else:
|
|
# if more than one, use last
|
|
d = grp[-1].groupdict()
|
|
actual_set = set()
|
|
if d["cpuset"]:
|
|
groups = d["cpuset"].split(",")
|
|
for g in groups:
|
|
ends = [int(x) for x in g.split("-")]
|
|
actual_set = actual_set.union(set(range(ends[0], ends[-1] + 1)))
|
|
d["cpuset"] = actual_set
|
|
if d["smp"]:
|
|
d["smp"] = int(d["smp"])
|
|
self._cpu_data = d
|
|
|
|
def __system_cpus(self):
|
|
cur_proc = -1
|
|
f = open("/proc/cpuinfo", "r")
|
|
results = {}
|
|
for line in f:
|
|
if line == '\n':
|
|
continue
|
|
key, value = [x.strip() for x in line.split(":")]
|
|
if key == "processor":
|
|
cur_proc = int(value)
|
|
results[cur_proc] = {}
|
|
results[cur_proc][key] = value
|
|
return results
|
|
|
|
def __init__(self):
|
|
self.__parse_cpuset()
|
|
self._cpu_data["system"] = self.__system_cpus()
|
|
|
|
def system_cpuinfo(self):
|
|
"""Returns parsed information about CPUs in the system"""
|
|
return self._cpu_data["system"]
|
|
|
|
def system_nr_threads(self):
|
|
"""Returns the number of threads available in the system"""
|
|
return len(self._cpu_data["system"])
|
|
|
|
def system_nr_cores(self):
|
|
"""Returns the number of cores available in the system"""
|
|
return len(set([x['core id'] for x in list(self._cpu_data["system"].values())]))
|
|
|
|
def cpuset(self):
|
|
"""Returns the current cpuset Scylla is configured to use. Returns None if no constraints exist"""
|
|
return self._cpu_data["cpuset"]
|
|
|
|
def smp(self):
|
|
"""Returns the explicit smp configuration for Scylla, returns None if no constraints exist"""
|
|
return self._cpu_data["smp"]
|
|
|
|
def nr_shards(self):
|
|
"""How many shards will Scylla use in this machine"""
|
|
if self._cpu_data["smp"]:
|
|
return self._cpu_data["smp"]
|
|
elif self._cpu_data["cpuset"]:
|
|
return len(self._cpu_data["cpuset"])
|
|
else:
|
|
return len(self._cpu_data["system"])
|
|
|
|
def run_iotune():
|
|
if "SCYLLA_CONF" in os.environ:
|
|
conf_dir = os.environ["SCYLLA_CONF"]
|
|
else:
|
|
conf_dir = etcdir() + "/scylla"
|
|
cfg = yaml.safe_load(open(os.path.join(conf_dir, "scylla.yaml")))
|
|
default_path = cfg.get('workdir') or datadir()
|
|
if not "data_file_directories" in cfg:
|
|
cfg["data_file_directories"] = [os.path.join(default_path, 'data')]
|
|
data_dirs = cfg["data_file_directories"]
|
|
|
|
for t in [ "commitlog", "hints", "view_hints", "saved_caches" ]:
|
|
key = "%s_directory" % t
|
|
if key in cfg:
|
|
data_dirs += [ cfg[key] ]
|
|
elif os.path.isdir(os.path.join(default_path, t)):
|
|
data_dirs += [ os.path.join(default_path, t) ]
|
|
|
|
iotune_args = []
|
|
for data_dir in data_dirs:
|
|
if os.path.exists(data_dir) == False:
|
|
logging.error("%s was not found. Please check the configuration and run scylla_io_setup again.\n", data_dir)
|
|
sys.exit(1)
|
|
if os.path.isdir(data_dir) == False:
|
|
logging.error("%s is not a directory. Please check the configuration and run scylla_io_setup again.\n", data_dir)
|
|
sys.exit(1)
|
|
st = os.statvfs(data_dir)
|
|
avail = st.f_bavail * st.f_frsize
|
|
rec = 10000000000
|
|
if avail < rec:
|
|
logging.error("Filesystem at %s has only %d bytes available; that is less than the recommended 10 GB. Please free up space and run scylla_io_setup again.\n", data_dir, avail)
|
|
sys.exit(1)
|
|
blocktune.tune_fs(data_dir, '2')
|
|
iotune_args += [ "--evaluation-directory", data_dir ]
|
|
|
|
if cpudata.cpuset():
|
|
iotune_args += [ "--cpuset", ",".join(map(str, cpudata.cpuset())) ]
|
|
elif cpudata.smp():
|
|
iotune_args += [ "--smp", str(cpudata.smp()) ]
|
|
|
|
try:
|
|
subprocess.check_call([bindir() + "/iotune",
|
|
"--format", "envfile",
|
|
"--options-file", etcdir() + "/scylla.d/io.conf",
|
|
"--properties-file", etcdir() + "/scylla.d/io_properties.yaml"] + iotune_args)
|
|
except Exception as e:
|
|
logging.error(e)
|
|
logging.error("%s did not pass validation tests, it may not be on XFS and/or has limited disk space.\n"
|
|
"This is a non-supported setup, and performance is expected to be very bad.\n"
|
|
"For better performance, placing your data on XFS-formatted directories is required.\n"
|
|
"To override this error, enable developer mode as follow:\n"
|
|
"sudo %s/scylla_dev_mode_setup --developer-mode 1", data_dirs, scriptsdir())
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='IO Setup script for Scylla.')
|
|
parser.add_argument('--ami', dest='ami', action='store_true',
|
|
help='configure AWS AMI')
|
|
args = parser.parse_args()
|
|
|
|
cpudata = scylla_cpuinfo()
|
|
if not is_developer_mode():
|
|
if args.ami:
|
|
idata = aws_instance()
|
|
|
|
if not idata.is_supported_instance_class():
|
|
logging.error('{} is not supported instance type, run "scylla_io_setup" again without --ami option.'.format(idata.instance()))
|
|
sys.exit(1)
|
|
disk_properties = {}
|
|
disk_properties["mountpoint"] = datadir()
|
|
nr_disks = len(idata.ephemeral_disks())
|
|
## both i3 and i2 can run with 1 I/O Queue per shard
|
|
if idata.instance() == "i3.large":
|
|
disk_properties["read_iops"] = 111000
|
|
disk_properties["read_bandwidth"] = 653925080
|
|
disk_properties["write_iops"] = 36800
|
|
disk_properties["write_bandwidth"] = 215066473
|
|
elif idata.instance() == "i3.xlarge":
|
|
disk_properties["read_iops"] = 200800
|
|
disk_properties["read_bandwidth"] = 1185106376
|
|
disk_properties["write_iops"] = 53180
|
|
disk_properties["write_bandwidth"] = 423621267
|
|
elif idata.instance_class() == "i3":
|
|
disk_properties["read_iops"] = 411200 * nr_disks
|
|
disk_properties["read_bandwidth"] = 2015342735 * nr_disks
|
|
disk_properties["write_iops"] = 181500 * nr_disks
|
|
disk_properties["write_bandwidth"] = 808775652 * nr_disks
|
|
elif idata.instance_class() == "i3en":
|
|
if idata.instance() == "i3en.large":
|
|
disk_properties["read_iops"] = 43315
|
|
disk_properties["read_bandwidth"] = 330301440
|
|
disk_properties["write_iops"] = 33177
|
|
disk_properties["write_bandwidth"] = 165675008
|
|
elif idata.instance() in ("i3.xlarge", "i3en.2xlarge"):
|
|
disk_properties["read_iops"] = 84480 * nr_disks
|
|
disk_properties["read_bandwidth"] = 666894336 * nr_disks
|
|
disk_properties["write_iops"] = 66969 * nr_disks
|
|
disk_properties["write_bandwidth"] = 333447168 * nr_disks
|
|
else:
|
|
disk_properties["read_iops"] = 257024 * nr_disks
|
|
disk_properties["read_bandwidth"] = 2043674624 * nr_disks
|
|
disk_properties["write_iops"] = 174080 * nr_disks
|
|
disk_properties["write_bandwidth"] = 1024458752 * nr_disks
|
|
elif idata.instance_class() == "i2":
|
|
disk_properties["read_iops"] = 64000 * nr_disks
|
|
disk_properties["read_bandwidth"] = 507338935 * nr_disks
|
|
disk_properties["write_iops"] = 57100 * nr_disks
|
|
disk_properties["write_bandwidth"] = 483141731 * nr_disks
|
|
properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
|
|
yaml.dump({ "disks": [ disk_properties ] }, properties_file, default_flow_style=False)
|
|
ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
|
|
ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
|
|
elif gcp_instance().is_gce_instance():
|
|
idata = gcp_instance()
|
|
|
|
if idata.is_recommended_instance():
|
|
disk_properties = {}
|
|
disk_properties["mountpoint"] = datadir()
|
|
nr_disks = idata.nvmeDiskCount
|
|
# below is based on https://cloud.google.com/compute/docs/disks/local-ssd#performance
|
|
# and https://cloud.google.com/compute/docs/disks/local-ssd#nvme
|
|
# note that scylla iotune might measure more, this is GCP recommended
|
|
mbs=1024*1024
|
|
if nr_disks >= 1 and nr_disks < 4:
|
|
disk_properties["read_iops"] = 170000 * nr_disks
|
|
disk_properties["read_bandwidth"] = 660 * mbs * nr_disks
|
|
disk_properties["write_iops"] = 90000 * nr_disks
|
|
disk_properties["write_bandwidth"] = 350 * mbs * nr_disks
|
|
elif nr_disks >= 4 and nr_disks <= 8:
|
|
disk_properties["read_iops"] = 680000
|
|
disk_properties["read_bandwidth"] = 2650 * mbs
|
|
disk_properties["write_iops"] = 360000
|
|
disk_properties["write_bandwidth"] = 1400 * mbs
|
|
elif nr_disks == "16":
|
|
disk_properties["read_iops"] = 1600000
|
|
disk_properties["read_bandwidth"] = 4521251328
|
|
#below is google, above is our measured
|
|
#disk_properties["read_bandwidth"] = 6240 * mbs
|
|
disk_properties["write_iops"] = 800000
|
|
disk_properties["write_bandwidth"] = 2759452672
|
|
#below is google, above is our measured
|
|
#disk_properties["write_bandwidth"] = 3120 * mbs
|
|
elif nr_disks == "24":
|
|
disk_properties["read_iops"] = 2400000
|
|
disk_properties["read_bandwidth"] = 5921532416
|
|
#below is google, above is our measured
|
|
#disk_properties["read_bandwidth"] = 9360 * mbs
|
|
disk_properties["write_iops"] = 1200000
|
|
disk_properties["write_bandwidth"] = 4663037952
|
|
#below is google, above is our measured
|
|
#disk_properties["write_bandwidth"] = 4680 * mbs
|
|
properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
|
|
yaml.dump({"disks": [disk_properties]}, properties_file, default_flow_style=False)
|
|
ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
|
|
ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
|
|
else:
|
|
logging.error('This is not a recommended Google Cloud instance setup for auto tuning, running manual iotune.')
|
|
run_iotune()
|
|
else:
|
|
run_iotune()
|
|
os.chmod(etcdir() + '/scylla.d/io_properties.yaml', 0o644)
|
|
os.chmod(etcdir() + '/scylla.d/io.conf', 0o644)
|