#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2017-present ScyllaDB
#

#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Scylla.  If not, see <http://www.gnu.org/licenses/>.

import os
import re
from scylla_util import *
import subprocess
import argparse
import yaml
import logging
import sys
import scylla_blocktune as blocktune


# Regular expression helpers
# non-advancing comment matcher
_nocomment = r"^\s*(?!#)"
# non-capturing grouping
_scyllaeq = r"(?:\s*|=)"
_cpuset = r"(?:\s*--cpuset" + _scyllaeq + r"(?P<cpuset>\d+(?:[-,]\d+)*))"
_smp = r"(?:\s*--smp" + _scyllaeq + r"(?P<smp>\d+))"


def _reopt(s):
    return s + r"?"


def is_developer_mode():
    f = open(etcdir() + "/scylla.d/dev-mode.conf", "r")
    pattern = re.compile(_nocomment + r".*developer-mode" + _scyllaeq + "(1|true)")
    return len([x for x in f if pattern.match(x)]) >= 1

class scylla_cpuinfo:
    """Class containing information about how Scylla sees CPUs in this machine.
    Information that can be probed include in which hyperthreads Scylla is configured
    to run, how many total threads exist in the system, etc"""

    def __parse_cpuset(self):
        f = open(etcdir() + "/scylla.d/cpuset.conf", "r")
        pattern = re.compile(_nocomment + r"CPUSET=\s*\"" + _reopt(_cpuset) + _reopt(_smp) + "\s*\"")
        grp = [pattern.match(x) for x in f.readlines() if pattern.match(x)]
        if not grp:
            d = {"cpuset": None, "smp": None}
        else:
            # if more than one, use last
            d = grp[-1].groupdict()
        actual_set = set()
        if d["cpuset"]:
            groups = d["cpuset"].split(",")
            for g in groups:
                ends = [int(x) for x in g.split("-")]
                actual_set = actual_set.union(set(range(ends[0], ends[-1] + 1)))
            d["cpuset"] = actual_set
        if d["smp"]:
            d["smp"] = int(d["smp"])
        self._cpu_data = d

    def __system_cpus(self):
        cur_proc = -1
        f = open("/proc/cpuinfo", "r")
        results = {}
        for line in f:
            if line == '\n':
                continue
            key, value = [x.strip() for x in line.split(":")]
            if key == "processor":
                cur_proc = int(value)
                results[cur_proc] = {}
            results[cur_proc][key] = value
        return results

    def __init__(self):
        self.__parse_cpuset()
        self._cpu_data["system"] = self.__system_cpus()

    def system_cpuinfo(self):
        """Returns parsed information about CPUs in the system"""
        return self._cpu_data["system"]

    def system_nr_threads(self):
        """Returns the number of threads available in the system"""
        return len(self._cpu_data["system"])

    def system_nr_cores(self):
        """Returns the number of cores available in the system"""
        return len(set([x['core id'] for x in list(self._cpu_data["system"].values())]))

    def cpuset(self):
        """Returns the current cpuset Scylla is configured to use. Returns None if no constraints exist"""
        return self._cpu_data["cpuset"]

    def smp(self):
        """Returns the explicit smp configuration for Scylla, returns None if no constraints exist"""
        return self._cpu_data["smp"]

    def nr_shards(self):
        """How many shards will Scylla use in this machine"""
        if self._cpu_data["smp"]:
            return self._cpu_data["smp"]
        elif self._cpu_data["cpuset"]:
            return len(self._cpu_data["cpuset"])
        else:
            return len(self._cpu_data["system"])

def run_iotune():
            if "SCYLLA_CONF" in os.environ:
                conf_dir = os.environ["SCYLLA_CONF"]
            else:
                conf_dir = etcdir() + "/scylla"
            cfg = yaml.safe_load(open(os.path.join(conf_dir, "scylla.yaml")))
            default_path = cfg.get('workdir') or datadir()
            if not "data_file_directories" in cfg:
                cfg["data_file_directories"] = [os.path.join(default_path, 'data')]
            data_dirs = cfg["data_file_directories"]

            for t in [ "commitlog", "hints", "view_hints", "saved_caches" ]:
                key = "%s_directory" % t
                if key in cfg:
                    data_dirs += [ cfg[key] ]
                elif os.path.isdir(os.path.join(default_path, t)):
                    data_dirs += [ os.path.join(default_path, t) ]

            iotune_args = []
            for data_dir in data_dirs:
                if os.path.exists(data_dir) == False:
                    logging.error("%s was not found. Please check the configuration and run scylla_io_setup again.\n", data_dir)
                    sys.exit(1)
                if os.path.isdir(data_dir) == False:
                    logging.error("%s is not a directory. Please check the configuration and run scylla_io_setup again.\n", data_dir)
                    sys.exit(1)
                st = os.statvfs(data_dir)
                avail = st.f_bavail * st.f_frsize
                rec = 10000000000
                if avail < rec:
                    logging.error("Filesystem at %s has only %d bytes available; that is less than the recommended 10 GB. Please free up space and run scylla_io_setup again.\n", data_dir, avail)
                    sys.exit(1)
                blocktune.tune_fs(data_dir, '2')
                iotune_args += [ "--evaluation-directory", data_dir ]

            if cpudata.cpuset():
                iotune_args += [ "--cpuset", ",".join(map(str, cpudata.cpuset())) ]
            elif cpudata.smp():
                iotune_args += [ "--smp", str(cpudata.smp()) ]

            try:
                subprocess.check_call([bindir() + "/iotune",
                                       "--format", "envfile",
                                       "--options-file", etcdir() + "/scylla.d/io.conf",
                                       "--properties-file", etcdir() + "/scylla.d/io_properties.yaml"] + iotune_args)
            except Exception as e:
                logging.error(e)
                logging.error("%s did not pass validation tests, it may not be on XFS and/or has limited disk space.\n"
                              "This is a non-supported setup, and performance is expected to be very bad.\n"
                              "For better performance, placing your data on XFS-formatted directories is required.\n"
                              "To override this error, enable developer mode as follow:\n"
                              "sudo %s/scylla_dev_mode_setup --developer-mode 1", data_dirs, scriptsdir())
                sys.exit(1)

if __name__ == "__main__":
    if not is_nonroot() and os.getuid() > 0:
        print('Requires root permission.')
        sys.exit(1)
    parser = argparse.ArgumentParser(description='IO Setup script for Scylla.')
    parser.add_argument('--ami', dest='ami', action='store_true',
                        help='configure AWS AMI')
    args = parser.parse_args()

    cpudata = scylla_cpuinfo()
    if not is_developer_mode():
        if args.ami:
            idata = aws_instance()

            if not idata.is_supported_instance_class():
                logging.error('{} is not supported instance type, run "scylla_io_setup" again without --ami option.'.format(idata.instance()))
                sys.exit(1)
            disk_properties = {}
            disk_properties["mountpoint"] = datadir()
            nr_disks = len(idata.ephemeral_disks())
            ## both i3 and i2 can run with 1 I/O Queue per shard
            if idata.instance() == "i3.large":
                disk_properties["read_iops"] = 111000
                disk_properties["read_bandwidth"] = 653925080
                disk_properties["write_iops"] = 36800
                disk_properties["write_bandwidth"] = 215066473
            elif idata.instance() == "i3.xlarge":
                disk_properties["read_iops"] = 200800
                disk_properties["read_bandwidth"] = 1185106376
                disk_properties["write_iops"] = 53180
                disk_properties["write_bandwidth"] = 423621267
            elif idata.instance_class() == "i3":
                disk_properties["read_iops"] = 411200 * nr_disks
                disk_properties["read_bandwidth"] = 2015342735 * nr_disks
                disk_properties["write_iops"] = 181500 * nr_disks
                disk_properties["write_bandwidth"] = 808775652 * nr_disks
            elif idata.instance_class() == "i3en":
                if idata.instance() == "i3en.large":
                    disk_properties["read_iops"] = 43315
                    disk_properties["read_bandwidth"] = 330301440
                    disk_properties["write_iops"] = 33177
                    disk_properties["write_bandwidth"] = 165675008
                elif idata.instance() in ("i3en.xlarge", "i3en.2xlarge"):
                    disk_properties["read_iops"] = 84480 * nr_disks
                    disk_properties["read_bandwidth"] = 666894336 * nr_disks
                    disk_properties["write_iops"] = 66969 * nr_disks
                    disk_properties["write_bandwidth"] = 333447168 * nr_disks
                else:
                    disk_properties["read_iops"] = 257024 * nr_disks
                    disk_properties["read_bandwidth"] = 2043674624 * nr_disks
                    disk_properties["write_iops"] = 174080 * nr_disks
                    disk_properties["write_bandwidth"] = 1024458752 * nr_disks
            elif idata.instance_class() == "i2":
                disk_properties["read_iops"] = 64000 * nr_disks
                disk_properties["read_bandwidth"] = 507338935 * nr_disks
                disk_properties["write_iops"] = 57100 * nr_disks
                disk_properties["write_bandwidth"] = 483141731 * nr_disks
            elif idata.instance_class() in ("c6gd", "m6gd", "r6gd", "x2gd"):
                if idata.instance_size() == "medium":
                    disk_properties["read_iops"] = 14808
                    disk_properties["read_bandwidth"] = 77869147
                    disk_properties["write_iops"] = 5972
                    disk_properties["write_bandwidth"] = 32820302
                elif idata.instance_size() == "large":
                    disk_properties["read_iops"] = 29690
                    disk_properties["read_bandwidth"] = 157712240
                    disk_properties["write_iops"] = 12148
                    disk_properties["write_bandwidth"] = 65978069
                elif idata.instance_size() == "xlarge":
                    disk_properties["read_iops"] = 59688
                    disk_properties["read_bandwidth"] = 318762880
                    disk_properties["write_iops"] = 24449
                    disk_properties["write_bandwidth"] = 133311808
                elif idata.instance_size() == "2xlarge":
                    disk_properties["read_iops"] = 119353
                    disk_properties["read_bandwidth"] = 634795733
                    disk_properties["write_iops"] = 49069
                    disk_properties["write_bandwidth"] = 266841680
                elif idata.instance_size() == "4xlarge":
                    disk_properties["read_iops"] = 237196
                    disk_properties["read_bandwidth"] = 1262309504
                    disk_properties["write_iops"] = 98884
                    disk_properties["write_bandwidth"] = 533938080
                elif idata.instance_size() == "8xlarge":
                    disk_properties["read_iops"] = 442945
                    disk_properties["read_bandwidth"] = 2522688939
                    disk_properties["write_iops"] = 166021
                    disk_properties["write_bandwidth"] = 1063041152
                elif idata.instance_size() == "12xlarge":
                    disk_properties["read_iops"] = 353691 * nr_disks
                    disk_properties["read_bandwidth"] = 1908192256 * nr_disks
                    disk_properties["write_iops"] = 146732 * nr_disks
                    disk_properties["write_bandwidth"] = 806399360 * nr_disks
                elif idata.instance_size() == "16xlarge":
                    disk_properties["read_iops"] = 426893 * nr_disks
                    disk_properties["read_bandwidth"] = 2525781589 * nr_disks
                    disk_properties["write_iops"] = 161740 * nr_disks
                    disk_properties["write_bandwidth"] = 1063389952 * nr_disks
                elif idata.instance_size() == "metal":
                    disk_properties["read_iops"] = 416257 * nr_disks
                    disk_properties["read_bandwidth"] = 2527296683 * nr_disks
                    disk_properties["write_iops"] = 156326 * nr_disks
                    disk_properties["write_bandwidth"] = 1063657088 * nr_disks
            elif idata.instance() == "im4gn.large":
                disk_properties["read_iops"] = 33943
                disk_properties["read_bandwidth"] = 288433525
                disk_properties["write_iops"] = 27877
                disk_properties["write_bandwidth"] = 126864680
            elif idata.instance() == "im4gn.xlarge":
                disk_properties["read_iops"] = 68122
                disk_properties["read_bandwidth"] = 576603520
                disk_properties["write_iops"] = 55246
                disk_properties["write_bandwidth"] = 254534954
            elif idata.instance() == "im4gn.2xlarge":
                disk_properties["read_iops"] = 136422
                disk_properties["read_bandwidth"] = 1152663765
                disk_properties["write_iops"] = 92184
                disk_properties["write_bandwidth"] = 508926453
            elif idata.instance() == "im4gn.4xlarge":
                disk_properties["read_iops"] = 273050
                disk_properties["read_bandwidth"] = 1638427264
                disk_properties["write_iops"] = 92173
                disk_properties["write_bandwidth"] = 1027966826
            elif idata.instance() == "im4gn.8xlarge":
                disk_properties["read_iops"] = 250241 * nr_disks
                disk_properties["read_bandwidth"] = 1163130709 * nr_disks
                disk_properties["write_iops"] = 86374 * nr_disks
                disk_properties["write_bandwidth"] = 977617664 * nr_disks
            elif idata.instance() == "im4gn.16xlarge":
                disk_properties["read_iops"] = 273030 * nr_disks
                disk_properties["read_bandwidth"] = 1638211413 * nr_disks
                disk_properties["write_iops"] = 92607 * nr_disks
                disk_properties["write_bandwidth"] = 1028340266 * nr_disks
            elif idata.instance() == "is4gen.medium":
                disk_properties["read_iops"] = 33965
                disk_properties["read_bandwidth"] = 288462506
                disk_properties["write_iops"] = 27876
                disk_properties["write_bandwidth"] = 126954200
            elif idata.instance() == "is4gen.large":
                disk_properties["read_iops"] = 68131
                disk_properties["read_bandwidth"] = 576654869
                disk_properties["write_iops"] = 55257
                disk_properties["write_bandwidth"] = 254551002
            elif idata.instance() == "is4gen.xlarge":
                disk_properties["read_iops"] = 136413
                disk_properties["read_bandwidth"] = 1152747904
                disk_properties["write_iops"] = 92180
                disk_properties["write_bandwidth"] = 508889546
            elif idata.instance() == "is4gen.2xlarge":
                disk_properties["read_iops"] = 273038
                disk_properties["read_bandwidth"] = 1628982613
                disk_properties["write_iops"] = 92182
                disk_properties["write_bandwidth"] = 1027983530
            elif idata.instance() == "is4gen.4xlarge":
                disk_properties["read_iops"] = 260493 * nr_disks
                disk_properties["read_bandwidth"] = 1217396928 * nr_disks
                disk_properties["write_iops"] = 83169 * nr_disks
                disk_properties["write_bandwidth"] = 1000390784 * nr_disks
            elif idata.instance() == "is4gen.8xlarge":
                disk_properties["read_iops"] = 273021 * nr_disks
                disk_properties["read_bandwidth"] = 1656354602 * nr_disks
                disk_properties["write_iops"] = 92233 * nr_disks
                disk_properties["write_bandwidth"] = 1028010325 * nr_disks
            properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
            yaml.dump({ "disks": [ disk_properties ] }, properties_file,  default_flow_style=False)
            ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
            ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
        elif gcp_instance().is_gce_instance():
            idata = gcp_instance()

            if idata.is_recommended_instance():
                disk_properties = {}
                disk_properties["mountpoint"] = datadir()
                nr_disks = idata.nvmeDiskCount
                # below is based on https://cloud.google.com/compute/docs/disks/local-ssd#performance
                # and https://cloud.google.com/compute/docs/disks/local-ssd#nvme
                # note that scylla iotune might measure more, this is GCP recommended
                mbs=1024*1024
                if nr_disks >= 1 and nr_disks < 4:
                    disk_properties["read_iops"] = 170000 * nr_disks
                    disk_properties["read_bandwidth"] = 660 * mbs * nr_disks
                    disk_properties["write_iops"] = 90000 * nr_disks
                    disk_properties["write_bandwidth"] = 350 * mbs * nr_disks
                elif nr_disks >= 4 and nr_disks <= 8:
                    disk_properties["read_iops"] = 680000
                    disk_properties["read_bandwidth"] = 2650 * mbs
                    disk_properties["write_iops"] = 360000
                    disk_properties["write_bandwidth"] = 1400 * mbs
                elif nr_disks == 16:
                    disk_properties["read_iops"] = 1600000
                    disk_properties["read_bandwidth"] = 4521251328
                    #below is google, above is our measured
                    #disk_properties["read_bandwidth"] = 6240 * mbs
                    disk_properties["write_iops"] = 800000
                    disk_properties["write_bandwidth"] = 2759452672
                    #below is google, above is our measured
                    #disk_properties["write_bandwidth"] = 3120 * mbs
                elif nr_disks == 24:
                    disk_properties["read_iops"] = 2400000
                    disk_properties["read_bandwidth"] = 5921532416
                    #below is google, above is our measured
                    #disk_properties["read_bandwidth"] = 9360 * mbs
                    disk_properties["write_iops"] = 1200000
                    disk_properties["write_bandwidth"] = 4663037952
                    #below is google, above is our measured
                    #disk_properties["write_bandwidth"] = 4680 * mbs
                if "read_iops" in disk_properties:
                    properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
                    yaml.dump({"disks": [disk_properties]}, properties_file,  default_flow_style=False)
                    ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
                    ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
                else:
                    logging.error(
                        'Did not detect number of disks in Google Cloud instance setup for auto local disk tuning, running manual iotune.')
                    run_iotune()
            else:
                logging.error(
                    'This is not a recommended Google Cloud instance setup for auto local disk tuning, running manual iotune.')
                run_iotune()
        elif azure_instance().is_azure_instance():
            idata = azure_instance()
            if idata.is_recommended_instance():
                disk_properties = {}
                disk_properties["mountpoint"] = datadir()
                nr_disks = idata.nvmeDiskCount
                # below is based on https://docs.microsoft.com/en-us/azure/virtual-machines/lsv2-series
                # note that scylla iotune might measure more, this is Azure recommended
                # since write properties are not defined, they come from our iotune tests
                mbs = 1024*1024
                if nr_disks == 1:
                    disk_properties["read_iops"] = 400000
                    disk_properties["read_bandwidth"] = 2000 * mbs
                    disk_properties["write_iops"] = 271696
                    disk_properties["write_bandwidth"] = 1314 * mbs
                elif nr_disks == 2:
                    disk_properties["read_iops"] = 800000
                    disk_properties["read_bandwidth"] = 4000 * mbs
                    disk_properties["write_iops"] = 552434
                    disk_properties["write_bandwidth"] = 2478 * mbs
                elif nr_disks == 4:
                    disk_properties["read_iops"] = 1500000
                    disk_properties["read_bandwidth"] = 8000 * mbs
                    disk_properties["write_iops"] = 1105063
                    disk_properties["write_bandwidth"] = 4948 * mbs
                elif nr_disks == 6:
                    disk_properties["read_iops"] = 2200000
                    disk_properties["read_bandwidth"] = 14000 * mbs
                    disk_properties["write_iops"] = 1616847
                    disk_properties["write_bandwidth"] = 7892 * mbs
                elif nr_disks == 8:
                    disk_properties["read_iops"] = 2900000
                    disk_properties["read_bandwidth"] = 16000 * mbs
                    disk_properties["write_iops"] = 2208081
                    disk_properties["write_bandwidth"] = 9694 * mbs
                elif nr_disks == 10:
                    disk_properties["read_iops"] = 3800000
                    disk_properties["read_bandwidth"] = 20000 * mbs
                    disk_properties["write_iops"] = 2546511
                    disk_properties["write_bandwidth"] = 11998 * mbs
                if "read_iops" in disk_properties:
                    properties_file = open(etcdir() + "/scylla.d/io_properties.yaml", "w")
                    yaml.dump({"disks": [disk_properties]}, properties_file,  default_flow_style=False)
                    ioconf = open(etcdir() + "/scylla.d/io.conf", "w")
                    ioconf.write("SEASTAR_IO=\"--io-properties-file={}\"\n".format(properties_file.name))
                else:
                    logging.error(
                        'Did not detect number of disks in Azure Cloud instance setup for auto local disk tuning, running manual iotune.')
                    run_iotune()
            else:
                logging.error(
                    'This is not a recommended Azure Cloud instance setup for auto local disk tuning, running manual iotune.')
                run_iotune()
        else:
            run_iotune()
        os.chmod(etcdir() + '/scylla.d/io_properties.yaml', 0o644)
        os.chmod(etcdir() + '/scylla.d/io.conf', 0o644)
