From 0152c000bbb459d377d93caee39e2ea27daaa392 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Fri, 24 Mar 2023 14:40:52 +0400 Subject: [PATCH] commitlog: use separate directory for schema commitlog The commitlog api originally implied that the commitlog_directory would contain files from a single commitlog instance. This is checked in segment_manager::list_descriptors, if it encounters a file with an unknown prefix, an exception occurs in commitlog::descriptor::descriptor, which is logged with the WARN level. A new schema commitlog was added recently, which shares the filesystem directory with the main commitlog. This causes warnings to be emitted on each boot. This patch solves the warnings problem by moving the schema commitlog to a separate directory. In addition, the user can employ the new schema_commitlog_directory parameter to move the schema commitlog to another disk drive. By default, the schema commitlog directory is nested in the commitlog_directory. This can help avoid problems during an upgrade if the commitlog_directory in the custom scylla.yaml is located on a separate disk partition. This is expected to be released in 5.3. As #13134 (raft tables->schema commitlog) is also scheduled for 5.3, and it already requires a clean rolling restart (no cl segments to replay), we don't need to specifically handle upgrade here. Fixes: #11867 --- HACKING.md | 2 +- conf/scylla.yaml | 6 ++++++ db/config.cc | 6 ++++++ db/config.hh | 1 + dist/common/scripts/scylla_blocktune.py | 1 + dist/common/scripts/scylla_fstrim | 1 + dist/common/scripts/scylla_util.py | 3 ++- docs/operating-scylla/scylla-yaml.inc | 6 ++++++ main.cc | 1 + replica/database.cc | 2 +- test/lib/cql_test_env.cc | 2 ++ 11 files changed, 28 insertions(+), 3 deletions(-) diff --git a/HACKING.md b/HACKING.md index 01217be328..0d1b435581 100644 --- a/HACKING.md +++ b/HACKING.md @@ -195,7 +195,7 @@ $ # Edit configuration options as appropriate $ SCYLLA_HOME=$HOME/scylla build/release/scylla ``` -The `scylla.yaml` file in the repository by default writes all database data to `/var/lib/scylla`, which likely requires root access. Change the `data_file_directories` and `commitlog_directory` fields as appropriate. +The `scylla.yaml` file in the repository by default writes all database data to `/var/lib/scylla`, which likely requires root access. Change the `data_file_directories`, `commitlog_directory` and `schema_commitlog_directory` fields as appropriate. Scylla has a number of requirements for the file-system and operating system to operate ideally and at peak performance. However, during development, these requirements can be relaxed with the `--developer-mode` flag. diff --git a/conf/scylla.yaml b/conf/scylla.yaml index 72900a8a7f..eb25fba0ac 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -36,6 +36,12 @@ num_tokens: 256 # separate spindle than the data directories. # commitlog_directory: /var/lib/scylla/commitlog +# schema commit log. A special commitlog instance +# used for schema and system tables. +# When running on magnetic HDD, this should be a +# separate spindle than the data directories. +# schema_commitlog_directory: /var/lib/scylla/commitlog/schema + # commitlog_sync may be either "periodic" or "batch." # # When in batch mode, Scylla won't ack writes until the commit log diff --git a/db/config.cc b/db/config.cc index b28baecc36..74be53db26 100644 --- a/db/config.cc +++ b/db/config.cc @@ -303,6 +303,8 @@ db::config::config(std::shared_ptr exts) "The directory in which Scylla will put all its subdirectories. The location of individual subdirs can be overriden by the respective *_directory options.") , commitlog_directory(this, "commitlog_directory", value_status::Used, "", "The directory where the commit log is stored. For optimal write performance, it is recommended the commit log be on a separate disk partition (ideally, a separate physical device) from the data file directories.") + , schema_commitlog_directory(this, "schema_commitlog_directory", value_status::Used, "", + "The directory where the schema commit log is stored. This is a special commitlog instance used for schema and system tables. For optimal write performance, it is recommended the commit log be on a separate disk partition (ideally, a separate physical device) from the data file directories.") , data_file_directories(this, "data_file_directories", "datadir", value_status::Used, { }, "The directory location where table data (SSTables) is stored") , hints_directory(this, "hints_directory", value_status::Used, "", @@ -950,6 +952,10 @@ void db::config::add_per_partition_rate_limit_extension() { void db::config::setup_directories() { maybe_in_workdir(commitlog_directory, "commitlog"); + if (!schema_commitlog_directory.is_set()) { + schema_commitlog_directory(commitlog_directory() + "/schema"); + } + maybe_in_workdir(schema_commitlog_directory, "schema_commitlog"); maybe_in_workdir(data_file_directories, "data"); maybe_in_workdir(hints_directory, "hints"); maybe_in_workdir(view_hints_directory, "view_hints"); diff --git a/db/config.hh b/db/config.hh index 3fda6ef7d2..4e2e86d251 100644 --- a/db/config.hh +++ b/db/config.hh @@ -148,6 +148,7 @@ public: named_value listen_interface_prefer_ipv6; named_value work_directory; named_value commitlog_directory; + named_value schema_commitlog_directory; named_value data_file_directories; named_value hints_directory; named_value view_hints_directory; diff --git a/dist/common/scripts/scylla_blocktune.py b/dist/common/scripts/scylla_blocktune.py index 104236095b..3223567413 100644 --- a/dist/common/scripts/scylla_blocktune.py +++ b/dist/common/scripts/scylla_blocktune.py @@ -86,3 +86,4 @@ def tune_yaml(path, nomerges): for fs in y['data_file_directories']: tune_fs(fs, nomerges) tune_fs(y['commitlog_directory'], nomerges) + tune_fs(y['schema_commitlog_directory'], nomerges) diff --git a/dist/common/scripts/scylla_fstrim b/dist/common/scripts/scylla_fstrim index 477b7127f9..3e9cb25080 100755 --- a/dist/common/scripts/scylla_fstrim +++ b/dist/common/scripts/scylla_fstrim @@ -34,6 +34,7 @@ def main(): mountpoints.add(find_mount_point(d)) mountpoints.add(find_mount_point(cfg['commitlog_directory'])) + mountpoints.add(find_mount_point(cfg['schema_commitlog_directory'])) # workaround of #2649 subprocess.call(["/opt/scylladb/scripts/scylla-blocktune", "--set-nomerges", "1"]) for d in mountpoints: diff --git a/dist/common/scripts/scylla_util.py b/dist/common/scripts/scylla_util.py index c6b9abb067..d810e8cddf 100644 --- a/dist/common/scripts/scylla_util.py +++ b/dist/common/scripts/scylla_util.py @@ -229,7 +229,7 @@ def parse_scylla_dirs_with_default(conf='/etc/scylla/scylla.yaml'): not len(y['data_file_directories']) or \ not " ".join(y['data_file_directories']).strip(): y['data_file_directories'] = [os.path.join(y['workdir'], 'data')] - for t in [ "commitlog", "hints", "view_hints", "saved_caches" ]: + for t in [ "commitlog", "schema_commitlog", "hints", "view_hints", "saved_caches" ]: key = "%s_directory" % t if key not in y or not y[key]: y[key] = os.path.join(y['workdir'], t) @@ -246,6 +246,7 @@ def get_scylla_dirs(): dirs = [] dirs.extend(y['data_file_directories']) dirs.append(y['commitlog_directory']) + dirs.append(y['schema_commitlog_directory']) if 'hints_directory' in y and y['hints_directory']: dirs.append(y['hints_directory']) diff --git a/docs/operating-scylla/scylla-yaml.inc b/docs/operating-scylla/scylla-yaml.inc index 3c4c818ff3..741c9e0cce 100644 --- a/docs/operating-scylla/scylla-yaml.inc +++ b/docs/operating-scylla/scylla-yaml.inc @@ -25,6 +25,12 @@ scylla.yaml is equivalent to the Apache Cassandra cassandra.yaml configuration f # separate spindle than the data directories. commitlog_directory: /var/lib/scylla/commitlog + # schema commit log. A special commitlog instance + # used for schema and system tables. + # When running on magnetic HDD, this should be a + # separate spindle than the data directories. + # schema_commitlog_directory: /var/lib/scylla/commitlog/schema + # seed_provider class_name is saved for future use. # A seed address is mandatory. seed_provider: diff --git a/main.cc b/main.cc index a4444e04b2..e4862dfe02 100644 --- a/main.cc +++ b/main.cc @@ -995,6 +995,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl utils::directories::set dir_set; dir_set.add(cfg->data_file_directories()); dir_set.add(cfg->commitlog_directory()); + dir_set.add(cfg->schema_commitlog_directory()); dirs.emplace(cfg->developer_mode()); dirs->create_and_verify(std::move(dir_set)).get(); diff --git a/replica/database.cc b/replica/database.cc index 9c13977909..951b5fe5a3 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -916,7 +916,7 @@ void database::maybe_init_schema_commitlog() { _uses_schema_commitlog = true; db::commitlog::config c; - c.commit_log_location = _cfg.commitlog_directory(); + c.commit_log_location = _cfg.schema_commitlog_directory(); c.fname_prefix = db::schema_tables::COMMITLOG_FILENAME_PREFIX; c.metrics_category_name = "schema-commitlog"; c.commitlog_total_space_in_mb = 10 << 20; diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index f4aef7eef3..fde28689d9 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -511,6 +511,7 @@ public: data_dir_path = cfg->data_file_directories()[0]; } cfg->commitlog_directory.set(data_dir_path + "/commitlog.dir"); + cfg->schema_commitlog_directory.set(cfg->commitlog_directory() + "/schema"); cfg->hints_directory.set(data_dir_path + "/hints.dir"); cfg->view_hints_directory.set(data_dir_path + "/view_hints.dir"); cfg->num_tokens.set(256); @@ -522,6 +523,7 @@ public: } create_directories((data_dir_path + "/system").c_str()); create_directories(cfg->commitlog_directory().c_str()); + create_directories(cfg->schema_commitlog_directory().c_str()); create_directories(cfg->hints_directory().c_str()); create_directories(cfg->view_hints_directory().c_str()); for (unsigned i = 0; i < smp::count; ++i) {