From 4efaf309458ad34fe9586e248f1273ce3c998be8 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 16 Mar 2026 13:06:13 +0200 Subject: [PATCH 1/8] db/config: make auto_snapshot live-updateable We're already getting the latest value of `auto_snapshot` in database::drop_table and database::truncate_table_on_all_shards. Marking auto_snapshot as LiveUpdate to allow updating it via CQL or by reloading the configuration from scylla.yaml while the node is up. Signed-off-by: Benny Halevy --- db/config.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/config.cc b/db/config.cc index 17ad1c5b87..24a3f7feb2 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1053,7 +1053,7 @@ db::config::config(std::shared_ptr exts) /** * @Group Advanced automatic backup setting */ - , auto_snapshot(this, "auto_snapshot", value_status::Used, true, + , auto_snapshot(this, "auto_snapshot", liveness::LiveUpdate, value_status::Used, true, "Enable or disable whether a snapshot is taken of the data before keyspace truncation or dropping of tables. To prevent data loss, using the default setting is strongly advised. If you set to false, you will lose data on truncation or drop.") /** * @Group Key caches and global row properties From 69f464fd3fa073ae97f156d35b16fd82fa11466a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 Feb 2026 12:00:08 +0200 Subject: [PATCH 2/8] db/config: add auto_snapshot_ttl Add a live-updateable configuration option to set auto_snapshot_ttl. It will be used in the following patches to set an expiration time on the auto-snapshot, and then to automatically delete it when it expires. Fixes SCYLLADB-191 Signed-off-by: Benny Halevy --- conf/scylla.yaml | 11 ++++++++++- db/config.cc | 2 ++ db/config.hh | 1 + docs/kb/snapshots.rst | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/conf/scylla.yaml b/conf/scylla.yaml index 1a6b071887..80904ba113 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -372,7 +372,16 @@ commitlog_total_space_in_mb: -1 # or dropping of column families. The STRONGLY advised default of true # should be used to provide data safety. If you set this flag to false, you will # lose data on truncation or drop. -# auto_snapshot: true +# +# `auto_snapshot_ttl` specifies the time-to-live (TTL) for automatic snapshots in seconds. +# A value of 0 means snapshots are kept indefinitely. +# The auto-snapshot will be deleted automatically when it expires. +# It is a good practice to set `auto_snapshot_ttl` to a reasonable time allowing backup +# of the auto-snapshot, but to have a safety net that will automatically clean up stale snapshots +# to reclaim their disk space. +# +auto_snapshot: true +auto_snapshot_ttl: 864000 # When executing a scan, within or across a partition, we need to keep the # tombstones seen in memory so we can return them to the coordinator, which diff --git a/db/config.cc b/db/config.cc index 24a3f7feb2..ceb8a40617 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1055,6 +1055,8 @@ db::config::config(std::shared_ptr exts) */ , auto_snapshot(this, "auto_snapshot", liveness::LiveUpdate, value_status::Used, true, "Enable or disable whether a snapshot is taken of the data before keyspace truncation or dropping of tables. To prevent data loss, using the default setting is strongly advised. If you set to false, you will lose data on truncation or drop.") + , auto_snapshot_ttl(this, "auto_snapshot_ttl", liveness::LiveUpdate, value_status::Used, 0, + "The time-to-live (TTL) for automatic snapshots in seconds. A value of 0 means snapshots are kept indefinitely.") /** * @Group Key caches and global row properties * @GroupDescription When creating or modifying tables, you enable or disable the key cache (partition key cache) or row cache for that table by setting the caching parameter. Other row and key cache tuning and configuration options are set at the global (node) level. Cassandra uses these settings to automatically distribute memory for each table on the node based on the overall workload and specific table usage. You can also configure the save periods for these caches globally. diff --git a/db/config.hh b/db/config.hh index 306417fbcd..70ed1f4824 100644 --- a/db/config.hh +++ b/db/config.hh @@ -301,6 +301,7 @@ public: named_value partitioner; named_value storage_port; named_value auto_snapshot; + named_value auto_snapshot_ttl; named_value key_cache_keys_to_save; named_value key_cache_save_period; named_value key_cache_size_in_mb; diff --git a/docs/kb/snapshots.rst b/docs/kb/snapshots.rst index f97f856e58..2eaed8f13e 100644 --- a/docs/kb/snapshots.rst +++ b/docs/kb/snapshots.rst @@ -35,6 +35,12 @@ Apart from *planned backup* procedure described above, and as a safeguard from * The default setting for the ``auto_snapshot`` flag in ``/etc/scylla/scylla.yaml`` file is ``true``. It is **not** recommended to set it to ``false``, unless there is a good backup and recovery strategy in place. +The automatically created snapshots remain on local storage until they are backed-up using the ``move_files`` option or they are explicitly deleted. +Otherwise, stale snapshots might cause a node to run out of storage space by holding up to the SSTables in the snapshot after they are deleted from the table (by compaction, tablet-migration, TRUNCATE, DROP TABLE, and so on). +The ``auto_snapshot_ttl`` configuration option can be used to automatically delete stale snapshots. + +The default setting for the ``auto_snapshot_ttl`` option in ``/etc/scylla/scylla.yaml`` file is ``864000`` seconds (10 days). It is recommended to set it to a reasonable time allowing backup of the auto-snapshot, yet have a safety net that will automatically clean up stale snapshots to reclaim their disk space. + Snapshot Creation ----------------- From 9e72079402093863c8a6dd19dc54869315a94c2d Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 Feb 2026 12:01:29 +0200 Subject: [PATCH 3/8] database: apply auto_snapshot_ttl When automatically taking a snapshot before a table is truncated or dropped, set snapshot_options::expires_at if auto_snapshot_ttl is set. This is then stored in the snapshot manifest.json file. Add a unit test to verify that the auto_snapshot_ttl is applied in the snapshot manifest.json. Signed-off-by: Benny Halevy --- replica/database.cc | 10 +++++- test/boost/database_test.cc | 64 +++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/replica/database.cc b/replica/database.cc index ec359e9685..f772f4c58c 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -3152,7 +3152,15 @@ future<> database::truncate_table_on_all_shards(sharded& sharded_db, s auto truncated_at = truncated_at_opt.value_or(db_clock::now()); auto name = snapshot_name_opt.value_or( format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name())); - co_await snapshot_table_on_all_shards(sharded_db, table_shards, name, db::snapshot_options{}); + auto ttl = sharded_db.local().get_config().auto_snapshot_ttl(); + db::snapshot_options opts; + if (ttl) { + // Add one second to compensate for created_at being truncated + // to second resolution, ensuring the snapshot lives for at least + // auto_snapshot_ttl seconds. + opts.expires_at = opts.created_at + (ttl + 1) * 1s; + } + co_await snapshot_table_on_all_shards(sharded_db, table_shards, name, opts); } co_await sharded_db.invoke_on_all([&] (database& db) { diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc index 0e63888381..80724a408b 100644 --- a/test/boost/database_test.cc +++ b/test/boost/database_test.cc @@ -549,7 +549,7 @@ static std::set collect_sstables(const std::set& all_files, co } // Validate that the manifest.json lists exactly the SSTables present in the snapshot directory -static future<> validate_manifest(const locator::topology& topology, const fs::path& snapshot_dir, const std::set& in_snapshot_dir, gc_clock::time_point min_time, bool tablets_enabled) { +static future<> validate_manifest(const locator::topology& topology, const fs::path& snapshot_dir, const std::set& in_snapshot_dir, gc_clock::time_point min_time, bool tablets_enabled, std::optional ttl = std::nullopt) { sstring suffix = "-TOC.txt"; auto sstables_in_snapshot = collect_sstables(in_snapshot_dir, suffix); std::set sstables_in_manifest; @@ -606,7 +606,15 @@ static future<> validate_manifest(const locator::topology& topology, const fs::p BOOST_REQUIRE(created_at_seconds > 0); auto& expires_at = manifest_snapshot["expires_at"]; BOOST_REQUIRE(expires_at.IsNumber()); - BOOST_REQUIRE_GE(expires_at.GetInt64(), created_at_seconds); + auto expires_at_seconds = expires_at.GetInt64(); + if (ttl) { + BOOST_REQUIRE_GE(expires_at_seconds, created_at_seconds + *ttl); + BOOST_REQUIRE_LE(expires_at_seconds, created_at_seconds + *ttl + 1); + } else { + BOOST_REQUIRE_GE(expires_at.GetInt64(), created_at_seconds); + } + } else if (ttl) { + BOOST_FAIL("manifest should have expires_at when ttl is set"); } BOOST_REQUIRE(manifest_json.HasMember("table")); @@ -757,6 +765,58 @@ SEASTAR_TEST_CASE(snapshot_skip_flush_works) { }); } +SEASTAR_THREAD_TEST_CASE(test_auto_snapshot_ttl) { + bool tablets_enabled = true; + bool create_mvs = false; + int ttl = 1; +#ifdef SCYLLA_BUILD_MODE_DEBUG + ttl = 3; +#endif + + auto db_cfg_ptr = make_shared(); + db_cfg_ptr->tablets_mode_for_new_keyspaces(tablets_enabled ? db::tablets_mode_t::mode::enabled : db::tablets_mode_t::mode::disabled); + db_cfg_ptr->auto_snapshot(true); + db_cfg_ptr->auto_snapshot_ttl(ttl); + std::string ks_name = "ks"; + std::string table_name = "test"; + size_t num_keys = 100; + do_with_some_data_in_thread({table_name}, [&] (cql_test_env& e) { + auto min_time = gc_clock::now(); + take_snapshot(e, ks_name, table_name).get(); + + auto& cf = e.local_db().find_column_family(ks_name, table_name); + auto table_directory = table_dir(cf); + + auto in_table_dir = collect_files(table_directory).get(); + // snapshot triggered a flush and wrote the data down. + BOOST_REQUIRE_GE(in_table_dir.size(), 9); + + testlog.debug("Dropping table {}.{}", ks_name, table_name); + replica::database::legacy_drop_table_on_all_shards(e.db(), e.get_system_keyspace(), ks_name, table_name, true).get(); + + fs::path snapshot_dir; + auto snapshot_base_dir = table_directory / sstables::snapshots_dir; + directory_lister lister(snapshot_base_dir, lister::dir_entry_types::of()); + while (auto de = lister.get().get()) { + if (de->name.starts_with("pre-drop")) { + BOOST_REQUIRE(snapshot_dir.empty()); // only one pre-drop snapshot should be present + testlog.debug("Found auto-snapshot directory: {}", snapshot_base_dir / de->name); + snapshot_dir = snapshot_base_dir / de->name; + } + } + + auto in_snapshot_dir = collect_files(snapshot_dir).get(); + + in_table_dir.insert("manifest.json"); + in_table_dir.insert("schema.cql"); + // all files were copied and manifest was generated + BOOST_REQUIRE_EQUAL(in_table_dir, in_snapshot_dir); + + const auto& topology = e.local_db().get_token_metadata().get_topology(); + validate_manifest(topology, snapshot_dir, in_snapshot_dir, min_time, tablets_enabled, ttl).get(); + }, create_mvs, db_cfg_ptr, num_keys).get(); +} + SEASTAR_TEST_CASE(snapshot_list_okay) { return do_with_some_data_in_thread({"cf"}, [] (cql_test_env& e) { auto& cf = e.local_db().find_column_family("ks", "cf"); From 4234cd8e16301561639dd4eeadb317f0772f597c Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 19 Feb 2026 17:11:53 +0200 Subject: [PATCH 4/8] db: snapshot_ctl: add deletion of expired snapshots Add a task running on shard 0 that deletes expired snapshots. Deletion is scheduled whenever an automatic snapshot is taken with a ttl. Signed-off-by: Benny Halevy --- db/snapshot-ctl.cc | 67 +++++++++++++++++++++++++++++++++++++ db/snapshot-ctl.hh | 16 +++++++++ main.cc | 4 ++- replica/database.cc | 8 +++++ replica/database.hh | 9 +++++ replica/table.cc | 11 +++++- test/boost/database_test.cc | 19 +++++++++++ 7 files changed, 132 insertions(+), 2 deletions(-) diff --git a/db/snapshot-ctl.cc b/db/snapshot-ctl.cc index 3fc85bf908..f5907f730a 100644 --- a/db/snapshot-ctl.cc +++ b/db/snapshot-ctl.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #include "sstables/sstables_manager.hh" #include "service/storage_proxy.hh" +using namespace std::chrono_literals; + logging::logger snap_log("snapshots"); namespace db { @@ -39,6 +42,10 @@ snapshot_ctl::snapshot_ctl(sharded& db, sharded snapshot_ctl::stop() { @@ -53,6 +60,9 @@ future<> snapshot_ctl::disable_all_operations() { } co_await _ops.close(); } + // Wake up the expiration task and await for it to finish. + _expiration_cond.signal(); + co_await std::exchange(_delete_expired_snapshots, make_ready_future<>()); } future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name, std::optional> filter) { @@ -183,10 +193,14 @@ future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vect t = resolve_table_name(ks_name, t); } co_await check_snapshot_not_exist(ks_name, tag, tables); + snap_log.debug("take_snapshot: tag={} keyspace={} tables={}: skip_flush={} created_at={} expires_at={}", + tag, ks_name, fmt::join(tables, ","), + opts.skip_flush, opts.created_at, opts.expires_at.value_or(gc_clock::time_point::min())); co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts); } future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector keyspace_names, sstring cf_name) { + snap_log.debug("clear_snapshot: tag={} keyspaces={} table={}", tag, fmt::join(keyspace_names, ","), cf_name); co_return co_await run_snapshot_modify_operation([this, tag = std::move(tag), keyspace_names = std::move(keyspace_names), cf_name = std::move(cf_name)] (this auto) -> future<> { // clear_snapshot enumerates keyspace_names and uses cf_name as a // filter in each. When cf_name needs resolution (e.g. logical index @@ -299,4 +313,57 @@ future snapshot_ctl::true_snapshots_size(sstring ks, sstring cf) { })); } +future<> snapshot_ctl::delete_expired_snapshots() { + auto delete_expired_snapshot = [this](expiration_info info) { + snap_log.info("Deleting expired snapshot {} of table {}.{}", info.tag, info.ks_name, info.table_name); + // Awaited indirectly using the `_ops` gate + (void)run_snapshot_modify_operation([this, info = std::move(info)]() mutable { + return _db.local().clear_snapshot(info.tag, {info.ks_name}, info.table_name).handle_exception([info = std::move(info)](std::exception_ptr ep) { + snap_log.warn("Failed to delete expired snapshot {} of table {}.{}: {}: Ignored", info.tag, info.ks_name, info.table_name, ep); + }); + }); + }; + auto expiration_timer = timer([this] { + snap_log.debug("Expiration timer fired: queued={}", _expiration_queue.size()); + _expiration_cond.signal(); + }); + while (!_ops.is_closed()) { + if (!_expiration_queue.empty()) { + auto now = gc_clock::now(); + if (_expiration_queue.front().expires_at <= now) { + // FIXME: do not delete expired snapshots during backup + auto info = _expiration_queue.front(); + std::ranges::pop_heap(_expiration_queue, std::greater{}, &expiration_info::expires_at); + _expiration_queue.resize(_expiration_queue.size() - 1); + delete_expired_snapshot(std::move(info)); + continue; + } else { + auto wait_duration = _expiration_queue.front().expires_at - now; + snap_log.debug("Expiration waiting for {}: queued={}", wait_duration, _expiration_queue.size()); + expiration_timer.rearm(timer<>::clock::now() + wait_duration); + } + } else { + snap_log.debug("Expiration waiting indefinitely: queue is empty"); + } + co_await _expiration_cond.wait(); + } } + +void snapshot_ctl::schedule_expiration(gc_clock::time_point when, sstring ks_name, sstring table_name, sstring tag) { + if (this_shard_id() != 0) { + on_internal_error(snap_log, "schedule_expiration must be called on shard 0"); + } + if (!_ops.is_closed()) { + snap_log.info("Scheduling expiration of snapshot {} of table {}.{} at {}", tag, ks_name, table_name, when); + _expiration_queue.emplace_back(expiration_info{ + .expires_at = when, + .ks_name = std::move(ks_name), + .table_name = std::move(table_name), + .tag = std::move(tag) + }); + std::ranges::push_heap(_expiration_queue, std::greater{}, &expiration_info::expires_at); + _expiration_cond.signal(); + } +} + +} // namespace db diff --git a/db/snapshot-ctl.hh b/db/snapshot-ctl.hh index 1f22a20b8a..b0030bfc06 100644 --- a/db/snapshot-ctl.hh +++ b/db/snapshot-ctl.hh @@ -20,6 +20,7 @@ #include "tasks/task_manager.hh" #include #include +#include using namespace seastar; @@ -122,6 +123,9 @@ public: future true_snapshots_size(sstring ks, sstring cf); future<> disable_all_operations(); + + // Must be called on shard 0 + void schedule_expiration(gc_clock::time_point when, sstring ks_name, sstring table_name, sstring tag); private: config _config; sharded& _db; @@ -130,6 +134,16 @@ private: seastar::named_gate _ops; shared_ptr _task_manager_module; sstables::storage_manager& _storage_manager; + condition_variable _expiration_cond; + + struct expiration_info { + gc_clock::time_point expires_at; + sstring ks_name; + sstring table_name; + sstring tag; + }; + std::vector _expiration_queue; + future<> _delete_expired_snapshots = make_ready_future<>(); future<> check_snapshot_not_exist(sstring ks_name, sstring name, std::optional> filter = {}); @@ -155,6 +169,8 @@ private: future<> do_take_snapshot(sstring tag, std::vector keyspace_names, snapshot_options opts = {} ); future<> do_take_column_family_snapshot(sstring ks_name, std::vector tables, sstring tag, snapshot_options opts = {}); future<> do_take_cluster_column_family_snapshot(std::vector ks_names, std::vector tables, sstring tag, snapshot_options opts = {}); + + future<> delete_expired_snapshots(); }; } diff --git a/main.cc b/main.cc index d56c154f5c..1a1b9c10c3 100644 --- a/main.cc +++ b/main.cc @@ -2264,7 +2264,9 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl .backup_sched_group = dbcfg.backup_scheduling_group, }; snapshot_ctl.start(std::ref(db), std::ref(proxy), std::ref(task_manager), std::ref(sstm), snap_cfg).get(); - auto stop_snapshot_ctl = defer_verbose_shutdown("snapshots", [&snapshot_ctl] { + db.local().plug_snapshot_ctl(snapshot_ctl.local()); + auto stop_snapshot_ctl = defer_verbose_shutdown("snapshots", [&snapshot_ctl, &db] { + db.local().unplug_snapshot_ctl(); snapshot_ctl.stop().get(); }); auto backup_throughput_update = io_throughput_updater("backup", dbcfg.backup_scheduling_group, cfg->backup_io_throughput_mb_per_sec); diff --git a/replica/database.cc b/replica/database.cc index f772f4c58c..0ab367d0c9 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -3610,6 +3610,14 @@ void database::unplug_view_update_generator() noexcept { _view_update_generator = nullptr; } +void database::plug_snapshot_ctl(db::snapshot_ctl& snapshot_ctl) noexcept { + _snapshot_ctl = &snapshot_ctl; +} + +void database::unplug_snapshot_ctl() noexcept { + _snapshot_ctl = nullptr; +} + } // namespace replica mutation_reader make_multishard_streaming_reader(sharded& db, diff --git a/replica/database.hh b/replica/database.hh index c76e88ab19..8093c39312 100644 --- a/replica/database.hh +++ b/replica/database.hh @@ -1774,6 +1774,8 @@ private: utils::disk_space_monitor::subscription _out_of_space_subscription; + db::snapshot_ctl* _snapshot_ctl = nullptr; + public: data_dictionary::database as_data_dictionary() const; db::commitlog* commitlog_for(const schema_ptr& schema); @@ -1800,6 +1802,9 @@ public: void plug_view_update_generator(db::view::view_update_generator& generator) noexcept; void unplug_view_update_generator() noexcept; + void plug_snapshot_ctl(db::snapshot_ctl& snapshot_ctl) noexcept; + void unplug_snapshot_ctl() noexcept; + private: future<> flush_non_system_column_families(); future<> flush_system_column_families(); @@ -2100,6 +2105,10 @@ public: static future<> snapshot_tables_on_all_shards(sharded& sharded_db, std::string_view ks_name, std::vector table_names, sstring tag, db::snapshot_options opts); static future<> snapshot_keyspace_on_all_shards(sharded& sharded_db, std::string_view ks_name, sstring tag, db::snapshot_options opts); + db::snapshot_ctl* get_snapshot_ctl_ptr() { + return _snapshot_ctl; + } + public: bool update_column_family(schema_ptr s); private: diff --git a/replica/table.cc b/replica/table.cc index 3b14f34650..3a8165b79d 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -4355,7 +4355,7 @@ future<> database::snapshot_table_on_all_shards(sharded& sharded_db, c } } } - co_await write_manifest(topology, *writer, std::move(sstable_sets), std::move(tablets), name, std::move(opts), s, + co_await write_manifest(topology, *writer, std::move(sstable_sets), std::move(tablets), name, opts, s, tablet_count, tablet_layout).handle_exception([&] (std::exception_ptr ptr) { tlogger.error("Failed to seal snapshot in {}: {}.", name, ptr); ex = std::move(ptr); @@ -4365,6 +4365,15 @@ future<> database::snapshot_table_on_all_shards(sharded& sharded_db, c } co_await writer->sync(); + + if (opts.expires_at) { + tlogger.info("snapshot {}: scheduled to expire at {}", name, opts.expires_at.value()); + co_await sharded_db.invoke_on(0, [when = *opts.expires_at, ks_name = s->ks_name(), table_name = s->cf_name(), name] (database& db) { + if (auto snap_ctl_ptr = db.get_snapshot_ctl_ptr()) { + snap_ctl_ptr->schedule_expiration(when, ks_name, table_name, name); + } + }); + } }); } diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc index 80724a408b..718747a54d 100644 --- a/test/boost/database_test.cc +++ b/test/boost/database_test.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include #undef SEASTAR_TESTING_MAIN @@ -781,6 +782,14 @@ SEASTAR_THREAD_TEST_CASE(test_auto_snapshot_ttl) { std::string table_name = "test"; size_t num_keys = 100; do_with_some_data_in_thread({table_name}, [&] (cql_test_env& e) { + sharded sc; + sc.start(std::ref(e.db()), std::ref(e.get_storage_proxy()), std::ref(e.get_task_manager()), std::ref(e.get_sstorage_manager()), db::snapshot_ctl::config{}).get(); + auto stop_sc = deferred_stop(sc); + e.local_db().plug_snapshot_ctl(sc.local()); + auto unplug_sc = deferred_action([&] () noexcept { + e.local_db().unplug_snapshot_ctl(); + }); + auto min_time = gc_clock::now(); take_snapshot(e, ks_name, table_name).get(); @@ -814,6 +823,16 @@ SEASTAR_THREAD_TEST_CASE(test_auto_snapshot_ttl) { const auto& topology = e.local_db().get_token_metadata().get_topology(); validate_manifest(topology, snapshot_dir, in_snapshot_dir, min_time, tablets_enabled, ttl).get(); + + // Wait for snapshot garbage collection after expiry, polling for directory removal + auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds((ttl + 10) * 1s); + sleep(ttl * 1s).get(); + while (file_exists(snapshot_dir.native()).get()) { + if (std::chrono::steady_clock::now() > deadline) { + BOOST_FAIL(fmt::format("Snapshot directory {} still exists after waiting for TTL expiry", snapshot_dir)); + } + sleep(100ms).get(); + } }, create_mvs, db_cfg_ptr, num_keys).get(); } From 9499f7b53f3bc3ca20ef944a23e0fc4776b40f92 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 17 Mar 2026 11:42:36 +0200 Subject: [PATCH 5/8] test/cqlpy/test_virtual_tables: consistenly pass a set of expected tables to verify_snapshots As noted by Copilot: > `verify_snapshots()` annotates `expected_snapshots` values as `set[str]`, > but callers in this file pass lists (e.g. `{test_tag: [table]}`) and the > function uses `.remove(...)` in a way that works for both. > Either update the callers to pass sets consistently, or relax the type annotation > to match actual usage. This patch makes sure that all callers pass a set to `verify_snapshots`. Signed-off-by: Benny Halevy --- test/cqlpy/test_virtual_tables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cqlpy/test_virtual_tables.py b/test/cqlpy/test_virtual_tables.py index 2586ad5225..c89a697b5e 100644 --- a/test/cqlpy/test_virtual_tables.py +++ b/test/cqlpy/test_virtual_tables.py @@ -27,7 +27,7 @@ def test_snapshots_table(scylla_only, cql, test_keyspace): with util.new_test_table(cql, test_keyspace, 'pk int PRIMARY KEY, v int') as table: cql.execute(f"INSERT INTO {table} (pk, v) VALUES (0, 0)") nodetool.take_snapshot(cql, table, test_tag, False) - verify_snapshots(cql, {test_tag: [table]}) + verify_snapshots(cql, {test_tag: {table}}) nodetool.del_snapshot(cql, test_tag) def test_snapshots_dropped_table(scylla_only, cql, test_keyspace): @@ -35,7 +35,7 @@ def test_snapshots_dropped_table(scylla_only, cql, test_keyspace): with util.new_test_table(cql, test_keyspace, 'pk int PRIMARY KEY, v int') as table: cql.execute(f"INSERT INTO {table} (pk, v) VALUES (0, 0)") nodetool.take_snapshot(cql, table, test_tag, False) - verify_snapshots(cql, {test_tag: [table]}) + verify_snapshots(cql, {test_tag: {table}}) nodetool.del_snapshot(cql, test_tag) def test_snapshots_multiple_keyspaces(scylla_only, cql): From cd11ed27753a673da05790fdc52a21db12d22cb1 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 16 Mar 2026 14:49:43 +0200 Subject: [PATCH 6/8] test/cqlpy/test_virtual_tables: add verfication of snapshot directory To be used also in the next patch for verifying auto_snapshot_ttl Signed-off-by: Benny Halevy --- test/cqlpy/test_virtual_tables.py | 40 ++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/test/cqlpy/test_virtual_tables.py b/test/cqlpy/test_virtual_tables.py index c89a697b5e..90b7b4cfc7 100644 --- a/test/cqlpy/test_virtual_tables.py +++ b/test/cqlpy/test_virtual_tables.py @@ -7,38 +7,60 @@ import pytest from . import nodetool from . import util import json +import glob +import os from collections import defaultdict from test.pylib.skip_types import skip_env -def verify_snapshots(cql, expected_snapshots: dict[str, set[str]]): +def verify_snapshots(cql, expected_snapshots: dict[str, set[str]], scylla_data_dir): results = list(cql.execute(f"SELECT keyspace_name, table_name, snapshot_name, live, total FROM system.snapshots")) for res in results: if res.snapshot_name in expected_snapshots: t = f"{res.keyspace_name}.{res.table_name}" assert t in expected_snapshots[res.snapshot_name], f"Unexpected snapshot {t}: snapshot_name={res.snapshot_name}: expected_snapshots={expected_snapshots}" expected_snapshots[res.snapshot_name].remove(t) + verify_snapshot_dir(scylla_data_dir, res.keyspace_name, res.table_name, res.snapshot_name) + for _, expected_tables in expected_snapshots.items(): assert not expected_tables, f"Not all expected snapshots were listed: expected_snapshots={expected_snapshots}" -def test_snapshots_table(scylla_only, cql, test_keyspace): +def snapshot_dir_exists(scylla_data_dir, keyspace_name, table_name, snapshot_name) -> Tuple[str, bool]: + path = os.path.join(scylla_data_dir, keyspace_name, f"{table_name}-*") + table_dir = glob.glob(path) + assert len(table_dir) == 1, f"Expected single table directory for '{path}', got {table_dir}" + snapshot_dir = os.path.join(table_dir[0], "snapshots", snapshot_name) + return snapshot_dir, os.path.exists(snapshot_dir) + +def verify_snapshot_dir(scylla_data_dir, keyspace_name, table_name, snapshot_name, expected: bool = True): + snapshot_dir, exists = snapshot_dir_exists(scylla_data_dir, keyspace_name, table_name, snapshot_name) + if expected: + assert exists, f"Snapshots directory '{snapshot_dir}' does not exist" + else: + assert not exists, f"Snapshots directory '{snapshot_dir}' still exists" + +def test_snapshots_table(scylla_only, cql, test_keyspace, scylla_data_dir): test_tag = util.unique_name() with util.new_test_table(cql, test_keyspace, 'pk int PRIMARY KEY, v int') as table: cql.execute(f"INSERT INTO {table} (pk, v) VALUES (0, 0)") nodetool.take_snapshot(cql, table, test_tag, False) - verify_snapshots(cql, {test_tag: {table}}) - nodetool.del_snapshot(cql, test_tag) + verify_snapshots(cql, {test_tag: {table}}, scylla_data_dir) + nodetool.del_snapshot(cql, test_tag) + keyspace_name, table_name = table.split('.') + verify_snapshot_dir(scylla_data_dir, keyspace_name, table_name, test_tag, False) -def test_snapshots_dropped_table(scylla_only, cql, test_keyspace): +def test_snapshots_dropped_table(scylla_only, cql, test_keyspace, scylla_data_dir): test_tag = util.unique_name() with util.new_test_table(cql, test_keyspace, 'pk int PRIMARY KEY, v int') as table: cql.execute(f"INSERT INTO {table} (pk, v) VALUES (0, 0)") nodetool.take_snapshot(cql, table, test_tag, False) - verify_snapshots(cql, {test_tag: {table}}) - nodetool.del_snapshot(cql, test_tag) + verify_snapshots(cql, {test_tag: {table}}, scylla_data_dir) + nodetool.del_snapshot(cql, test_tag) + keyspace_name, table_name = table.split('.') + verify_snapshot_dir(scylla_data_dir, keyspace_name, table_name, test_tag, False) -def test_snapshots_multiple_keyspaces(scylla_only, cql): +def test_snapshots_multiple_keyspaces(scylla_only, cql, scylla_data_dir): expected_snapshots = defaultdict(set) ks_opts = "WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}" test_tags = [util.unique_name(), util.unique_name(), util.unique_name()] @@ -59,7 +81,7 @@ def test_snapshots_multiple_keyspaces(scylla_only, cql): nodetool.take_snapshot(cql, table2, test_tags[2], False) expected_snapshots[test_tags[2]].add(table2) - verify_snapshots(cql, expected_snapshots) + verify_snapshots(cql, expected_snapshots, scylla_data_dir) for t in test_tags: nodetool.del_snapshot(cql, t) From 61978c7e4609d276a1db4b180a6e742deb314387 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 Feb 2026 10:14:22 +0200 Subject: [PATCH 7/8] api, nodetool: add snapshot ttl option Add test/cqlpy/test_snapshot.py to test the snapshot api ttl option on both Cassandra and Scylla. Fixes SCYLLADB-190 Signed-off-by: Benny Halevy --- api/storage_service.cc | 37 ++++++ api/storage_service.hh | 7 ++ .../nodetool-commands/snapshot.rst | 6 +- test/cqlpy/nodetool.py | 72 ++++++++++- test/cqlpy/test_snapshot.py | 45 +++++++ test/cqlpy/test_virtual_tables.py | 29 +++-- test/nodetool/test_snapshot.py | 116 +++++++++--------- test/pylib/rest_client.py | 4 +- tools/scylla-nodetool.cc | 12 +- 9 files changed, 257 insertions(+), 71 deletions(-) create mode 100644 test/cqlpy/test_snapshot.py diff --git a/api/storage_service.cc b/api/storage_service.cc index 7b38c8134c..91e3584f59 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include #include @@ -145,6 +147,37 @@ std::pair> parse_table_infos(const http_context return std::make_pair(std::move(keyspace), std::move(tis)); } +std::optional validate_ttl(const std::string& value) { + if (value.empty()) { + return std::nullopt; + } + + // Match an integer, optional whitespace, and an optional single-character suffix + static const boost::regex re(R"((\d+)\s*([smhd])?)", boost::regex_constants::icase); + boost::smatch match; + if (!boost::regex_match(value, match, re)) { + throw bad_param_exception(fmt::format("TTL value '{}' is not valid, expected a non-negative integer with an optional suffix [smhd]", value)); + } + + int res; + try { + res = std::stoi(match[1].str()); + } catch (...) { + throw bad_param_exception(fmt::format("Parsing TTL value '{}' failed: {}", value, std::current_exception())); + } + + auto suffix = match[2].str(); + auto c = suffix.empty() ? 's' : std::tolower(suffix[0]); + switch (c) { + case 's': return std::chrono::seconds(res); + case 'm': return std::chrono::minutes(res); + case 'h': return std::chrono::hours(res); + case 'd': return std::chrono::days(res); + } + + std::unreachable(); +} + static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) { ss::token_range r; r.start_token = d._start_token; @@ -2135,6 +2168,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded& snap_ db::snapshot_options opts = { .skip_flush = strcasecmp(sfopt.c_str(), "true") == 0, }; + auto ttl = validate_ttl(req->get_query_param("ttl")); + if (ttl && *ttl > 0s) { + opts.expires_at = opts.created_at + std::chrono::seconds(*ttl); + } std::vector keynames = split(req->get_query_param("kn"), ","); try { diff --git a/api/storage_service.hh b/api/storage_service.hh index 73f996e471..23007816c5 100644 --- a/api/storage_service.hh +++ b/api/storage_service.hh @@ -66,6 +66,13 @@ struct scrub_info { scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr req); +// TTL is given as a number with an optional a suffix of: +// s - seconds (default) +// m - minutes +// h - hours +// d - days +std::optional validate_ttl(const std::string& value); + void set_storage_service(http_context& ctx, httpd::routes& r, sharded& ss, sharded&, service::raft_group0_client&); void unset_storage_service(http_context& ctx, httpd::routes& r); void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded& sst_loader); diff --git a/docs/operating-scylla/nodetool-commands/snapshot.rst b/docs/operating-scylla/nodetool-commands/snapshot.rst index ee52ca9c8f..61310d475e 100644 --- a/docs/operating-scylla/nodetool-commands/snapshot.rst +++ b/docs/operating-scylla/nodetool-commands/snapshot.rst @@ -17,7 +17,7 @@ SYNOPSIS [(-u | --username )] snapshot [(-cf | --column-family
| --table
)] [(-kc | --kc.list )] - [(-sf | --skip-flush)] [(-t | --tag )] [--] [] + [(-sf | --skip-flush)] [(-t | --tag )] [--ttl ] [--] [] OPTIONS ....... @@ -38,6 +38,10 @@ Parameter Descriptio -sf / --skip-flush Do not flush memtables before snapshotting (snapshot will not contain unflushed data) -------------------------------------------------------------------- ------------------------------------------------------------------------------------- -t / --tag The name of the snapshot +-------------------------------------------------------------------- ------------------------------------------------------------------------------------- +--ttl The time-to-live for the snapshot, optionally followed by: + 's' for seconds (the default), 'm' for minutes, 'h' for hours, or 'd' for days. + Missing TTL, or 0, means no TTL ==================================================================== ===================================================================================== ``--`` This option can be used to separate command-line options from the list of argument, (useful when arguments might be mistaken for command-line options. diff --git a/test/cqlpy/nodetool.py b/test/cqlpy/nodetool.py index 39d800627e..10dd448d36 100644 --- a/test/cqlpy/nodetool.py +++ b/test/cqlpy/nodetool.py @@ -21,6 +21,9 @@ import subprocess import shutil import pytest import re +from collections import defaultdict +from datetime import datetime + # For a "cql" object connected to one node, find the REST API URL # with the same node and port 10000. @@ -54,14 +57,17 @@ def nodetool_cmd(): if nodetool_cmd.cmd: return nodetool_cmd.cmd if not nodetool_cmd.failed: - nodetool_cmd.conf = os.getenv('NODETOOL') or 'nodetool' - nodetool_cmd.cmd = shutil.which(nodetool_cmd.conf) + nodetool_cmd.conf = os.getenv('NODETOOL').split() or ['nodetool'] + nodetool_cmd.cmd = shutil.which(nodetool_cmd.conf[0]) if nodetool_cmd.cmd is None: nodetool_cmd.failed = True + elif len(nodetool_cmd.conf) > 1: + nodetool_cmd.args = nodetool_cmd.conf[1:] if nodetool_cmd.failed: pytest.fail(f"Error: Can't find {nodetool_cmd.conf}. Please set the NODETOOL environment variable to the path of the nodetool utility.", pytrace=False) return nodetool_cmd.cmd nodetool_cmd.cmd = None +nodetool_cmd.args = [] nodetool_cmd.failed = False nodetool_cmd.conf = False @@ -71,7 +77,10 @@ def run_nodetool(cql, *args, **subprocess_kwargs): # TODO: We may need to change this function or its callers to add proper # support for testing on multi-node clusters. host = cql.cluster.contact_points[0] - return subprocess.run([nodetool_cmd(), '-h', host, *args], **subprocess_kwargs) + cmd = [nodetool_cmd()] + cmd.extend(nodetool_cmd.args) + cmd.extend(['-h', host, *args]) + return subprocess.run(cmd, **subprocess_kwargs) def flush(cql, table): ks, cf = table.split('.') @@ -115,14 +124,19 @@ def compact_keyspace(cql, ks, flush_memtables=True): args.extend([ks]) run_nodetool(cql, "compact", *args) -def take_snapshot(cql, table, tag, skip_flush): +def take_snapshot(cql, table, tag, skip_flush = None, ttl = None): ks, cf = table.split('.') if has_rest_api(cql): - requests.post(f'{rest_api_url(cql)}/storage_service/snapshots/', params={'kn': ks, 'cf' : cf, 'tag': tag, 'sf': skip_flush}) + params = {'kn': ks, 'cf' : cf, 'tag': tag, 'sf': skip_flush} + if ttl is not None: + params['ttl'] = str(ttl) + requests.post(f'{rest_api_url(cql)}/storage_service/snapshots/', params=params) else: args = ['--tag', tag, '--table', cf] if skip_flush: args.append('--skip-flush') + if ttl is not None: + args.extend(['--ttl', str(ttl)]) args.append(ks) run_nodetool(cql, "snapshot", *args) @@ -138,6 +152,54 @@ def del_snapshot(cql, tag:str, keyspaces:list[str] = []): args.extend(keyspaces) run_nodetool(cql, "clearsnapshot", *args) +def list_snapshots(cql): + if has_rest_api(cql): + api_res = requests.get(f'{rest_api_url(cql)}/storage_service/snapshots/') + api_res.raise_for_status() + return api_res.json() + else: + nodetool_res = run_nodetool(cql, "listsnapshots", capture_output=True, text=True) + if nodetool_res.returncode != 0: + raise RuntimeError(f"nodetool listsnapshots failed: {nodetool_res.stderr}") + + def with_units(value, units) -> int: + mult = 1 + if units: + if units.upper()[0] == 'B': + mult = 1 + elif units.upper()[0] == 'K': + mult = 1024 + elif units.upper()[0] == 'M': + mult = 1024*1024 + elif units.upper()[0] == 'G': + mult = 1024*1024*1024 + elif units.upper()[0] == 'T': + mult = 1024*1024*1024*1024 + else: + raise RuntimeError(f"nodetool listsnapshots has unrecognized {units=} for {value=}") + return int(float(value) * mult) + + def ts_fromisoformat(value): + return datetime.fromisoformat(value).timestamp() + + res = defaultdict(list) + for line in nodetool_res.stdout.splitlines(): + stripped = line.strip() + if not stripped or stripped == "There are no snapshots": + continue + parts = stripped.split() + if parts[0] == "Snapshot" or parts[0] == "Total": + continue + res[parts[0]].append({ + 'ks': parts[1], + 'cf': parts[2], + 'live': with_units(parts[3], parts[4]) if len(parts) > 4 else 0, + 'total': with_units(parts[5], parts[6]) if len(parts) > 6 else 0, + 'created_at': ts_fromisoformat(parts[7]) if len(parts) > 7 else None, + 'expires_at': ts_fromisoformat(parts[8]) if len(parts) > 8 else None, + }) + return [{'key': k, 'value': v} for k, v in res.items()] + def refreshsizeestimates(cql): if has_rest_api(cql): # The "nodetool refreshsizeestimates" is not available, or needed, in Scylla diff --git a/test/cqlpy/test_snapshot.py b/test/cqlpy/test_snapshot.py new file mode 100644 index 0000000000..12409435d5 --- /dev/null +++ b/test/cqlpy/test_snapshot.py @@ -0,0 +1,45 @@ +# Copyright 2021-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + +import time + +from . import nodetool +from .util import new_test_table + + + +def has_snapshot(snapshots, tag): + for s in snapshots: + if s['key'] == tag: + return s['value'] + return None + +def test_snapshot_ttl(cql, test_keyspace): + with new_test_table(cql, test_keyspace, 'k int PRIMARY KEY') as table: + write = cql.prepare(f"INSERT INTO {table} (k) VALUES (?)") + for i in range(10): + cql.execute(write, [i]) + nodetool.flush(cql, table) + + tag = "test_snapshot" + ttl = 60 + + # Take a snapshot of the table + created_at = time.time() + nodetool.take_snapshot(cql, table, tag, ttl=f"{ttl}s") + expires_at = time.time() + ttl + 1 + + # Verify that the snapshot exists + snapshots = nodetool.list_snapshots(cql) + print(f"Snapshots for {table}: {snapshots}") + infos = has_snapshot(snapshots, tag) + assert len(infos) == 1, f"Expected to find {tag} in {snapshots=}, found {len(infos)} infos" + # Currently, Cassandra returns the snapshot creation and expiration times in nodetool listsnapshots. + # Scylla does not support that yet + info = infos[0] + if info.get("created_at"): + created_at = float(info["created_at"]) + assert info.get("expires_at") + expires_at = float(info["expires_at"]) + assert expires_at - created_at == ttl diff --git a/test/cqlpy/test_virtual_tables.py b/test/cqlpy/test_virtual_tables.py index 90b7b4cfc7..c42d20a571 100644 --- a/test/cqlpy/test_virtual_tables.py +++ b/test/cqlpy/test_virtual_tables.py @@ -3,18 +3,21 @@ # # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 +from typing import Tuple + import pytest from . import nodetool from . import util import json import glob import os +import time from collections import defaultdict from test.pylib.skip_types import skip_env -def verify_snapshots(cql, expected_snapshots: dict[str, set[str]], scylla_data_dir): +def verify_snapshots(cql, expected_snapshots: dict[str, set[str]], scylla_data_dir, expiry: int = None): results = list(cql.execute(f"SELECT keyspace_name, table_name, snapshot_name, live, total FROM system.snapshots")) for res in results: if res.snapshot_name in expected_snapshots: @@ -23,8 +26,10 @@ def verify_snapshots(cql, expected_snapshots: dict[str, set[str]], scylla_data_d expected_snapshots[res.snapshot_name].remove(t) verify_snapshot_dir(scylla_data_dir, res.keyspace_name, res.table_name, res.snapshot_name) - for _, expected_tables in expected_snapshots.items(): - assert not expected_tables, f"Not all expected snapshots were listed: expected_snapshots={expected_snapshots}" + now = time.time() + if not expiry or int(now) < int(expiry): + for _, expected_tables in expected_snapshots.items(): + assert not expected_tables, f"Not all expected snapshots were listed: expected_snapshots={expected_snapshots} {now=} {expiry=}" def snapshot_dir_exists(scylla_data_dir, keyspace_name, table_name, snapshot_name) -> Tuple[str, bool]: path = os.path.join(scylla_data_dir, keyspace_name, f"{table_name}-*") @@ -50,14 +55,24 @@ def test_snapshots_table(scylla_only, cql, test_keyspace, scylla_data_dir): keyspace_name, table_name = table.split('.') verify_snapshot_dir(scylla_data_dir, keyspace_name, table_name, test_tag, False) -def test_snapshots_dropped_table(scylla_only, cql, test_keyspace, scylla_data_dir): +@pytest.mark.parametrize("ttl", [0, 5]) +def test_snapshots_dropped_table(scylla_only, cql, test_keyspace, scylla_data_dir, ttl): + cql.execute(f"UPDATE system.config SET value = '{ttl}' WHERE name = 'auto_snapshot_ttl'") test_tag = util.unique_name() with util.new_test_table(cql, test_keyspace, 'pk int PRIMARY KEY, v int') as table: cql.execute(f"INSERT INTO {table} (pk, v) VALUES (0, 0)") - nodetool.take_snapshot(cql, table, test_tag, False) - verify_snapshots(cql, {test_tag: {table}}, scylla_data_dir) - nodetool.del_snapshot(cql, test_tag) + expiry = int(time.time() + ttl) if ttl else None + nodetool.take_snapshot(cql, table, test_tag, False, ttl) + verify_snapshots(cql, {test_tag: {table}}, scylla_data_dir, expiry) keyspace_name, table_name = table.split('.') + if ttl: + deadline = expiry + 10 + time.sleep(ttl) + while snapshot_dir_exists(scylla_data_dir, keyspace_name, table_name, test_tag)[1] and time.time() < deadline: + time.sleep(1) + else: + # For ttl == 0, explicitly delete the snapshot and verify removal. + nodetool.del_snapshot(cql, test_tag) verify_snapshot_dir(scylla_data_dir, keyspace_name, table_name, test_tag, False) def test_snapshots_multiple_keyspaces(scylla_only, cql, scylla_data_dir): diff --git a/test/nodetool/test_snapshot.py b/test/nodetool/test_snapshot.py index 4f30b68949..5634928968 100644 --- a/test/nodetool/test_snapshot.py +++ b/test/nodetool/test_snapshot.py @@ -99,7 +99,7 @@ def test_listsnapshots_no_snapshots(nodetool, request): assert res.stdout == "Snapshot Details: \nThere are no snapshots\n" -def check_snapshot_out(res, tag, ktlist, skip_flush): +def check_snapshot_out(res, tag, ktlist, skip_flush, ttl): """Check that the output of nodetool snapshot contains the expected messages""" if len(ktlist) == 0: @@ -110,7 +110,7 @@ def check_snapshot_out(res, tag, ktlist, skip_flush): pattern = re.compile("Requested creating snapshot\\(s\\)" f" for \\[{keyspaces}\\]" f" with snapshot name \\[(.+)\\]" - f" and options \\{{skipFlush={str(skip_flush).lower()}\\}}") + f" and options \\{{skipFlush={str(skip_flush).lower()}, ttl={ttl}\\}}") print(res) print(pattern) @@ -138,15 +138,15 @@ def test_snapshot_keyspace(nodetool): res = nodetool("snapshot", "--tag", tag, "ks1", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1"], False) + check_snapshot_out(res.stdout, tag, ["ks1"], False, "0") res = nodetool("snapshot", "--tag", tag, "ks1", "ks2", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1,ks2"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1,ks2"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1", "ks2"], False) + check_snapshot_out(res.stdout, tag, ["ks1", "ks2"], False, "0") @pytest.mark.parametrize("option_name", ("-cf", "--column-family", "--table")) @@ -155,15 +155,15 @@ def test_snapshot_keyspace_with_table(nodetool, option_name): res = nodetool("snapshot", "--tag", tag, "ks1", option_name, "tbl", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1", "cf": "tbl"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1", "cf": "tbl"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1"], False) + check_snapshot_out(res.stdout, tag, ["ks1"], False, "0") res = nodetool("snapshot", "--tag", tag, "ks1", option_name, "tbl1,tbl2", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1", "cf": "tbl1,tbl2"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1", "cf": "tbl1,tbl2"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1"], False) + check_snapshot_out(res.stdout, tag, ["ks1"], False, "0") class kn_param(NamedTuple): @@ -186,14 +186,14 @@ class kn_param(NamedTuple): def test_snapshot_keyspace_table_single_arg(nodetool, param, scylla_only): tag = "my_snapshot" - req_params = {"tag": tag, "sf": "false", "kn": param.kn} + req_params = {"tag": tag, "sf": "false", "ttl": "0", "kn": param.kn} if param.cf: req_params["cf"] = param.cf res = nodetool("snapshot", "--tag", tag, *param.args, expected_requests=[ expected_request("POST", "/storage_service/snapshots", params=req_params) ]) - check_snapshot_out(res.stdout, tag, param.snapshot_keyspaces, False) + check_snapshot_out(res.stdout, tag, param.snapshot_keyspaces, False, "0") @pytest.mark.parametrize("option_name", ("-kt", "--kt-list", "-kc", "--kc.list")) @@ -202,21 +202,21 @@ def test_snapshot_ktlist(nodetool, option_name): res = nodetool("snapshot", "--tag", tag, option_name, "ks1.tbl1", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1", "cf": "tbl1"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1", "cf": "tbl1"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1.tbl1"], False) + check_snapshot_out(res.stdout, tag, ["ks1.tbl1"], False, "0") res = nodetool("snapshot", "--tag", tag, option_name, "ks1.tbl1,ks2.tbl2", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1.tbl1,ks2.tbl2"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1.tbl1,ks2.tbl2"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1.tbl1", "ks2.tbl2"], False) + check_snapshot_out(res.stdout, tag, ["ks1.tbl1", "ks2.tbl2"], False, "0") res = nodetool("snapshot", "--tag", tag, option_name, "ks1,ks2", expected_requests=[ expected_request("POST", "/storage_service/snapshots", - params={"tag": tag, "sf": "false", "kn": "ks1,ks2"}) + params={"tag": tag, "sf": "false", "ttl": "0", "kn": "ks1,ks2"}) ]) - check_snapshot_out(res.stdout, tag, ["ks1" ,"ks2"], False) + check_snapshot_out(res.stdout, tag, ["ks1" ,"ks2"], False, "0") @pytest.mark.parametrize("tag", [None, "my_snapshot_tag"]) @@ -228,52 +228,58 @@ def test_snapshot_ktlist(nodetool, option_name): {"ks": ["ks1"], "tbl": ["tbl1", "tbl2"]}, {"ks": ["ks1", "ks2"], "tbl": []}, ]) -@pytest.mark.parametrize("skip_flush", [False, True]) -def test_snapshot_options_matrix(nodetool, tag, ktlist, skip_flush): - cmd = ["snapshot"] - params = {} +def test_snapshot_options_matrix(nodetool, tag, ktlist): + for skip_flush in [False, True]: + for ttl in ["1", "1s", "1S", "1m", "1M", "1h", "1H", "1d", "1D"]: + cmd = ["snapshot"] + params = {} - if tag is None: - tag = int(time.time() * 1000) - params["tag"] = approximate_value(value=tag, delta=99999).to_json() - else: - cmd += ["--tag", tag] - params["tag"] = tag + if tag is None: + cur_tag = int(time.time() * 1000) + params["tag"] = approximate_value(value=cur_tag, delta=99999).to_json() + else: + cur_tag = tag + cmd += ["--tag", cur_tag] + params["tag"] = cur_tag - if skip_flush: - cmd.append("--skip-flush") + if skip_flush: + cmd.append("--skip-flush") - params["sf"] = str(skip_flush).lower() + if ttl: + cmd += ["--ttl", ttl] - if ktlist: - if "tbl" in ktlist: - if len(ktlist["tbl"]) > 0: - keyspaces = ktlist["ks"] - cmd += ["--table", ",".join(ktlist["tbl"])] - cmd += keyspaces - params["kn"] = keyspaces[0] - params["cf"] = ",".join(ktlist["tbl"]) + params["sf"] = str(skip_flush).lower() + params["ttl"] = ttl + + if ktlist: + if "tbl" in ktlist: + if len(ktlist["tbl"]) > 0: + keyspaces = ktlist["ks"] + cmd += ["--table", ",".join(ktlist["tbl"])] + cmd += keyspaces + params["kn"] = keyspaces[0] + params["cf"] = ",".join(ktlist["tbl"]) + else: + keyspaces = ktlist["ks"] + cmd += keyspaces + params["kn"] = ",".join(keyspaces) else: keyspaces = ktlist["ks"] - cmd += keyspaces - params["kn"] = ",".join(keyspaces) + cmd += ["-kt", ",".join(keyspaces)] + if len(keyspaces) == 1: + ks, tbl = keyspaces[0].split(".") + params["kn"] = ks + params["cf"] = tbl + elif len(keyspaces) > 1: + params["kn"] = ",".join(keyspaces) else: - keyspaces = ktlist["ks"] - cmd += ["-kt", ",".join(keyspaces)] - if len(keyspaces) == 1: - ks, tbl = keyspaces[0].split(".") - params["kn"] = ks - params["cf"] = tbl - elif len(keyspaces) > 1: - params["kn"] = ",".join(keyspaces) - else: - keyspaces = [] + keyspaces = [] - res = nodetool(*cmd, expected_requests=[ - expected_request("POST", "/storage_service/snapshots", params=params) - ]) + res = nodetool(*cmd, expected_requests=[ + expected_request("POST", "/storage_service/snapshots", params=params) + ]) - check_snapshot_out(res.stdout, tag, keyspaces, skip_flush) + check_snapshot_out(res.stdout, cur_tag, keyspaces, skip_flush, ttl) def test_snapshot_multiple_keyspace_with_table(nodetool): diff --git a/test/pylib/rest_client.py b/test/pylib/rest_client.py index bc2c873e4f..7f58bbe2d6 100644 --- a/test/pylib/rest_client.py +++ b/test/pylib/rest_client.py @@ -433,11 +433,13 @@ class ScyllaRESTAPIClient: ] return await self.client.post_json(f"/storage_service/tablets/restore", host=node_ip, params=params, json=backup_location) - async def take_snapshot(self, node_ip: str, ks: str, tag: str, tables: list[str] = None) -> None: + async def take_snapshot(self, node_ip: str, ks: str, tag: str, tables: list[str] = None, ttl: Optional[str] = None) -> None: """Take keyspace snapshot""" params = { 'kn': ks, 'tag': tag } if tables: params['cf'] = ','.join(tables) + if ttl is not None: + params['ttl'] = ttl await self.client.post(f"/storage_service/snapshots", host=node_ip, params=params) async def take_cluster_snapshot(self, node_ip: str, ks: str, tag: str, tables: list[str] = None) -> None: diff --git a/tools/scylla-nodetool.cc b/tools/scylla-nodetool.cc index af58ab0257..6cac174f27 100644 --- a/tools/scylla-nodetool.cc +++ b/tools/scylla-nodetool.cc @@ -2390,16 +2390,23 @@ void snapshot_operation(scylla_rest_client& client, const bpo::variables_map& vm params["sf"] = "false"; } + if (vm.contains("ttl")) { + params["ttl"] = vm["ttl"].as(); + } else { + params["ttl"] = "0"; + } + client.post("/storage_service/snapshots", params); if (kn_msg.empty()) { kn_msg = params["kn"]; } - fmt::print(std::cout, "Requested creating snapshot(s) for [{}] with snapshot name [{}] and options {{skipFlush={}}}\n", + fmt::print(std::cout, "Requested creating snapshot(s) for [{}] with snapshot name [{}] and options {{skipFlush={}, ttl={}}}\n", kn_msg, params["tag"], - params["sf"]); + params["sf"], + params["ttl"]); fmt::print(std::cout, "Snapshot directory: {}\n", params["tag"]); } @@ -4830,6 +4837,7 @@ For more information, see: {} typed_option("keyspace-table-list", "The keyspace.table pair(s) to snapshot, multiple ones can be joined with ','"), typed_option("tag,t", "The name of the snapshot"), typed_option<>("skip-flush", "Do not flush memtables before snapshotting (snapshot will not contain unflushed data)"), + typed_option("ttl", "The TTL for the snapshot, optionally followed by 's' for seconds (the default), 'm' for minutes, 'h' for hours, or 'd' for days. Missing TTL, or 0, means no TTL"), }, { typed_option>("keyspaces", "The keyspaces to snapshot", -1), From 881f67de89ec2bf2bdf47c0c2dd641847f14c40b Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 23 Feb 2026 19:49:05 +0200 Subject: [PATCH 8/8] db: snapshot-ctl: add cancel_expiration Cancel outstanding snapshot expiration when a snapshot is backed up and when clear_snapshot is called from the api. Fixes SCYLLADB-787 Fixes SCYLLADB-789 Signed-off-by: Benny Halevy --- db/snapshot-ctl.cc | 28 ++++++++++++++++++++++++++++ db/snapshot-ctl.hh | 6 ++++++ 2 files changed, 34 insertions(+) diff --git a/db/snapshot-ctl.cc b/db/snapshot-ctl.cc index f5907f730a..c0bbbdde85 100644 --- a/db/snapshot-ctl.cc +++ b/db/snapshot-ctl.cc @@ -12,6 +12,8 @@ #include #include +#include + #include #include #include @@ -201,6 +203,9 @@ future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vect future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector keyspace_names, sstring cf_name) { snap_log.debug("clear_snapshot: tag={} keyspaces={} table={}", tag, fmt::join(keyspace_names, ","), cf_name); + co_await container().invoke_on(0, [&] (auto& sc) { + return sc.cancel_expiration(tag, keyspace_names, cf_name); + }); co_return co_await run_snapshot_modify_operation([this, tag = std::move(tag), keyspace_names = std::move(keyspace_names), cf_name = std::move(cf_name)] (this auto) -> future<> { // clear_snapshot enumerates keyspace_names and uses cf_name as a // filter in each. When cf_name needs resolution (e.g. logical index @@ -276,6 +281,7 @@ future snapshot_ctl::start_backup(sstring endpoint, sstring buck if (!storage_options.is_local_type()) { throw std::invalid_argument("not able to backup a non-local table"); } + cancel_expiration(snapshot_name, {keyspace}, table); auto& local_storage_options = std::get(storage_options.value); // // The keyspace data directories and their snapshots are arranged as follows: @@ -366,4 +372,26 @@ void snapshot_ctl::schedule_expiration(gc_clock::time_point when, sstring ks_nam } } +void snapshot_ctl::cancel_expiration(sstring tag, std::vector ks_names, sstring table_name) { + if (this_shard_id() != 0) { + on_internal_error(snap_log, "cancel_expiration must be called on shard 0"); + } + snap_log.debug("Cancel expiration of snapshots with tag='{}' in keyspaces={} table={}", tag, fmt::join(ks_names, ","), table_name); + std::unordered_set keyspaces; + std::ranges::move(ks_names, std::inserter(keyspaces, keyspaces.end())); + _expiration_queue.erase(std::remove_if(_expiration_queue.begin(), _expiration_queue.end(), [&] (const expiration_info& info) { + if (!tag.empty() && info.tag != tag) { + return false; + } + if (!keyspaces.empty() && !keyspaces.contains(info.ks_name)) { + return false; + } + if (!table_name.empty() && info.table_name != table_name) { + return false; + } + return true; + }), _expiration_queue.end()); + std::ranges::make_heap(_expiration_queue, std::greater{}, &expiration_info::expires_at); +} + } // namespace db diff --git a/db/snapshot-ctl.hh b/db/snapshot-ctl.hh index b0030bfc06..95e9c5b8d3 100644 --- a/db/snapshot-ctl.hh +++ b/db/snapshot-ctl.hh @@ -126,6 +126,12 @@ public: // Must be called on shard 0 void schedule_expiration(gc_clock::time_point when, sstring ks_name, sstring table_name, sstring tag); + + // For canceling expiration, ks_name or table_name can be empty + // And then all snapshots with the given tag (or all, if `tag` is empty) are erased from the expiration queue + // within the given scope. + // Must be called on shard 0 + void cancel_expiration(sstring tag, std::vector ks_names = {}, sstring table_name = ""); private: config _config; sharded& _db;