From 0cd0775a273d8aea2db4f6185ab48f8912cfdadd Mon Sep 17 00:00:00 2001 From: Piotr Jastrzebski Date: Mon, 11 May 2020 10:29:51 +0200 Subject: [PATCH 1/2] cdc: Set CDC Log gc_grace_seconds to 0 Data in CDC Log is TTLed and we want to remove it as soon as it expires. Signed-off-by: Piotr Jastrzebski --- cdc/log.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cdc/log.cc b/cdc/log.cc index 2f21216c7d..20a0cc8ca6 100644 --- a/cdc/log.cc +++ b/cdc/log.cc @@ -393,6 +393,9 @@ static schema_ptr create_log_schema(const schema& s, std::optional schema_builder b(s.ks_name(), log_name(s.cf_name())); b.with_partitioner("com.scylladb.dht.CDCPartitioner"); b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name())); + if (s.cdc_options().ttl() > 0) { + b.set_gc_grace_seconds(0); + } b.with_column(log_meta_column_name_bytes("stream_id"), bytes_type, column_kind::partition_key); b.with_column(log_meta_column_name_bytes("time"), timeuuid_type, column_kind::clustering_key); b.with_column(log_meta_column_name_bytes("batch_seq_no"), int32_type, column_kind::clustering_key); From 49b6010cb459cae50452eb03ed10ca70c8c6098a Mon Sep 17 00:00:00 2001 From: Piotr Jastrzebski Date: Fri, 8 May 2020 18:16:07 +0200 Subject: [PATCH 2/2] cdc: Use time window compaction strategy for CDC Log table CDC Log is a time series with data TTLed by default to 24 hours so it makes sense to use for it a time window compaction. A window size is adjusted to the TTL configured for CDC Log so that no more than 24 sstables will be created. Signed-off-by: Piotr Jastrzebski --- cdc/log.cc | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/cdc/log.cc b/cdc/log.cc index 20a0cc8ca6..84c82a7540 100644 --- a/cdc/log.cc +++ b/cdc/log.cc @@ -51,6 +51,7 @@ #include "types/listlike_partial_deserializing_iterator.hh" #include "tracing/trace_state.hh" #include "stats.hh" +#include "compaction_strategy.hh" namespace std { @@ -392,9 +393,25 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) { static schema_ptr create_log_schema(const schema& s, std::optional uuid) { schema_builder b(s.ks_name(), log_name(s.cf_name())); b.with_partitioner("com.scylladb.dht.CDCPartitioner"); + b.set_compaction_strategy(sstables::compaction_strategy_type::time_window); b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name())); - if (s.cdc_options().ttl() > 0) { + auto ttl_seconds = s.cdc_options().ttl(); + if (ttl_seconds > 0) { b.set_gc_grace_seconds(0); + auto ceil = [] (int dividend, int divisor) { + return dividend / divisor + (dividend % divisor == 0 ? 0 : 1); + }; + auto seconds_to_minutes = [] (int seconds_value) { + using namespace std::chrono; + return std::chrono::ceil(seconds(seconds_value)).count(); + }; + // What's the minimum window that won't create more than 24 sstables. + auto window_seconds = ceil(ttl_seconds, 24); + auto window_minutes = seconds_to_minutes(window_seconds); + b.set_compaction_strategy_options({ + {"compaction_window_unit", "MINUTES"}, + {"compaction_window_size", std::to_string(window_minutes)} + }); } b.with_column(log_meta_column_name_bytes("stream_id"), bytes_type, column_kind::partition_key); b.with_column(log_meta_column_name_bytes("time"), timeuuid_type, column_kind::clustering_key); @@ -446,7 +463,7 @@ static schema_ptr create_log_schema(const schema& s, std::optional if (uuid) { b.set_uuid(*uuid); } - + return b.build(); }