Files
scylladb/compaction/compaction_strategy.hh
Benny Halevy 88ae067ddb everywhere: add skeletal support for the in_memory_tables feature
Forward-ported from scylla-enterprise.
Note that the feature has been deprecated and the implementation
is provided only for backward compatibility with pre-existing
features and schema.

Tested manually after adding the following to feature_service:
```
    gms::feature workload_prioritization { *this, "WORKLOAD_PRIORITIZATION"sv };
```

Launched a single-node cluster running 2023.1.10
```
cqlsh> create KEYSPACE ks WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
cqlsh> create TABLE ks.test ( pk int PRIMARY KEY, val int ) WITH compaction = {'class': 'InMemoryCompactionStrategy'};
```

log:
```
Scylla version 2023.1.10-0.20241227.21cffccc1ccd with build-id bd65b8399cb13b713a87e57fe333cfcabfd50be7 starting ...
...
INFO  2024-12-27 19:45:16,563 [shard 0] migration_manager - Create new ColumnFamily: org.apache.cassandra.config.CFMetaData@0x600000f1b400[cfId=5529c630-c47a-11ef-bd1d-4295734ce5a8,ksName=ks,cfName=test,cfType=Standard,comparator=org.apache.cassandra.db.marshal.CompositeType(org.apache.cassandra.db.marshal.UTF8Type),comment=,readRepairChance=0,dcLocalReadRepairChance=0,tombstoneGcOptions={"mode":"timeout","propagation_delay_in_seconds":"3600"},gcGraceSeconds=864000,keyValidator=org.apache.cassandra.db.marshal.Int32Type,minCompactionThreshold=4,maxCompactionThreshold=32,columnMetadata=[ColumnDefinition{name=pk, type=org.apache.cassandra.db.marshal.Int32Type, kind=PARTITION_KEY, componentIndex=0, droppedAt=-9223372036854775808}, ColumnDefinition{name=val, type=org.apache.cassandra.db.marshal.Int32Type, kind=REGULAR, componentIndex=null, droppedAt=-9223372036854775808}],compactionStrategyClass=class org.apache.cassandra.db.compaction.InMemoryCompactionStrategy,compactionStrategyOptions={enabled=true},compressionParameters={sstable_compression=org.apache.cassandra.io.compress.LZ4Compressor},bloomFilterFpChance=0.01,memtableFlushPeriod=0,caching={"keys":"ALL","rows_per_partition":"ALL"},cdc={},defaultTimeToLive=0,minIndexInterval=128,maxIndexInterval=2048,speculativeRetry=99.0PERCENTILE,triggers=[],isDense=false,in_memory=false,version=5529c631-c47a-11ef-bd1d-4295734ce5a8,droppedColumns={},collections={},indices={}]
INFO  2024-12-27 19:45:16,564 [shard 0] schema_tables - Creating ks.test id=5529c630-c47a-11ef-bd1d-4295734ce5a8 version=ec88d510-6aff-344a-914d-541d37081440
```

Upgraded to this branch and started scylla.
Verified that ks.test was successfuly loaded:

log:
```
INFO  2024-12-27 19:48:58,115 [shard 0:main] init - Scylla version 6.3.0~dev-0.20241227.a64c6dfc153e with build-id f9496134a09cf2e55d3865b9e9ff499f672aa7da starting ...
...
WARN  2024-12-27 19:53:02,948 [shard 1:main] CompactionStrategy - InMemoryCompactionStrategy is no longer supported. Defaulting to NullCompactionStrategy.
...
INFO  2024-12-27 19:53:02,948 [shard 0:main] database - Keyspace ks: Reading CF test id=5529c630-c47a-11ef-bd1d-4295734ce5a8 version=ec88d510-6aff-344a-914d-541d37081440 storage=/home/bhalevy/scylladb/data/ks/test-5529c630c47a11efbd1d4295734ce5a8
```

Then, tested:
```
cqlsh> describe KEYSPACE ks;

CREATE KEYSPACE ks WITH replication = {'class': 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true AND tablets = {'enabled': false};

CREATE TABLE ks.test (
    pk int,
    val int,
    PRIMARY KEY (pk)
) WITH bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'ALL'}
    AND comment = ''
    AND compaction = {'class': 'InMemoryCompactionStrategy'}
    AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND speculative_retry = '99.0PERCENTILE';

cqlsh> alter TABLE ks.test with compaction = {'class': 'SizeTieredCompactionStrategy'};
cqlsh> describe KEYSPACE ks;

CREATE KEYSPACE ks WITH replication = {'class': 'org.apache.cassandra.locator.SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true AND tablets = {'enabled': false};

CREATE TABLE ks.test (
    pk int,
    val int,
    PRIMARY KEY (pk)
) WITH bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'ALL'}
    AND comment = ''
    AND compaction = {'class': 'SizeTieredCompactionStrategy'}
    AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND speculative_retry = '99.0PERCENTILE'
    AND tombstone_gc = {'mode': 'timeout', 'propagation_delay_in_seconds': '3600'};
```

log:
```
INFO  2024-12-27 19:56:40,465 [shard 0:stmt] migration_manager - Update table 'ks.test' From org.apache.cassandra.config.CFMetaData@0x60000362d800[cfId=5529c630-c47a-11ef-bd1d-4295734ce5a8,ksName==ks,cfName=test,cfType=Standard,comparator=org.apache.cassandra.db.marshal.CompositeType(org.apache.cassandra.db.marshal.UTF8Type),comment=,tombstoneGcOptions={"mode":"timeout","propagation_delay_in_seconds":"3600"},gcGraceSeconds=864000,minCompactionThreshold=4,maxCompactionThreshold=32,columnMetadata=[ColumnDefinition{name=pk, type=org.apache.cassandra.db.marshal.Int32Type, kind=PARTITION_KEY, componentIndex=0, droppedAt=-9223372036854775808}, ColumnDefinition{name=val, type=org.apache.cassandra.db.marshal.Int32Type, kind=REGULAR, componentIndex=null, droppedAt=-9223372036854775808}],compactionStrategyClass=class org.apache.cassandra.db.compaction.InMemoryCompactionStrategy,compactionStrategyOptions={enabled=true},compressionParameters={sstable_compression=org.apache.cassandra.io.compress.LZ4Compressor},bloomFilterFpChance=0.01,memtableFlushPeriod=0,caching={"keys":"ALL","rows_per_partition":"ALL"},cdc={},defaultTimeToLive=0,minIndexInterval=128,maxIndexInterval=2048,speculativeRetry=99.0PERCENTILE,triggers=[],isDense=false,version=ec88d510-6aff-344a-914d-541d37081440,droppedColumns={},collections={},indices={}] To org.apache.cassandra.config.CFMetaData@0x60000336e000[cfId=5529c630-c47a-11ef-bd1d-4295734ce5a8,ksName==ks,cfName=test,cfType=Standard,comparator=org.apache.cassandra.db.marshal.CompositeType(org.apache.cassandra.db.marshal.UTF8Type),comment=,tombstoneGcOptions={"mode":"timeout","propagation_delay_in_seconds":"3600"},gcGraceSeconds=864000,minCompactionThreshold=4,maxCompactionThreshold=32,columnMetadata=[ColumnDefinition{name=pk, type=org.apache.cassandra.db.marshal.Int32Type, kind=PARTITION_KEY, componentIndex=0, droppedAt=-9223372036854775808}, ColumnDefinition{name=val, type=org.apache.cassandra.db.marshal.Int32Type, kind=REGULAR, componentIndex=null, droppedAt=-9223372036854775808}],compactionStrategyClass=class org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy,compactionStrategyOptions={enabled=true},compressionParameters={sstable_compression=org.apache.cassandra.io.compress.LZ4Compressor},bloomFilterFpChance=0.01,memtableFlushPeriod=0,caching={"keys":"ALL","rows_per_partition":"ALL"},cdc={},defaultTimeToLive=0,minIndexInterval=128,maxIndexInterval=2048,speculativeRetry=99.0PERCENTILE,triggers=[],isDense=false,version=ecccf010-c47b-11ef-b52c-622f2f0e87c4,droppedColumns={},collections={},indices={}]
INFO  2024-12-27 19:56:40,466 [shard 0: gms] schema_tables - Altering ks.test id=5529c630-c47a-11ef-bd1d-4295734ce5a8 version=ecccf010-c47b-11ef-b52c-622f2f0e87c4
```

Signed-off-by: Benny Halevy <bhalevy@scylladb.com>

Closes scylladb/scylladb#22068
2025-01-20 16:55:17 +02:00

140 lines
5.7 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "schema/schema_fwd.hh"
#include "sstables/shared_sstable.hh"
#include "exceptions/exceptions.hh"
#include "compaction_strategy_type.hh"
#include "table_state.hh"
#include "strategy_control.hh"
struct mutation_source_metadata;
class compaction_backlog_tracker;
extern logging::logger compaction_strategy_logger;
using namespace compaction;
namespace sstables {
class compaction_strategy_impl;
class sstable;
class sstable_set;
struct compaction_descriptor;
class storage;
class compaction_strategy {
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
public:
compaction_strategy(::shared_ptr<compaction_strategy_impl> impl);
compaction_strategy();
~compaction_strategy();
compaction_strategy(const compaction_strategy&);
compaction_strategy(compaction_strategy&&);
compaction_strategy& operator=(compaction_strategy&&);
// Return a list of sstables to be compacted after applying the strategy.
compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control);
compaction_descriptor get_major_compaction_job(table_state& table_s, std::vector<shared_sstable> candidates);
std::vector<compaction_descriptor> get_cleanup_compaction_jobs(table_state& table_s, std::vector<shared_sstable> candidates) const;
// Some strategies may look at the compacted and resulting sstables to
// get some useful information for subsequent compactions.
void notify_completion(table_state& table_s, const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added);
// Return if parallel compaction is allowed by strategy.
bool parallel_compaction() const;
// Return if optimization to rule out sstables based on clustering key filter should be applied.
bool use_clustering_key_filter() const;
// An estimation of number of compaction for strategy to be satisfied.
int64_t estimated_pending_compactions(table_state& table_s) const;
static sstring name(compaction_strategy_type type) {
switch (type) {
case compaction_strategy_type::null:
return "NullCompactionStrategy";
case compaction_strategy_type::size_tiered:
return "SizeTieredCompactionStrategy";
case compaction_strategy_type::leveled:
return "LeveledCompactionStrategy";
case compaction_strategy_type::time_window:
return "TimeWindowCompactionStrategy";
case compaction_strategy_type::in_memory:
return "InMemoryCompactionStrategy";
case compaction_strategy_type::incremental:
return "IncrementalCompactionStrategy";
default:
throw std::runtime_error("Invalid Compaction Strategy");
}
}
static compaction_strategy_type type(const sstring& name) {
auto pos = name.find("org.apache.cassandra.db.compaction.");
sstring short_name = (pos == sstring::npos) ? name : name.substr(pos + 35);
if (short_name == "NullCompactionStrategy") {
return compaction_strategy_type::null;
} else if (short_name == "SizeTieredCompactionStrategy") {
return compaction_strategy_type::size_tiered;
} else if (short_name == "LeveledCompactionStrategy") {
return compaction_strategy_type::leveled;
} else if (short_name == "TimeWindowCompactionStrategy") {
return compaction_strategy_type::time_window;
} else if (short_name == "InMemoryCompactionStrategy") {
return compaction_strategy_type::in_memory;
} else if (short_name == "IncrementalCompactionStrategy") {
return compaction_strategy_type::incremental;
} else {
throw exceptions::configuration_exception(format("Unable to find compaction strategy class '{}'", name));
}
}
compaction_strategy_type type() const;
sstring name() const {
return name(type());
}
sstable_set make_sstable_set(schema_ptr schema) const;
compaction_backlog_tracker make_backlog_tracker() const;
uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr) const;
reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const;
// Returns whether or not interposer consumer is used by a given strategy.
bool use_interposer_consumer() const;
// Informs the caller (usually the compaction manager) about what would it take for this set of
// SSTables closer to becoming in-strategy. If this returns an empty compaction descriptor, this
// means that the sstable set is already in-strategy.
//
// The caller can specify one of two modes: strict or relaxed. In relaxed mode the tolerance for
// what is considered offstrategy is higher. It can be used, for instance, for when the system
// is restarting and previous compactions were likely in-flight. In strict mode, we are less
// tolerant to invariant breakages.
//
// The caller should also pass a maximum number of SSTables which is the maximum amount of
// SSTables that can be added into a single job.
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
};
// Creates a compaction_strategy object from one of the strategies available.
compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options);
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode);
}