partitioners: Make it impossible to use RandomPartitioner

RandomPartitioner has been deprecated for 2.5 year.
Now we drop the support for it. There are two reasons for this.
First, this partitioner can lead to uneven distribution of partitions
among the nodes in the cluster which leads to hot nodes.
Second, we're planning to unify the representation of tokens and
fix it as int64_t. RandomPartitioner does not comply with this.

Signed-off-by: Piotr Jastrzebski <piotr@scylladb.com>
This commit is contained in:
Piotr Jastrzebski
2020-01-20 14:55:40 +01:00
parent 7a86e2ff46
commit d80ac4c2d0
4 changed files with 4 additions and 154 deletions

View File

@@ -288,8 +288,7 @@ batch_size_fail_threshold_in_kb: 50
# reloading all data, so when upgrading you should set this to the
# same partitioner you were already using.
#
# Besides Murmur3Partitioner, partitioners included for backwards
# compatibility include RandomPartitioner.
# Murmur3Partitioner is currently the only supported partitioner,
#
partitioner: org.apache.cassandra.dht.Murmur3Partitioner

View File

@@ -425,13 +425,10 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"If not using vnodes, comment #num_tokens : 256 or set num_tokens : 1 and use initial_token. If you already have an existing cluster with one token per node and wish to migrate to vnodes, see Enabling virtual nodes on an existing production cluster.\n"
"Note: If using DataStax Enterprise, the default setting of this property depends on the type of node and type of install.")
, partitioner(this, "partitioner", value_status::Used, "org.apache.cassandra.dht.Murmur3Partitioner",
"Distributes rows (by partition key) across all nodes in the cluster. Any IPartitioner may be used, including your own as long as it is in the class path. For new clusters use the default partitioner.\n"
"Scylla provides the following partitioners for backwards compatibility:\n"
"\n"
"\tRandomPartitioner\n"
"Distributes rows (by partition key) across all nodes in the cluster. At the moment, only Murmur3Partitioner is supported. For new clusters use the default partitioner.\n"
"\n"
"Related information: Partitioners"
, {"org.apache.cassandra.dht.Murmur3Partitioner", "org.apache.cassandra.dht.RandomPartitioner"})
, {"org.apache.cassandra.dht.Murmur3Partitioner"})
, storage_port(this, "storage_port", value_status::Used, 7000,
"The port for inter-node communication.")
/* Advanced automatic backup setting */

View File

@@ -153,8 +153,7 @@ permissions_validity_in_ms: 2000
# reloading all data, so when upgrading you should set this to the
# same partitioner you were already using.
#
# Besides Murmur3Partitioner, partitioners included for backwards
# compatibility include RandomPartitioner.
# Murmur3Partitioner is currently the only supported partitioner,
#
partitioner: org.apache.cassandra.dht.Murmur3Partitioner

View File

@@ -232,151 +232,6 @@ SEASTAR_THREAD_TEST_CASE(test_token_no_wraparound_1) {
BOOST_REQUIRE_EQUAL(midpoint, token_from_long(0x6000'0000'0000'0000));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token1) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto str1 = sstring("123456");
auto t = partitioner.from_sstring(str1);
auto str2 = partitioner.to_sstring(t);
BOOST_REQUIRE(str1 == str2);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token2) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto min = dht::minimum_token();
auto t1 = partitioner.from_sstring(sstring());
auto t2 = partitioner.from_sstring(to_sstring("0"));
BOOST_REQUIRE(min == t1);
BOOST_REQUIRE(min == t2);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token3) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto t1 = partitioner.from_sstring(to_sstring("255"));
auto bytes1 = partitioner.token_to_bytes(t1);
// Zero byte is prepended, 255 needs one byte, bit 7 is set
BOOST_REQUIRE(bytes1.size() == 2);
BOOST_REQUIRE(bytes1[0] == int8_t(0));
BOOST_REQUIRE(bytes1[1] == int8_t(255));
auto t2 = partitioner.from_sstring(to_sstring("250"));
auto bytes2 = partitioner.token_to_bytes(t2);
// Zero byte is prepended, 250 needs one byte, bit 7 is set
BOOST_REQUIRE(bytes2.size() == 2);
BOOST_REQUIRE(bytes2[0] == int8_t(0));
BOOST_REQUIRE(bytes2[1] == int8_t(250));
auto t3 = partitioner.from_sstring(to_sstring("256"));
auto bytes3 = partitioner.token_to_bytes(t3);
// Zero byte is not prepended, 256 needs two bytes, bit 15 is not set
BOOST_REQUIRE(bytes3.size() == 2);
BOOST_REQUIRE(bytes3[0] == int8_t(1));
BOOST_REQUIRE(bytes3[1] == int8_t(0));
auto t4 = partitioner.from_sstring(to_sstring("127"));
auto bytes4 = partitioner.token_to_bytes(t4);
// Zero byte is not prepended, 127 needs one byte, bit 7 is not set
BOOST_REQUIRE(bytes4.size() == 1);
BOOST_REQUIRE(bytes4[0] == int8_t(127));
auto t5 = partitioner.from_sstring(to_sstring("128"));
auto bytes5 = partitioner.token_to_bytes(t5);
// Zero byte is prepended, 128 needs one byte, bit 7 is set
BOOST_REQUIRE(bytes5.size() == 2);
BOOST_REQUIRE(bytes5[0] == int8_t(0));
BOOST_REQUIRE(bytes5[1] == int8_t(128));
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token4) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto s = schema_builder("ks", "cf").with_column("a", bytes_type, column_kind::partition_key)
.with_column("b", int32_type) .build();
auto t1 = partitioner.from_sstring("1498727546111218218000240550937185703"); // g1
auto t2 = partitioner.from_sstring("5743128803285680324364720388504740393"); // z
auto t3 = partitioner.from_sstring("24285907100581385209761791172262166336"); // b1
auto t4 = partitioner.from_sstring("74278675443652264562362882013958732244"); // 2
auto t5 = partitioner.from_sstring("78703492656118554854272571946195123045"); // 1
auto t6 = partitioner.from_sstring("114355602889666587562799073732149921607"); // c1
auto t7 = partitioner.from_sstring("114688863869225338471480367428049914939"); // 1000
auto t8 = partitioner.from_sstring("156123446300388841848425604775226615902"); // a1
auto t1_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("g1"))));
auto t2_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("z"))));
auto t3_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("b1"))));
auto t4_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("2"))));
auto t5_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("1"))));
auto t6_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("c1"))));
auto t7_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("1000"))));
auto t8_ = partitioner.get_token(*s, partition_key::from_single_value(*s, to_bytes(sstring("a1"))));
BOOST_REQUIRE(t1 == t1_);
BOOST_REQUIRE(t2 == t2_);
BOOST_REQUIRE(t3 == t3_);
BOOST_REQUIRE(t4 == t4_);
BOOST_REQUIRE(t5 == t5_);
BOOST_REQUIRE(t6 == t6_);
BOOST_REQUIRE(t7 == t7_);
BOOST_REQUIRE(t8 == t8_);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token_midpoint1) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto t1 = partitioner.from_sstring("1000");
auto t2 = partitioner.from_sstring("5000");
auto mid = partitioner.midpoint(t1, t2);
auto mid_expected = partitioner.from_sstring("3000");
BOOST_REQUIRE(t1 < t2);
BOOST_REQUIRE(mid == mid_expected);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_token_midpoint2) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto t1 = partitioner.from_sstring("5000");
auto t2 = partitioner.from_sstring("1000");
auto mid = partitioner.midpoint(t1, t2);
auto mid_expected = partitioner.from_sstring("85070591730234615865843651857942055864");
BOOST_REQUIRE(t1 > t2);
BOOST_REQUIRE(mid == mid_expected);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
SEASTAR_THREAD_TEST_CASE(test_rp_describe_ownership) {
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.RandomPartitioner"));
dht::random_partitioner partitioner;
auto t1 = partitioner.from_sstring("34028236692093846346337460743176821144");
auto t2 = partitioner.from_sstring("51042355038140769519506191114765231716");
auto t3 = partitioner.from_sstring("85070591730234615865843651857942052860");
auto t4 = partitioner.from_sstring("153127065114422308558518573344295695148");
auto sorted_tokens = std::vector<dht::token>{t1, t2, t3, t4};
auto own_map = partitioner.describe_ownership(sorted_tokens);
BOOST_REQUIRE(std::fabs(own_map[t1] - 0.3) <= FLT_EPSILON);
BOOST_REQUIRE(std::fabs(own_map[t2] - 0.1) <= FLT_EPSILON);
BOOST_REQUIRE(std::fabs(own_map[t3] - 0.2) <= FLT_EPSILON);
BOOST_REQUIRE(std::fabs(own_map[t4] - 0.4) <= FLT_EPSILON);
dht::set_global_partitioner(to_sstring("org.apache.cassandra.dht.Murmur3Partitioner"));
}
void test_partitioner_sharding(const dht::i_partitioner& part, unsigned shards, std::vector<dht::token> shard_limits,
std::function<dht::token (const dht::i_partitioner&, dht::token)> prev_token, unsigned ignorebits = 0) {
auto s = schema_builder("ks", "cf")