From ddb7c8ea12507920d1bfe632956895fda31f53df Mon Sep 17 00:00:00 2001 From: Taras Veretilnyk Date: Thu, 3 Jul 2025 20:55:56 +0200 Subject: [PATCH] keys: from_nodetool_style_string don't split single partition keys Users with single-column partition keys that contain colon characters were unable to use certain REST APIs and 'nodetool' commands, because the API split key by colon regardless of the partition key schema. Affected commands: - 'nodetool getendpoints' - 'nodetool getsstables' Affected endpoints: - '/column_family/sstables/by_key' - '/storage_service/natural_endpoints' Refs: #16596 - This does not fully fix the issue, as users with compound keys will face the issue if any column of the partition key contains a colon character. Closes scylladb/scylladb#24829 Closes scylladb/scylladb#25565 --- keys.cc | 14 ++++++++++---- test/boost/keys_test.cc | 15 +++++++++++---- test/rest_api/test_storage_service.py | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/keys.cc b/keys.cc index f9d1d2308a..6077137592 100644 --- a/keys.cc +++ b/keys.cc @@ -38,12 +38,18 @@ partition_key_view::ring_order_tri_compare(const schema& s, partition_key_view k partition_key partition_key::from_nodetool_style_string(const schema_ptr s, const sstring& key) { std::vector vec; - boost::split(vec, key, boost::is_any_of(":")); + if (s->partition_key_type()->types().size() == 1) { + // For a single column partition key. Don't try to split the key + // See #16596 + vec.push_back(key); + } else { + boost::split(vec, key, boost::is_any_of(":")); + if (vec.size() != s->partition_key_type()->types().size()) { + throw std::invalid_argument(fmt::format("partition key '{}' has mismatch number of components: expected {}, got {}", key, s->partition_key_type()->types().size(), vec.size())); + } + } auto it = std::begin(vec); - if (vec.size() != s->partition_key_type()->types().size()) { - throw std::invalid_argument("partition key '" + key + "' has mismatch number of components"); - } std::vector r; r.reserve(vec.size()); for (auto t : s->partition_key_type()->types()) { diff --git a/test/boost/keys_test.cc b/test/boost/keys_test.cc index a2ebfe0659..252349e52b 100644 --- a/test/boost/keys_test.cc +++ b/test/boost/keys_test.cc @@ -159,7 +159,7 @@ BOOST_AUTO_TEST_CASE(test_serialization) { } -BOOST_AUTO_TEST_CASE(test_from_nodetool_style_string) { +BOOST_AUTO_TEST_CASE(test_from_nodetool_style_string_single_partition_key) { auto s1 = schema_builder("", "") .with_column("c1", utf8_type, column_kind::partition_key) .with_column("c2", bytes_type, column_kind::clustering_key) @@ -169,10 +169,15 @@ BOOST_AUTO_TEST_CASE(test_from_nodetool_style_string) { auto pk_value = bytes("value"); partition_key key1(std::vector({pk_value})); - auto key2 = partition_key::from_nodetool_style_string(s1, "value"); BOOST_REQUIRE(key1.equal(*s1, key2)); + auto pk_with_col_value = bytes("val:ue"); + partition_key key_with_col(std::vector({pk_with_col_value})); + BOOST_REQUIRE(key_with_col.equal(*s1, partition_key::from_nodetool_style_string(s1, "val:ue"))); +} + +BOOST_AUTO_TEST_CASE(test_from_nodetool_style_string_composite_partition_key) { auto s2 = schema_builder("", "") .with_column("c1", utf8_type, column_kind::partition_key) .with_column("c2", utf8_type, column_kind::partition_key) @@ -183,7 +188,9 @@ BOOST_AUTO_TEST_CASE(test_from_nodetool_style_string) { auto pk_value1 = bytes("value1"); auto pk_value2 = bytes("value2"); partition_key key3(std::vector({pk_value1, pk_value2})); - auto key4 = partition_key::from_nodetool_style_string(s2, "value1:value2"); - BOOST_REQUIRE(key3.equal(*s1, key4)); + BOOST_REQUIRE(key3.equal(*s2, key4)); + + BOOST_REQUIRE_THROW(partition_key::from_nodetool_style_string(s2, "value1:value2:extra"), std::invalid_argument); + BOOST_REQUIRE_THROW(partition_key::from_nodetool_style_string(s2, "value1"), std::invalid_argument); } diff --git a/test/rest_api/test_storage_service.py b/test/rest_api/test_storage_service.py index f73dc30bfc..7f29bf2348 100644 --- a/test/rest_api/test_storage_service.py +++ b/test/rest_api/test_storage_service.py @@ -629,6 +629,25 @@ def test_storage_service_get_natural_endpoints(cql, rest_api, tablets_enabled, s assert resp.json() == [rest_api.host] +@pytest.mark.parametrize("tablets_enabled", ["true", "false"]) +def test_storage_service_get_natural_endpoints_compound_key(cql, rest_api, tablets_enabled, skip_without_tablets): + with new_test_keyspace(cql, f"WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }} AND TABLETS = {{ 'enabled': {tablets_enabled} }}") as keyspace: + with new_test_table(cql, keyspace, 'p1 int, p2 text, c int, PRIMARY KEY ((p1, p2), c)') as t0: + table = t0.split(".")[1] + resp = rest_api.send("GET", f"storage_service/natural_endpoints/{keyspace}", params={"cf": table, "key": "1:value"}) + resp.raise_for_status() + assert resp.json() == [rest_api.host] + +# Verify that a single-column partition key containing ':' is not split. Reproduces #16596 +@pytest.mark.parametrize("tablets_enabled", ["true", "false"]) +def test_storage_service_get_natural_endpoints_text_key_with_colon(cql, rest_api, tablets_enabled, skip_without_tablets): + with new_test_keyspace(cql, f"WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }} AND TABLETS = {{ 'enabled': {tablets_enabled} }}") as keyspace: + with new_test_table(cql, keyspace, 'p text PRIMARY KEY') as t0: + table = t0.split(".")[1] + resp = rest_api.send("GET", f"storage_service/natural_endpoints/{keyspace}", params={"cf": table, "key": "value:123"}) + resp.raise_for_status() + assert resp.json() == [rest_api.host] + def test_range_to_endpoint_map_tablets_enabled_keyspace_param_only(cql, rest_api, skip_without_tablets): with new_test_keyspace(cql, "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 } AND TABLETS = { 'enabled': true }") as keyspace: with new_test_table(cql, keyspace, 'p int PRIMARY KEY') as table: