repair: Reject repair requests where start and end tokens are equal

When a user calls the repair API with identical startToken and endToken
values, the code creates a wrapping interval (T, T]. This causes
unwrap() to split it into (-inf, T] and (T, +inf), covering the entire
token ring and triggering a full repair.

Reject such requests early with an error message matching
Cassandra's behavior: "Start and end tokens must be different."

Fixes: CUSTOMER-368

Closes scylladb/scylladb#29821

(cherry picked from commit 0204372156)

Closes scylladb/scylladb#29836

Closes scylladb/scylladb#29863
This commit is contained in:
Asias He
2026-05-11 11:05:44 +08:00
committed by Botond Dénes
parent 356ca32994
commit 8bd0e562be
2 changed files with 29 additions and 0 deletions

View File

@@ -1281,6 +1281,9 @@ future<int> repair_service::do_repair_start(gms::gossip_address_map& addr_map, s
}
if (!options.start_token.empty() || !options.end_token.empty()) {
if (!options.start_token.empty() && !options.end_token.empty() && options.start_token == options.end_token) {
throw std::invalid_argument("Start and end tokens must be different.");
}
// Intersect the list of local ranges with the given token range,
// dropping ranges with no intersection.
std::optional<::wrapping_interval<dht::token>::bound> tok_start;

View File

@@ -16,6 +16,7 @@ from cassandra.cluster import ConsistencyLevel
from cassandra.query import SimpleStatement
from test.pylib.manager_client import ManagerClient
from test.pylib.rest_client import HTTPError
from test.pylib.util import wait_for_cql_and_get_hosts
from test.cluster.conftest import skip_mode
from test.cluster.util import new_test_keyspace
@@ -353,3 +354,28 @@ async def test_small_table_optimization_repair(manager):
rows = await cql.run_async(f"SELECT * from system.repair_history")
assert len(rows) == 1
@pytest.mark.asyncio
async def test_repair_rejects_equal_start_and_end_token(manager):
"""Verify that repair rejects a request where startToken == endToken.
When start == end, the wrapping range (T, T] covers the full token ring,
causing an unintended full repair instead of a no-op.
Reproduces https://scylladb.atlassian.net/browse/CUSTOMER-358
"""
servers = await manager.servers_add(2, auto_rack_dc="dc1")
cql = manager.get_cql()
cql.execute("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2} AND TABLETS = {'enabled': false}")
cql.execute("CREATE TABLE ks.tbl (pk int PRIMARY KEY)")
token = "1558831538804957103"
params = {
"columnFamilies": "tbl",
"startToken": token,
"endToken": token,
}
with pytest.raises(HTTPError, match="Start and end tokens must be different"):
await manager.api.client.post_json(f"/storage_service/repair_async/ks",
host=servers[0].ip_addr, params=params)