repair: Wire tablet repair with the user repair request
Currently, only the table and primary replica selection options are
supported.
Reject repair request if the repair options are not supported yet.
With this patch, users can repair tablet tables by running
nodetool repair -pr myks mytable
on each node in the cluster, so that each tablet will be repaired only
once without duplication work.
Below is an example showing how tablet repairs works. The `nodetool
repair -pr` cmd was performed on all the nodes. Three nodes in the
cluster. RF = 2. 16 initial tablets.
Tablets:
cqlsh> SELECT * FROM system.tablets;
keyspace_name | table_id | last_token | table_name | tablet_count | new_replicas | replicas | session | stage
---------------+--------------------------------------+----------------------+------------+--------------+--------------+----------------------------------------------------------------------------------------+---------+-------
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -8070450532247928833 | standard1 | 16 | null | [(951cb5bc-5749-481a-9645-4dd0f624f24a, 6), (2dd3808d-6601-4483-b081-adf41ef094e5, 5)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -6917529027641081857 | standard1 | 16 | null | [(2dd3808d-6601-4483-b081-adf41ef094e5, 0), (19caaeb3-d754-4704-a998-840df53eb54c, 5)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -5764607523034234881 | standard1 | 16 | null | [(19caaeb3-d754-4704-a998-840df53eb54c, 2), (2dd3808d-6601-4483-b081-adf41ef094e5, 3)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -4611686018427387905 | standard1 | 16 | null | [(951cb5bc-5749-481a-9645-4dd0f624f24a, 5), (2dd3808d-6601-4483-b081-adf41ef094e5, 4)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -3458764513820540929 | standard1 | 16 | null | [(19caaeb3-d754-4704-a998-840df53eb54c, 1), (951cb5bc-5749-481a-9645-4dd0f624f24a, 0)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -2305843009213693953 | standard1 | 16 | null | [(951cb5bc-5749-481a-9645-4dd0f624f24a, 7), (2dd3808d-6601-4483-b081-adf41ef094e5, 1)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -1152921504606846977 | standard1 | 16 | null | [(19caaeb3-d754-4704-a998-840df53eb54c, 7), (951cb5bc-5749-481a-9645-4dd0f624f24a, 1)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | -1 | standard1 | 16 | null | [(951cb5bc-5749-481a-9645-4dd0f624f24a, 2), (2dd3808d-6601-4483-b081-adf41ef094e5, 7)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 1152921504606846975 | standard1 | 16 | null | [(951cb5bc-5749-481a-9645-4dd0f624f24a, 6), (19caaeb3-d754-4704-a998-840df53eb54c, 2)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 2305843009213693951 | standard1 | 16 | null | [(2dd3808d-6601-4483-b081-adf41ef094e5, 5), (951cb5bc-5749-481a-9645-4dd0f624f24a, 7)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 3458764513820540927 | standard1 | 16 | null | [(2dd3808d-6601-4483-b081-adf41ef094e5, 1), (19caaeb3-d754-4704-a998-840df53eb54c, 3)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 4611686018427387903 | standard1 | 16 | null | [(2dd3808d-6601-4483-b081-adf41ef094e5, 7), (951cb5bc-5749-481a-9645-4dd0f624f24a, 1)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 5764607523034234879 | standard1 | 16 | null | [(19caaeb3-d754-4704-a998-840df53eb54c, 6), (2dd3808d-6601-4483-b081-adf41ef094e5, 2)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 6917529027641081855 | standard1 | 16 | null | [(19caaeb3-d754-4704-a998-840df53eb54c, 5), (951cb5bc-5749-481a-9645-4dd0f624f24a, 3)] | null | null
ks1 | 3ffadad0-a552-11ee-bc15-66412bbb6978 | 8070450532247928831 | standard1 | 16 | null | [(2dd3808d-6601-4483-b081-adf41ef094e5, 0), (19caaeb3-d754-4704-a998-840df53eb54c, 7)] | null | null
node1:
$nodetool repair -p 7199 -pr ks1 standard1
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: starting user-requested repair for keyspace ks1, repair id 6, options {{trace -> false}, {primaryRange -> true}, {columnFamilies -> standard1}, {jobThreads -> 1}, {incremental -> false}, {parallelism -> parallel}}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c] Repair 1 out of 5 tablets: table=ks1.standard1 tablet_id=2 range=(-6917529027641081857,-5764607523034234881] replicas={19caaeb3-d754-4704-a998-840df53eb54c:2, 2dd3808d-6601-4483-b081-adf41ef094e5:3} primary_replica_only=true
[shard 2:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07399633 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7174440}, {127.0.0.2, 7174440}}, row_from_disk_nr={{127.0.0.1, 15330}, {127.0.0.2, 15330}}, row_from_disk_bytes_per_sec={{127.0.0.1, 92.4651}, {127.0.0.2, 92.4651}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 207172}, {127.0.0.2, 207172}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c] Repair 2 out of 5 tablets: table=ks1.standard1 tablet_id=4 range=(-4611686018427387905,-3458764513820540929] replicas={19caaeb3-d754-4704-a998-840df53eb54c:1, 951cb5bc-5749-481a-9645-4dd0f624f24a:0} primary_replica_only=true
[shard 1:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07302664 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7195032}, {127.0.0.3, 7195032}}, row_from_disk_nr={{127.0.0.1, 15374}, {127.0.0.3, 15374}}, row_from_disk_bytes_per_sec={{127.0.0.1, 93.9618}, {127.0.0.3, 93.9618}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 210526}, {127.0.0.3, 210526}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c] Repair 3 out of 5 tablets: table=ks1.standard1 tablet_id=6 range=(-2305843009213693953,-1152921504606846977] replicas={19caaeb3-d754-4704-a998-840df53eb54c:7, 951cb5bc-5749-481a-9645-4dd0f624f24a:1} primary_replica_only=true
[shard 7:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.06781354 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7095816}, {127.0.0.3, 7095816}}, row_from_disk_nr={{127.0.0.1, 15162}, {127.0.0.3, 15162}}, row_from_disk_bytes_per_sec={{127.0.0.1, 99.7898}, {127.0.0.3, 99.7898}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 223584}, {127.0.0.3, 223584}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c] Repair 4 out of 5 tablets: table=ks1.standard1 tablet_id=12 range=(4611686018427387903,5764607523034234879] replicas={19caaeb3-d754-4704-a998-840df53eb54c:6, 2dd3808d-6601-4483-b081-adf41ef094e5:2} primary_replica_only=true
[shard 6:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.06793772 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7150572}, {127.0.0.2, 7150572}}, row_from_disk_nr={{127.0.0.1, 15279}, {127.0.0.2, 15279}}, row_from_disk_bytes_per_sec={{127.0.0.1, 100.376}, {127.0.0.2, 100.376}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 224897}, {127.0.0.2, 224897}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c] Repair 5 out of 5 tablets: table=ks1.standard1 tablet_id=13 range=(5764607523034234879,6917529027641081855] replicas={19caaeb3-d754-4704-a998-840df53eb54c:5, 951cb5bc-5749-481a-9645-4dd0f624f24a:3} primary_replica_only=true
[shard 5:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.068579935 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7129512}, {127.0.0.3, 7129512}}, row_from_disk_nr={{127.0.0.1, 15234}, {127.0.0.3, 15234}}, row_from_disk_bytes_per_sec={{127.0.0.1, 99.1432}, {127.0.0.3, 99.1432}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 222135}, {127.0.0.3, 222135}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[f7ac8fb6-8e49-4b31-8c7d-0d493064977c]: Finished user-requested repair for tablet keyspace=ks1 tables={standard1} repair_id=6 duration=0.352379s
node2:
$nodetool repair -p 7200 -pr ks1 standard1
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: starting user-requested repair for keyspace ks1, repair id 1, options {{trace -> false}, {primaryRange -> true}, {columnFamilies -> standard1}, {jobThreads -> 1}, {incremental -> false}, {parallelism -> parallel}}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 1 out of 6 tablets: table=ks1.standard1 tablet_id=1 range=(-8070450532247928833,-6917529027641081857] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:0, 19caaeb3-d754-4704-a998-840df53eb54c:5} primary_replica_only=true
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07016466 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7212816}, {127.0.0.2, 7212816}}, row_from_disk_nr={{127.0.0.1, 15412}, {127.0.0.2, 15412}}, row_from_disk_bytes_per_sec={{127.0.0.1, 98.0362}, {127.0.0.2, 98.0362}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 219655}, {127.0.0.2, 219655}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 2 out of 6 tablets: table=ks1.standard1 tablet_id=9 range=(1152921504606846975,2305843009213693951] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:5, 951cb5bc-5749-481a-9645-4dd0f624f24a:7} primary_replica_only=true
[shard 5:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07180758 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7236216}, {127.0.0.3, 7236216}}, row_from_disk_nr={{127.0.0.2, 15462}, {127.0.0.3, 15462}}, row_from_disk_bytes_per_sec={{127.0.0.2, 96.104}, {127.0.0.3, 96.104}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 215325}, {127.0.0.3, 215325}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 3 out of 6 tablets: table=ks1.standard1 tablet_id=10 range=(2305843009213693951,3458764513820540927] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:1, 19caaeb3-d754-4704-a998-840df53eb54c:3} primary_replica_only=true
[shard 1:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.06772773 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7039188}, {127.0.0.2, 7039188}}, row_from_disk_nr={{127.0.0.1, 15041}, {127.0.0.2, 15041}}, row_from_disk_bytes_per_sec={{127.0.0.1, 99.1188}, {127.0.0.2, 99.1188}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 222080}, {127.0.0.2, 222080}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 4 out of 6 tablets: table=ks1.standard1 tablet_id=11 range=(3458764513820540927,4611686018427387903] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:7, 951cb5bc-5749-481a-9645-4dd0f624f24a:1} primary_replica_only=true
[shard 7:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07025768 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7229664}, {127.0.0.3, 7229664}}, row_from_disk_nr={{127.0.0.2, 15448}, {127.0.0.3, 15448}}, row_from_disk_bytes_per_sec={{127.0.0.2, 98.1351}, {127.0.0.3, 98.1351}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 219876}, {127.0.0.3, 219876}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 5 out of 6 tablets: table=ks1.standard1 tablet_id=14 range=(6917529027641081855,8070450532247928831] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:0, 19caaeb3-d754-4704-a998-840df53eb54c:7} primary_replica_only=true
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.0719635 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7225452}, {127.0.0.2, 7225452}}, row_from_disk_nr={{127.0.0.1, 15439}, {127.0.0.2, 15439}}, row_from_disk_bytes_per_sec={{127.0.0.1, 95.7531}, {127.0.0.2, 95.7531}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 214539}, {127.0.0.2, 214539}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e] Repair 6 out of 6 tablets: table=ks1.standard1 tablet_id=15 range=(8070450532247928831,9223372036854775807] replicas={2dd3808d-6601-4483-b081-adf41ef094e5:4, 19caaeb3-d754-4704-a998-840df53eb54c:3} primary_replica_only=true
[shard 4:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.0691715 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7122960}, {127.0.0.2, 7122960}}, row_from_disk_nr={{127.0.0.1, 15220}, {127.0.0.2, 15220}}, row_from_disk_bytes_per_sec={{127.0.0.1, 98.2049}, {127.0.0.2, 98.2049}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 220033}, {127.0.0.2, 220033}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[5c805f0c-4ff2-4c5c-88df-bb318d559e0e]: Finished user-requested repair for tablet keyspace=ks1 tables={standard1} repair_id=1 duration=0.42178s
node3:
$nodetool repair -p 7300 -pr ks1 standard1
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: starting user-requested repair for keyspace ks1, repair id 1, options {{trace -> false}, {primaryRange -> true}, {columnFamilies -> standard1}, {jobThreads -> 1}, {incremental -> false}, {parallelism -> parallel}}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6] Repair 1 out of 5 tablets: table=ks1.standard1 tablet_id=0 range=(minimum token,-8070450532247928833] replicas={951cb5bc-5749-481a-9645-4dd0f624f24a:6, 2dd3808d-6601-4483-b081-adf41ef094e5:5} primary_replica_only=true
[shard 6:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.07126866 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7133256}, {127.0.0.3, 7133256}}, row_from_disk_nr={{127.0.0.2, 15242}, {127.0.0.3, 15242}}, row_from_disk_bytes_per_sec={{127.0.0.2, 95.4529}, {127.0.0.3, 95.4529}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 213867}, {127.0.0.3, 213867}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6] Repair 2 out of 5 tablets: table=ks1.standard1 tablet_id=3 range=(-5764607523034234881,-4611686018427387905] replicas={951cb5bc-5749-481a-9645-4dd0f624f24a:5, 2dd3808d-6601-4483-b081-adf41ef094e5:4} primary_replica_only=true
[shard 5:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.0701025 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7138404}, {127.0.0.3, 7138404}}, row_from_disk_nr={{127.0.0.2, 15253}, {127.0.0.3, 15253}}, row_from_disk_bytes_per_sec={{127.0.0.2, 97.1108}, {127.0.0.3, 97.1108}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 217581}, {127.0.0.3, 217581}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6] Repair 3 out of 5 tablets: table=ks1.standard1 tablet_id=5 range=(-3458764513820540929,-2305843009213693953] replicas={951cb5bc-5749-481a-9645-4dd0f624f24a:7, 2dd3808d-6601-4483-b081-adf41ef094e5:1} primary_replica_only=true
[shard 7:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.06859512 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7171632}, {127.0.0.3, 7171632}}, row_from_disk_nr={{127.0.0.2, 15324}, {127.0.0.3, 15324}}, row_from_disk_bytes_per_sec={{127.0.0.2, 99.7068}, {127.0.0.3, 99.7068}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 223398}, {127.0.0.3, 223398}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6] Repair 4 out of 5 tablets: table=ks1.standard1 tablet_id=7 range=(-1152921504606846977,-1] replicas={951cb5bc-5749-481a-9645-4dd0f624f24a:2, 2dd3808d-6601-4483-b081-adf41ef094e5:7} primary_replica_only=true
[shard 2:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.06975318 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.2, 7105176}, {127.0.0.3, 7105176}}, row_from_disk_nr={{127.0.0.2, 15182}, {127.0.0.3, 15182}}, row_from_disk_bytes_per_sec={{127.0.0.2, 97.1429}, {127.0.0.3, 97.1429}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.2, 217653}, {127.0.0.3, 217653}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6] Repair 5 out of 5 tablets: table=ks1.standard1 tablet_id=8 range=(-1,1152921504606846975] replicas={951cb5bc-5749-481a-9645-4dd0f624f24a:6, 19caaeb3-d754-4704-a998-840df53eb54c:2} primary_replica_only=true
[shard 6:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: stats: repair_reason=repair, keyspace=ks1, tables={standard1}, ranges_nr=1, round_nr=2, round_nr_fast_path_already_synced=2, round_nr_fast_path_same_combined_hashes=0, round_nr_slow_path=0, rpc_call_nr=6, tx_hashes_nr=0, rx_hashes_nr=0, duration=0.070810474 seconds, tx_row_nr=0, rx_row_nr=0, tx_row_bytes=0, rx_row_bytes=0, row_from_disk_bytes={{127.0.0.1, 7023276}, {127.0.0.3, 7023276}}, row_from_disk_nr={{127.0.0.1, 15007}, {127.0.0.3, 15007}}, row_from_disk_bytes_per_sec={{127.0.0.1, 94.5894}, {127.0.0.3, 94.5894}} MiB/s, row_from_disk_rows_per_sec={{127.0.0.1, 211932}, {127.0.0.3, 211932}} Rows/s, tx_row_nr_peer={}, rx_row_nr_peer={}
[shard 0:strm] repair - repair[350b97f3-f06e-470f-9164-43997a4f82a6]: Finished user-requested repair for tablet keyspace=ks1 tables={standard1} repair_id=1 duration=0.351395s
Fixes #16599
This commit is contained in:
242
repair/repair.cc
242
repair/repair.cc
@@ -22,6 +22,7 @@
|
||||
#include "utils/hashers.hh"
|
||||
#include "locator/network_topology_strategy.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
|
||||
@@ -42,6 +43,7 @@
|
||||
|
||||
#include <cfloat>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
|
||||
#include "idl/position_in_partition.dist.hh"
|
||||
#include "idl/partition_checksum.dist.hh"
|
||||
@@ -1066,12 +1068,6 @@ future<int> repair_service::do_repair_start(sstring keyspace, std::unordered_map
|
||||
get_repair_module().check_in_shutdown();
|
||||
auto& sharded_db = get_db();
|
||||
auto& db = sharded_db.local();
|
||||
auto germs = make_lw_shared(co_await locator::make_global_effective_replication_map(sharded_db, keyspace));
|
||||
auto& erm = germs->get();
|
||||
auto& topology = erm.get_token_metadata().get_topology();
|
||||
auto my_address = topology.my_address();
|
||||
|
||||
repair_options options(options_map);
|
||||
|
||||
// Note: Cassandra can, in some cases, decide immediately that there is
|
||||
// nothing to repair, and return 0. "nodetool repair" prints in this case
|
||||
@@ -1081,6 +1077,80 @@ future<int> repair_service::do_repair_start(sstring keyspace, std::unordered_map
|
||||
auto id = _repair_module->new_repair_uniq_id();
|
||||
rlogger.info("repair[{}]: starting user-requested repair for keyspace {}, repair id {}, options {}", id.uuid(), keyspace, id.id, options_map);
|
||||
|
||||
repair_options options(options_map);
|
||||
|
||||
std::vector<sstring> cfs =
|
||||
options.column_families.size() ? options.column_families : list_column_families(db, keyspace);
|
||||
if (cfs.empty()) {
|
||||
rlogger.info("repair[{}]: completed successfully: no tables to repair", id.uuid());
|
||||
co_return id.id;
|
||||
}
|
||||
|
||||
{
|
||||
// Repair tables in table name order. Later we could repair in other
|
||||
// orders, e.g., smaller tables first.
|
||||
std::sort(cfs.begin(), cfs.end());
|
||||
|
||||
size_t nr_tablet_table = 0;
|
||||
size_t nr_vnode_table = 0;
|
||||
bool is_tablet = false;
|
||||
for (auto& table_name : cfs) {
|
||||
auto& t = db.find_column_family(keyspace, table_name);
|
||||
if (t.uses_tablets()) {
|
||||
nr_tablet_table++;
|
||||
} else {
|
||||
nr_vnode_table++;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_tablet_table != 0) {
|
||||
if (nr_vnode_table != 0) {
|
||||
throw std::runtime_error("Mixed vnode table and tablet table");
|
||||
}
|
||||
is_tablet = true;
|
||||
}
|
||||
if (is_tablet) {
|
||||
// Reject unsupported options for tablet repair
|
||||
if (!options.ranges.empty()) {
|
||||
throw std::runtime_error("The ranges option is not supported for tablet repair");
|
||||
}
|
||||
if (!options.hosts.empty()) {
|
||||
throw std::runtime_error("The hosts option is not supported for tablet repair");
|
||||
}
|
||||
if (!options.ignore_nodes.empty()) {
|
||||
throw std::runtime_error("The ignore_nodes option is not supported for tablet repair");
|
||||
}
|
||||
if (!options.data_centers.empty()) {
|
||||
throw std::runtime_error("The dataCenters option is not supported for tablet repair");
|
||||
}
|
||||
if (!options.start_token.empty()) {
|
||||
throw std::runtime_error("The startToken option is not supported for tablet repair");
|
||||
}
|
||||
if (!options.end_token.empty()) {
|
||||
throw std::runtime_error("The endToken option is not supported for tablet repair");
|
||||
}
|
||||
if (options.small_table_optimization) {
|
||||
throw std::runtime_error("The small_table_optimization option is not supported for tablet repair");
|
||||
}
|
||||
|
||||
auto host2ip = [&addr_map = _addr_map] (locator::host_id host) -> future<gms::inet_address> {
|
||||
auto ip = addr_map.local().find(raft::server_id(host.uuid()));
|
||||
if (!ip) {
|
||||
throw std::runtime_error(format("Could not get ip address for host {} from raft_address_map", host));
|
||||
}
|
||||
co_return *ip;
|
||||
};
|
||||
bool primary_replica_only = options.primary_range;
|
||||
co_await repair_tablets(id, keyspace, cfs, host2ip, primary_replica_only);
|
||||
co_return id.id;
|
||||
}
|
||||
}
|
||||
|
||||
auto germs = make_lw_shared(co_await locator::make_global_effective_replication_map(sharded_db, keyspace));
|
||||
auto& erm = germs->get();
|
||||
auto& topology = erm.get_token_metadata().get_topology();
|
||||
auto my_address = erm.get_topology().my_address();
|
||||
|
||||
if (erm.get_replication_strategy().get_type() == locator::replication_strategy_type::local) {
|
||||
rlogger.info("repair[{}]: completed successfully: nothing to repair for keyspace {} with local replication strategy", id.uuid(), keyspace);
|
||||
co_return id.id;
|
||||
@@ -1168,13 +1238,6 @@ future<int> repair_service::do_repair_start(sstring keyspace, std::unordered_map
|
||||
ranges = std::move(intersections);
|
||||
}
|
||||
|
||||
std::vector<sstring> cfs =
|
||||
options.column_families.size() ? options.column_families : list_column_families(db, keyspace);
|
||||
if (cfs.empty()) {
|
||||
rlogger.info("repair[{}]: completed successfully: no tables to repair", id.uuid());
|
||||
co_return id.id;
|
||||
}
|
||||
|
||||
auto small_table_optimization = options.small_table_optimization;
|
||||
if (small_table_optimization) {
|
||||
auto range = dht::token_range(dht::token_range::bound(dht::minimum_token(), false), dht::token_range::bound(dht::maximum_token(), false));
|
||||
@@ -1980,6 +2043,159 @@ future<> repair_service::replace_with_repair(locator::token_metadata_ptr tmptr,
|
||||
co_return co_await do_rebuild_replace_with_repair(std::move(cloned_tmptr), std::move(op), myloc.dc, reason, std::move(ignore_nodes));
|
||||
}
|
||||
|
||||
// Repair all tablets belong to this node for the given table
|
||||
future<> repair_service::repair_tablets(repair_uniq_id rid, sstring keyspace_name, std::vector<sstring> table_names, host2ip_t host2ip, bool primary_replica_only) {
|
||||
std::vector<tablet_repair_task_meta> task_metas;
|
||||
for (auto& table_name : table_names) {
|
||||
lw_shared_ptr<replica::table> t;
|
||||
try {
|
||||
t = _db.local().find_column_family(keyspace_name, table_name).shared_from_this();
|
||||
} catch (replica::no_such_column_family& e) {
|
||||
rlogger.debug("repair[{}] Table {}.{} does not exist anymore", rid.uuid(), keyspace_name, table_name);
|
||||
continue;
|
||||
}
|
||||
if (!t->uses_tablets()) {
|
||||
throw std::runtime_error(format("repair[{}] Table {}.{} is not a tablet table", rid.uuid(), keyspace_name, table_name));
|
||||
}
|
||||
table_id tid = t->schema()->id();
|
||||
// FIXME: we need to wait for current tablet movement and disable future tablet movment
|
||||
auto erm = t->get_effective_replication_map();
|
||||
auto& tmap = erm->get_token_metadata_ptr()->tablets().get_tablet_map(tid);
|
||||
struct repair_tablet_meta {
|
||||
locator::tablet_id id;
|
||||
dht::token_range range;
|
||||
locator::host_id master_host_id;
|
||||
shard_id master_shard_id;
|
||||
locator::tablet_replica_set replicas;
|
||||
};
|
||||
std::vector<repair_tablet_meta> metas;
|
||||
auto myhostid = erm->get_token_metadata_ptr()->get_my_id();
|
||||
auto myip = erm->get_topology().my_address();
|
||||
co_await tmap.for_each_tablet([&] (locator::tablet_id id, const locator::tablet_info& info) {
|
||||
auto range = tmap.get_token_range(id);
|
||||
auto& replicas = info.replicas;
|
||||
bool found = false;
|
||||
shard_id master_shard_id;
|
||||
// Repair all tablets belong to this node
|
||||
for (auto& r : replicas) {
|
||||
if (r.host == myhostid) {
|
||||
master_shard_id = r.shard;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (primary_replica_only) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
metas.push_back(repair_tablet_meta{id, range, myhostid, master_shard_id, replicas});
|
||||
}
|
||||
});
|
||||
|
||||
size_t nr = 0;
|
||||
for (auto& m : metas) {
|
||||
nr++;
|
||||
rlogger.debug("repair[{}] Collect {} out of {} tablets: table={}.{} tablet_id={} range={} replicas={} primary_replica_only={}",
|
||||
rid.uuid(), nr, metas.size(), keyspace_name, table_name, m.id, m.range, m.replicas, primary_replica_only);
|
||||
std::vector<gms::inet_address> nodes;
|
||||
auto master_shard_id = m.master_shard_id;
|
||||
auto range = m.range;
|
||||
std::vector<shard_id> shards;
|
||||
for (auto& r : m.replicas) {
|
||||
auto shard = r.shard;
|
||||
auto ip = co_await host2ip(r.host);
|
||||
if (r.host != myhostid) {
|
||||
rlogger.debug("repair[{}] Repair get neighbors table={}.{} hostid={} shard={} ip={} myip={} myhostid={}",
|
||||
rid.uuid(), keyspace_name, table_name, r.host, shard, ip, myip, myhostid);
|
||||
shards.push_back(shard);
|
||||
nodes.push_back(ip);
|
||||
}
|
||||
}
|
||||
task_metas.push_back(tablet_repair_task_meta{keyspace_name, table_name, tid, master_shard_id, range, repair_neighbors(nodes, shards), m.replicas});
|
||||
}
|
||||
}
|
||||
auto task = co_await _repair_module->make_and_start_task<repair::tablet_repair_task_impl>({}, rid, keyspace_name, table_names, streaming::stream_reason::repair, std::move(task_metas));
|
||||
}
|
||||
|
||||
future<> repair::tablet_repair_task_impl::run() {
|
||||
auto m = dynamic_pointer_cast<repair::task_manager_module>(_module);
|
||||
auto& rs = m->get_repair_service();
|
||||
auto& sharded_db = rs.get_db();
|
||||
auto& db = sharded_db.local();
|
||||
auto id = get_repair_uniq_id();
|
||||
auto keyspace = _keyspace;
|
||||
rlogger.debug("repair[{}]: Repair tablet for keyspace={} tables={} status=started", id.uuid(), _keyspace, _tables);
|
||||
co_await m->run(id, [this, &rs, id] () mutable {
|
||||
// This runs inside a seastar thread
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
auto parent_data = get_repair_uniq_id().task_info;
|
||||
std::atomic<int> idx{1};
|
||||
rs.container().invoke_on_all([&idx, id, metas = _metas, parent_data, reason = _reason] (repair_service& rs) -> future<> {
|
||||
for (auto& m : metas) {
|
||||
if (m.master_shard_id != this_shard_id()) {
|
||||
continue;
|
||||
}
|
||||
auto nr = idx.fetch_add(1);
|
||||
rlogger.info("repair[{}] Repair {} out of {} tablets: table={}.{} range={} replicas={}",
|
||||
id.uuid(), nr, metas.size(), m.keyspace_name, m.table_name, m.range, m.replicas);
|
||||
lw_shared_ptr<replica::table> t;
|
||||
try {
|
||||
t = rs._db.local().find_column_family(m.tid).shared_from_this();
|
||||
} catch (replica::no_such_column_family& e) {
|
||||
rlogger.debug("repair[{}] Table {}.{} does not exist anymore", id.uuid(), m.keyspace_name, m.table_name);
|
||||
continue;
|
||||
}
|
||||
auto erm = t->get_effective_replication_map();
|
||||
if (rs.get_repair_module().is_aborted(id.uuid())) {
|
||||
throw abort_requested_exception();
|
||||
}
|
||||
|
||||
std::unordered_map<dht::token_range, repair_neighbors> neighbors;
|
||||
neighbors[m.range] = m.neighbors;
|
||||
dht::token_range_vector ranges = {m.range};
|
||||
std::vector<table_id> table_ids = {m.tid};
|
||||
|
||||
auto data_centers = std::vector<sstring>();
|
||||
auto hosts = std::vector<sstring>();
|
||||
auto ignore_nodes = std::unordered_set<gms::inet_address>();
|
||||
bool hints_batchlog_flushed = false;
|
||||
bool small_table_optimization = false;
|
||||
auto ranges_parallelism = std::nullopt;
|
||||
|
||||
auto task_impl_ptr = seastar::make_shared<repair::shard_repair_task_impl>(rs._repair_module, tasks::task_id::create_random_id(),
|
||||
m.keyspace_name, rs, erm, std::move(ranges), std::move(table_ids), id, std::move(data_centers), std::move(hosts),
|
||||
std::move(ignore_nodes), reason, hints_batchlog_flushed, small_table_optimization, ranges_parallelism);
|
||||
task_impl_ptr->neighbors = std::move(neighbors);
|
||||
auto task = co_await rs._repair_module->make_task(std::move(task_impl_ptr), parent_data);
|
||||
task->start();
|
||||
co_await task->done();
|
||||
}
|
||||
}).get();
|
||||
auto duration = std::chrono::duration<float>(std::chrono::steady_clock::now() - start_time);
|
||||
rlogger.info("repair[{}]: Finished user-requested repair for tablet keyspace={} tables={} repair_id={} tablets_repaired={} duration={}",
|
||||
id.uuid(), _keyspace, _tables, id.id, _metas.size(), duration);
|
||||
}).then([id, keyspace] {
|
||||
rlogger.debug("repair[{}]: Repair tablet for keyspace={} status=succeeded", id.uuid(), keyspace);
|
||||
}).handle_exception([&db, id, keyspace, &rs] (std::exception_ptr ep) {
|
||||
if (!db.has_keyspace(keyspace)) {
|
||||
rlogger.warn("repair[{}]: Repair tablet for keyspace={}, status=failed: keyspace does not exist any more, ignoring it, {}", id.uuid(), keyspace, ep);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
rlogger.warn("repair[{}]: Repair tablet for keyspace={} status=failed: {}", id.uuid(), keyspace, ep);
|
||||
rs.get_repair_module().check_in_shutdown();
|
||||
return make_exception_future<>(ep);
|
||||
});
|
||||
}
|
||||
|
||||
future<std::optional<double>> repair::tablet_repair_task_impl::expected_total_workload() const {
|
||||
auto sz = _metas.size();
|
||||
co_return sz ? std::make_optional<double>(sz) : std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<double> repair::tablet_repair_task_impl::expected_children_number() const {
|
||||
return _metas.size();
|
||||
}
|
||||
|
||||
node_ops_cmd_category categorize_node_ops_cmd(node_ops_cmd cmd) noexcept {
|
||||
switch (cmd) {
|
||||
case node_ops_cmd::removenode_prepare:
|
||||
|
||||
@@ -260,6 +260,16 @@ struct repair_flush_hints_batchlog_request {
|
||||
struct repair_flush_hints_batchlog_response {
|
||||
};
|
||||
|
||||
struct tablet_repair_task_meta {
|
||||
sstring keyspace_name;
|
||||
sstring table_name;
|
||||
table_id tid;
|
||||
shard_id master_shard_id;
|
||||
dht::token_range range;
|
||||
repair_neighbors neighbors;
|
||||
locator::tablet_replica_set replicas;
|
||||
};
|
||||
|
||||
namespace std {
|
||||
|
||||
template<>
|
||||
|
||||
@@ -160,6 +160,11 @@ private:
|
||||
streaming::stream_reason reason,
|
||||
shared_ptr<node_ops_info> ops_info);
|
||||
|
||||
public:
|
||||
future<> repair_tablets(repair_uniq_id id, sstring keyspace_name, std::vector<sstring> table_names, host2ip_t host2ip, bool primary_replica_only = true);
|
||||
|
||||
private:
|
||||
|
||||
future<repair_update_system_table_response> repair_update_system_table_handler(
|
||||
gms::inet_address from,
|
||||
repair_update_system_table_request req);
|
||||
@@ -237,6 +242,7 @@ public:
|
||||
|
||||
friend class repair::user_requested_repair_task_impl;
|
||||
friend class repair::data_sync_repair_task_impl;
|
||||
friend class repair::tablet_repair_task_impl;
|
||||
};
|
||||
|
||||
class repair_info;
|
||||
|
||||
@@ -101,6 +101,31 @@ protected:
|
||||
virtual std::optional<double> expected_children_number() const override;
|
||||
};
|
||||
|
||||
class tablet_repair_task_impl : public repair_task_impl {
|
||||
private:
|
||||
sstring _keyspace;
|
||||
std::vector<sstring> _tables;
|
||||
std::vector<tablet_repair_task_meta> _metas;
|
||||
optimized_optional<abort_source::subscription> _abort_subscription;
|
||||
public:
|
||||
tablet_repair_task_impl(tasks::task_manager::module_ptr module, repair_uniq_id id, sstring keyspace, std::vector<sstring> tables, streaming::stream_reason reason, std::vector<tablet_repair_task_meta> metas)
|
||||
: repair_task_impl(module, id.uuid(), id.id, "keyspace", keyspace, "", "", tasks::task_id::create_null_id(), reason)
|
||||
, _keyspace(std::move(keyspace))
|
||||
, _tables(std::move(tables))
|
||||
, _metas(std::move(metas))
|
||||
{
|
||||
}
|
||||
|
||||
virtual tasks::is_abortable is_abortable() const noexcept override {
|
||||
return tasks::is_abortable(!_abort_subscription);
|
||||
}
|
||||
protected:
|
||||
future<> run() override;
|
||||
|
||||
virtual future<std::optional<double>> expected_total_workload() const override;
|
||||
virtual std::optional<double> expected_children_number() const override;
|
||||
};
|
||||
|
||||
class shard_repair_task_impl : public repair_task_impl {
|
||||
public:
|
||||
repair_service& rs;
|
||||
|
||||
Reference in New Issue
Block a user