scylla-gdb.py: improve scylla repairs commadn

Make output more readable by:
* group follower/master repair instances separately
* split repair details into one line for repair summary, then one line
  for each host info
* add indentation to make the output easier to follow

Also add -m|--memory option to calculate memory usage of repair buffers.

Example output:

    (gdb) scylla repairs -m
    Repairs for which this node is leader:
      (repair_meta*) 0x60503ab7f7b0: {id: 19197, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 30, memory: 48208512}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
        host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started
      (repair_meta*) 0x60503717f7b0: {id: 19211, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 28, memory: 63863265}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
        host: c4936a19-41da-4260-971e-651445d740fd, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished
      (repair_meta*) 0x60502ddff7b0: {id: 19231, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 0, memory: 0}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::row_level_stop_started
        host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::row_level_stop_finished
      (repair_meta*) 0x60501db3f7b0: {id: 19234, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 0, memory: 0}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_sync_boundary_started
        host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::get_sync_boundary_finished
      (repair_meta*) 0x60501c81f7b0: {id: 19236, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 28, memory: 42696821}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
        host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started
      (repair_meta*) 0x60503f65f7b0: {id: 19238, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: 0}, working_row_buf: {len: 28, memory: 47785163}, same_shard: True, tablet: False}
        host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
        host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished
    Repairs for which this node is follower:
This commit is contained in:
Botond Dénes
2025-02-26 10:55:51 -05:00
parent 47c62a4cf2
commit 94e8971308

View File

@@ -6113,32 +6113,113 @@ class scylla_features(gdb.Command):
gdb.write('%s: %s\n' % (f['_name'], f['_enabled']))
class scylla_repairs(gdb.Command):
""" List all active repair instances for both repair masters and followers.
"""List all active repair instances that involve this node and shard, for both repair masters and followers.
See `scylla repairs --help` for a list of available options.
Example:
(repair_meta*) for masters: addr = 0x600005abf830, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.3->repair_state::get_sync_boundary_finished'], repair_meta = (repair_meta*) 0x60400af3f8e0
(repair_meta*) for masters: addr = 0x60000521f830, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.2->repair_state::get_sync_boundary_started'], repair_meta = (repair_meta*) 0x6040103df8e0
(repair_meta*) for follower: addr = 0x60000432a808, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.2->repair_state::unknown'], repair_meta = (repair_meta*) 0x60400d73f8e0
(gdb) scylla repairs
Repairs for which this node is leader:
(repair_meta*) 0x60503ab7f7b0: {id: 19197, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 30, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started
(repair_meta*) 0x60503717f7b0: {id: 19211, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
host: c4936a19-41da-4260-971e-651445d740fd, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished
(repair_meta*) 0x60502ddff7b0: {id: 19231, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 0, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::row_level_stop_started
host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::row_level_stop_finished
(repair_meta*) 0x60501db3f7b0: {id: 19234, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 0, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_sync_boundary_started
host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::get_sync_boundary_finished
(repair_meta*) 0x60501c81f7b0: {id: 19236, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started
(repair_meta*) 0x60503f65f7b0: {id: 19238, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False}
host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished
host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished
Repairs for which this node is follower:
"""
def __init__(self):
gdb.Command.__init__(self, 'scylla repairs', gdb.COMMAND_USER, gdb.COMPLETE_NONE, True)
def process(self, master, rm):
def process(self, rm, calculate_memory_consumption):
schema = rm['_schema']
table = schema_ptr(schema).table_name().replace('"', '')
all_nodes_state = []
ip = str(rm['_myip']).replace('"', '')
row_buf = std_list(rm['_row_buf'])
working_row_buf = std_list(rm['_working_row_buf'])
def repair_row_list_memory(row_list):
mem = 0
repair_row_size = gdb.lookup_type('repair_row').sizeof
decorated_key_with_hash_size = gdb.lookup_type('decorated_key_with_hash').sizeof
known_dk_with_hash = set()
for row in row_list:
mem += repair_row_size
fm_opt = std_optional(row['_fm'])
if fm_opt:
mem += int(fm_opt.get()['_bytes']['_size'])
dk_with_hash_ptr = seastar_lw_shared_ptr(row['_dk_with_hash'])
if dk_with_hash_ptr:
if int(dk_with_hash_ptr.get()) in known_dk_with_hash:
continue
mem += decorated_key_with_hash_size
mem += len(managed_bytes(dk_with_hash_ptr.get()['dk']['_key']['_bytes']))
mf_ptr = seastar_lw_shared_ptr(row['_mf'])
if mf_ptr:
data = std_unique_ptr(mf_ptr.get()['_data'])
if data:
mem += int(data.get()['_memory']['_resources']['memory'])
return mem
if calculate_memory_consumption:
row_buf_mem = repair_row_list_memory(row_buf)
working_row_buf_mem = repair_row_list_memory(working_row_buf)
else:
row_buf_mem = 'n/a'
working_row_buf_mem = 'n/a'
gdb.write(' (repair_meta*) {}: {{id: {}, table: {}, reason: {}, row_buf: {{len: {}, memory: {}}}, working_row_buf: {{len: {}, memory: {}}}, same_shard: {}, tablet: {}}}\n'.format(
rm.address,
int(rm['_repair_meta_id']),
table,
str(rm['_reason']).split("::")[-1],
len(row_buf),
row_buf_mem,
len(working_row_buf),
working_row_buf_mem,
bool(rm['_same_sharding_config']),
bool(rm['_is_tablet'])))
for n in std_vector(rm['_all_node_states']):
all_nodes_state.append(str(n['node']).replace('"', '') + "->" + str(n['state']))
gdb.write('(%s*) for %s: addr = %s, table = %s, ip = %s, states = %s, repair_meta = (repair_meta*) %s\n' % (rm.type, master, str(rm.address), table, ip, all_nodes_state, rm.address))
gdb.write(' host: {}, shard: {}, state: {}\n'.format(n['node']['id'], n['shard'], n['state']))
def invoke(self, arg, for_tty):
parser = argparse.ArgumentParser(description="List runnign repairs which involve this node and shard. See `help scylla repairs` for more information.")
parser.add_argument("-m", "--memory", action="store_true", default=False,
help="Calculate memory consumption of repairs (can take a long time)")
try:
args = parser.parse_args(arg.split())
except SystemExit:
return
gdb.write('Repairs for which this node is leader:\n')
for rm in intrusive_list(gdb.parse_and_eval('debug::repair_meta_for_masters._repair_metas'), link='_tracker_link'):
self.process("masters", rm)
self.process(rm, args.memory)
gdb.write('Repairs for which this node is follower:\n')
for rm in intrusive_list(gdb.parse_and_eval('debug::repair_meta_for_followers._repair_metas'), link='_tracker_link'):
self.process("follower", rm)
self.process(rm, args.memory)
class scylla_tablet_metadata(gdb.Command):