diff --git a/scylla-gdb.py b/scylla-gdb.py index b3e3799a73..52c1d49b7e 100755 --- a/scylla-gdb.py +++ b/scylla-gdb.py @@ -6113,32 +6113,113 @@ class scylla_features(gdb.Command): gdb.write('%s: %s\n' % (f['_name'], f['_enabled'])) class scylla_repairs(gdb.Command): - """ List all active repair instances for both repair masters and followers. + """List all active repair instances that involve this node and shard, for both repair masters and followers. + + See `scylla repairs --help` for a list of available options. Example: - (repair_meta*) for masters: addr = 0x600005abf830, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.3->repair_state::get_sync_boundary_finished'], repair_meta = (repair_meta*) 0x60400af3f8e0 - (repair_meta*) for masters: addr = 0x60000521f830, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.2->repair_state::get_sync_boundary_started'], repair_meta = (repair_meta*) 0x6040103df8e0 - (repair_meta*) for follower: addr = 0x60000432a808, table = myks2.standard1, ip = 127.0.0.1, states = ['127.0.0.1->repair_state::get_sync_boundary_started', '127.0.0.2->repair_state::unknown'], repair_meta = (repair_meta*) 0x60400d73f8e0 + (gdb) scylla repairs + Repairs for which this node is leader: + (repair_meta*) 0x60503ab7f7b0: {id: 19197, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 30, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished + host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started + (repair_meta*) 0x60503717f7b0: {id: 19211, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished + host: c4936a19-41da-4260-971e-651445d740fd, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished + (repair_meta*) 0x60502ddff7b0: {id: 19231, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 0, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::row_level_stop_started + host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::row_level_stop_finished + (repair_meta*) 0x60501db3f7b0: {id: 19234, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 0, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_sync_boundary_started + host: 039494b6-9d35-4f34-82c4-3c79c1d97175, shard: 4294967295, state: repair_state::get_sync_boundary_finished + (repair_meta*) 0x60501c81f7b0: {id: 19236, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished + host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::put_row_diff_with_rpc_stream_started + (repair_meta*) 0x60503f65f7b0: {id: 19238, table: large_collection_test.table_with_large_collection, reason: decommission, row_buf: {len: 0, memory: n/a}, working_row_buf: {len: 28, memory: n/a}, same_shard: True, tablet: False} + host: 496e8b0c-50bf-4ada-b8f9-3d167138e908, shard: 5, state: repair_state::get_combined_row_hash_finished + host: ce4413ab-33d9-40f8-b13e-d14af8511dda, shard: 4294967295, state: repair_state::get_row_diff_with_rpc_stream_finished + Repairs for which this node is follower: """ def __init__(self): gdb.Command.__init__(self, 'scylla repairs', gdb.COMMAND_USER, gdb.COMPLETE_NONE, True) - def process(self, master, rm): + def process(self, rm, calculate_memory_consumption): schema = rm['_schema'] table = schema_ptr(schema).table_name().replace('"', '') - all_nodes_state = [] - ip = str(rm['_myip']).replace('"', '') + row_buf = std_list(rm['_row_buf']) + working_row_buf = std_list(rm['_working_row_buf']) + + def repair_row_list_memory(row_list): + mem = 0 + repair_row_size = gdb.lookup_type('repair_row').sizeof + decorated_key_with_hash_size = gdb.lookup_type('decorated_key_with_hash').sizeof + known_dk_with_hash = set() + + for row in row_list: + mem += repair_row_size + + fm_opt = std_optional(row['_fm']) + if fm_opt: + mem += int(fm_opt.get()['_bytes']['_size']) + + dk_with_hash_ptr = seastar_lw_shared_ptr(row['_dk_with_hash']) + if dk_with_hash_ptr: + if int(dk_with_hash_ptr.get()) in known_dk_with_hash: + continue + + mem += decorated_key_with_hash_size + mem += len(managed_bytes(dk_with_hash_ptr.get()['dk']['_key']['_bytes'])) + + mf_ptr = seastar_lw_shared_ptr(row['_mf']) + if mf_ptr: + data = std_unique_ptr(mf_ptr.get()['_data']) + if data: + mem += int(data.get()['_memory']['_resources']['memory']) + + return mem + + + if calculate_memory_consumption: + row_buf_mem = repair_row_list_memory(row_buf) + working_row_buf_mem = repair_row_list_memory(working_row_buf) + else: + row_buf_mem = 'n/a' + working_row_buf_mem = 'n/a' + + + gdb.write(' (repair_meta*) {}: {{id: {}, table: {}, reason: {}, row_buf: {{len: {}, memory: {}}}, working_row_buf: {{len: {}, memory: {}}}, same_shard: {}, tablet: {}}}\n'.format( + rm.address, + int(rm['_repair_meta_id']), + table, + str(rm['_reason']).split("::")[-1], + len(row_buf), + row_buf_mem, + len(working_row_buf), + working_row_buf_mem, + bool(rm['_same_sharding_config']), + bool(rm['_is_tablet']))) + for n in std_vector(rm['_all_node_states']): - all_nodes_state.append(str(n['node']).replace('"', '') + "->" + str(n['state'])) - gdb.write('(%s*) for %s: addr = %s, table = %s, ip = %s, states = %s, repair_meta = (repair_meta*) %s\n' % (rm.type, master, str(rm.address), table, ip, all_nodes_state, rm.address)) + gdb.write(' host: {}, shard: {}, state: {}\n'.format(n['node']['id'], n['shard'], n['state'])) def invoke(self, arg, for_tty): + parser = argparse.ArgumentParser(description="List runnign repairs which involve this node and shard. See `help scylla repairs` for more information.") + parser.add_argument("-m", "--memory", action="store_true", default=False, + help="Calculate memory consumption of repairs (can take a long time)") + + try: + args = parser.parse_args(arg.split()) + except SystemExit: + return + + gdb.write('Repairs for which this node is leader:\n') for rm in intrusive_list(gdb.parse_and_eval('debug::repair_meta_for_masters._repair_metas'), link='_tracker_link'): - self.process("masters", rm) + self.process(rm, args.memory) + gdb.write('Repairs for which this node is follower:\n') for rm in intrusive_list(gdb.parse_and_eval('debug::repair_meta_for_followers._repair_metas'), link='_tracker_link'): - self.process("follower", rm) + self.process(rm, args.memory) class scylla_tablet_metadata(gdb.Command):