After the recent fixes 4 tests started failing with the java nodetool implementation. We are about to ditch the java implementation, but until we actually do, it is valuable to keep the tests passing with both the native and java implementation. So in this patch, these tests are fixed to pass with the java implementation too. There is one test, test_help.py, which fails only if run together with all the tests. I couldn't confirm this 100%, but it seems like this is due to JMX sending a rouge request on some timer, which happens to hit this test. I don't think this is worth trying to fix.
513 lines
18 KiB
Python
513 lines
18 KiB
Python
# Copyright 2024-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
#
|
|
|
|
|
|
from collections import defaultdict
|
|
from enum import Enum
|
|
from rest_api_mock import expected_request
|
|
from socket import getnameinfo
|
|
from typing import NamedTuple
|
|
|
|
import pytest
|
|
|
|
|
|
class NodeStatus(Enum):
|
|
Up = 'U'
|
|
Down = 'D'
|
|
Unknown = '?'
|
|
|
|
|
|
class NodeState(Enum):
|
|
Joining = 'J'
|
|
Leaving = 'L'
|
|
Moving = 'M'
|
|
Normal = 'N'
|
|
|
|
|
|
class Node(NamedTuple):
|
|
endpoint: str
|
|
host_id: str
|
|
load: int
|
|
tokens: list[str]
|
|
datacenter: str
|
|
rack: str
|
|
status: NodeStatus
|
|
state: NodeState
|
|
|
|
class StatusQueryTarget(NamedTuple):
|
|
keyspace: str
|
|
table: str
|
|
uses_tablets: bool
|
|
|
|
null_ownership_error = ("Non-system keyspaces don't have the same replication settings, "
|
|
"effective ownership information is meaningless")
|
|
|
|
|
|
def validate_status_output(res, keyspace, nodes, ownership, resolve, effective_ownership_unknown, token_count_unknown,
|
|
cassandra_nodetool):
|
|
datacenters = sorted(list(set([node.datacenter for node in nodes.values()])))
|
|
load_multiplier = {"bytes": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}
|
|
|
|
lines = res.split('\n')
|
|
i = 0
|
|
|
|
for dc in datacenters:
|
|
dc_line = lines[i]
|
|
assert dc_line.startswith("Datacenter:")
|
|
res_dc = dc_line.split()[1]
|
|
assert dc == res_dc
|
|
dc_line_len = len(dc_line)
|
|
|
|
i += 1
|
|
assert lines[i] == "=" * dc_line_len
|
|
|
|
i += 1
|
|
assert lines[i] == "Status=Up/Down"
|
|
|
|
i += 1
|
|
assert lines[i] == "|/ State=Normal/Leaving/Joining/Moving"
|
|
|
|
i += 1
|
|
if keyspace is None:
|
|
assert lines[i].split() == ["--", "Address", "Load", "Tokens", "Owns", "Host", "ID", "Rack"]
|
|
else:
|
|
assert lines[i].split() == ["--", "Address", "Load", "Tokens", "Owns", "(effective)", "Host", "ID", "Rack"]
|
|
|
|
dc_eps = {ep for ep, node in nodes.items() if node.datacenter == dc}
|
|
if resolve:
|
|
name_to_ep = {getnameinfo((ep, 0), 0)[0]: ep for ep in nodes.keys()}
|
|
|
|
# The legacy nodetool prints endpoints in random order (probably in hash-map order).
|
|
# So just make sure here, that we see all endpoints from each DC and that their properties are correct.
|
|
while dc_eps:
|
|
i += 1
|
|
|
|
assert lines[i] != ""
|
|
|
|
pieces = tuple(lines[i].split())
|
|
if len(pieces) == 8:
|
|
status_state, ep, load, load_unit, tokens, owns, host_id, rack = pieces
|
|
else:
|
|
status_state, ep, load, tokens, owns, host_id, rack = pieces
|
|
|
|
if resolve:
|
|
assert ep in name_to_ep
|
|
ep = name_to_ep[ep]
|
|
|
|
assert ep in dc_eps
|
|
dc_eps.remove(ep)
|
|
|
|
node = nodes[ep]
|
|
|
|
assert status_state == "{}{}".format(nodes[ep].status.value, nodes[ep].state.value)
|
|
if node.load is None:
|
|
assert load == "?"
|
|
else:
|
|
assert load_unit is not None
|
|
assert load == "{:.2f}".format(int(node.load) / load_multiplier[load_unit])
|
|
if token_count_unknown:
|
|
tokens == "?"
|
|
else:
|
|
assert int(tokens) == len(node.tokens)
|
|
if effective_ownership_unknown:
|
|
assert owns == "?"
|
|
else:
|
|
assert owns == "{:.1f}%".format(float(ownership[ep]) * 100)
|
|
if node.host_id is not None:
|
|
assert host_id == node.host_id
|
|
else:
|
|
if cassandra_nodetool:
|
|
assert host_id == "null"
|
|
else:
|
|
assert host_id == "?"
|
|
assert rack == node.rack
|
|
|
|
assert len(dc_eps) == 0
|
|
i += 1
|
|
|
|
i += 1
|
|
if keyspace is None:
|
|
assert lines[i] == f"Note: {null_ownership_error}"
|
|
else:
|
|
assert lines[i] == ""
|
|
|
|
|
|
def ratio_helper(a: int, b: int):
|
|
maxint_64 = (2**63)-1
|
|
minint_64 = -2**63
|
|
int64_range = 2**64
|
|
if a > b:
|
|
val = a - b
|
|
else:
|
|
val = -(a - minint_64) + (maxint_64 - b)
|
|
return val / int64_range
|
|
|
|
|
|
# Mirrors dht::token::describe_ownership()
|
|
def _describe_token_ownership(sorted_tokens: list[int]):
|
|
if len(sorted_tokens) == 0:
|
|
return {str(sorted_tokens[0]): 1.0}
|
|
|
|
ownerships = {}
|
|
|
|
start = sorted_tokens[0]
|
|
ti = start # The first token and its value
|
|
tim1 = ti # The last token and its value (after loop)
|
|
|
|
for i in range(1, len(sorted_tokens)):
|
|
ti = sorted_tokens[i] # The next token and its value
|
|
ownerships[str(ti)] = ratio_helper(ti, tim1) # save (T(i) -> %age)
|
|
tim1 = ti
|
|
|
|
# The start token's range extends backward to the last token, which is why both were saved above.
|
|
ownerships[str(start)] = ratio_helper(start, ti)
|
|
|
|
return ownerships
|
|
|
|
|
|
# Mirrors service::storage_service::get_ownership()
|
|
def _get_ownership(nodes):
|
|
sorted_tokens = sorted([int(token) for node in nodes for token in node.tokens])
|
|
ownership_by_token = _describe_token_ownership(sorted_tokens)
|
|
token_to_endpoint = {token: node.endpoint for node in nodes for token in node.tokens}
|
|
|
|
ownership = defaultdict(int)
|
|
for token, own in ownership_by_token.items():
|
|
ep = token_to_endpoint[token]
|
|
ownership[ep] += own
|
|
|
|
return ownership
|
|
|
|
|
|
def _do_test_status(request, nodetool, status_query_target, node_list, resolve=None):
|
|
uses_cassandra_nodetool = request.config.getoption("nodetool") == "cassandra"
|
|
|
|
if status_query_target:
|
|
keyspace = status_query_target.keyspace
|
|
table = status_query_target.table
|
|
keyspace_uses_tablets = status_query_target.uses_tablets
|
|
else:
|
|
keyspace = None
|
|
table = None
|
|
keyspace_uses_tablets = False
|
|
|
|
nodes = {node.endpoint: node for node in node_list}
|
|
|
|
joining = [n.endpoint for n in node_list if n.state == NodeState.Joining]
|
|
leaving = [n.endpoint for n in node_list if n.state == NodeState.Leaving]
|
|
moving = [n.endpoint for n in node_list if n.state == NodeState.Moving]
|
|
live = [n.endpoint for n in node_list if n.status == NodeStatus.Up]
|
|
down = [n.endpoint for n in node_list if n.status == NodeStatus.Down]
|
|
|
|
load_map = [{"key": ep, "value": node.load} for ep, node in nodes.items() if node.load is not None]
|
|
|
|
host_id_map = [{"key": ep, "value": node.host_id} for ep, node in nodes.items() if node.host_id is not None]
|
|
|
|
tokens_endpoint_params = {}
|
|
if keyspace_uses_tablets and table:
|
|
tokens_endpoint_params["keyspace"] = keyspace
|
|
tokens_endpoint_params["cf"] = table
|
|
|
|
tokens_endpoint = []
|
|
for ep, node in nodes.items():
|
|
for token in node.tokens:
|
|
tokens_endpoint.append({"key": token, "value": ep})
|
|
tokens_endpoint.sort(key=lambda x: int(x['key']))
|
|
|
|
ownership = _get_ownership(node_list)
|
|
ownership_response = [{"key": ep, "value": str(own)} for ep, own in ownership.items()]
|
|
|
|
expected_requests = [
|
|
expected_request("GET", "/storage_service/nodes/joining", response=joining),
|
|
expected_request("GET", "/storage_service/nodes/leaving", response=leaving),
|
|
expected_request("GET", "/storage_service/nodes/moving", response=moving),
|
|
expected_request("GET", "/storage_service/load_map", response=load_map),
|
|
expected_request("GET", "/storage_service/tokens_endpoint", params=tokens_endpoint_params,
|
|
response=tokens_endpoint),
|
|
expected_request("GET", "/gossiper/endpoint/live", response=live),
|
|
expected_request("GET", "/gossiper/endpoint/down", response=down),
|
|
expected_request("GET", "/storage_service/host_id", response=host_id_map),
|
|
]
|
|
|
|
if keyspace is None:
|
|
expected_requests += [
|
|
expected_request("GET",
|
|
"/storage_service/ownership/null",
|
|
response_status=500,
|
|
multiple=expected_request.ANY,
|
|
response={"message": f"std::runtime_error({null_ownership_error})", "code": 500}),
|
|
expected_request("GET", "/storage_service/ownership", multiple=expected_request.ANY,
|
|
response=ownership_response)]
|
|
else:
|
|
if not uses_cassandra_nodetool:
|
|
keyspaces_using_tablets = [keyspace] if keyspace_uses_tablets else []
|
|
expected_requests.append(
|
|
expected_request("GET", "/storage_service/keyspaces", params={"replication": "tablets"},
|
|
multiple=expected_request.ONE, response=keyspaces_using_tablets))
|
|
if table is None:
|
|
if not keyspace_uses_tablets:
|
|
expected_requests.append(
|
|
expected_request("GET", f"/storage_service/ownership/{keyspace}",
|
|
multiple=expected_request.ONE, response=ownership_response))
|
|
else:
|
|
expected_requests.append(
|
|
expected_request("GET", f"/storage_service/ownership/{keyspace}", params={"cf": table},
|
|
response=ownership_response))
|
|
|
|
for ep, node in nodes.items():
|
|
expected_requests += [
|
|
expected_request("GET", "/snitch/datacenter", params={"host": ep}, multiple=expected_request.ANY,
|
|
response=node.datacenter),
|
|
expected_request("GET", "/snitch/rack", params={"host": ep}, multiple=expected_request.ANY,
|
|
response=node.rack),
|
|
]
|
|
|
|
args = ["status"]
|
|
|
|
if keyspace is not None:
|
|
args.append(keyspace)
|
|
|
|
if table is not None:
|
|
args.append(table)
|
|
|
|
if resolve is not None:
|
|
args.append(resolve)
|
|
|
|
res = nodetool(*args, expected_requests=expected_requests)
|
|
|
|
effective_ownership_unknown = keyspace is None or (table is None and keyspace_uses_tablets)
|
|
token_count_unknown = keyspace_uses_tablets and not table
|
|
validate_status_output(res.stdout, keyspace, nodes, ownership, bool(resolve), effective_ownership_unknown,
|
|
token_count_unknown, uses_cassandra_nodetool)
|
|
|
|
|
|
def test_status_no_keyspace_single_dc(request, nodetool):
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.0.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.0.0.2",
|
|
host_id="ed341f60-b12a-4fd4-9917-e80977ded0f9",
|
|
load=277624,
|
|
tokens=["-1810801828328238220", "2983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack2",
|
|
status=NodeStatus.Down,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.0.0.3",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["3810801828328238220", "6810801828328238220"],
|
|
datacenter="datacenter1",
|
|
rack="rack3",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Leaving,
|
|
),
|
|
]
|
|
|
|
_do_test_status(request, nodetool, None, nodes)
|
|
|
|
|
|
@pytest.mark.parametrize("uses_tablets", (False, True))
|
|
@pytest.mark.parametrize("table", (None, "cf"))
|
|
def test_status_keyspace_single_dc(request, nodetool, uses_tablets, table):
|
|
if request.config.getoption("nodetool") == "cassandra" and (uses_tablets or table):
|
|
pytest.skip("skipping tablets-related test with Cassandra nodetool")
|
|
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.0.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Unknown,
|
|
state=NodeState.Joining,
|
|
),
|
|
Node(
|
|
endpoint="127.0.0.2",
|
|
host_id="ed341f60-b12a-4fd4-9917-e80977ded0f9",
|
|
load=277624,
|
|
tokens=["-1810801828328238220", "2983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack2",
|
|
status=NodeStatus.Down,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.0.0.3",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["3810801828328238220", "6810801828328238220"],
|
|
datacenter="datacenter1",
|
|
rack="rack3",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
]
|
|
|
|
status_target = StatusQueryTarget(keyspace="ks", table=table, uses_tablets=uses_tablets)
|
|
_do_test_status(request, nodetool, status_target, nodes)
|
|
|
|
|
|
def test_status_no_keyspace_multi_dc(request, nodetool):
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.1.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.1.0.2",
|
|
host_id="ed341f60-b12a-4fd4-9917-e80977ded0f9",
|
|
load=277624,
|
|
tokens=["1810801828328238220", "2810801828328238220"],
|
|
datacenter="datacenter1",
|
|
rack="rack2",
|
|
status=NodeStatus.Down,
|
|
state=NodeState.Moving,
|
|
),
|
|
Node(
|
|
endpoint="127.2.0.1",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["3810801828328238220", "6810801828328238220"],
|
|
datacenter="datacenter2",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.2.0.2",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["8810801828328238220", "9810801828328238220"],
|
|
datacenter="datacenter2",
|
|
rack="rack2",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
]
|
|
|
|
_do_test_status(request, nodetool, None, nodes)
|
|
|
|
@pytest.mark.parametrize("uses_tablets", (False, True))
|
|
@pytest.mark.parametrize("table", (None, "cf"))
|
|
def test_status_keyspace_multi_dc(request, nodetool, uses_tablets, table):
|
|
if request.config.getoption("nodetool") == "cassandra" and (uses_tablets or table):
|
|
pytest.skip("skipping tablets-related test with Cassandra nodetool")
|
|
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.1.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Down,
|
|
state=NodeState.Joining,
|
|
),
|
|
Node(
|
|
endpoint="127.1.0.2",
|
|
host_id="ed341f60-b12a-4fd4-9917-e80977ded0f9",
|
|
load=277624,
|
|
tokens=["1810801828328238220", "2810801828328238220"],
|
|
datacenter="datacenter1",
|
|
rack="rack2",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.2.0.1",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["3810801828328238220", "6810801828328238220"],
|
|
datacenter="datacenter2",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.2.0.2",
|
|
host_id="1e77eb26-a372-4eb4-aeaa-72f224cf6b4c",
|
|
load=353236,
|
|
tokens=["8810801828328238220", "9810801828328238220"],
|
|
datacenter="datacenter2",
|
|
rack="rack2",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
]
|
|
|
|
status_target = StatusQueryTarget(keyspace="ks", table=table, uses_tablets=uses_tablets)
|
|
_do_test_status(request, nodetool, status_target, nodes)
|
|
|
|
|
|
def test_status_keyspace_joining_node(request, nodetool):
|
|
""" Joining nodes do not have some attributes available yet:
|
|
* load
|
|
* host_id
|
|
"""
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.0.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
Node(
|
|
endpoint="127.0.0.2",
|
|
host_id=None,
|
|
load=None,
|
|
tokens=["-1810801828328238220", "2983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack2",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Joining,
|
|
),
|
|
]
|
|
|
|
status_target = StatusQueryTarget(keyspace="ks", table=None, uses_tablets=False)
|
|
_do_test_status(request, nodetool, status_target, nodes)
|
|
|
|
|
|
@pytest.mark.parametrize("resolve", (None, '-r', '--resolve-ip'))
|
|
def test_status_resolve(request, nodetool, resolve):
|
|
nodes = [
|
|
Node(
|
|
endpoint="127.0.0.1",
|
|
host_id="78a9c1d0-b341-467e-a076-9eff4cf7ffc6",
|
|
load=206015,
|
|
tokens=["-9175818098208185248", "-3983536194780899528"],
|
|
datacenter="datacenter1",
|
|
rack="rack1",
|
|
status=NodeStatus.Up,
|
|
state=NodeState.Normal,
|
|
),
|
|
]
|
|
|
|
_do_test_status(request, nodetool, None, nodes, resolve)
|