Compare commits

...

1 Commits

Author SHA1 Message Date
Amnon Heiman
607ca719d7 Enable prometheus_allow_protobuf by default
Change the prometheus_allow_protobuf configuration to true by default.
This allows ScyllaDB server to serve Prometheus protobuf format (enables
native histogram support) if asked so by the monitoring server.

Update config help text/docs to reflect protobuf support (drop
“experimental” wording).

Add cluster tests to validate the default is enabled, can be overridden,
and /metrics returns protobuf when requested via Accept header (and
falls back to text when disabled).

Fixes #27817
co-Author: mykaul <mykaul@scylladb.com>

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
2026-01-19 09:40:49 +02:00
2 changed files with 85 additions and 1 deletions

View File

@@ -1318,7 +1318,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, prometheus_port(this, "prometheus_port", value_status::Used, 9180, "Prometheus port, set to zero to disable.")
, prometheus_address(this, "prometheus_address", value_status::Used, {/* listen_address */}, "Prometheus listening address, defaulting to listen_address if not explicitly set.")
, prometheus_prefix(this, "prometheus_prefix", value_status::Used, "scylla", "Set the prefix of the exported Prometheus metrics. Changing this will break Scylla's dashboard compatibility, do not change unless you know what you are doing.")
, prometheus_allow_protobuf(this, "prometheus_allow_protobuf", value_status::Used, false, "If set allows the experimental Prometheus protobuf with native histogram")
, prometheus_allow_protobuf(this, "prometheus_allow_protobuf", value_status::Used, true, "Enable Prometheus protobuf with native histogram. Set to false to force text exposition format.")
, abort_on_lsa_bad_alloc(this, "abort_on_lsa_bad_alloc", value_status::Used, false, "Abort when allocation in LSA region fails.")
, murmur3_partitioner_ignore_msb_bits(this, "murmur3_partitioner_ignore_msb_bits", value_status::Used, default_murmur3_partitioner_ignore_msb_bits, "Number of most significant token bits to ignore in murmur3 partitioner; increase for very large clusters.")
, unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit.")

View File

@@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
import aiohttp
import logging
import pytest
import requests
@@ -39,3 +40,86 @@ async def test_non_liveupdatable_config(manager):
await manager.server_update_config(server.server_id, liveupdatable_param, True)
await wait_for_config(manager, server, liveupdatable_param, True)
await wait_for_config(manager, server, not_liveupdatable_param, True)
# Default Prometheus metrics port
PROMETHEUS_PORT = 9180
# Accept header for requesting Prometheus protobuf format with native histograms
PROMETHEUS_PROTOBUF_ACCEPT_HEADER = 'application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited'
@pytest.mark.asyncio
async def test_prometheus_allow_protobuf_default(manager):
"""
Test that prometheus_allow_protobuf is enabled by default,
while ensuring the configuration can be changed if needed.
"""
logging.info("Starting server with default configuration")
server = await manager.server_add()
logging.info("Verify prometheus_allow_protobuf defaults to true")
await wait_for_config(manager, server, "prometheus_allow_protobuf", True)
logging.info("Test that the configuration can be explicitly disabled")
server2 = await manager.server_add(config={'prometheus_allow_protobuf': False})
await wait_for_config(manager, server2, "prometheus_allow_protobuf", False)
logging.info("Test that the configuration can be explicitly enabled")
server3 = await manager.server_add(config={'prometheus_allow_protobuf': True})
await wait_for_config(manager, server3, "prometheus_allow_protobuf", True)
@pytest.mark.asyncio
async def test_prometheus_protobuf_native_histogram(manager):
"""
Test that when prometheus_allow_protobuf is enabled, the server actually
returns metrics in protobuf format with native histogram support when requested.
"""
logging.info("Starting server with prometheus_allow_protobuf enabled")
server = await manager.server_add(config={'prometheus_allow_protobuf': True})
metrics_url = f"http://{server.ip_addr}:{PROMETHEUS_PORT}/metrics"
logging.info(f"Requesting metrics in protobuf format from {metrics_url}")
# Request metrics with Accept header for protobuf format
headers = {
'Accept': PROMETHEUS_PROTOBUF_ACCEPT_HEADER
}
async with aiohttp.ClientSession() as session:
async with session.get(metrics_url, headers=headers) as resp:
assert resp.status == 200, f"Expected status 200, got {resp.status}"
# Check that we got protobuf content type in response
content_type = resp.headers.get('Content-Type', '')
logging.info(f"Response Content-Type: {content_type}")
# When protobuf is supported and requested, we should get protobuf back
assert 'application/vnd.google.protobuf' in content_type, \
f"Expected protobuf content type, got: {content_type}"
# Read the response body
body = await resp.read()
# Verify we got non-empty protobuf data
assert len(body) > 0, "Expected non-empty protobuf response"
logging.info(f"Successfully received protobuf response with {len(body)} bytes")
logging.info("Test that disabling prometheus_allow_protobuf prevents protobuf responses")
server2 = await manager.server_add(config={'prometheus_allow_protobuf': False})
metrics_url2 = f"http://{server2.ip_addr}:{PROMETHEUS_PORT}/metrics"
async with aiohttp.ClientSession() as session:
async with session.get(metrics_url2, headers=headers) as resp:
assert resp.status == 200, "Fail reading metrics from {metrics_url2}"
content_type = resp.headers.get('Content-Type', '')
logging.info(f"Response Content-Type (protobuf disabled): {content_type}")
# When protobuf is disabled, we should get text format even if requested
# The server should return text/plain or not include protobuf in content-type
assert 'application/vnd.google.protobuf' not in content_type, \
f"Expected text format when protobuf disabled, got: {content_type}"
logging.info("Confirmed that protobuf is not returned when disabled")