Compare commits

...

1 Commits

Author SHA1 Message Date
Yaniv Michael Kaul
65b774309e tracing: set_skip_when_empty() for error-path metrics
Add .set_skip_when_empty() to all error-path metrics in the tracing
module. Tracing itself is not a commonly used feature, making all of
these metrics almost always zero:

Tier 1 (very rare - corruption/schema issues):
- tracing_keyspace_helper::bad_column_family_errors: tracing schema
  missing or incompatible, should never happen post-bootstrap
- tracing::trace_errors: internal error building trace parameters

Tier 2 (overload - tracing backend saturated):
- tracing::dropped_sessions: too many pending sessions
- tracing::dropped_records: too many pending records

Tier 3 (general tracing write errors):
- tracing_keyspace_helper::tracing_errors: errors during writes to
  system_traces keyspace

Since tracing is an opt-in feature that most deployments rarely use,
all five metrics are almost always zero and create unnecessary
reporting overhead.

AI-Assisted: yes
Signed-off-by: Yaniv Kaul <yaniv.kaul@scylladb.com>
2026-04-06 14:42:00 +03:00
2 changed files with 5 additions and 5 deletions

View File

@@ -200,13 +200,13 @@ trace_keyspace_helper::trace_keyspace_helper(tracing& tr)
_metrics.add_group("tracing_keyspace_helper", {
sm::make_counter("tracing_errors", [this] { return _stats.tracing_errors; },
sm::description("Counts a number of errors during writing to a system_traces keyspace. "
"One error may cause one or more tracing records to be lost.")),
"One error may cause one or more tracing records to be lost.")).set_skip_when_empty(),
sm::make_counter("bad_column_family_errors", [this] { return _stats.bad_column_family_errors; },
sm::description("Counts a number of times write failed due to one of the tables in the system_traces keyspace has an incompatible schema. "
"One error may result one or more tracing records to be lost. "
"Non-zero value indicates that the administrator has to take immediate steps to fix the corresponding schema. "
"The appropriate error message will be printed in the syslog.")),
"The appropriate error message will be printed in the syslog.")).set_skip_when_empty(),
});
}

View File

@@ -39,17 +39,17 @@ tracing::tracing(sstring tracing_backend_helper_class_name)
_metrics.add_group("tracing", {
sm::make_counter("dropped_sessions", stats.dropped_sessions,
sm::description("Counts a number of dropped sessions due to too many pending sessions/records. "
"High value indicates that backend is saturated with the rate with which new tracing records are created.")),
"High value indicates that backend is saturated with the rate with which new tracing records are created.")).set_skip_when_empty(),
sm::make_counter("dropped_records", stats.dropped_records,
sm::description("Counts a number of dropped records due to too many pending records. "
"High value indicates that backend is saturated with the rate with which new tracing records are created.")),
"High value indicates that backend is saturated with the rate with which new tracing records are created.")).set_skip_when_empty(),
sm::make_counter("trace_records_count", stats.trace_records_count,
sm::description("This metric is a rate of tracing records generation.")),
sm::make_counter("trace_errors", stats.trace_errors,
sm::description("Counts a number of trace records dropped due to an error (e.g. OOM).")),
sm::description("Counts a number of trace records dropped due to an error (e.g. OOM).")).set_skip_when_empty(),
sm::make_gauge("active_sessions", _active_sessions,
sm::description("Holds a number of a currently active tracing sessions.")),