mirror of
https://github.com/google/nomulus
synced 2026-04-15 22:17:25 +00:00
Add the ability to configure a ratio of proxy metrics to be recorded (#2772)
This ratio defaults to 1.0 (i.e. all metrics will be recorded), but we will set it much lower in sandbox and production, probably something closer to 0.01. This will reduce recorded metrics volume and thus StackDriver cost, while still retaining enough data for overall performance monitoring. This is handled stochastically, so as to not require any coordination between Java threads or GKE pods/clusters, as alternative approaches would (i.e. using a counter and recording every Nth, or throttling to a max metrics qps).
This commit is contained in:
@@ -113,6 +113,8 @@ public class ProxyConfig {
|
||||
public int stackdriverMaxQps;
|
||||
public int stackdriverMaxPointsPerRequest;
|
||||
public int writeIntervalSeconds;
|
||||
public double frontendMetricsRatio;
|
||||
public double backendMetricsRatio;
|
||||
}
|
||||
|
||||
/** Configuration options that apply to quota management. */
|
||||
|
||||
@@ -61,6 +61,7 @@ import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.Optional;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
@@ -395,6 +396,26 @@ public class ProxyModule {
|
||||
return Duration.ofSeconds(config.serverCertificateCacheSeconds);
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("frontendMetricsRatio")
|
||||
static double provideFrontendMetricsRatio(ProxyConfig config) {
|
||||
return config.metrics.frontendMetricsRatio;
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("backendMetricsRatio")
|
||||
static double provideBackendMetricsRatio(ProxyConfig config) {
|
||||
return config.metrics.backendMetricsRatio;
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
static Random provideRandom() {
|
||||
return new Random();
|
||||
}
|
||||
|
||||
/** Root level component that exposes the port-to-protocol map. */
|
||||
@Singleton
|
||||
@Component(
|
||||
|
||||
@@ -200,3 +200,15 @@ metrics:
|
||||
|
||||
# How often metrics are written.
|
||||
writeIntervalSeconds: 60
|
||||
|
||||
# What ratio of frontend request metrics should be stochastically recorded
|
||||
# (0.0 means none, 1.0 means all). This is useful for reducing metrics volume,
|
||||
# and thus cost, while still recording some information for performance
|
||||
# monitoring purposes.
|
||||
frontendMetricsRatio: 1.0
|
||||
|
||||
# What ratio of backend request metrics should be stochastically recorded
|
||||
# (0.0 means none, 1.0 means all). This is useful for reducing metrics volume,
|
||||
# and thus cost, while still recording some information for performance
|
||||
# monitoring purposes.
|
||||
backendMetricsRatio: 1.0
|
||||
|
||||
@@ -22,7 +22,9 @@ import com.google.monitoring.metrics.MetricRegistryImpl;
|
||||
import google.registry.util.NonFinalForTesting;
|
||||
import io.netty.handler.codec.http.FullHttpResponse;
|
||||
import jakarta.inject.Inject;
|
||||
import jakarta.inject.Named;
|
||||
import jakarta.inject.Singleton;
|
||||
import java.util.Random;
|
||||
import org.joda.time.Duration;
|
||||
|
||||
/** Backend metrics instrumentation. */
|
||||
@@ -75,8 +77,14 @@ public class BackendMetrics extends BaseMetrics {
|
||||
LABELS,
|
||||
DEFAULT_LATENCY_FITTER);
|
||||
|
||||
private final Random random;
|
||||
private final double backendMetricsRatio;
|
||||
|
||||
@Inject
|
||||
BackendMetrics() {}
|
||||
BackendMetrics(@Named("backendMetricsRatio") double backendMetricsRatio, Random random) {
|
||||
this.backendMetricsRatio = backendMetricsRatio;
|
||||
this.random = random;
|
||||
}
|
||||
|
||||
@Override
|
||||
void resetMetrics() {
|
||||
@@ -89,6 +97,10 @@ public class BackendMetrics extends BaseMetrics {
|
||||
|
||||
@NonFinalForTesting
|
||||
public void requestSent(String protocol, String certHash, int bytes) {
|
||||
// Short-circuit metrics recording randomly according to the configured ratio.
|
||||
if (random.nextDouble() > backendMetricsRatio) {
|
||||
return;
|
||||
}
|
||||
requestsCounter.increment(protocol, certHash);
|
||||
requestBytes.record(bytes, protocol, certHash);
|
||||
}
|
||||
@@ -96,6 +108,10 @@ public class BackendMetrics extends BaseMetrics {
|
||||
@NonFinalForTesting
|
||||
public void responseReceived(
|
||||
String protocol, String certHash, FullHttpResponse response, Duration latency) {
|
||||
// Short-circuit metrics recording randomly according to the configured ratio.
|
||||
if (random.nextDouble() > backendMetricsRatio) {
|
||||
return;
|
||||
}
|
||||
latencyMs.record(latency.getMillis(), protocol, certHash);
|
||||
responseBytes.record(response.content().readableBytes(), protocol, certHash);
|
||||
responsesCounter.increment(protocol, certHash, response.status().toString());
|
||||
|
||||
@@ -26,8 +26,10 @@ import io.netty.channel.group.ChannelGroup;
|
||||
import io.netty.channel.group.DefaultChannelGroup;
|
||||
import io.netty.util.concurrent.GlobalEventExecutor;
|
||||
import jakarta.inject.Inject;
|
||||
import jakarta.inject.Named;
|
||||
import jakarta.inject.Singleton;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import org.joda.time.Duration;
|
||||
@@ -78,8 +80,14 @@ public class FrontendMetrics extends BaseMetrics {
|
||||
LABELS,
|
||||
DEFAULT_LATENCY_FITTER);
|
||||
|
||||
private final Random random;
|
||||
private final double frontendMetricsRatio;
|
||||
|
||||
@Inject
|
||||
public FrontendMetrics() {}
|
||||
FrontendMetrics(@Named("frontendMetricsRatio") double frontendMetricsRatio, Random random) {
|
||||
this.frontendMetricsRatio = frontendMetricsRatio;
|
||||
this.random = random;
|
||||
}
|
||||
|
||||
@Override
|
||||
void resetMetrics() {
|
||||
@@ -109,6 +117,10 @@ public class FrontendMetrics extends BaseMetrics {
|
||||
|
||||
@NonFinalForTesting
|
||||
public void responseSent(String protocol, String certHash, Duration latency) {
|
||||
// Short-circuit metrics recording randomly according to the configured ratio.
|
||||
if (random.nextDouble() > frontendMetricsRatio) {
|
||||
return;
|
||||
}
|
||||
latencyMs.record(latency.getMillis(), protocol, certHash);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@ import jakarta.inject.Named;
|
||||
import jakarta.inject.Provider;
|
||||
import jakarta.inject.Singleton;
|
||||
import java.time.Duration;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
@@ -312,6 +313,26 @@ public abstract class ProtocolModuleTest {
|
||||
return Duration.ofHours(1);
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("frontendMetricsRatio")
|
||||
static double provideFrontendMetricsRatio() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
@Named("backendMetricsRatio")
|
||||
static double providebackendMetricsRatio() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Provides
|
||||
static Random provideRandom() {
|
||||
return new Random();
|
||||
}
|
||||
|
||||
// This method is only here to satisfy Dagger binding, but is never used. In test environment,
|
||||
// it is the self-signed certificate and its key that ends up being used.
|
||||
@Singleton
|
||||
|
||||
@@ -18,11 +18,14 @@ import static com.google.monitoring.metrics.contrib.DistributionMetricSubject.as
|
||||
import static com.google.monitoring.metrics.contrib.LongMetricSubject.assertThat;
|
||||
import static google.registry.proxy.TestUtils.makeHttpPostRequest;
|
||||
import static google.registry.proxy.TestUtils.makeHttpResponse;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import io.netty.handler.codec.http.FullHttpRequest;
|
||||
import io.netty.handler.codec.http.FullHttpResponse;
|
||||
import io.netty.handler.codec.http.HttpResponseStatus;
|
||||
import java.util.Random;
|
||||
import org.joda.time.Duration;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@@ -34,10 +37,11 @@ class BackendMetricsTest {
|
||||
private final String certHash = "blah12345";
|
||||
private final String protocol = "frontend protocol";
|
||||
|
||||
private final BackendMetrics metrics = new BackendMetrics();
|
||||
private BackendMetrics metrics;
|
||||
|
||||
@BeforeEach
|
||||
void beforeEach() {
|
||||
metrics = new BackendMetrics(1.0, new Random());
|
||||
metrics.resetMetrics();
|
||||
}
|
||||
|
||||
@@ -107,15 +111,21 @@ class BackendMetricsTest {
|
||||
|
||||
@Test
|
||||
void testSuccess_multipleResponses() {
|
||||
Random mockRandom = mock(Random.class);
|
||||
metrics = new BackendMetrics(0.2, mockRandom);
|
||||
// The third response won't be logged.
|
||||
when(mockRandom.nextDouble()).thenReturn(.1, .04, .5, .15);
|
||||
String content1 = "some response";
|
||||
String content2 = "other response";
|
||||
String content3 = "a very bad response";
|
||||
FullHttpResponse response1 = makeHttpResponse(content1, HttpResponseStatus.OK);
|
||||
FullHttpResponse response2 = makeHttpResponse(content2, HttpResponseStatus.OK);
|
||||
FullHttpResponse response3 = makeHttpResponse(content3, HttpResponseStatus.BAD_REQUEST);
|
||||
FullHttpResponse response3 = makeHttpResponse(content2, HttpResponseStatus.OK);
|
||||
FullHttpResponse response4 = makeHttpResponse(content3, HttpResponseStatus.BAD_REQUEST);
|
||||
metrics.responseReceived(protocol, certHash, response1, Duration.millis(5));
|
||||
metrics.responseReceived(protocol, certHash, response2, Duration.millis(8));
|
||||
metrics.responseReceived(protocol, certHash, response3, Duration.millis(2));
|
||||
metrics.responseReceived(protocol, certHash, response3, Duration.millis(15));
|
||||
metrics.responseReceived(protocol, certHash, response4, Duration.millis(2));
|
||||
|
||||
assertThat(BackendMetrics.requestsCounter).hasNoOtherValues();
|
||||
assertThat(BackendMetrics.requestBytes).hasNoOtherValues();
|
||||
|
||||
@@ -15,11 +15,17 @@
|
||||
package google.registry.proxy.metric;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static com.google.monitoring.metrics.contrib.DistributionMetricSubject.assertThat;
|
||||
import static com.google.monitoring.metrics.contrib.LongMetricSubject.assertThat;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import io.netty.channel.ChannelFuture;
|
||||
import io.netty.channel.DefaultChannelId;
|
||||
import io.netty.channel.embedded.EmbeddedChannel;
|
||||
import java.util.Random;
|
||||
import org.joda.time.Duration;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -28,10 +34,11 @@ class FrontendMetricsTest {
|
||||
|
||||
private static final String PROTOCOL = "some protocol";
|
||||
private static final String CERT_HASH = "abc_blah_1134zdf";
|
||||
private final FrontendMetrics metrics = new FrontendMetrics();
|
||||
private FrontendMetrics metrics;
|
||||
|
||||
@BeforeEach
|
||||
void beforeEach() {
|
||||
metrics = new FrontendMetrics(1.0, new Random());
|
||||
metrics.resetMetrics();
|
||||
}
|
||||
|
||||
@@ -60,8 +67,13 @@ class FrontendMetricsTest {
|
||||
|
||||
@Test
|
||||
void testSuccess_twoConnections_sameClient() {
|
||||
Random mockRandom = mock(Random.class);
|
||||
metrics = new FrontendMetrics(0.2, mockRandom);
|
||||
// The third response won't be logged.
|
||||
when(mockRandom.nextDouble()).thenReturn(.1, .04, .5);
|
||||
EmbeddedChannel channel1 = new EmbeddedChannel();
|
||||
EmbeddedChannel channel2 = new EmbeddedChannel(DefaultChannelId.newInstance());
|
||||
EmbeddedChannel channel3 = new EmbeddedChannel();
|
||||
|
||||
metrics.registerActiveConnection(PROTOCOL, CERT_HASH, channel1);
|
||||
assertThat(channel1.isActive()).isTrue();
|
||||
@@ -85,6 +97,27 @@ class FrontendMetricsTest {
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
|
||||
metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(10));
|
||||
metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(8));
|
||||
metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(13));
|
||||
|
||||
metrics.registerActiveConnection(PROTOCOL, CERT_HASH, channel3);
|
||||
assertThat(channel3.isActive()).isTrue();
|
||||
assertThat(FrontendMetrics.activeConnectionsGauge)
|
||||
.hasValueForLabels(2, PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
// All connection counts are recorded as metrics, but ...
|
||||
assertThat(FrontendMetrics.totalConnectionsCounter)
|
||||
.hasValueForLabels(3, PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
// Latency stats are subject to the metrics ratio.
|
||||
assertThat(FrontendMetrics.latencyMs)
|
||||
.hasDataSetForLabels(ImmutableSet.of(10, 8), PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
ChannelFuture unusedFuture1 = channel1.close();
|
||||
assertThat(channel1.isActive()).isFalse();
|
||||
@@ -93,7 +126,7 @@ class FrontendMetricsTest {
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
assertThat(FrontendMetrics.totalConnectionsCounter)
|
||||
.hasValueForLabels(2, PROTOCOL, CERT_HASH)
|
||||
.hasValueForLabels(3, PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
|
||||
@@ -102,7 +135,16 @@ class FrontendMetricsTest {
|
||||
assertThat(channel2.isActive()).isFalse();
|
||||
assertThat(FrontendMetrics.activeConnectionsGauge).hasNoOtherValues();
|
||||
assertThat(FrontendMetrics.totalConnectionsCounter)
|
||||
.hasValueForLabels(2, PROTOCOL, CERT_HASH)
|
||||
.hasValueForLabels(3, PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
ChannelFuture unusedFuture3 = channel3.close();
|
||||
assertThat(channel3.isActive()).isFalse();
|
||||
assertThat(FrontendMetrics.activeConnectionsGauge).hasNoOtherValues();
|
||||
assertThat(FrontendMetrics.totalConnectionsCounter)
|
||||
.hasValueForLabels(3, PROTOCOL, CERT_HASH)
|
||||
.and()
|
||||
.hasNoOtherValues();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user