diff --git a/proxy/src/main/java/google/registry/proxy/ProxyConfig.java b/proxy/src/main/java/google/registry/proxy/ProxyConfig.java index 94f5a62bf..7b080ffcc 100644 --- a/proxy/src/main/java/google/registry/proxy/ProxyConfig.java +++ b/proxy/src/main/java/google/registry/proxy/ProxyConfig.java @@ -113,6 +113,8 @@ public class ProxyConfig { public int stackdriverMaxQps; public int stackdriverMaxPointsPerRequest; public int writeIntervalSeconds; + public double frontendMetricsRatio; + public double backendMetricsRatio; } /** Configuration options that apply to quota management. */ diff --git a/proxy/src/main/java/google/registry/proxy/ProxyModule.java b/proxy/src/main/java/google/registry/proxy/ProxyModule.java index 3556a4128..b61bf1206 100644 --- a/proxy/src/main/java/google/registry/proxy/ProxyModule.java +++ b/proxy/src/main/java/google/registry/proxy/ProxyModule.java @@ -61,6 +61,7 @@ import java.time.Duration; import java.util.Arrays; import java.util.Base64; import java.util.Optional; +import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -395,6 +396,26 @@ public class ProxyModule { return Duration.ofSeconds(config.serverCertificateCacheSeconds); } + @Singleton + @Provides + @Named("frontendMetricsRatio") + static double provideFrontendMetricsRatio(ProxyConfig config) { + return config.metrics.frontendMetricsRatio; + } + + @Singleton + @Provides + @Named("backendMetricsRatio") + static double provideBackendMetricsRatio(ProxyConfig config) { + return config.metrics.backendMetricsRatio; + } + + @Singleton + @Provides + static Random provideRandom() { + return new Random(); + } + /** Root level component that exposes the port-to-protocol map. */ @Singleton @Component( diff --git a/proxy/src/main/java/google/registry/proxy/config/default-config.yaml b/proxy/src/main/java/google/registry/proxy/config/default-config.yaml index ed6fd0eab..10a224395 100644 --- a/proxy/src/main/java/google/registry/proxy/config/default-config.yaml +++ b/proxy/src/main/java/google/registry/proxy/config/default-config.yaml @@ -200,3 +200,15 @@ metrics: # How often metrics are written. writeIntervalSeconds: 60 + + # What ratio of frontend request metrics should be stochastically recorded + # (0.0 means none, 1.0 means all). This is useful for reducing metrics volume, + # and thus cost, while still recording some information for performance + # monitoring purposes. + frontendMetricsRatio: 1.0 + + # What ratio of backend request metrics should be stochastically recorded + # (0.0 means none, 1.0 means all). This is useful for reducing metrics volume, + # and thus cost, while still recording some information for performance + # monitoring purposes. + backendMetricsRatio: 1.0 diff --git a/proxy/src/main/java/google/registry/proxy/metric/BackendMetrics.java b/proxy/src/main/java/google/registry/proxy/metric/BackendMetrics.java index ee7603d6e..f14372c7f 100644 --- a/proxy/src/main/java/google/registry/proxy/metric/BackendMetrics.java +++ b/proxy/src/main/java/google/registry/proxy/metric/BackendMetrics.java @@ -22,7 +22,9 @@ import com.google.monitoring.metrics.MetricRegistryImpl; import google.registry.util.NonFinalForTesting; import io.netty.handler.codec.http.FullHttpResponse; import jakarta.inject.Inject; +import jakarta.inject.Named; import jakarta.inject.Singleton; +import java.util.Random; import org.joda.time.Duration; /** Backend metrics instrumentation. */ @@ -75,8 +77,14 @@ public class BackendMetrics extends BaseMetrics { LABELS, DEFAULT_LATENCY_FITTER); + private final Random random; + private final double backendMetricsRatio; + @Inject - BackendMetrics() {} + BackendMetrics(@Named("backendMetricsRatio") double backendMetricsRatio, Random random) { + this.backendMetricsRatio = backendMetricsRatio; + this.random = random; + } @Override void resetMetrics() { @@ -89,6 +97,10 @@ public class BackendMetrics extends BaseMetrics { @NonFinalForTesting public void requestSent(String protocol, String certHash, int bytes) { + // Short-circuit metrics recording randomly according to the configured ratio. + if (random.nextDouble() > backendMetricsRatio) { + return; + } requestsCounter.increment(protocol, certHash); requestBytes.record(bytes, protocol, certHash); } @@ -96,6 +108,10 @@ public class BackendMetrics extends BaseMetrics { @NonFinalForTesting public void responseReceived( String protocol, String certHash, FullHttpResponse response, Duration latency) { + // Short-circuit metrics recording randomly according to the configured ratio. + if (random.nextDouble() > backendMetricsRatio) { + return; + } latencyMs.record(latency.getMillis(), protocol, certHash); responseBytes.record(response.content().readableBytes(), protocol, certHash); responsesCounter.increment(protocol, certHash, response.status().toString()); diff --git a/proxy/src/main/java/google/registry/proxy/metric/FrontendMetrics.java b/proxy/src/main/java/google/registry/proxy/metric/FrontendMetrics.java index 8a4decb58..d6eb1a45e 100644 --- a/proxy/src/main/java/google/registry/proxy/metric/FrontendMetrics.java +++ b/proxy/src/main/java/google/registry/proxy/metric/FrontendMetrics.java @@ -26,8 +26,10 @@ import io.netty.channel.group.ChannelGroup; import io.netty.channel.group.DefaultChannelGroup; import io.netty.util.concurrent.GlobalEventExecutor; import jakarta.inject.Inject; +import jakarta.inject.Named; import jakarta.inject.Singleton; import java.util.Map; +import java.util.Random; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import org.joda.time.Duration; @@ -78,8 +80,14 @@ public class FrontendMetrics extends BaseMetrics { LABELS, DEFAULT_LATENCY_FITTER); + private final Random random; + private final double frontendMetricsRatio; + @Inject - public FrontendMetrics() {} + FrontendMetrics(@Named("frontendMetricsRatio") double frontendMetricsRatio, Random random) { + this.frontendMetricsRatio = frontendMetricsRatio; + this.random = random; + } @Override void resetMetrics() { @@ -109,6 +117,10 @@ public class FrontendMetrics extends BaseMetrics { @NonFinalForTesting public void responseSent(String protocol, String certHash, Duration latency) { + // Short-circuit metrics recording randomly according to the configured ratio. + if (random.nextDouble() > frontendMetricsRatio) { + return; + } latencyMs.record(latency.getMillis(), protocol, certHash); } } diff --git a/proxy/src/test/java/google/registry/proxy/ProtocolModuleTest.java b/proxy/src/test/java/google/registry/proxy/ProtocolModuleTest.java index 9227c5df2..40a92f4d6 100644 --- a/proxy/src/test/java/google/registry/proxy/ProtocolModuleTest.java +++ b/proxy/src/test/java/google/registry/proxy/ProtocolModuleTest.java @@ -56,6 +56,7 @@ import jakarta.inject.Named; import jakarta.inject.Provider; import jakarta.inject.Singleton; import java.time.Duration; +import java.util.Random; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -312,6 +313,26 @@ public abstract class ProtocolModuleTest { return Duration.ofHours(1); } + @Singleton + @Provides + @Named("frontendMetricsRatio") + static double provideFrontendMetricsRatio() { + return 1.0; + } + + @Singleton + @Provides + @Named("backendMetricsRatio") + static double providebackendMetricsRatio() { + return 1.0; + } + + @Singleton + @Provides + static Random provideRandom() { + return new Random(); + } + // This method is only here to satisfy Dagger binding, but is never used. In test environment, // it is the self-signed certificate and its key that ends up being used. @Singleton diff --git a/proxy/src/test/java/google/registry/proxy/metric/BackendMetricsTest.java b/proxy/src/test/java/google/registry/proxy/metric/BackendMetricsTest.java index 009b9b586..7e158fd71 100644 --- a/proxy/src/test/java/google/registry/proxy/metric/BackendMetricsTest.java +++ b/proxy/src/test/java/google/registry/proxy/metric/BackendMetricsTest.java @@ -18,11 +18,14 @@ import static com.google.monitoring.metrics.contrib.DistributionMetricSubject.as import static com.google.monitoring.metrics.contrib.LongMetricSubject.assertThat; import static google.registry.proxy.TestUtils.makeHttpPostRequest; import static google.registry.proxy.TestUtils.makeHttpResponse; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableSet; import io.netty.handler.codec.http.FullHttpRequest; import io.netty.handler.codec.http.FullHttpResponse; import io.netty.handler.codec.http.HttpResponseStatus; +import java.util.Random; import org.joda.time.Duration; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -34,10 +37,11 @@ class BackendMetricsTest { private final String certHash = "blah12345"; private final String protocol = "frontend protocol"; - private final BackendMetrics metrics = new BackendMetrics(); + private BackendMetrics metrics; @BeforeEach void beforeEach() { + metrics = new BackendMetrics(1.0, new Random()); metrics.resetMetrics(); } @@ -107,15 +111,21 @@ class BackendMetricsTest { @Test void testSuccess_multipleResponses() { + Random mockRandom = mock(Random.class); + metrics = new BackendMetrics(0.2, mockRandom); + // The third response won't be logged. + when(mockRandom.nextDouble()).thenReturn(.1, .04, .5, .15); String content1 = "some response"; String content2 = "other response"; String content3 = "a very bad response"; FullHttpResponse response1 = makeHttpResponse(content1, HttpResponseStatus.OK); FullHttpResponse response2 = makeHttpResponse(content2, HttpResponseStatus.OK); - FullHttpResponse response3 = makeHttpResponse(content3, HttpResponseStatus.BAD_REQUEST); + FullHttpResponse response3 = makeHttpResponse(content2, HttpResponseStatus.OK); + FullHttpResponse response4 = makeHttpResponse(content3, HttpResponseStatus.BAD_REQUEST); metrics.responseReceived(protocol, certHash, response1, Duration.millis(5)); metrics.responseReceived(protocol, certHash, response2, Duration.millis(8)); - metrics.responseReceived(protocol, certHash, response3, Duration.millis(2)); + metrics.responseReceived(protocol, certHash, response3, Duration.millis(15)); + metrics.responseReceived(protocol, certHash, response4, Duration.millis(2)); assertThat(BackendMetrics.requestsCounter).hasNoOtherValues(); assertThat(BackendMetrics.requestBytes).hasNoOtherValues(); diff --git a/proxy/src/test/java/google/registry/proxy/metric/FrontendMetricsTest.java b/proxy/src/test/java/google/registry/proxy/metric/FrontendMetricsTest.java index ec839a61d..3914b982a 100644 --- a/proxy/src/test/java/google/registry/proxy/metric/FrontendMetricsTest.java +++ b/proxy/src/test/java/google/registry/proxy/metric/FrontendMetricsTest.java @@ -15,11 +15,17 @@ package google.registry.proxy.metric; import static com.google.common.truth.Truth.assertThat; +import static com.google.monitoring.metrics.contrib.DistributionMetricSubject.assertThat; import static com.google.monitoring.metrics.contrib.LongMetricSubject.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableSet; import io.netty.channel.ChannelFuture; import io.netty.channel.DefaultChannelId; import io.netty.channel.embedded.EmbeddedChannel; +import java.util.Random; +import org.joda.time.Duration; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -28,10 +34,11 @@ class FrontendMetricsTest { private static final String PROTOCOL = "some protocol"; private static final String CERT_HASH = "abc_blah_1134zdf"; - private final FrontendMetrics metrics = new FrontendMetrics(); + private FrontendMetrics metrics; @BeforeEach void beforeEach() { + metrics = new FrontendMetrics(1.0, new Random()); metrics.resetMetrics(); } @@ -60,8 +67,13 @@ class FrontendMetricsTest { @Test void testSuccess_twoConnections_sameClient() { + Random mockRandom = mock(Random.class); + metrics = new FrontendMetrics(0.2, mockRandom); + // The third response won't be logged. + when(mockRandom.nextDouble()).thenReturn(.1, .04, .5); EmbeddedChannel channel1 = new EmbeddedChannel(); EmbeddedChannel channel2 = new EmbeddedChannel(DefaultChannelId.newInstance()); + EmbeddedChannel channel3 = new EmbeddedChannel(); metrics.registerActiveConnection(PROTOCOL, CERT_HASH, channel1); assertThat(channel1.isActive()).isTrue(); @@ -85,6 +97,27 @@ class FrontendMetricsTest { .and() .hasNoOtherValues(); + metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(10)); + metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(8)); + metrics.responseSent(PROTOCOL, CERT_HASH, Duration.millis(13)); + + metrics.registerActiveConnection(PROTOCOL, CERT_HASH, channel3); + assertThat(channel3.isActive()).isTrue(); + assertThat(FrontendMetrics.activeConnectionsGauge) + .hasValueForLabels(2, PROTOCOL, CERT_HASH) + .and() + .hasNoOtherValues(); + // All connection counts are recorded as metrics, but ... + assertThat(FrontendMetrics.totalConnectionsCounter) + .hasValueForLabels(3, PROTOCOL, CERT_HASH) + .and() + .hasNoOtherValues(); + // Latency stats are subject to the metrics ratio. + assertThat(FrontendMetrics.latencyMs) + .hasDataSetForLabels(ImmutableSet.of(10, 8), PROTOCOL, CERT_HASH) + .and() + .hasNoOtherValues(); + @SuppressWarnings("unused") ChannelFuture unusedFuture1 = channel1.close(); assertThat(channel1.isActive()).isFalse(); @@ -93,7 +126,7 @@ class FrontendMetricsTest { .and() .hasNoOtherValues(); assertThat(FrontendMetrics.totalConnectionsCounter) - .hasValueForLabels(2, PROTOCOL, CERT_HASH) + .hasValueForLabels(3, PROTOCOL, CERT_HASH) .and() .hasNoOtherValues(); @@ -102,7 +135,16 @@ class FrontendMetricsTest { assertThat(channel2.isActive()).isFalse(); assertThat(FrontendMetrics.activeConnectionsGauge).hasNoOtherValues(); assertThat(FrontendMetrics.totalConnectionsCounter) - .hasValueForLabels(2, PROTOCOL, CERT_HASH) + .hasValueForLabels(3, PROTOCOL, CERT_HASH) + .and() + .hasNoOtherValues(); + + @SuppressWarnings("unused") + ChannelFuture unusedFuture3 = channel3.close(); + assertThat(channel3.isActive()).isFalse(); + assertThat(FrontendMetrics.activeConnectionsGauge).hasNoOtherValues(); + assertThat(FrontendMetrics.totalConnectionsCounter) + .hasValueForLabels(3, PROTOCOL, CERT_HASH) .and() .hasNoOtherValues(); }