diff --git a/java/google/registry/model/server/Lock.java b/java/google/registry/model/server/Lock.java index f7e9b6e21..92190e892 100644 --- a/java/google/registry/model/server/Lock.java +++ b/java/google/registry/model/server/Lock.java @@ -18,6 +18,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static google.registry.model.ofy.ObjectifyService.ofy; import static google.registry.util.DateTimeUtils.isAtOrAfter; +import com.google.auto.value.AutoValue; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.googlecode.objectify.VoidWork; import com.googlecode.objectify.Work; @@ -49,6 +51,12 @@ public class Lock extends ImmutableObject { private static final FormattingLogger logger = FormattingLogger.getLoggerForCallerClass(); + /** Disposition of locking, for monitoring. */ + enum LockState { IN_USE, FREE, TIMED_OUT, OWNER_DIED } + + @VisibleForTesting + static LockMetrics lockMetrics = new LockMetrics(); + /** The name of the locked resource. */ @Id String lockId; @@ -90,19 +98,82 @@ public class Lock extends ImmutableObject { return String.format("%s-%s", tld, resourceName); } + @AutoValue + abstract static class AcquireResult { + public abstract DateTime transactionTime(); + public abstract @Nullable Lock existingLock(); + public abstract @Nullable Lock newLock(); + public abstract LockState lockState(); + + public static AcquireResult create( + DateTime transactionTime, + @Nullable Lock existingLock, + @Nullable Lock newLock, + LockState lockState) { + return new AutoValue_Lock_AcquireResult(transactionTime, existingLock, newLock, lockState); + } + } + + private static void logAcquireResult(AcquireResult acquireResult) { + try { + Lock lock = acquireResult.existingLock(); + DateTime now = acquireResult.transactionTime(); + switch (acquireResult.lockState()) { + case IN_USE: + logger.infofmt( + "Existing lock by request %s is still valid now %s (until %s) lock: %s", + lock.requestLogId, + now, + lock.expirationTime, + lock.lockId); + break; + case TIMED_OUT: + logger.infofmt( + "Existing lock by request %s is timed out now %s (was valid until %s) lock: %s", + lock.requestLogId, + now, + lock.expirationTime, + lock.lockId); + break; + case OWNER_DIED: + logger.infofmt( + "Existing lock is valid now %s (until %s), but owner (%s) isn't running lock: %s", + now, + lock.expirationTime, + lock.requestLogId, + lock.lockId); + break; + case FREE: + // There was no existing lock + break; + } + Lock newLock = acquireResult.newLock(); + if (acquireResult.newLock() != null) { + logger.infofmt( + "acquire succeeded %s lock: %s", + newLock, + newLock.lockId); + } + } catch (Throwable e) { + // We might get here if there is a NullPointerException for example, if AcquireResult wasn't + // constructed correctly. Simply log it for debugging but continue as if nothing happened + logger.warningfmt(e, "Error while logging AcquireResult %s. Continuing.", acquireResult); + } + } + /** Try to acquire a lock. Returns absent if it can't be acquired. */ public static Optional acquire( final String resourceName, @Nullable final String tld, final Duration leaseLength, final RequestStatusChecker requestStatusChecker) { + String lockId = makeLockId(resourceName, tld); // It's important to use transactNew rather than transact, because a Lock can be used to control // access to resources like GCS that can't be transactionally rolled back. Therefore, the lock // must be definitively acquired before it is used, even when called inside another transaction. - return Optional.ofNullable(ofy().transactNew(new Work() { + AcquireResult acquireResult = ofy().transactNew(new Work() { @Override - public Lock run() { - String lockId = makeLockId(resourceName, tld); + public AcquireResult run() { DateTime now = ofy().getTransactionTime(); // Checking if an unexpired lock still exists - if so, the lock can't be acquired. @@ -111,29 +182,18 @@ public class Lock extends ImmutableObject { logger.infofmt( "Loaded existing lock: %s for request: %s", lock.lockId, lock.requestLogId); } - // TODO(b/63982642): remove check on requestLogId being null once migration is done - // Until then we assume missing requestLogId means the app is still running (since we have - // no information to the contrary) - if (lock != null - && !isAtOrAfter(now, lock.expirationTime) - && (lock.requestLogId == null || requestStatusChecker.isRunning(lock.requestLogId))) { - logger.infofmt( - "Existing lock by request %s is still valid now %s (until %s) lock: %s", - lock.requestLogId, - now, - lock.expirationTime, - lockId); - return null; + LockState lockState; + if (lock == null) { + lockState = LockState.FREE; + } else if (isAtOrAfter(now, lock.expirationTime)) { + lockState = LockState.TIMED_OUT; + } else if (!requestStatusChecker.isRunning(lock.requestLogId)) { + lockState = LockState.OWNER_DIED; + } else { + lockState = LockState.IN_USE; + return AcquireResult.create(now, lock, null, lockState); } - if (lock != null) { - logger.infofmt( - "Existing lock by request %s is timed out now %s (was valid until %s) lock: %s", - lock.requestLogId, - now, - lock.expirationTime, - lockId); - } Lock newLock = create( resourceName, tld, @@ -142,12 +202,12 @@ public class Lock extends ImmutableObject { // Locks are not parented under an EntityGroupRoot (so as to avoid write contention) and // don't need to be backed up. ofy().saveWithoutBackup().entity(newLock); - logger.infofmt( - "acquire succeeded %s lock: %s", - newLock, - lockId); - return newLock; - }})); + return AcquireResult.create(now, lock, newLock, lockState); + }}); + + logAcquireResult(acquireResult); + lockMetrics.record(resourceName, tld, acquireResult.lockState()); + return Optional.ofNullable(acquireResult.newLock()); } /** Release the lock. */ diff --git a/java/google/registry/model/server/LockMetrics.java b/java/google/registry/model/server/LockMetrics.java new file mode 100644 index 000000000..436efd70b --- /dev/null +++ b/java/google/registry/model/server/LockMetrics.java @@ -0,0 +1,45 @@ +// Copyright 2017 The Nomulus Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package google.registry.model.server; + +import com.google.common.collect.ImmutableSet; +import google.registry.model.server.Lock.LockState; +import google.registry.monitoring.metrics.IncrementableMetric; +import google.registry.monitoring.metrics.LabelDescriptor; +import google.registry.monitoring.metrics.MetricRegistryImpl; +import javax.annotation.Nullable; + +/** Metrics for lock contention. */ +class LockMetrics { + + private static final ImmutableSet LABEL_DESCRIPTORS = + ImmutableSet.of( + LabelDescriptor.create("tld", "TLD"), + LabelDescriptor.create("resource", "resource name"), + LabelDescriptor.create( + "state", "The existing lock state (before attempting to acquire).")); + + private static final IncrementableMetric lockRequestsMetric = + MetricRegistryImpl.getDefault() + .newIncrementableMetric( + "/lock/acquire_lock_requests", + "Count of lock acquisition attempts", + "count", + LABEL_DESCRIPTORS); + + void record(String resourceName, @Nullable String tld, LockState state) { + lockRequestsMetric.increment(String.valueOf(tld), resourceName, state.name()); + } +} diff --git a/javatests/google/registry/model/server/LockTest.java b/javatests/google/registry/model/server/LockTest.java index 6733b9969..c3e9d3b06 100644 --- a/javatests/google/registry/model/server/LockTest.java +++ b/javatests/google/registry/model/server/LockTest.java @@ -14,12 +14,18 @@ package google.registry.model.server; -import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth8.assertThat; +import static google.registry.model.server.Lock.LockState.FREE; +import static google.registry.model.server.Lock.LockState.IN_USE; +import static google.registry.model.server.Lock.LockState.OWNER_DIED; +import static google.registry.model.server.Lock.LockState.TIMED_OUT; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; import google.registry.model.ofy.Ofy; +import google.registry.model.server.Lock.LockState; import google.registry.testing.AppEngineRule; import google.registry.testing.ExceptionRule; import google.registry.testing.FakeClock; @@ -53,63 +59,70 @@ public class LockTest { @Rule public final ExceptionRule thrown = new ExceptionRule(); - private Optional acquire(String tld, Duration leaseLength) { - return Lock.acquire(RESOURCE_NAME, tld, leaseLength, requestStatusChecker); + private Optional acquire(String tld, Duration leaseLength, LockState expectedLockState) { + Lock.lockMetrics = mock(LockMetrics.class); + Optional lock = Lock.acquire(RESOURCE_NAME, tld, leaseLength, requestStatusChecker); + verify(Lock.lockMetrics).record(RESOURCE_NAME, tld, expectedLockState); + verifyNoMoreInteractions(Lock.lockMetrics); + Lock.lockMetrics = null; + return lock; } + @Before public void setUp() { + Lock.lockMetrics = null; when(requestStatusChecker.getLogId()).thenReturn("current-request-id"); when(requestStatusChecker.isRunning("current-request-id")).thenReturn(true); } @Test public void testReleasedExplicitly() throws Exception { - Optional lock = acquire("", ONE_DAY); + Optional lock = acquire("", ONE_DAY, FREE); assertThat(lock).isPresent(); // We can't get it again at the same time. - assertThat(acquire("", ONE_DAY)).isEmpty(); + assertThat(acquire("", ONE_DAY, IN_USE)).isEmpty(); // But if we release it, it's available. lock.get().release(); - assertThat(acquire("", ONE_DAY)).isPresent(); + assertThat(acquire("", ONE_DAY, FREE)).isPresent(); } @Test public void testReleasedAfterTimeout() throws Exception { FakeClock clock = new FakeClock(); inject.setStaticField(Ofy.class, "clock", clock); - assertThat(acquire("", TWO_MILLIS)).isPresent(); + assertThat(acquire("", TWO_MILLIS, FREE)).isPresent(); // We can't get it again at the same time. - assertThat(acquire("", TWO_MILLIS)).isEmpty(); + assertThat(acquire("", TWO_MILLIS, IN_USE)).isEmpty(); // A second later we still can't get the lock. clock.advanceOneMilli(); - assertThat(acquire("", TWO_MILLIS)).isEmpty(); + assertThat(acquire("", TWO_MILLIS, IN_USE)).isEmpty(); // But two seconds later we can get it. clock.advanceOneMilli(); - assertThat(acquire("", TWO_MILLIS)).isPresent(); + assertThat(acquire("", TWO_MILLIS, TIMED_OUT)).isPresent(); } @Test public void testReleasedAfterRequestFinish() throws Exception { - assertThat(acquire("", ONE_DAY)).isPresent(); + assertThat(acquire("", ONE_DAY, FREE)).isPresent(); // We can't get it again while request is active - assertThat(acquire("", ONE_DAY)).isEmpty(); + assertThat(acquire("", ONE_DAY, IN_USE)).isEmpty(); // But if request is finished, we can get it. when(requestStatusChecker.isRunning("current-request-id")).thenReturn(false); - assertThat(acquire("", ONE_DAY)).isPresent(); + assertThat(acquire("", ONE_DAY, OWNER_DIED)).isPresent(); } @Test public void testTldsAreIndependent() throws Exception { - Optional lockA = acquire("a", ONE_DAY); + Optional lockA = acquire("a", ONE_DAY, FREE); assertThat(lockA).isPresent(); // For a different tld we can still get a lock with the same name. - Optional lockB = acquire("b", ONE_DAY); + Optional lockB = acquire("b", ONE_DAY, FREE); assertThat(lockB).isPresent(); // We can't get lockB again at the same time. - assertThat(acquire("b", ONE_DAY)).isEmpty(); + assertThat(acquire("b", ONE_DAY, IN_USE)).isEmpty(); // Releasing lockA has no effect on lockB (even though we are still using the "b" tld). lockA.get().release(); - assertThat(acquire("b", ONE_DAY)).isEmpty(); + assertThat(acquire("b", ONE_DAY, IN_USE)).isEmpty(); } @Test