1
0
mirror of https://github.com/google/nomulus synced 2025-12-23 06:15:42 +00:00

Bump the number of retries in transaction failures and add skew (#2699)

This can potentially help even more with serializable transaction
failures (optimistic locking exceptions, which are expected to occur
somewhat frequently).

With six attempts, we will sleep at most five times, for
100+200+400+800+1600 ms each, for a total of at most 3.1 seconds (much
less than the EPP maximum which I believe (?) to be 30 seconds.

In addition, we add a 20% skew in an attempt to spread out
possibly-conflicting transaction retries.
This commit is contained in:
gbrodman
2025-03-21 15:47:55 -04:00
committed by GitHub
parent ed33c7424d
commit daa56e6d85
3 changed files with 15 additions and 10 deletions

View File

@@ -86,7 +86,7 @@ import org.joda.time.DateTime;
public class JpaTransactionManagerImpl implements JpaTransactionManager {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private static final Retrier retrier = new Retrier(new SystemSleeper(), 3);
private static final Retrier retrier = new Retrier(new SystemSleeper(), 6);
private static final String NESTED_TRANSACTION_MESSAGE =
"Nested transaction detected. Try refactoring to avoid nested transactions. If unachievable,"
+ " use reTransact() in nested transactions";

View File

@@ -293,11 +293,11 @@ class JpaTransactionManagerImplTest {
assertThrows(
OptimisticLockException.class,
() -> spyJpaTm.transact(() -> spyJpaTm.delete(theEntityKey)));
verify(spyJpaTm, times(3)).delete(theEntityKey);
verify(spyJpaTm, times(6)).delete(theEntityKey);
assertThrows(
OptimisticLockException.class,
() -> spyJpaTm.transact(() -> spyJpaTm.delete(theEntityKey)));
verify(spyJpaTm, times(6)).delete(theEntityKey);
verify(spyJpaTm, times(12)).delete(theEntityKey);
}
@Test
@@ -355,10 +355,10 @@ class JpaTransactionManagerImplTest {
spyJpaTm.transact(() -> spyJpaTm.insert(theEntity));
assertThrows(
RuntimeException.class, () -> spyJpaTm.transact(() -> spyJpaTm.delete(theEntityKey)));
verify(spyJpaTm, times(3)).delete(theEntityKey);
verify(spyJpaTm, times(6)).delete(theEntityKey);
assertThrows(
RuntimeException.class, () -> spyJpaTm.transact(() -> spyJpaTm.delete(theEntityKey)));
verify(spyJpaTm, times(6)).delete(theEntityKey);
verify(spyJpaTm, times(12)).delete(theEntityKey);
}
@Test
@@ -759,11 +759,11 @@ class JpaTransactionManagerImplTest {
spyJpaTm.transact(
() -> {
spyJpaTm.exists(theEntity);
spyJpaTm.transact(() -> spyJpaTm.delete(theEntityKey));
spyJpaTm.delete(theEntityKey);
}));
verify(spyJpaTm, times(3)).exists(theEntity);
verify(spyJpaTm, times(3)).delete(theEntityKey);
verify(spyJpaTm, times(6)).exists(theEntity);
verify(spyJpaTm, times(6)).delete(theEntityKey);
}
private static void insertPerson(int age) {

View File

@@ -24,6 +24,7 @@ import com.google.common.flogger.FluentLogger;
import jakarta.inject.Inject;
import jakarta.inject.Named;
import java.io.Serializable;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.function.Predicate;
@@ -34,6 +35,8 @@ public class Retrier implements Serializable {
private static final long serialVersionUID = 1167386907195735483L;
private static final Random randomForSkew = new Random();
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private final Sleeper sleeper;
@@ -157,9 +160,11 @@ public class Retrier implements Serializable {
throw new RuntimeException(e);
}
failureReporter.beforeRetry(e, failures, attempts);
// Wait (skewed) 100ms on the first attempt, doubling on each subsequent attempt.
long backoffMillis = pow(2, failures) * 100L;
long sleepDurationMillis = Math.round(randomForSkew.nextDouble(0.8, 1.2) * backoffMillis);
try {
// Wait 100ms on the first attempt, doubling on each subsequent attempt.
sleeper.sleep(Duration.millis(pow(2, failures) * 100L));
sleeper.sleep(Duration.millis(sleepDurationMillis));
} catch (InterruptedException e2) {
// Since we're not rethrowing InterruptedException, set the interrupt state on the thread
// so the next blocking operation will know to abort the thread.