From dff2d90325ca54daccde968822c325002b4d2a4e Mon Sep 17 00:00:00 2001 From: sarahcaseybot Date: Fri, 29 Mar 2024 16:51:19 -0400 Subject: [PATCH] Add batching to DeleteProberDataAction (#2322) * Add batching to DeleteProberDataAction * Only get time once * Add separate query for dry run * Update querries to actually properly delete all the data * Fix merge conflicts * Add test for foreign key constraints * Make transaction repeatable read * Make queries to subtables native * Add native query for GracePeriodHistory * Kill job after 20 hours * remove extra time check from read query --- .../batch/DeleteProberDataAction.java | 153 ++++++++++++------ .../model/domain/secdns/DomainDsData.java | 2 +- .../registry/request/RequestModule.java | 8 + .../registry/request/RequestParameters.java | 5 + .../server/RefreshDnsForAllDomainsAction.java | 3 +- .../tools/server/ToolsServerModule.java | 6 - .../batch/DeleteProberDataActionTest.java | 33 +++- 7 files changed, 145 insertions(+), 65 deletions(-) diff --git a/core/src/main/java/google/registry/batch/DeleteProberDataAction.java b/core/src/main/java/google/registry/batch/DeleteProberDataAction.java index a2c682f0e..0d2ecd470 100644 --- a/core/src/main/java/google/registry/batch/DeleteProberDataAction.java +++ b/core/src/main/java/google/registry/batch/DeleteProberDataAction.java @@ -16,15 +16,19 @@ package google.registry.batch; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static google.registry.dns.DnsUtils.requestDomainDnsRefresh; import static google.registry.model.reporting.HistoryEntry.Type.DOMAIN_DELETE; import static google.registry.model.tld.Tlds.getTldsOfType; +import static google.registry.persistence.PersistenceModule.TransactionIsolationLevel.TRANSACTION_REPEATABLE_READ; import static google.registry.persistence.transaction.TransactionManagerFactory.tm; import static google.registry.request.Action.Method.POST; +import static google.registry.request.RequestParameters.PARAM_BATCH_SIZE; import static google.registry.request.RequestParameters.PARAM_DRY_RUN; import static google.registry.request.RequestParameters.PARAM_TLDS; import static google.registry.util.RegistryEnvironment.PRODUCTION; +import static org.joda.time.DateTimeZone.UTC; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; @@ -41,12 +45,11 @@ import google.registry.request.Action; import google.registry.request.Parameter; import google.registry.request.auth.Auth; import google.registry.util.RegistryEnvironment; +import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import javax.inject.Inject; -import org.hibernate.CacheMode; -import org.hibernate.ScrollMode; -import org.hibernate.ScrollableResults; -import org.hibernate.query.Query; +import javax.persistence.TypedQuery; import org.joda.time.DateTime; import org.joda.time.Duration; @@ -61,6 +64,7 @@ import org.joda.time.Duration; auth = Auth.AUTH_API_ADMIN) public class DeleteProberDataAction implements Runnable { + // TODO(b/323026070): Add email alert on failure of this action private static final FluentLogger logger = FluentLogger.forEnclosingClass(); /** @@ -90,31 +94,38 @@ public class DeleteProberDataAction implements Runnable { // Note: creationTime must be compared to a Java object (CreateAutoTimestamp) but deletionTime can // be compared directly to the SQL timestamp (it's a DateTime) private static final String DOMAIN_QUERY_STRING = - "FROM Domain d WHERE d.tld IN :tlds AND d.domainName NOT LIKE 'nic.%' AND" + "FROM Domain d WHERE d.tld IN :tlds AND d.domainName NOT LIKE 'nic.%%' AND" + " (d.subordinateHosts IS EMPTY OR d.subordinateHosts IS NULL) AND d.creationTime <" - + " :creationTimeCutoff AND ((d.creationTime <= :nowAutoTimestamp AND d.deletionTime >" - + " current_timestamp()) OR d.deletionTime < :nowMinusSoftDeleteDelay) ORDER BY d.repoId"; + + " :creationTimeCutoff AND (d.deletionTime > :now OR d.deletionTime <" + + " :nowMinusSoftDeleteDelay)"; /** Number of domains to retrieve and delete per SQL transaction. */ - private static final int BATCH_SIZE = 1000; + private static final int DEFAULT_BATCH_SIZE = 1000; - @Inject - @Parameter(PARAM_DRY_RUN) boolean isDryRun; + /** List of TLDs to work on. If empty - will work on all TLDs that end with .test. */ - @Inject - @Parameter(PARAM_TLDS) ImmutableSet tlds; - @Inject - @Config("registryAdminClientId") + int batchSize; + String registryAdminRegistrarId; @Inject - DeleteProberDataAction() {} + DeleteProberDataAction( + @Parameter(PARAM_DRY_RUN) boolean isDryRun, + @Parameter(PARAM_TLDS) ImmutableSet tlds, + @Parameter(PARAM_BATCH_SIZE) Optional batchSize, + @Config("registryAdminClientId") String registryAdminRegistrarId) { + this.isDryRun = isDryRun; + this.tlds = tlds; + this.batchSize = batchSize.orElse(DEFAULT_BATCH_SIZE); + this.registryAdminRegistrarId = registryAdminRegistrarId; + } @Override public void run() { + checkArgument(batchSize > 0, "The batch size must be greater than 0"); checkState( !Strings.isNullOrEmpty(registryAdminRegistrarId), "Registry admin client ID must be configured for prober data deletion to work"); @@ -131,13 +142,30 @@ public class DeleteProberDataAction implements Runnable { "If tlds are given, they must all exist and be TEST tlds. Given: %s, not found: %s", tlds, Sets.difference(tlds, deletableTlds)); - runSqlJob(deletableTlds); - } - - private void runSqlJob(ImmutableSet deletableTlds) { AtomicInteger softDeletedDomains = new AtomicInteger(); AtomicInteger hardDeletedDomains = new AtomicInteger(); - tm().transact(() -> processDomains(deletableTlds, softDeletedDomains, hardDeletedDomains)); + AtomicReference> domainsBatch = new AtomicReference<>(); + DateTime startTime = DateTime.now(UTC); + do { + tm().transact( + TRANSACTION_REPEATABLE_READ, + () -> + domainsBatch.set( + processDomains( + deletableTlds, softDeletedDomains, hardDeletedDomains, startTime))); + + // Only process one batch in dryrun mode + if (isDryRun) { + break; + } + + logger.atInfo().log( + "deleteProberData hard deleted %d names with %d batchSize", + hardDeletedDomains.get(), batchSize); + + // Automatically kill the job if it is running for over 20 hours + } while (DateTime.now(UTC).isBefore(startTime.plusHours(20)) + && domainsBatch.get().size() == batchSize); logger.atInfo().log( "%s %d domains.", isDryRun ? "Would have soft-deleted" : "Soft-deleted", softDeletedDomains.get()); @@ -146,46 +174,32 @@ public class DeleteProberDataAction implements Runnable { isDryRun ? "Would have hard-deleted" : "Hard-deleted", hardDeletedDomains.get()); } - private void processDomains( + private ImmutableList processDomains( ImmutableSet deletableTlds, AtomicInteger softDeletedDomains, - AtomicInteger hardDeletedDomains) { - DateTime now = tm().getTransactionTime(); - // Scroll through domains, soft-deleting as necessary (very few will be soft-deleted) and - // keeping track of which domains to hard-delete (there can be many, so we batch them up) - try (ScrollableResults scrollableResult = + AtomicInteger hardDeletedDomains, + DateTime now) { + TypedQuery query = tm().query(DOMAIN_QUERY_STRING, Domain.class) .setParameter("tlds", deletableTlds) .setParameter( "creationTimeCutoff", CreateAutoTimestamp.create(now.minus(DOMAIN_USED_DURATION))) .setParameter("nowMinusSoftDeleteDelay", now.minus(SOFT_DELETE_DELAY)) - .setParameter("nowAutoTimestamp", CreateAutoTimestamp.create(now)) - .unwrap(Query.class) - .setCacheMode(CacheMode.IGNORE) - .scroll(ScrollMode.FORWARD_ONLY)) { + .setParameter("now", now); + ImmutableList domainList = + query.setMaxResults(batchSize).getResultStream().collect(toImmutableList()); ImmutableList.Builder domainRepoIdsToHardDelete = new ImmutableList.Builder<>(); ImmutableList.Builder hostNamesToHardDelete = new ImmutableList.Builder<>(); - for (int i = 1; scrollableResult.next(); i = (i + 1) % BATCH_SIZE) { - Domain domain = (Domain) scrollableResult.get(0); - processDomain( - domain, - domainRepoIdsToHardDelete, - hostNamesToHardDelete, - softDeletedDomains, - hardDeletedDomains); - // Batch the deletion and DB flush + session clearing, so we don't OOM - if (i == 0) { - hardDeleteDomainsAndHosts( - domainRepoIdsToHardDelete.build(), hostNamesToHardDelete.build()); - domainRepoIdsToHardDelete = new ImmutableList.Builder<>(); - hostNamesToHardDelete = new ImmutableList.Builder<>(); - tm().getEntityManager().flush(); - tm().getEntityManager().clear(); - } - } - // process the remainder - hardDeleteDomainsAndHosts(domainRepoIdsToHardDelete.build(), hostNamesToHardDelete.build()); + for (Domain domain : domainList) { + processDomain( + domain, + domainRepoIdsToHardDelete, + hostNamesToHardDelete, + softDeletedDomains, + hardDeletedDomains); } + hardDeleteDomainsAndHosts(domainRepoIdsToHardDelete.build(), hostNamesToHardDelete.build()); + return domainList; } private void processDomain( @@ -225,6 +239,10 @@ public class DeleteProberDataAction implements Runnable { tm().query("DELETE FROM Host WHERE hostName IN :hostNames") .setParameter("hostNames", hostNames) .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"GracePeriod\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); tm().query("DELETE FROM BillingEvent WHERE domainRepoId IN :repoIds") .setParameter("repoIds", domainRepoIds) .executeUpdate(); @@ -234,12 +252,45 @@ public class DeleteProberDataAction implements Runnable { tm().query("DELETE FROM BillingCancellation WHERE domainRepoId IN :repoIds") .setParameter("repoIds", domainRepoIds) .executeUpdate(); - tm().query("DELETE FROM DomainHistory WHERE repoId IN :repoIds") + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"GracePeriodHistory\" WHERE domain_repo_id IN :repoIds") .setParameter("repoIds", domainRepoIds) .executeUpdate(); tm().query("DELETE FROM PollMessage WHERE domainRepoId IN :repoIds") .setParameter("repoIds", domainRepoIds) .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"DomainHost\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery( + "DELETE FROM \"DomainHistoryHost\" WHERE domain_history_domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"HostHistory\" WHERE host_name IN :hostNames") + .setParameter("hostNames", hostNames) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"DelegationSignerData\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery( + "DELETE FROM \"DomainTransactionRecord\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"DomainDsDataHistory\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + tm().getEntityManager() + .createNativeQuery("DELETE FROM \"DomainHistory\" WHERE domain_repo_id IN :repoIds") + .setParameter("repoIds", domainRepoIds) + .executeUpdate(); + // Delete from domain table is done last so that a trainsient failure in the middle of an action + // run will not prevent the domain from being deleted by this action in a future run tm().query("DELETE FROM Domain WHERE repoId IN :repoIds") .setParameter("repoIds", domainRepoIds) .executeUpdate(); diff --git a/core/src/main/java/google/registry/model/domain/secdns/DomainDsData.java b/core/src/main/java/google/registry/model/domain/secdns/DomainDsData.java index df1ac2610..1124e4a5e 100644 --- a/core/src/main/java/google/registry/model/domain/secdns/DomainDsData.java +++ b/core/src/main/java/google/registry/model/domain/secdns/DomainDsData.java @@ -36,7 +36,7 @@ import javax.xml.bind.annotation.XmlType; * @see RFC 4034 */ @XmlType(name = "dsData") -@Entity +@Entity(name = "DelegationSignerData") @IdClass(DomainDsDataId.class) @Table(name = "DelegationSignerData", indexes = @Index(columnList = "domainRepoId")) public class DomainDsData extends DomainDsDataBase { diff --git a/core/src/main/java/google/registry/request/RequestModule.java b/core/src/main/java/google/registry/request/RequestModule.java index d43a7ba56..9611e4634 100644 --- a/core/src/main/java/google/registry/request/RequestModule.java +++ b/core/src/main/java/google/registry/request/RequestModule.java @@ -18,8 +18,10 @@ import static com.google.common.net.MediaType.JSON_UTF_8; import static google.registry.dns.PublishDnsUpdatesAction.CLOUD_TASKS_RETRY_HEADER; import static google.registry.model.tld.Tlds.assertTldExists; import static google.registry.model.tld.Tlds.assertTldsExist; +import static google.registry.request.RequestParameters.PARAM_BATCH_SIZE; import static google.registry.request.RequestParameters.PARAM_DRY_RUN; import static google.registry.request.RequestParameters.extractBooleanParameter; +import static google.registry.request.RequestParameters.extractOptionalIntParameter; import static google.registry.request.RequestParameters.extractRequiredHeader; import static google.registry.request.RequestParameters.extractRequiredParameter; import static google.registry.request.RequestParameters.extractSetOfParameters; @@ -99,6 +101,12 @@ public final class RequestModule { return extractBooleanParameter(req, PARAM_DRY_RUN); } + @Provides + @Parameter(PARAM_BATCH_SIZE) + static Optional provideBatchSize(HttpServletRequest req) { + return extractOptionalIntParameter(req, PARAM_BATCH_SIZE); + } + @Provides static Response provideResponse(ResponseImpl response) { return response; diff --git a/core/src/main/java/google/registry/request/RequestParameters.java b/core/src/main/java/google/registry/request/RequestParameters.java index e2b61c078..08deba8e0 100644 --- a/core/src/main/java/google/registry/request/RequestParameters.java +++ b/core/src/main/java/google/registry/request/RequestParameters.java @@ -41,6 +41,11 @@ public final class RequestParameters { /** The standardized request parameter name used by any action in dry run mode. */ public static final String PARAM_DRY_RUN = "dryRun"; + /** + * The standardized request parameter name used by any action taking a configurable batch size. + */ + public static final String PARAM_BATCH_SIZE = "batchSize"; + /** * Returns first GET or POST parameter associated with {@code name}. * diff --git a/core/src/main/java/google/registry/tools/server/RefreshDnsForAllDomainsAction.java b/core/src/main/java/google/registry/tools/server/RefreshDnsForAllDomainsAction.java index b43e296fa..2fcfc00f7 100644 --- a/core/src/main/java/google/registry/tools/server/RefreshDnsForAllDomainsAction.java +++ b/core/src/main/java/google/registry/tools/server/RefreshDnsForAllDomainsAction.java @@ -21,6 +21,7 @@ import static google.registry.dns.DnsUtils.requestDomainDnsRefresh; import static google.registry.model.tld.Tlds.assertTldsExist; import static google.registry.persistence.PersistenceModule.TransactionIsolationLevel.TRANSACTION_REPEATABLE_READ; import static google.registry.persistence.transaction.TransactionManagerFactory.tm; +import static google.registry.request.RequestParameters.PARAM_BATCH_SIZE; import static google.registry.request.RequestParameters.PARAM_TLDS; import static google.registry.util.DateTimeUtils.END_OF_TIME; @@ -83,7 +84,7 @@ public class RefreshDnsForAllDomainsAction implements Runnable { RefreshDnsForAllDomainsAction( Response response, @Parameter(PARAM_TLDS) ImmutableSet tlds, - @Parameter("batchSize") Optional batchSize, + @Parameter(PARAM_BATCH_SIZE) Optional batchSize, @Parameter("refreshQps") Optional refreshQps, Random random) { this.response = response; diff --git a/core/src/main/java/google/registry/tools/server/ToolsServerModule.java b/core/src/main/java/google/registry/tools/server/ToolsServerModule.java index 379f8cfc9..95e031761 100644 --- a/core/src/main/java/google/registry/tools/server/ToolsServerModule.java +++ b/core/src/main/java/google/registry/tools/server/ToolsServerModule.java @@ -70,12 +70,6 @@ public class ToolsServerModule { return extractRequiredParameter(req, "rawKeys"); } - @Provides - @Parameter("batchSize") - static Optional provideBatchSize(HttpServletRequest req) { - return extractOptionalIntParameter(req, "batchSize"); - } - @Provides @Parameter("refreshQps") static Optional provideRefreshQps(HttpServletRequest req) { diff --git a/core/src/test/java/google/registry/batch/DeleteProberDataActionTest.java b/core/src/test/java/google/registry/batch/DeleteProberDataActionTest.java index 0825a55cb..8b594509b 100644 --- a/core/src/test/java/google/registry/batch/DeleteProberDataActionTest.java +++ b/core/src/test/java/google/registry/batch/DeleteProberDataActionTest.java @@ -17,6 +17,7 @@ package google.registry.batch; import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; import static google.registry.model.EppResourceUtils.loadByForeignKey; +import static google.registry.model.domain.rgp.GracePeriodStatus.ADD; import static google.registry.testing.DatabaseHelper.assertDomainDnsRequests; import static google.registry.testing.DatabaseHelper.createTld; import static google.registry.testing.DatabaseHelper.loadByEntitiesIfPresent; @@ -37,6 +38,7 @@ import google.registry.model.billing.BillingBase.Reason; import google.registry.model.billing.BillingEvent; import google.registry.model.domain.Domain; import google.registry.model.domain.DomainHistory; +import google.registry.model.domain.GracePeriod; import google.registry.model.poll.PollMessage; import google.registry.model.reporting.HistoryEntry; import google.registry.model.tld.Tld; @@ -92,11 +94,7 @@ class DeleteProberDataActionTest { } private void resetAction() { - action = new DeleteProberDataAction(); - action.isDryRun = false; - action.tlds = ImmutableSet.of(); - action.registryAdminRegistrarId = "TheRegistrar"; - // RegistryEnvironment.SANDBOX.setup(systemPropertyExtension); + action = new DeleteProberDataAction(false, ImmutableSet.of(), Optional.empty(), "TheRegistrar"); } @AfterEach @@ -119,6 +117,19 @@ class DeleteProberDataActionTest { assertAllAbsent(oaEntities); } + @Test + void test_deletesAllInBatches() throws Exception { + // Persist 40 domains + Set ibEntities = persistLotsOfDomains("ib-any.test"); + Set oaEntities = persistLotsOfDomains("oa-canary.test"); + // Create action with batch size of 3 + DeleteProberDataAction batchedAction = + new DeleteProberDataAction(false, ImmutableSet.of(), Optional.of(3), "TheRegistrar"); + batchedAction.run(); + assertAllAbsent(ibEntities); + assertAllAbsent(oaEntities); + } + @Test void testSuccess_deletesAllAndOnlyGivenTlds() throws Exception { Set tldEntities = persistLotsOfDomains("tld"); @@ -303,12 +314,22 @@ class DeleteProberDataActionTest { .setRegistrarId("TheRegistrar") .setMsg("Domain registered") .build()); + GracePeriod gracePeriod = + persistSimpleResource( + GracePeriod.create( + ADD, + domain.getRepoId(), + DELETION_TIME.plusDays(5), + "TheRegistrar", + billingEvent.createVKey())); + domain = persistResource(domain.asBuilder().addGracePeriod(gracePeriod).build()); ImmutableSet.Builder builder = new ImmutableSet.Builder() .add(domain) .add(historyEntry) .add(billingEvent) - .add(pollMessage); + .add(pollMessage) + .add(gracePeriod); return builder.build(); }