1
0
mirror of https://github.com/google/nomulus synced 2026-02-02 19:12:27 +00:00

Remove pipeline/action to wipe out contact data (#2948)

We've wiped it all out now, so it's moot
This commit is contained in:
gbrodman
2026-01-29 14:38:29 -05:00
committed by GitHub
parent 41d26d8385
commit 1fdacf25dc
13 changed files with 0 additions and 745 deletions

View File

@@ -569,11 +569,6 @@ if (environment == 'alpha') {
mainClass: 'google.registry.beam.resave.ResaveAllEppResourcesPipeline',
metaData: 'google/registry/beam/resave_all_epp_resources_pipeline_metadata.json'
],
wipeOutContactHistoryPii:
[
mainClass: 'google.registry.beam.wipeout.WipeOutContactHistoryPiiPipeline',
metaData: 'google/registry/beam/wipe_out_contact_history_pii_pipeline_metadata.json'
],
]
project.tasks.create("stageBeamPipelines") {
doLast {

View File

@@ -131,12 +131,6 @@ public class BatchModule {
return extractOptionalDatetimeParameter(req, ExpandBillingRecurrencesAction.PARAM_END_TIME);
}
@Provides
@Parameter(WipeOutContactHistoryPiiAction.PARAM_CUTOFF_TIME)
static Optional<DateTime> provideCutoffTime(HttpServletRequest req) {
return extractOptionalDatetimeParameter(req, WipeOutContactHistoryPiiAction.PARAM_CUTOFF_TIME);
}
@Provides
@Parameter(ExpandBillingRecurrencesAction.PARAM_ADVANCE_CURSOR)
static boolean provideAdvanceCursor(HttpServletRequest req) {

View File

@@ -1,142 +0,0 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.batch;
import static google.registry.beam.BeamUtils.createJobName;
import static google.registry.request.RequestParameters.PARAM_DRY_RUN;
import static jakarta.servlet.http.HttpServletResponse.SC_INTERNAL_SERVER_ERROR;
import static jakarta.servlet.http.HttpServletResponse.SC_OK;
import com.google.api.services.dataflow.Dataflow;
import com.google.api.services.dataflow.model.LaunchFlexTemplateParameter;
import com.google.api.services.dataflow.model.LaunchFlexTemplateRequest;
import com.google.api.services.dataflow.model.LaunchFlexTemplateResponse;
import com.google.common.collect.ImmutableMap;
import com.google.common.flogger.FluentLogger;
import com.google.common.net.MediaType;
import google.registry.beam.wipeout.WipeOutContactHistoryPiiPipeline;
import google.registry.config.RegistryConfig.Config;
import google.registry.model.contact.ContactHistory;
import google.registry.request.Action;
import google.registry.request.Parameter;
import google.registry.request.Response;
import google.registry.request.auth.Auth;
import google.registry.util.Clock;
import google.registry.util.RegistryEnvironment;
import jakarta.inject.Inject;
import java.io.IOException;
import java.util.Optional;
import org.joda.time.DateTime;
/**
* An action that launches {@link WipeOutContactHistoryPiiPipeline} to wipe out Personal
* Identifiable Information (PII) fields of {@link ContactHistory} entities.
*
* <p>{@link ContactHistory} entities should be retained in the database for only certain amount of
* time.
*/
@Action(
service = Action.Service.BACKEND,
path = WipeOutContactHistoryPiiAction.PATH,
auth = Auth.AUTH_ADMIN)
public class WipeOutContactHistoryPiiAction implements Runnable {
public static final String PATH = "/_dr/task/wipeOutContactHistoryPii";
public static final String PARAM_CUTOFF_TIME = "wipeoutTime";
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private static final String PIPELINE_NAME = "wipe_out_contact_history_pii_pipeline";
private final Clock clock;
private final boolean isDryRun;
private final Optional<DateTime> maybeCutoffTime;
private final int minMonthsBeforeWipeOut;
private final String stagingBucketUrl;
private final String projectId;
private final String jobRegion;
private final Dataflow dataflow;
private final Response response;
@Inject
public WipeOutContactHistoryPiiAction(
Clock clock,
@Parameter(PARAM_DRY_RUN) boolean isDryRun,
@Parameter(PARAM_CUTOFF_TIME) Optional<DateTime> maybeCutoffTime,
@Config("minMonthsBeforeWipeOut") int minMonthsBeforeWipeOut,
@Config("beamStagingBucketUrl") String stagingBucketUrl,
@Config("projectId") String projectId,
@Config("defaultJobRegion") String jobRegion,
Dataflow dataflow,
Response response) {
this.clock = clock;
this.isDryRun = isDryRun;
this.maybeCutoffTime = maybeCutoffTime;
this.minMonthsBeforeWipeOut = minMonthsBeforeWipeOut;
this.stagingBucketUrl = stagingBucketUrl;
this.projectId = projectId;
this.jobRegion = jobRegion;
this.dataflow = dataflow;
this.response = response;
}
@Override
public void run() {
response.setContentType(MediaType.PLAIN_TEXT_UTF_8);
DateTime cutoffTime =
maybeCutoffTime.orElse(clock.nowUtc().minusMonths(minMonthsBeforeWipeOut));
LaunchFlexTemplateParameter launchParameter =
new LaunchFlexTemplateParameter()
.setJobName(
createJobName(
String.format(
"contact-history-pii-wipeout-%s",
cutoffTime.toString("yyyy-MM-dd't'HH-mm-ss'z'")),
clock))
.setContainerSpecGcsPath(
String.format("%s/%s_metadata.json", stagingBucketUrl, PIPELINE_NAME))
.setParameters(
ImmutableMap.of(
"registryEnvironment",
RegistryEnvironment.get().name(),
"cutoffTime",
cutoffTime.toString("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"),
"isDryRun",
Boolean.toString(isDryRun)));
logger.atInfo().log(
"Launching Beam pipeline to wipe out all PII of contact history entities prior to %s%s.",
cutoffTime, " in dry run mode");
try {
LaunchFlexTemplateResponse launchResponse =
dataflow
.projects()
.locations()
.flexTemplates()
.launch(
projectId,
jobRegion,
new LaunchFlexTemplateRequest().setLaunchParameter(launchParameter))
.execute();
logger.atInfo().log("Got response: %s", launchResponse.getJob().toPrettyString());
response.setStatus(SC_OK);
response.setPayload(
String.format(
"Launched contact history PII wipeout pipeline: %s",
launchResponse.getJob().getId()));
} catch (IOException e) {
logger.atWarning().withCause(e).log("Pipeline Launch failed");
response.setStatus(SC_INTERNAL_SERVER_ERROR);
response.setPayload(String.format("Pipeline launch failed: %s", e.getMessage()));
}
}
}

View File

@@ -1,166 +0,0 @@
// Copyright 2023 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.wipeout;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static google.registry.persistence.transaction.TransactionManagerFactory.tm;
import static org.apache.beam.sdk.values.TypeDescriptors.voids;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Streams;
import google.registry.beam.common.RegistryJpaIO;
import google.registry.model.contact.ContactHistory;
import google.registry.model.reporting.HistoryEntry.HistoryEntryId;
import google.registry.persistence.PersistenceModule.TransactionIsolationLevel;
import google.registry.persistence.VKey;
import java.io.Serializable;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.coders.VarLongCoder;
import org.apache.beam.sdk.metrics.Counter;
import org.apache.beam.sdk.metrics.Metrics;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.join.CoGroupByKey;
import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.TupleTag;
import org.joda.time.DateTime;
/**
* Definition of a Dataflow Flex pipeline template, which finds out {@link ContactHistory} entries
* that are older than a given age (excluding the most recent one, even if it falls with the range)
* and wipe out PII information in them.
*
* <p>To stage this template locally, run {@code ./nom_build :core:sBP --environment=alpha \
* --pipeline=wipeOutContactHistoryPii}.
*
* <p>Then, you can run the staged template via the API client library, gCloud or a raw REST call.
*/
public class WipeOutContactHistoryPiiPipeline implements Serializable {
private static final long serialVersionUID = -4111052675715913820L;
private static final TupleTag<Long> REVISIONS_TO_WIPE = new TupleTag<>();
private static final TupleTag<Long> MOST_RECENT_REVISION = new TupleTag<>();
private final DateTime cutoffTime;
private final boolean dryRun;
private final Counter contactsInScope =
Metrics.counter("WipeOutContactHistoryPii", "contacts in scope");
private final Counter historiesToWipe =
Metrics.counter("WipeOutContactHistoryPii", "contact histories to wipe PII from");
private final Counter historiesWiped =
Metrics.counter("WipeOutContactHistoryPii", "contact histories actually updated");
WipeOutContactHistoryPiiPipeline(WipeOutContactHistoryPiiPipelineOptions options) {
dryRun = options.getIsDryRun();
cutoffTime = DateTime.parse(options.getCutoffTime());
}
void setup(Pipeline pipeline) {
KeyedPCollectionTuple.of(REVISIONS_TO_WIPE, getHistoryEntriesToWipe(pipeline))
.and(MOST_RECENT_REVISION, getMostRecentHistoryEntries(pipeline))
.apply("Group by contact", CoGroupByKey.create())
.apply(
"Wipe out PII",
MapElements.into(voids())
.via(
kv -> {
String repoId = kv.getKey();
long mostRecentRevision = kv.getValue().getOnly(MOST_RECENT_REVISION);
ImmutableList<Long> revisionsToWipe =
Streams.stream(kv.getValue().getAll(REVISIONS_TO_WIPE))
.filter(e -> e != mostRecentRevision)
.collect(toImmutableList());
if (revisionsToWipe.isEmpty()) {
return null;
}
contactsInScope.inc();
tm().transact(
() -> {
for (long revisionId : revisionsToWipe) {
historiesToWipe.inc();
ContactHistory history =
tm().loadByKey(
VKey.create(
ContactHistory.class,
new HistoryEntryId(repoId, revisionId)));
// In the unlikely case where multiple pipelines run at the
// same time, or where the runner decides to rerun a particular
// transform, we might have a history entry that has already been
// wiped at this point. There's no need to wipe it again.
if (!dryRun
&& history.getContactBase().isPresent()
&& history.getContactBase().get().getEmailAddress() != null) {
historiesWiped.inc();
tm().update(history.asBuilder().wipeOutPii().build());
}
}
});
return null;
}));
}
PCollection<KV<String, Long>> getHistoryEntriesToWipe(Pipeline pipeline) {
return pipeline.apply(
"Find contact histories to wipee",
// Email is one of the required fields in EPP, meaning it's initially not null when it
// is set by EPP flows (even though it is nullalbe in the SQL schema). Therefore,
// checking if it's null is one way to avoid processing contact history entities that
// have been processed previously. Refer to RFC 5733 for more information.
RegistryJpaIO.read(
"SELECT repoId, revisionId FROM ContactHistory WHERE resource.email IS NOT NULL"
+ " AND modificationTime < :cutoffTime",
ImmutableMap.of("cutoffTime", cutoffTime),
Object[].class,
row -> KV.of((String) row[0], (long) row[1]))
.withCoder(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of())));
}
PCollection<KV<String, Long>> getMostRecentHistoryEntries(Pipeline pipeline) {
return pipeline.apply(
"Find the most recent historiy entry for each contact",
RegistryJpaIO.read(
"SELECT repoId, revisionId FROM ContactHistory"
+ " WHERE (repoId, modificationTime) IN"
+ " (SELECT repoId, MAX(modificationTime) FROM ContactHistory GROUP BY repoId)",
ImmutableMap.of(),
Object[].class,
row -> KV.of((String) row[0], (long) row[1]))
.withCoder(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of())));
}
PipelineResult run(Pipeline pipeline) {
setup(pipeline);
return pipeline.run();
}
public static void main(String[] args) {
PipelineOptionsFactory.register(WipeOutContactHistoryPiiPipelineOptions.class);
WipeOutContactHistoryPiiPipelineOptions options =
PipelineOptionsFactory.fromArgs(args)
.withValidation()
.as(WipeOutContactHistoryPiiPipelineOptions.class);
// Repeatable read should be more than enough since we are dealing with old history entries that
// are otherwise immutable.
options.setIsolationOverride(TransactionIsolationLevel.TRANSACTION_REPEATABLE_READ);
Pipeline pipeline = Pipeline.create(options);
new WipeOutContactHistoryPiiPipeline(options).run(pipeline);
}
}

View File

@@ -1,37 +0,0 @@
// Copyright 2023 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.wipeout;
import google.registry.beam.common.RegistryPipelineOptions;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
public interface WipeOutContactHistoryPiiPipelineOptions extends RegistryPipelineOptions {
@Description(
"A contact history entry with a history modification time before this time will have its PII"
+ " wiped, unless it is the most entry for the contact.")
String getCutoffTime();
void setCutoffTime(String value);
@Description(
"If true, the wiped out billing events will not be saved but the pipeline metrics counter"
+ " will still be updated.")
@Default.Boolean(false)
boolean getIsDryRun();
void setIsDryRun(boolean value);
}

View File

@@ -1264,12 +1264,6 @@ public final class RegistryConfig {
return ImmutableSet.copyOf(config.sslCertificateValidation.allowedEcdsaCurves);
}
@Provides
@Config("minMonthsBeforeWipeOut")
public static int provideMinMonthsBeforeWipeOut(RegistryConfigSettings config) {
return config.contactHistory.minMonthsBeforeWipeOut;
}
@Provides
@Config("jdbcBatchSize")
public static int provideHibernateJdbcBatchSize(RegistryConfigSettings config) {

View File

@@ -39,7 +39,6 @@ public class RegistryConfigSettings {
public Beam beam;
public RegistryTool registryTool;
public SslCertificateValidation sslCertificateValidation;
public ContactHistory contactHistory;
public DnsUpdate dnsUpdate;
public BulkPricingPackageMonitoring bulkPricingPackageMonitoring;
public Bsa bsa;
@@ -223,11 +222,6 @@ public class RegistryConfigSettings {
public String expirationWarningEmailSubjectText;
}
/** Configuration for contact history. */
public static class ContactHistory {
public int minMonthsBeforeWipeOut;
}
/** Configuration for dns update. */
public static class DnsUpdate {
public String dnsUpdateFailEmailSubjectText;

View File

@@ -450,11 +450,6 @@ registryTool:
# OAuth client secret used by the tool.
clientSecret: YOUR_CLIENT_SECRET
# Configuration options for handling contact history.
contactHistory:
# The number of months that a ContactHistory entity should be stored in the database.
minMonthsBeforeWipeOut: 18
# Configuration options relevant to the DNS update functionality.
dnsUpdate:
dnsUpdateFailRegistryName: Example name

View File

@@ -27,7 +27,6 @@ import google.registry.batch.RelockDomainAction;
import google.registry.batch.ResaveAllEppResourcesPipelineAction;
import google.registry.batch.ResaveEntityAction;
import google.registry.batch.SendExpiringCertificateNotificationEmailAction;
import google.registry.batch.WipeOutContactHistoryPiiAction;
import google.registry.bsa.BsaDownloadAction;
import google.registry.bsa.BsaRefreshAction;
import google.registry.bsa.BsaValidateAction;
@@ -347,8 +346,6 @@ interface RequestComponent {
VerifyOteAction verifyOteAction();
WipeOutContactHistoryPiiAction wipeOutContactHistoryPiiAction();
@Subcomponent.Builder
abstract class Builder implements RequestComponentBuilder<RequestComponent> {
@Override

View File

@@ -1,39 +0,0 @@
{
"name": "Wipe Out PII From Old Contact History Entries",
"description": "An Apache Beam batch pipeline that finds old contact history entries and remove PII information from them.",
"parameters": [
{
"name": "registryEnvironment",
"label": "The Registry environment.",
"helpText": "The Registry environment.",
"is_optional": false,
"regexes": [
"^PRODUCTION|SANDBOX|CRASH|QA|ALPHA$"
]
},
{
"name": "isolationOverride",
"label": "The desired SQL transaction isolation level.",
"helpText": "The desired SQL transaction isolation level.",
"is_optional": true,
"regexes": [
"^[0-9A-Z_]+$"
]
},
{
"name": "cutoffTime",
"label": "The maximum history modification time of a contact history entry eligible for wipe out.",
"helpText": "If the history modificaiton time of contact history entry is older than this, and it is not the most recent entry of a contact, it will have its PII wiped out.",
"is_optional": true
},
{
"name": "isDryRun",
"label": "Whether this job is a dry run.",
"helpText": "If true, no changes will be saved to the database.",
"is_optional": true,
"regexes": [
"^true|false$"
]
}
]
}

View File

@@ -1,133 +0,0 @@
// Copyright 2021 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.batch;
import static com.google.common.truth.Truth.assertThat;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import com.google.api.services.dataflow.model.LaunchFlexTemplateRequest;
import google.registry.beam.BeamActionTestBase;
import google.registry.testing.FakeClock;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.joda.time.DateTime;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
/** Unit tests for {@link WipeOutContactHistoryPiiAction}. */
class WipeOutContactHistoryPiiActionTest extends BeamActionTestBase {
private final DateTime now = DateTime.parse("2019-01-19T01:02:03Z");
private final FakeClock clock = new FakeClock(now);
private final Map<String, String> expectedParameters = new HashMap<>();
private final ArgumentCaptor<LaunchFlexTemplateRequest> launchRequest =
ArgumentCaptor.forClass(LaunchFlexTemplateRequest.class);
private WipeOutContactHistoryPiiAction action =
new WipeOutContactHistoryPiiAction(
clock,
false,
Optional.empty(),
8,
"tucketBucket",
"testProject",
"testRegion",
dataflow,
response);
@BeforeEach
void before() {
expectedParameters.put("registryEnvironment", "UNITTEST");
expectedParameters.put("isDryRun", "false");
expectedParameters.put("cutoffTime", "2018-05-19T01:02:03.000Z");
}
@Test
void testSuccess() throws Exception {
action.run();
assertThat(response.getStatus()).isEqualTo(200);
assertThat(response.getPayload())
.isEqualTo("Launched contact history PII wipeout pipeline: jobid");
verify(templates, times(1))
.launch(eq("testProject"), eq("testRegion"), launchRequest.capture());
assertThat(launchRequest.getValue().getLaunchParameter().getParameters())
.containsExactlyEntriesIn(expectedParameters);
}
@Test
void testSuccess_providedCutoffTime() throws Exception {
action =
new WipeOutContactHistoryPiiAction(
clock,
false,
Optional.of(now.minusYears(1)),
8,
"tucketBucket",
"testProject",
"testRegion",
dataflow,
response);
action.run();
expectedParameters.put("cutoffTime", "2018-01-19T01:02:03.000Z");
assertThat(response.getStatus()).isEqualTo(200);
assertThat(response.getPayload())
.isEqualTo("Launched contact history PII wipeout pipeline: jobid");
verify(templates, times(1))
.launch(eq("testProject"), eq("testRegion"), launchRequest.capture());
assertThat(launchRequest.getValue().getLaunchParameter().getParameters())
.containsExactlyEntriesIn(expectedParameters);
}
@Test
void testSuccess_dryRun() throws Exception {
action =
new WipeOutContactHistoryPiiAction(
clock,
true,
Optional.empty(),
8,
"tucketBucket",
"testProject",
"testRegion",
dataflow,
response);
action.run();
expectedParameters.put("isDryRun", "true");
assertThat(response.getStatus()).isEqualTo(200);
assertThat(response.getPayload())
.isEqualTo("Launched contact history PII wipeout pipeline: jobid");
verify(templates, times(1))
.launch(eq("testProject"), eq("testRegion"), launchRequest.capture());
assertThat(launchRequest.getValue().getLaunchParameter().getParameters())
.containsExactlyEntriesIn(expectedParameters);
}
@Test
void testFailure_launchError() throws Exception {
when(launch.execute()).thenThrow(new IOException("cannot launch"));
action.run();
assertThat(response.getStatus()).isEqualTo(500);
assertThat(response.getPayload()).isEqualTo("Pipeline launch failed: cannot launch");
verify(templates, times(1))
.launch(eq("testProject"), eq("testRegion"), launchRequest.capture());
assertThat(launchRequest.getValue().getLaunchParameter().getParameters())
.containsExactlyEntriesIn(expectedParameters);
}
}

View File

@@ -1,196 +0,0 @@
// Copyright 2023 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package google.registry.beam.wipeout;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static google.registry.model.reporting.HistoryEntry.Type.CONTACT_CREATE;
import static google.registry.persistence.PersistenceModule.TransactionIsolationLevel.TRANSACTION_REPEATABLE_READ;
import static google.registry.testing.DatabaseHelper.loadAllOf;
import static google.registry.testing.DatabaseHelper.newContact;
import static google.registry.testing.DatabaseHelper.persistResource;
import static org.hibernate.cfg.AvailableSettings.ISOLATION;
import com.google.common.collect.ImmutableList;
import google.registry.beam.TestPipelineExtension;
import google.registry.model.contact.Contact;
import google.registry.model.contact.ContactHistory;
import google.registry.model.contact.ContactPhoneNumber;
import google.registry.model.reporting.HistoryEntryDao;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationTestExtension;
import google.registry.testing.FakeClock;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
/** Unit tests for {@link WipeOutContactHistoryPiiPipeline}. */
public class WipeOutContactHistoryPiiPipelineTest {
private static final int MIN_AGE_IN_MONTHS = 18;
private static final DateTimeFormatter DATE_TIME_FORMATTER =
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
private final FakeClock clock = new FakeClock(DateTime.parse("2020-02-02T12:34:56Z"));
private final WipeOutContactHistoryPiiPipelineOptions options =
PipelineOptionsFactory.create().as(WipeOutContactHistoryPiiPipelineOptions.class);
private Contact contact1;
private Contact contact2;
@RegisterExtension
final JpaIntegrationTestExtension jpa =
new JpaTestExtensions.Builder()
.withClock(clock)
.withProperty(ISOLATION, TRANSACTION_REPEATABLE_READ.name())
.buildIntegrationTestExtension();
@RegisterExtension
final TestPipelineExtension pipeline =
TestPipelineExtension.create().enableAbandonedNodeEnforcement(true);
@BeforeEach
void beforeEach() {
contact1 =
persistResource(
newContact("my-contact1")
.asBuilder()
.setEmailAddress("test@example.com")
.setFaxNumber(
new ContactPhoneNumber.Builder().setPhoneNumber("+12122122122").build())
.build());
contact2 =
persistResource(
newContact("my-contact2")
.asBuilder()
.setEmailAddress("test@example.tld")
.setVoiceNumber(
new ContactPhoneNumber.Builder().setPhoneNumber("+19177199177").build())
.build());
// T = 0 month;
persistResource(createHistory(contact1));
// T = 5 months;
advanceMonths(5);
persistResource(createHistory(contact2));
// T = 10 months;
advanceMonths(5);
persistResource(createHistory(contact1));
persistResource(createHistory(contact2));
// T = 20 months;
advanceMonths(10);
persistResource(createHistory(contact2));
// T = 30 months;
advanceMonths(10);
options.setCutoffTime(DATE_TIME_FORMATTER.print(clock.nowUtc().minusMonths(MIN_AGE_IN_MONTHS)));
}
@Test
void testSuccess() {
// Before the pipeline runs, every history entry should have an emali address.
assertThat(
loadAllOf(ContactHistory.class).stream()
.filter(e -> e.getContactBase().get().getEmailAddress() != null)
.count())
.isEqualTo(5);
// Before the pipeline runs, contact history for contact1 should have fax numbers.
ImmutableList<ContactHistory> histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact1.createVKey(), ContactHistory.class);
assertThat(
histories.stream().filter(e -> e.getContactBase().get().getFaxNumber() != null).count())
.isEqualTo(2);
// Before the pipeline runs, contact history for contact2 should have voice numbers.
histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact2.createVKey(), ContactHistory.class);
assertThat(
histories.stream()
.filter(e -> e.getContactBase().get().getVoiceNumber() != null)
.count())
.isEqualTo(3);
WipeOutContactHistoryPiiPipeline wipeOutContactHistoryPiiPipeline =
new WipeOutContactHistoryPiiPipeline(options);
wipeOutContactHistoryPiiPipeline.run(pipeline).waitUntilFinish();
histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact1.createVKey(), ContactHistory.class);
assertThat(histories.size()).isEqualTo(2);
ImmutableList<ContactHistory> wipedEntries =
histories.stream()
.filter(e -> e.getContactBase().get().getEmailAddress() == null)
.collect(toImmutableList());
// Only the history entry at T = 10 is wiped. The one at T = 10 is over 18 months old, but it
// is the most recent entry, so it is kept.
assertThat(wipedEntries.size()).isEqualTo(1);
assertThat(wipedEntries.get(0).getContactBase().get().getFaxNumber()).isNull();
// With a new history entry at T = 30, the one at T = 10 is eligible for wipe out. Note the
// current time itself (therefore the cutoff time) has not changed.
persistResource(createHistory(contact1));
wipeOutContactHistoryPiiPipeline.run(pipeline).waitUntilFinish();
histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact1.createVKey(), ContactHistory.class);
assertThat(histories.size()).isEqualTo(3);
wipedEntries =
histories.stream()
.filter(e -> e.getContactBase().get().getEmailAddress() == null)
.collect(toImmutableList());
assertThat(wipedEntries.size()).isEqualTo(2);
// Check that the pipeline deals with multiple contacts correctly.
histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact2.createVKey(), ContactHistory.class);
assertThat(histories.size()).isEqualTo(3);
wipedEntries =
histories.stream()
.filter(e -> e.getContactBase().get().getEmailAddress() == null)
.collect(toImmutableList());
// Only the history entry at T = 10 is wiped. The one at T = 10 is over 18 months old, but it
// is the most recent entry, so it is kept.
assertThat(wipedEntries.size()).isEqualTo(2);
assertThat(wipedEntries.get(0).getContactBase().get().getVoiceNumber()).isNull();
assertThat(wipedEntries.get(1).getContactBase().get().getVoiceNumber()).isNull();
}
@Test
void testSuccess_dryRun() {
options.setIsDryRun(true);
WipeOutContactHistoryPiiPipeline wipeOutContactHistoryPiiPipeline =
new WipeOutContactHistoryPiiPipeline(options);
wipeOutContactHistoryPiiPipeline.run(pipeline).waitUntilFinish();
ImmutableList<ContactHistory> histories =
HistoryEntryDao.loadHistoryObjectsForResource(contact1.createVKey(), ContactHistory.class);
assertThat(histories.size()).isEqualTo(2);
assertThat(
histories.stream()
.filter(e -> e.getContactBase().get().getEmailAddress() == null)
.collect(toImmutableList()))
.isEmpty();
}
private ContactHistory createHistory(Contact contact) {
return new ContactHistory.Builder()
.setContact(contact)
.setType(CONTACT_CREATE)
.setRegistrarId("TheRegistrar")
.setModificationTime(clock.nowUtc())
.build();
}
private void advanceMonths(int months) {
DateTime now = clock.nowUtc();
DateTime next = now.plusMonths(months);
clock.advanceBy(new Duration(now, next));
}
}

View File

@@ -57,7 +57,6 @@ BACKEND /_dr/task/tmchSmdrl TmchSmdrlAction
BACKEND /_dr/task/triggerMosApiServiceState TriggerServiceStateAction GET n APP ADMIN
BACKEND /_dr/task/updateRegistrarRdapBaseUrls UpdateRegistrarRdapBaseUrlsAction GET y APP ADMIN
BACKEND /_dr/task/uploadBsaUnavailableNames UploadBsaUnavailableDomainsAction GET,POST n APP ADMIN
BACKEND /_dr/task/wipeOutContactHistoryPii WipeOutContactHistoryPiiAction GET n APP ADMIN
PUBAPI /check CheckApiAction GET n NONE PUBLIC
PUBAPI /rdap/ RdapEmptyAction GET,HEAD n NONE PUBLIC
PUBAPI /rdap/autnum/(*) RdapAutnumAction GET,HEAD n NONE PUBLIC