From 1ebe2fcd1a95e4dfa80de3105c2d37b5c0a41618 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Tue, 19 Nov 2024 13:42:55 -0800 Subject: [PATCH] add integration test for personal info showing in login audit logs --- test/integration/audit_test.go | 440 +++++++++++++++--- .../integration/limited_ciphers_utils_test.go | 40 +- 2 files changed, 415 insertions(+), 65 deletions(-) diff --git a/test/integration/audit_test.go b/test/integration/audit_test.go index da429d2ec..598b0db15 100644 --- a/test/integration/audit_test.go +++ b/test/integration/audit_test.go @@ -6,9 +6,14 @@ package integration import ( "bytes" "context" + "encoding/base64" "encoding/json" "io" "net/http" + "os" + "os/exec" + "path/filepath" + "slices" "strings" "testing" "time" @@ -19,10 +24,15 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" "k8s.io/utils/ptr" + "sigs.k8s.io/yaml" + authenticationv1alpha1 "go.pinniped.dev/generated/latest/apis/concierge/authentication/v1alpha1" supervisorconfigv1alpha1 "go.pinniped.dev/generated/latest/apis/supervisor/config/v1alpha1" idpv1alpha1 "go.pinniped.dev/generated/latest/apis/supervisor/idp/v1alpha1" + "go.pinniped.dev/internal/auditevent" "go.pinniped.dev/internal/certauthority" + "go.pinniped.dev/internal/config/concierge" + "go.pinniped.dev/internal/config/supervisor" "go.pinniped.dev/internal/kubeclient" "go.pinniped.dev/test/testlib" ) @@ -41,6 +51,321 @@ func kubeClientWithoutPinnipedAPISuffix(t *testing.T) kubernetes.Interface { return client.Kubernetes } +// TestAuditLogsDuringLogin is an end-to-end login test which cares more about making audit log +// assertions than assertions about the login itself. Much of how this test performs a login was +// inspired by a test case from TestE2EFullIntegration_Browser. This test is Disruptive because +// it restarts the Supervisor and Concierge to reconfigure audit logging, and then restarts them +// again to put back the original configuration. +func TestAuditLogsDuringLogin_Disruptive(t *testing.T) { + env := testEnvForPodShutdownTests(t) + + testStartTime := metav1.Now() + + ctx, cancelFunc := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancelFunc() + + kubeClient := testlib.NewKubernetesClientset(t) + kubeClientForK8sResourcesOnly := kubeClientWithoutPinnipedAPISuffix(t) + + // Build pinniped CLI. + pinnipedExe := testlib.PinnipedCLIPath(t) + + supervisorIssuer := env.InferSupervisorIssuerURL(t) + + // Generate a CA bundle with which to serve this provider. + t.Logf("generating test CA") + tlsServingCertForSupervisorSecretName := "federation-domain-serving-cert-" + testlib.RandHex(t, 8) + + federationDomainSelfSignedCA := createTLSServingCertSecretForSupervisor( + ctx, + t, + env, + supervisorIssuer, + tlsServingCertForSupervisorSecretName, + kubeClient, + ) + + // Save that bundle plus the one that signs the upstream issuer, for test purposes. + federationDomainCABundlePath := filepath.Join(t.TempDir(), "test-ca.pem") + federationDomainCABundlePEM := federationDomainSelfSignedCA.Bundle() + require.NoError(t, os.WriteFile(federationDomainCABundlePath, federationDomainCABundlePEM, 0600)) + + // Create the downstream FederationDomain. + // This helper function will nil out spec.TLS if spec.Issuer is an IP address. + federationDomain := testlib.CreateTestFederationDomain(ctx, t, + supervisorconfigv1alpha1.FederationDomainSpec{ + Issuer: supervisorIssuer.Issuer(), + TLS: &supervisorconfigv1alpha1.FederationDomainTLSSpec{SecretName: tlsServingCertForSupervisorSecretName}, + }, + supervisorconfigv1alpha1.FederationDomainPhaseError, // in phase error until there is an IDP created + ) + + expectedUsername := env.SupervisorUpstreamLDAP.TestUserMailAttributeValue + expectedGroups := make([]any, len(env.SupervisorUpstreamLDAP.TestUserDirectGroupsDNs)) + for i, g := range env.SupervisorUpstreamLDAP.TestUserDirectGroupsDNs { + expectedGroups[i] = g + } + + // Create a JWTAuthenticator that will validate the tokens from the downstream issuer. + // If the FederationDomain is not Ready, the JWTAuthenticator cannot be ready, either. + clusterAudience := "test-cluster-" + testlib.RandHex(t, 8) + defaultJWTAuthenticatorSpec := authenticationv1alpha1.JWTAuthenticatorSpec{ + Issuer: federationDomain.Spec.Issuer, + Audience: clusterAudience, + TLS: &authenticationv1alpha1.TLSSpec{CertificateAuthorityData: base64.StdEncoding.EncodeToString(federationDomainCABundlePEM)}, + } + authenticator := testlib.CreateTestJWTAuthenticator(ctx, t, defaultJWTAuthenticatorSpec, authenticationv1alpha1.JWTAuthenticatorPhaseError) + setupClusterForEndToEndLDAPTest(t, expectedUsername, env) + testlib.WaitForFederationDomainStatusPhase(ctx, t, federationDomain.Name, supervisorconfigv1alpha1.FederationDomainPhaseReady) + testlib.WaitForJWTAuthenticatorStatusPhase(ctx, t, authenticator.Name, authenticationv1alpha1.JWTAuthenticatorPhaseReady) + + tempDir := t.TempDir() // per-test tmp dir to avoid sharing files between tests + // Use a specific session cache for this test. + sessionCachePath := tempDir + "/test-sessions.yaml" + credentialCachePath := tempDir + "/test-credentials.yaml" + + kubeconfigPath := runPinnipedGetKubeconfig(t, env, pinnipedExe, tempDir, []string{ + "get", "kubeconfig", + "--concierge-api-group-suffix", env.APIGroupSuffix, + "--concierge-authenticator-type", "jwt", + "--concierge-authenticator-name", authenticator.Name, + "--oidc-session-cache", sessionCachePath, + "--credential-cache", credentialCachePath, + // use default for --oidc-scopes, which is to request all relevant scopes + }) + + t.Setenv("PINNIPED_USERNAME", expectedUsername) + t.Setenv("PINNIPED_PASSWORD", env.SupervisorUpstreamLDAP.TestUserPassword) + + timeBeforeLogin := metav1.Now() + + // Run kubectl command which should run an LDAP-style login without interactive prompts for username and password. + kubectlCmd := exec.CommandContext(ctx, "kubectl", "auth", "whoami", "--kubeconfig", kubeconfigPath) + kubectlCmd.Env = slices.Concat(os.Environ(), env.ProxyEnv()) + kubectlOutput, err := kubectlCmd.CombinedOutput() + require.NoErrorf(t, err, + "expected no error but got error, combined stdout/stderr was:\n----start of output\n%s\n----end of output", kubectlOutput) + + allSupervisorSessionStartedLogs := getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["message"] == string(auditevent.SessionStarted) + }, + kubeClientForK8sResourcesOnly, + env.SupervisorNamespace, + env.SupervisorAppName, + timeBeforeLogin, + ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorSessionStartedLogs) + // Also remove sessionID, which is a UUID that we can't predict for the assertions below. + for _, log := range allSupervisorSessionStartedLogs { + require.NotEmpty(t, log["sessionID"]) + delete(log, "sessionID") + } + + // All values in the personalInfo map should be redacted by default. + require.Equal(t, []map[string]any{ + { + "message": "Session Started", + "personalInfo": map[string]any{ + "username": "redacted", + "groups": []any{"redacted 2 values"}, + "subject": "redacted", + "additionalClaims": map[string]any{"redacted": "redacted 0 keys"}, + }, + "warnings": []any{}, + }, + }, allSupervisorSessionStartedLogs) + + allConciergeTCRLogs := getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["message"] == string(auditevent.TokenCredentialRequestAuthenticatedUser) + }, + kubeClientForK8sResourcesOnly, + env.ConciergeNamespace, + env.ConciergeAppName, + timeBeforeLogin, + ) + removeSomeKeysFromEachAuditLogEvent(allConciergeTCRLogs) + // Also remove issuedClientCertExpires, which is a timestamp that we can't easily predict for the assertions below. + for _, log := range allConciergeTCRLogs { + require.NotEmpty(t, log["issuedClientCertExpires"]) + delete(log, "issuedClientCertExpires") + } + + // All values in the personalInfo map should be redacted by default. + require.Equal(t, []map[string]any{ + { + "message": "TokenCredentialRequest Authenticated User", + "authenticator": map[string]any{ + // this always pinniped.dev even when the API group suffix was customized because of the way that the production code works + "apiGroup": "authentication.concierge.pinniped.dev", + "kind": "JWTAuthenticator", + "name": authenticator.Name, + }, + "personalInfo": map[string]any{ + "username": "redacted", + "groups": []any{"redacted 2 values"}, + }, + }, + }, allConciergeTCRLogs) + + allSupervisorHealthzLogs := getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["path"] == "/healthz" + }, + kubeClientForK8sResourcesOnly, + env.SupervisorNamespace, + env.SupervisorAppName, + testStartTime, + ) + // There should be none, because /healthz audit logs are disabled by default. + require.Empty(t, allSupervisorHealthzLogs) + + t.Log("updating Supervisor's static ConfigMap and restarting the pods") + updateStaticConfigMapAndRestartApp(t, + ctx, + env.SupervisorNamespace, + env.SupervisorAppName+"-static-config", + env.SupervisorAppName, + false, + func(t *testing.T, configMapData string) string { + t.Helper() + + var config supervisor.Config + err := yaml.Unmarshal([]byte(configMapData), &config) + require.NoError(t, err) + + // The Supervisor has two audit configuration options. Enable both. + config.Audit.LogUsernamesAndGroups = "enabled" + config.Audit.LogInternalPaths = "enabled" + + updatedConfig, err := yaml.Marshal(config) + require.NoError(t, err) + return string(updatedConfig) + }, + ) + + t.Log("updating Concierge's static ConfigMap and restarting the pods") + updateStaticConfigMapAndRestartApp(t, + ctx, + env.ConciergeNamespace, + env.ConciergeAppName+"-config", + env.ConciergeAppName, + true, + func(t *testing.T, configMapData string) string { + t.Helper() + + var config concierge.Config + err := yaml.Unmarshal([]byte(configMapData), &config) + require.NoError(t, err) + + // The Concierge has only one audit configuration option. Enable it. + config.Audit.LogUsernamesAndGroups = "enabled" + + updatedConfig, err := yaml.Marshal(config) + require.NoError(t, err) + return string(updatedConfig) + }, + ) + + // Force a fresh login for the next kubectl command by removing the local caches. + require.NoError(t, os.Remove(sessionCachePath)) + require.NoError(t, os.Remove(credentialCachePath)) + + // Reset the start time before we do a second login. + timeBeforeLogin = metav1.Now() + + // Do a second login, which should cause audit logs with non-redacted personal info. + // Run kubectl command which should run an LDAP-style login without interactive prompts for username and password. + kubectlCmd = exec.CommandContext(ctx, "kubectl", "auth", "whoami", "--kubeconfig", kubeconfigPath) + kubectlCmd.Env = slices.Concat(os.Environ(), env.ProxyEnv()) + kubectlOutput, err = kubectlCmd.CombinedOutput() + require.NoErrorf(t, err, + "expected no error but got error, combined stdout/stderr was:\n----start of output\n%s\n----end of output", kubectlOutput) + + allSupervisorSessionStartedLogs = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["message"] == string(auditevent.SessionStarted) + }, + kubeClientForK8sResourcesOnly, + env.SupervisorNamespace, + env.SupervisorAppName, + timeBeforeLogin, + ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorSessionStartedLogs) + // Also remove sessionID, which is a UUID that we can't predict for the assertions below. + for _, log := range allSupervisorSessionStartedLogs { + require.NotEmpty(t, log["sessionID"]) + delete(log, "sessionID") + } + // Now that "subject" should not be redacted, remove it too because it also contains values that are hard to predict. + for _, log := range allSupervisorSessionStartedLogs { + p := log["personalInfo"].(map[string]any) + require.NotEmpty(t, p) + require.Contains(t, p["subject"], "ldaps://"+env.SupervisorUpstreamLDAP.Host+"?") + delete(p, "subject") + } + + // All values in the personalInfo map should not be redacted anymore. + require.Equal(t, []map[string]any{ + { + "message": "Session Started", + "personalInfo": map[string]any{ + "username": expectedUsername, + "groups": expectedGroups, + // note that we removed "subject" above + "additionalClaims": map[string]any{}, + }, + "warnings": []any{}, + }, + }, allSupervisorSessionStartedLogs) + + allConciergeTCRLogs = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["message"] == string(auditevent.TokenCredentialRequestAuthenticatedUser) + }, + kubeClientForK8sResourcesOnly, + env.ConciergeNamespace, + env.ConciergeAppName, + timeBeforeLogin, + ) + removeSomeKeysFromEachAuditLogEvent(allConciergeTCRLogs) + // Also remove issuedClientCertExpires, which is a timestamp that we can't easily predict for the assertions below. + for _, log := range allConciergeTCRLogs { + require.NotEmpty(t, log["issuedClientCertExpires"]) + delete(log, "issuedClientCertExpires") + } + + // All values in the personalInfo map should not be redacted anymore. + require.Equal(t, []map[string]any{ + { + "message": "TokenCredentialRequest Authenticated User", + "authenticator": map[string]any{ + "apiGroup": "authentication.concierge." + env.APIGroupSuffix, + "kind": "JWTAuthenticator", + "name": authenticator.Name, + }, + "personalInfo": map[string]any{ + "username": expectedUsername, + "groups": expectedGroups, + }, + }, + }, allConciergeTCRLogs) + + allSupervisorHealthzLogs = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["path"] == "/healthz" + }, + kubeClientForK8sResourcesOnly, + env.SupervisorNamespace, + env.SupervisorAppName, + testStartTime, + ) + // There should be some, because we reconfigured the setting to enable them. + t.Logf("saw %d audit logs where path=/healthz in Supervisor pod logs", len(allSupervisorHealthzLogs)) + require.NotEmpty(t, allSupervisorHealthzLogs) +} + func TestAuditLogsEmittedForDiscoveryEndpoints_Parallel(t *testing.T) { ctx, cancelFunc := context.WithTimeout(context.Background(), 2*time.Minute) defer cancelFunc() @@ -49,23 +374,23 @@ func TestAuditLogsEmittedForDiscoveryEndpoints_Parallel(t *testing.T) { startTime := metav1.Now() //nolint:bodyclose // this is closed in the helper function - _, _, auditID := requireSuccessEndpointResponse( - t, + _, _, auditID := requireSuccessEndpointResponse(t, fakeIssuerForDisplayPurposes.Issuer()+"/.well-known/openid-configuration", fakeIssuerForDisplayPurposes.Issuer(), ca.Bundle(), dnsOverrides, ) - allSupervisorPodLogsWithAuditID := getAuditLogsForAuditID( - t, - ctx, - auditID, + allSupervisorPodLogsWithAuditID := getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["auditID"] == auditID + }, kubeClientForK8sResourcesOnly, env.SupervisorNamespace, env.SupervisorAppName, startTime, ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorPodLogsWithAuditID) require.Equal(t, 2, len(allSupervisorPodLogsWithAuditID), "expected exactly two log lines with auditID=%s", auditID) @@ -100,8 +425,7 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test // Call the /oauth2/authorize endpoint startTime := metav1.Now() //nolint:bodyclose // this is closed in the helper function - _, _, auditID := requireEndpointResponse( - t, + _, _, auditID := requireEndpointResponse(t, fakeIssuerForDisplayPurposes.Issuer()+"/oauth2/authorize?foo=bar&foo=bar&scope=safe-to-log", fakeIssuerForDisplayPurposes.Issuer(), ca.Bundle(), @@ -109,15 +433,16 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test http.StatusBadRequest, ) - allSupervisorPodLogsWithAuditID := getAuditLogsForAuditID( - t, - ctx, - auditID, + allSupervisorPodLogsWithAuditID := getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["auditID"] == auditID + }, kubeClientForK8sResourcesOnly, env.SupervisorNamespace, env.SupervisorAppName, startTime, ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorPodLogsWithAuditID) require.Equal(t, []map[string]any{ { @@ -154,8 +479,7 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test // Call the /callback endpoint startTime = metav1.Now() //nolint:bodyclose // this is closed in the helper function - _, _, auditID = requireEndpointResponse( - t, + _, _, auditID = requireEndpointResponse(t, fakeIssuerForDisplayPurposes.Issuer()+"/callback?foo=bar&foo=bar&error=safe-to-log", fakeIssuerForDisplayPurposes.Issuer(), ca.Bundle(), @@ -163,15 +487,16 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test http.StatusForbidden, ) - allSupervisorPodLogsWithAuditID = getAuditLogsForAuditID( - t, - ctx, - auditID, + allSupervisorPodLogsWithAuditID = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["auditID"] == auditID + }, kubeClientForK8sResourcesOnly, env.SupervisorNamespace, env.SupervisorAppName, startTime, ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorPodLogsWithAuditID) require.Equal(t, []map[string]any{ { @@ -203,8 +528,7 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test // Call the /login endpoint startTime = metav1.Now() //nolint:bodyclose // this is closed in the helper function - _, _, auditID = requireEndpointResponse( - t, + _, _, auditID = requireEndpointResponse(t, fakeIssuerForDisplayPurposes.Issuer()+"/login?foo=bar&foo=bar&err=safe-to-log", fakeIssuerForDisplayPurposes.Issuer(), ca.Bundle(), @@ -212,15 +536,16 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test http.StatusForbidden, ) - allSupervisorPodLogsWithAuditID = getAuditLogsForAuditID( - t, - ctx, - auditID, + allSupervisorPodLogsWithAuditID = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["auditID"] == auditID + }, kubeClientForK8sResourcesOnly, env.SupervisorNamespace, env.SupervisorAppName, startTime, ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorPodLogsWithAuditID) require.Equal(t, []map[string]any{ { @@ -252,8 +577,7 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test // Call the /oauth2/token endpoint startTime = metav1.Now() //nolint:bodyclose // this is closed in the helper function - _, _, auditID = requireEndpointResponse( - t, + _, _, auditID = requireEndpointResponse(t, fakeIssuerForDisplayPurposes.Issuer()+"/oauth2/token?foo=bar&foo=bar&grant_type=safe-to-log", fakeIssuerForDisplayPurposes.Issuer(), ca.Bundle(), @@ -261,15 +585,16 @@ func TestAuditLogsEmittedForEndpointsEvenWhenTheCallsAreInvalid_Parallel(t *test http.StatusBadRequest, ) - allSupervisorPodLogsWithAuditID = getAuditLogsForAuditID( - t, - ctx, - auditID, + allSupervisorPodLogsWithAuditID = getFilteredAuditLogs(t, ctx, + func(log map[string]any) bool { + return log["auditID"] == auditID + }, kubeClientForK8sResourcesOnly, env.SupervisorNamespace, env.SupervisorAppName, startTime, ) + removeSomeKeysFromEachAuditLogEvent(allSupervisorPodLogsWithAuditID) require.Equal(t, []map[string]any{ { @@ -363,24 +688,23 @@ func auditSetup(t *testing.T, ctx context.Context) ( return env, kubeClientForK8sResourcesOnly, fakeIssuerForDisplayPurposes, ca, dnsOverrides } -func cleanupAuditLog(t *testing.T, m *map[string]any, auditID string) { - delete(*m, "caller") - delete(*m, "remoteAddr") - delete(*m, "userAgent") - delete(*m, "timestamp") - delete(*m, "latency") - require.Equal(t, (*m)["level"], "info") - delete(*m, "level") - require.Equal(t, (*m)["auditEvent"], true) - delete(*m, "auditEvent") - require.Equal(t, (*m)["auditID"], auditID) - delete(*m, "auditID") +func removeSomeKeysFromEachAuditLogEvent(logs []map[string]any) { + for _, log := range logs { + delete(log, "level") + delete(log, "auditEvent") + delete(log, "caller") + delete(log, "remoteAddr") + delete(log, "userAgent") + delete(log, "timestamp") + delete(log, "latency") + delete(log, "auditID") + } } -func getAuditLogsForAuditID( +func getFilteredAuditLogs( t *testing.T, ctx context.Context, - auditID string, + filterAuditLogEvent func(log map[string]any) bool, kubeClient kubernetes.Interface, namespace string, appName string, @@ -392,9 +716,7 @@ func getAuditLogsForAuditID( defer cancel() pods, err := kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ - LabelSelector: labels.Set{ - "app": appName, - }.String(), + LabelSelector: labels.Set{"app": appName}.String(), }) require.NoError(t, err) @@ -405,19 +727,25 @@ func getAuditLogsForAuditID( } allPodLogs := strings.Split(allPodLogsBuffer.String(), "\n") - var allPodLogsWithAuditID []map[string]any + var filteredAuditLogs []map[string]any for _, podLog := range allPodLogs { - if strings.Contains(podLog, auditID) { - var deserialized map[string]any - err = json.Unmarshal([]byte(podLog), &deserialized) - require.NoError(t, err) - cleanupAuditLog(t, &deserialized, auditID) - - allPodLogsWithAuditID = append(allPodLogsWithAuditID, deserialized) + if len(podLog) == 0 { + continue + } + var deserializedPodLog map[string]any + err = json.Unmarshal([]byte(podLog), &deserializedPodLog) + require.NoErrorf(t, err, "error parsing line of pod log: %s", podLog) + isAuditEventBool, hasAuditEvent := deserializedPodLog["auditEvent"] + if hasAuditEvent { + require.Equal(t, true, isAuditEventBool) + require.Equal(t, "info", deserializedPodLog["level"]) + } + if hasAuditEvent && filterAuditLogEvent(deserializedPodLog) { + filteredAuditLogs = append(filteredAuditLogs, deserializedPodLog) } } - return allPodLogsWithAuditID + return filteredAuditLogs } func getLogsForPodSince( diff --git a/test/integration/limited_ciphers_utils_test.go b/test/integration/limited_ciphers_utils_test.go index f2eef58cf..10b79ce8d 100644 --- a/test/integration/limited_ciphers_utils_test.go +++ b/test/integration/limited_ciphers_utils_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/yaml" @@ -173,7 +174,6 @@ func updateStaticConfigMapAndRestartApp( } // restartAllPodsOfApp will immediately scale to 0 and then scale back. -// There are no uses of t.Cleanup since these actions need to happen immediately. func restartAllPodsOfApp( t *testing.T, namespace string, @@ -195,17 +195,39 @@ func restartAllPodsOfApp( originalScale := updateDeploymentScale(t, namespace, appName, 0) require.Greater(t, int(originalScale), 0) + scaleDeploymentBackToOriginalScale := func() { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + client := testlib.NewKubernetesClientset(t) + + currentScale, err := client.AppsV1().Deployments(namespace).GetScale(ctx, appName, metav1.GetOptions{}) + require.NoError(t, err) + if currentScale.Spec.Replicas == originalScale { + // Already scaled appropriately. No need to change the scale. + return + } + + updateDeploymentScale(t, namespace, appName, originalScale) + + // Wait for all the new pods to be running and ready. + var newPods []corev1.Pod + testlib.RequireEventually(t, func(requireEventually *require.Assertions) { + newPods = getRunningPodsByNamePrefix(t, namespace, appName+"-", ignorePodsWithNameSubstring) + requireEventually.Equal(len(newPods), int(originalScale), "wanted pods to return to original scale") + requireEventually.True(allPodsReady(newPods), "wanted all new pods to be ready") + }, 2*time.Minute, 200*time.Millisecond) + } + + // Even if the test fails due to the below assertions, still try to scale back to original scale, + // to avoid polluting other tests. + t.Cleanup(scaleDeploymentBackToOriginalScale) + + // Now that we have adjusted the scale to 0, the pods should go away. testlib.RequireEventually(t, func(requireEventually *require.Assertions) { newPods := getRunningPodsByNamePrefix(t, namespace, appName+"-", ignorePodsWithNameSubstring) requireEventually.Len(newPods, 0, "wanted zero pods") }, 2*time.Minute, 200*time.Millisecond) - // Reset the application to its original scale. - updateDeploymentScale(t, namespace, appName, originalScale) - - testlib.RequireEventually(t, func(requireEventually *require.Assertions) { - newPods := getRunningPodsByNamePrefix(t, namespace, appName+"-", ignorePodsWithNameSubstring) - requireEventually.Equal(len(newPods), int(originalScale), "wanted %d pods", originalScale) - requireEventually.True(allPodsReady(newPods), "wanted all new pods to be ready") - }, 2*time.Minute, 200*time.Millisecond) + // Scale back to original scale immediately. + scaleDeploymentBackToOriginalScale() }