Files
pinniped/pipelines/shared-tasks/run-integration-tests/task.sh
2025-06-26 16:05:09 -07:00

281 lines
14 KiB
Bash
Executable File

#!/usr/bin/env bash
# Copyright 2020-2025 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
# Run the integration tests against a remote target cluster.
#
# This script is designed to be run both in CI as a task (see task.yaml)
# and on a development workstation (see hack/prepare-remote-cluster-for-integration-tests.sh).
# When editing this file, please ensure that both methods of running will still work.
# All required file/directory paths can be controlled by env vars so they can default
# to the CI task values but be overridden by an invocation on your workstation.
#
# This script assumes that the app is already deployed into the remote target cluster
# and that the necessary env vars are provided.
set -euo pipefail
export GOCACHE="$PWD/cache/gocache"
export GOMODCACHE="$PWD/cache/gomodcache"
# When run as a CI task the initial working directory is the directory above all of the
# inout directories.
initial_working_directory=$(pwd)
# Set some default paths that would apply when run as a CI task.
# There is no equivalent needed for these when running on your development laptop.
pinniped_test_cli="$initial_working_directory/ci-test-image/rootfs/usr/local/bin/pinniped"
integration_test_binary="$initial_working_directory/ci-test-image/rootfs/usr/local/bin/pinniped-integration-test"
# Set up the KUBECONFIG for the integration tests to use.
# To make it possible to run this script on your workstation, first check to see if $KUBECONFIG is already set by the caller.
kubeconfig=${KUBECONFIG:-"$initial_working_directory/kubeconfig/kubeconfig"}
echo "Using kubeconfig file $kubeconfig"
export KUBECONFIG="$kubeconfig"
# Load the env vars that were output by the previous script which are needed during go test
# To make it possible to run this script on your workstation, first check to see if an alternate path is set.
test_env_path=${TEST_ENV_PATH:-"integration-test-env-vars/integration-test-env"}
echo "Using test env file $test_env_path"
source "$test_env_path"
# cd to the source code repo.
# To make it possible to run this script on your workstation, first check to see if an alternate path is set.
source_path=${SOURCE_PATH:-"pinniped"}
cd "$source_path"
echo "Using source code directory $(pwd)"
# Some supervisor deployment settings, with default values that are appropriate defaults for both the CI task
# and for running on development workstations. These can be overridden to allow testing of secondary deploys
# i.e. when there are two Pinnipeds running on the same cluster.
supervisor_namespace=${PINNIPED_SUPERVISOR_NAMESPACE:-"supervisor"}
supervisor_nodeport_service=${PINNIPED_SUPERVISOR_NODEPORT_SERVICE:-"supervisor-nodeport"}
supervisor_https_host_port=${PINNIPED_SUPERVISOR_HTTPS_HOST_PORT:-12344} # see gce-init.sh for the meaning of this port
# Prepare to clean up any background jobs that we might start below.
background_pids=()
function cleanup() {
if [[ "${#background_pids[@]}" -gt "0" ]]; then
echo "Cleaning up background processes..."
# Kill all background jobs. Can't use the $background_pids here since some of the commands that we
# put into the background are pipelines of multiple commands, and $background_pids only holds the pids
# of the last command in each pipeline. `jobs -p` is the pids of the first command in each pipeline.
jobs -p | xargs kill
fi
}
trap cleanup EXIT
# See kind port mappings in gce-init.sh for what these port number values hook into on a remote kind cluster.
# See single-node.yaml for the same port numbers when running a kind cluster on your development laptop.
ssh_mappings=(
# The Pinniped Supervisor's https port.
"127.0.0.1:12344:127.0.0.1:${supervisor_https_host_port}"
# The squid proxy port. We run squid inside the cluster to allow the tests
# to use it as an http_proxy to access all Services inside the cluster.
"127.0.0.1:12346:127.0.0.1:12346"
)
kubectl_mapping_command1=(
# The Pinniped Supervisor's https port.
kubectl port-forward -n "$supervisor_namespace" "svc/$supervisor_nodeport_service" 12344:443 -v 9
)
kubectl_mapping_command2=(
# The squid proxy port. We run squid inside the cluster to allow the tests
# to use it as an http_proxy to access all Services inside the cluster.
kubectl port-forward -n tools svc/proxy 12346:3128 -v 9
)
# The above variables are not unused, as shellcheck warns. They are passed by name into this array.
kubectl_mapping_commands=(kubectl_mapping_command1 kubectl_mapping_command2)
# The health checks that we should run before running the tests to ensure that our port mappings are ready.
port_health_checks=(
# The Pinniped Supervisor's https port.
"curl -fsk https://127.0.0.1:12344/healthz"
# The squid proxy port.
"https_proxy=127.0.0.1:12346 curl -fsk https://dex.tools.svc.cluster.local/dex/.well-known/openid-configuration"
)
# Use "gcloud ssh" to forward ports of remote kind clusters because "kubectl port-forward"
# proved to be unreliable in that use case.
if [[ "${START_GCLOUD_PROXY:-no}" == "yes" ]]; then
if [[ -z "${GCP_ZONE:-}" || -z "${GCP_PROJECT:-}" ]]; then
echo "\$GCP_ZONE and \$GCP_PROJECT are required when START_GCLOUD_PROXY==yes"
exit 1
fi
# If the GCP_USERNAME env var was set, then use it along with $GCP_JSON_KEY to log in as a service account.
# When running on your laptop we will assume that you are already logged in to gcloud as yourself.
if [[ -n "${GCP_USERNAME:-}" ]]; then
echo "Signing in to gcloud as service account $GCP_USERNAME ..."
gcloud auth activate-service-account \
"$GCP_USERNAME" \
--key-file <(echo "$GCP_JSON_KEY") \
--project "$GCP_PROJECT"
fi
# For using "gcloud ssh" with a remote kind cluster below, we'll need to know the name of the cluster.
if [[ -f "$initial_working_directory/kubeconfig/cluster-name" ]]; then
# In CI we set the cluster name as another file in the kubeconfig input directory.
cluster_name="$(cat "$initial_working_directory/kubeconfig/cluster-name")"
else
# When running on your development workstation, the name of the file is the name of the cluster.
cluster_name="$(basename "$KUBECONFIG")"
fi
# Make a private key that can be used for all ssh commands below, if one does not already exist.
# Check if it exists because there is no need to regenerate it when running on your development workstation.
ssh_key_file="$HOME/.ssh/pinniped-integration-test-key"
if [[ ! -f "$ssh_key_file" ]]; then
# Generate a private key which has no password, output to $ssh_key_file.
ssh-keygen -t rsa -b 4096 -q -N "" -f "$ssh_key_file"
fi
# Use a unique username for each test invocation so that each test invocation will upload a new public key to our GCP project.
# This allows any number of port forwards across parallel test runs to be independent.
# Note that this username must be 32 character or less.
unique_username="int-test-$(openssl rand -hex 8)"
# When run in CI, the service account should not have permission to create project-wide keys, so explicitly add the
# key only to the specific VM instance (as VM metadata). We don't want to pollute the project-wide keys with these.
# See https://cloud.google.com/compute/docs/connect/add-ssh-keys#after-vm-creation for explanation of these commands.
# Note that this overwrites all ssh keys in the metadata. At the moment, these VMs have no ssh keys in the metadata
# upon creation, so it should always be okay to overwrite the empty value. However, if someday they need to have some
# initial ssh keys in the metadata for some reason, and if those keys need to be preserved for some reason, then
# these commands could be enhanced to instead read the keys, add to them, and write back the new list.
future_time="$(date --utc --date '+3 hours' '+%FT%T%z')"
echo \
"${unique_username}:$(cat "${ssh_key_file}.pub") google-ssh {\"userName\":\"${unique_username}\",\"expireOn\":\"${future_time}\"}" \
> /tmp/ssh-key-values
gcloud compute instances add-metadata "$cluster_name" \
--metadata-from-file ssh-keys=/tmp/ssh-key-values \
--zone "$GCP_ZONE" --project "$GCP_PROJECT"
# Get the IP so we can stop using gcloud ssh and start using regular ssh, now that it has been set up.
# gcloud ssh seems to complain that the "remote host identification has changed" sometimes and there
# seems to be no way to avoid it. :( So we'll use regular ssh.
gcloud_instance_ip=$(gcloud compute instances describe \
--zone "$GCP_ZONE" --project "$GCP_PROJECT" "${cluster_name}" \
--format='get(networkInterfaces[0].networkIP)')
# Now start some simultaneous background jobs.
for mapping in "${ssh_mappings[@]}"; do
echo "Starting ssh for temporary user ${unique_username} to map port ${mapping} ..."
ssh "${unique_username}@${gcloud_instance_ip}" -i "${ssh_key_file}" \
-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ExitOnForwardFailure=yes -NT -L "${mapping}" &
background_pids+=($!)
done
# For other kinds of remote clusters, "kubectl port-forward" works fine.
elif [[ "${START_KUBECTL_PROXY:-no}" == "yes" ]]; then
for cmd_and_args_array_name in "${kubectl_mapping_commands[@]}"; do
# All these array gymnastics are to avoid using eval to run the command because
# eval makes it very hard to kill the background kubectl process during trap cleanup.
cmd_and_args_array_name="$cmd_and_args_array_name""[@]"
cmd_and_args_array=("${!cmd_and_args_array_name}")
echo "Starting " "${cmd_and_args_array[@]}"
"${cmd_and_args_array[@]}" | grep --line-buffered -v "Handling connection" &
background_pids+=($!)
done
fi
# Give a few moments for the background commands to run, only to avoid having their stdout
# interleave so much with the stdout of the commands that we're about to do below. This is
# not for correctness to avoid a race, because the while loop below is doing that.
sleep 10
# If we started either style of port forwarding above, then wait for all of the ports to
# start working before we start the integration tests to avoid a race between the port
# forwarding starting and the first integration test which tries to use one of these ports.
if [[ "${START_GCLOUD_PROXY:-no}" == "yes" || "${START_KUBECTL_PROXY:-no}" == "yes" ]]; then
while true; do
sleep 1
for pid in "${background_pids[@]}"; do
if ! ps -p "$pid" >/dev/null; then
echo "Background port-forward process $pid seems to have died. Exiting. :("
exit 1
fi
done
succeeded=true
# Try to curl an endpoint which should succeed through each port-forwarded port.
for health_check in "${port_health_checks[@]}"; do
echo "$health_check"
if ! eval "$health_check" >/dev/null; then
succeeded=false
break
fi
done
if [[ $succeeded == "true" ]]; then
echo "All port-forwarded ports are ready."
break
fi
echo "Waiting for port-forwarded ports to be ready..."
done
fi
# Print version for logs.
go version
if [[ "$OSTYPE" != "darwin"* ]]; then
google-chrome --version
fi
# If the cli has been pre-compiled then use it.
if [[ -f "$pinniped_test_cli" ]]; then
export PINNIPED_TEST_CLI="$pinniped_test_cli"
fi
# Unset this before running the integration tests, to try to hide this GCP_JSON_KEY credential from the tests.
if [[ -n "${GCP_JSON_KEY:-}" ]]; then
unset GCP_JSON_KEY GCP_ZONE GCP_PROJECT GCP_USERNAME
fi
if [ -d "../test-output" ]; then
# this is probably running in CI, and test-output is the name of the concourse output directory
# that we put the file in so that the next task can upload it to GCS.
# we need to chmod it so our non-root user can write to it.
chmod 777 ../test-output
jsonfile_arg="../test-output/testoutput.log"
else
# otherwise, we're probably running locally and don't actually want to output the logs to a file
# to aggregate and analyze later.
jsonfile_arg="/dev/null"
fi
test_run_regex=${TEST_RUN_REGEX:-'.*'}
if [[ -f "$integration_test_binary" ]]; then
# If the integration test suite has been pre-compiled, then use it to run the tests.
test_command="gotestsum --raw-command --format standard-verbose --jsonfile $jsonfile_arg -- go tool test2json -t -p pkgname \"$integration_test_binary\" -test.v -test.count=1 -test.timeout=70m -test.run='${test_run_regex}'"
else
# Otherwise just run the tests with "go test".
test_command="gotestsum --format standard-verbose --jsonfile $jsonfile_arg ./test/integration/ -- -race -v -count 1 -timeout 70m -run '${test_run_regex}'"
fi
# Run the integration tests. They can assume that the app is already deployed
# and that kubectl is configured to talk to the cluster. They also have the
# k14s tools available (ytt, kapp, etc) in case they want to do more deploys.
if [[ "$(id -u)" == "0" ]]; then
# Downgrade to a non-root user to run the tests. We don't want them reading the
# environment of any parent process, e.g. by reading from /proc. This user account
# was created in the Dockerfile of the container image used to run this script in CI.
# It is okay if $GCP_JSON_KEY is empty or unset, either way we've avoided sharing the
# credential with the subprocess.
if [[ -n $(su testrunner -c "echo $GCP_JSON_KEY") ]]; then
echo "Tried to obscure the GCP_JSON_KEY secret from the testrunner user but it didn't work!"
exit 1
fi
# This should not be necessary, but something strange started happening after upgrading to Concourse v7.7.0
# where sometimes the owner and group IDs of these directories are wrong inside the container on Concourse.
# Attempting the following chown as a workaround, which should change the owner/group of the files back to
# what they were in the container image.
chown -R testrunner:testrunner /home/testrunner
echo "Downgrading to user testrunner and running: ${test_command}"
# su without "-" keeps the parent environment variables, but we've already deleted the credential variables.
su testrunner -c "$test_command"
else
# Already non-root, so just run as yourself.
echo "Running: ${test_command}"
eval "$test_command"
fi