preparing to create and use GKE acceptance cluster (but need DNS first)

This commit is contained in:
Ryan Richard
2025-07-02 10:57:58 -07:00
parent 526ac86f44
commit 9398b9622e
5 changed files with 67 additions and 125 deletions

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved.
# Copyright 2020-2025 the Pinniped contributors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
set -euo pipefail
@@ -15,6 +15,22 @@ if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then
exit 1
fi
if [[ -z "${SHARED_VPC_PROJECT:-}" ]]; then
echo "SHARED_VPC_PROJECT env var must be set"
exit 1
fi
if [[ -z "${SHARED_VPC_NAME:-}" ]]; then
echo "SHARED_VPC_NAME env var must be set"
exit 1
fi
if [[ -z "${SUBNET_NAME:-}" ]]; then
echo "SUBNET_NAME env var must be set"
exit 1
fi
CLUSTER_ZONE="us-west1-c"
SUBNET_REGION="us-west1"
# Create (or recreate) a GKE acceptance cluster.
# Pro tip: The GCP Console UI can help you build this command.
# The following fields were customized, and all of the others are left as the GCP Console's defaults:
@@ -23,22 +39,33 @@ fi
# - Num nodes - sized smaller to be cheaper
# - Maintenance window start and recurrence - to avoid downtime during business hours
# - Issue client certificate - to make it possible to use an admin kubeconfig without the GKE auth plugin
# - tags, authorized networks, private nodes, private endpoint, network, subnet, and secondary ranges
gcloud container --project "$PINNIPED_GCP_PROJECT" clusters create "gke-acceptance-cluster" \
--zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.30.4-gke.1348000" --release-channel "regular" \
--zone "$CLUSTER_ZONE" \
--no-enable-basic-auth \
--cluster-version "1.33.1-gke.1584000" \
--release-channel "regular" \
--machine-type "e2-medium" \
--image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true \
--scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \
--num-nodes "1" \
--logging=SYSTEM,WORKLOAD --monitoring=SYSTEM,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET \
--enable-ip-alias \
--network "projects/$PINNIPED_GCP_PROJECT/global/networks/default" \
--subnetwork "projects/$PINNIPED_GCP_PROJECT/regions/us-central1/subnetworks/default" \
--no-enable-intra-node-visibility \
--default-max-pods-per-node "110" \
--security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks \
--security-posture=standard --workload-vulnerability-scanning=disabled \
--addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver \
--enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 \
--binauthz-evaluation-mode=DISABLED --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" \
--binauthz-evaluation-mode=DISABLED --enable-managed-prometheus --enable-shielded-nodes --node-locations "$CLUSTER_ZONE" \
--maintenance-window-start "2020-07-01T03:00:00Z" --maintenance-window-end "2020-07-01T11:00:00Z" \
--maintenance-window-recurrence "FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR,SA,SU" \
--issue-client-certificate
--issue-client-certificate \
--tags "gke-broadcom" \
--enable-master-authorized-networks \
--master-authorized-networks "10.0.0.0/8" \
--enable-private-nodes \
--enable-private-endpoint \
--enable-ip-alias \
--network "projects/${SHARED_VPC_PROJECT}/global/networks/${SHARED_VPC_NAME}" \
--subnetwork "projects/${SHARED_VPC_PROJECT}/regions/${SUBNET_REGION}/subnetworks/${SUBNET_NAME}" \
--cluster-secondary-range-name "services" \
--services-secondary-range-name "pods"

View File

@@ -530,7 +530,7 @@ resources:
check_every: 5m
source:
driver: gcs
bucket: tanzu-user-authentication-private-ci
bucket: pinniped-ci-version-state
key: semver/0.0.x-version.txt
json_key: ((gcr-image-pusher-json-key))
initial_version: 0.0.0
@@ -1828,11 +1828,11 @@ jobs:
# We don't need to run these on every version of Kubernetes for Kind in this pipeline, so we choose to run
# them on one version to get some coverage.
# TODO: replace this with some other LDAP and open firewall for outgoing LDAP and LDAPs
# <<: *jumpcloud_integration_env_vars
# <<: *jumpcloud_integration_env_vars
# The following AD params enable the ActiveDirectory integration tests. We don't need to run these on every
# version of Kubernetes for Kind in this pipeline, so we choose to run them on one version to get some coverage.
# TODO: bring this back with a new AD server
# <<: *active_directory_integration_env_vars
# <<: *active_directory_integration_env_vars
# The following params enable the GitHub integration tests. We don't need to run these on every
# version of Kubernetes for Kind in this pipeline, so we choose to run them on one version to get some coverage.
<<: *github_integration_env_vars
@@ -1962,7 +1962,8 @@ jobs:
# The following AD params enable the ActiveDirectory integration tests. We don't need to run these on every
# version of Kubernetes for Kind in this pipeline, but it is useful to know if we can communicate with our
# AD server when using FIPS cipher suites.
<<: *active_directory_integration_env_vars
# TODO: bring this back with a new AD server
# <<: *active_directory_integration_env_vars
# The following params enable the GitHub integration tests. We don't need to run these on every
# version of Kubernetes for Kind in this pipeline, but it is useful to know if we can communicate with
# GitHub when using FIPS cipher suites.
@@ -2494,22 +2495,30 @@ jobs:
DEPLOY_LOCAL_USER_AUTHENTICATOR: "yes"
GKE_CLUSTER_NAME: gke-acceptance-cluster
SUPERVISOR_AND_CONCIERGE_NO_CPU_REQUEST: true
RESERVED_LOAD_BALANCER_STATIC_IP: 35.224.24.196
LOAD_BALANCER_DNS_NAME: gke-acceptance-supervisor-lb.test.pinniped.dev
# Note that this static IP reservation needs to be manually created before the first time that this job is run,
# and the reserved IP needs to be updated here. Create it in the same subnet as the acceptance cluster itself.
# This is the IP address of a static IP reservation in GCP, not the name of the reservation. The name of this
# reservation in GCP is currently gke-acceptance-test-supervisor-loadbalancer-ip, and it can be viewed in the
# GCP Console's "IP Addresses" UI.
RESERVED_LOAD_BALANCER_STATIC_IP: 10.31.141.181
# This DNS name needs to be manually created as an "A" record pointing to the above reserved IP address for
# the LB before this job is run.
LOAD_BALANCER_DNS_NAME: gke-acceptance-supervisor-lb.test.pinniped.broadcom.net
# Note that this static IP needs to be manually reserved in GCP before we run this job for the first time
# for this GKE cluster. Create it in the same subnet as the acceptance cluster itself. This needs to be the
# name of the IP reservation in GCP, not the IP address itself, because that's how a GKE Ingress accepts
# static IPs. It can be viewed in the GCP Console's "IP Addresses" UI.
INGRESS_STATIC_IP_GCLOUD_NAME: gke-acceptance-test-supervisor-ingress-ip
INGRESS_DNS_ENTRY_GCLOUD_NAME: gke-acceptance-supervisor.test.pinniped.dev
# This DNS name needs to be manually created as an "A" record pointing to the above reserved IP address for
# the ingress before this job is run.
INGRESS_DNS_NAME: gke-acceptance-supervisor-ingress.test.pinniped.broadcom.net
<<: *okta_integration_env_vars
OKTA_SUPERVISOR_CALLBACK: ((okta-supervisor-callback))
<<: *jumpcloud_integration_env_vars
<<: *active_directory_integration_env_vars
# TODO: replace this with some other LDAP and open firewall for outgoing LDAP and LDAPs
# <<: *jumpcloud_integration_env_vars
# TODO: bring this back with a new AD server
# <<: *active_directory_integration_env_vars
<<: *github_integration_env_vars
# - task: install-and-configure-cert-manager
# file: pinniped-ci/pipelines/shared-tasks/install-and-configure-cert-manager/task.yml
# image: k8s-app-deployer-image
# timeout: 15m
# params:
# PINNIPED_GCP_PROJECT: ((gcp-project-name))
# CERT_MANAGER_DNS_ADMIN_JSON_KEY: ((cert-manager-dns-admin-json-key))
- task: run-integration-tests
timeout: 75m
file: pinniped-ci/pipelines/shared-tasks/run-integration-tests/task.yml

View File

@@ -92,16 +92,9 @@ set -euo pipefail
# NodePort Service defined and create an Ingress connected to that Service.
# When set to "yes" the following additional variables are expected:
# - $SUPERVISOR_INGRESS_STATIC_IP_NAME: The name of the static IP resource from the
# underlying cloud infrastructure platform. Optional.
# underlying cloud infrastructure platform. Required when $SUPERVISOR_INGRESS is "yes".
# - $SUPERVISOR_INGRESS_DNS_NAME: The DNS hostname name associated with the
# ingress' IP address. Required when $SUPERVISOR_INGRESS is "yes".
# - $SUPERVISOR_INGRESS_PATH_PATTERN: The path that will be set in the Ingress object
# (e.g., "/", "/*"; this depends on what is supported by the underlying platform).
# Required when $SUPERVISOR_INGRESS is "yes".
# - If the $SUPERVISOR_INGRESS_DNS_NAME is given without the
# $SUPERVISOR_INGRESS_STATIC_IP_NAME, then allow the ingress service
# to choose its own IP address, and dynamically register that address as the name
# specified in $SUPERVISOR_INGRESS_DNS_NAME using the Cloud DNS service.
# - When neither $SUPERVISOR_LOAD_BALANCER nor $SUPERVISOR_INGRESS then we will use
# nodeport services to make the supervisor available. In this case you may specify
# $PINNIPED_SUPERVISOR_HTTP_NODEPORT and $PINNIPED_SUPERVISOR_HTTPS_NODEPORT if you
@@ -176,64 +169,6 @@ function print_redacted_manifest() {
print_or_redact_doc "$doc"
}
function update_gcloud_dns_record() {
if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then
echo "PINNIPED_GCP_PROJECT env var must be set when using update_gcloud_dns_record"
exit 1
fi
local dns_name=$1
local new_ip=$2
local dns_record_name="${dns_name}."
local dns_zone="pinniped-dev"
local dns_project="$PINNIPED_GCP_PROJECT"
# Login to gcloud CLI
gcloud auth activate-service-account "$GKE_USERNAME" --key-file <(echo "$GKE_JSON_KEY") --project "$dns_project"
# Get the current value of the DNS A record.
# We assume that this record already exists because it was manually created.
# We also assume in the transaction commands below that it was created with a TTL of 30 seconds.
current_dns_record_ip=$(gcloud dns record-sets list --zone "$dns_zone" \
--project "$dns_project" --name "$dns_record_name" --format json |
jq -r ".[] | select(.name ==\"${dns_record_name}\") | .rrdatas[0]")
if [[ "$current_dns_record_ip" == "$new_ip" ]]; then
echo "No update needed: DNS record $dns_record_name was already set to $new_ip"
else
echo "Changing DNS record $dns_record_name from $current_dns_record_ip to $new_ip ..."
# Updating a DNS record with gcloud must be done with a remove and an add wrapped in a transaction.
gcloud dns record-sets transaction start --zone "$dns_zone" --project "$dns_project"
gcloud dns record-sets transaction remove "$current_dns_record_ip" --name "$dns_name" \
--ttl "30" --type "A" --zone "$dns_zone" --project "$dns_project"
gcloud dns record-sets transaction add "$new_ip" --name "$dns_name" \
--ttl "30" --type "A" --zone "$dns_zone" --project "$dns_project"
change_id=$(gcloud dns record-sets transaction execute --zone "$dns_zone" --project "$dns_project" --format json | jq -r '.id')
# Wait for that transaction to commit. This is usually quick.
change_status="not-done"
while [[ "$change_status" != "done" ]]; do
sleep 3
change_status=$(gcloud dns record-sets changes describe "$change_id" \
--zone "$dns_zone" --project "$dns_project" --format json | jq -r '.status')
echo "Waiting for change $change_id to have status 'done'. Current status: $change_status"
done
# Wait for DNS propagation. The TTL is 30 seconds, so this shouldn't take too long.
echo "Waiting for new IP address $new_ip to appear in the result of a local DNS query. This may take a few minutes..."
while true; do
dig_result=$(dig +short "$dns_name")
echo "dig result for $dns_name: $dig_result"
if [[ "$dig_result" == "$new_ip" ]]; then
echo "New IP address has finished DNS propagation. Done with DNS update!"
break
fi
sleep 5
done
fi
}
if [[ "${TMC_API_TOKEN:-}" == "" && "${DEPLOY_LOCAL_USER_AUTHENTICATOR:-no}" != "yes" ]]; then
echo "Must use either \$TMC_API_TOKEN or \$DEPLOY_LOCAL_USER_AUTHENTICATOR"
exit 1
@@ -1048,12 +983,6 @@ if [[ "${SUPERVISOR_LOAD_BALANCER:-no}" == "yes" ]]; then
echo "Load balancer reported ingress: $ingress_json"
ingress_ip=$(echo "$ingress_json" | jq -r '.ingress[0].ip')
if [[ "${SUPERVISOR_LOAD_BALANCER_STATIC_IP:-}" == "" ]]; then
# No static IP was provided, so the load balancer was allowed to choose its own IP.
# Update the DNS record associated with $SUPERVISOR_LOAD_BALANCER_DNS_NAME to make it match the new IP.
update_gcloud_dns_record "$SUPERVISOR_LOAD_BALANCER_DNS_NAME" "$ingress_ip"
fi
# Use the published ingress address for the integration test env vars below.
supervisor_https_address="https://${SUPERVISOR_LOAD_BALANCER_DNS_NAME}:443"
elif [[ "${USE_LOAD_BALANCERS_FOR_DEX_AND_SUPERVISOR:-no}" == "yes" ]]; then
@@ -1160,12 +1089,6 @@ EOF
kubectl get -n "$supervisor_namespace" secret "$ingress_tls_secret" -o jsonpath=\{.data.'tls\.crt'\} | base64 -d >"$ingress_tls_cert_file"
fi
# If a static IP name was provided then use it. Otherwise, don't include the annotation at all.
static_ip_annotation=""
if [[ "${SUPERVISOR_INGRESS_STATIC_IP_NAME:-}" != "" ]]; then
static_ip_annotation="kubernetes.io/ingress.global-static-ip-name: ${SUPERVISOR_INGRESS_STATIC_IP_NAME}"
fi
if [[ "$cluster_has_gke_backend_config" == "yes" ]]; then
# Get the nodePort port number that was dynamically assigned to the nodeport service.
nodeport_service_port=$(kubectl get service -n "${supervisor_namespace}" "${supervisor_app_name}-nodeport" -o jsonpath='{.spec.ports[0].nodePort}')
@@ -1200,6 +1123,7 @@ metadata:
namespace: ${supervisor_namespace}
annotations:
kubernetes.io/ingress.class: "gce-internal"
kubernetes.io/ingress.regional-static-ip-name: "${SUPERVISOR_INGRESS_STATIC_IP_NAME}"
kubernetes.io/ingress.allow-http: "false"
nginx.ingress.kubernetes.io/backend-protocol: HTTPS
# TODO Re-enable backend TLS cert verification once the Supervisor's default TLS cert is generated by automation in this script.
@@ -1207,7 +1131,6 @@ metadata:
#nginx.ingress.kubernetes.io/proxy-ssl-verify: "on"
#nginx.ingress.kubernetes.io/proxy-ssl-secret: ${supervisor_namespace}/${supervisor_app_name}-default-tls-certificate
nginx.ingress.kubernetes.io/proxy-ssl-verify: "off"
${static_ip_annotation}
spec:
defaultBackend:
service:
@@ -1220,25 +1143,6 @@ spec:
- ${SUPERVISOR_INGRESS_DNS_NAME}
EOF
# If no static IP was provided for the ingress, then register the dynamic IP of the ingress with the DNS provider.
if [[ "${SUPERVISOR_INGRESS_STATIC_IP_NAME:-}" == "" ]]; then
# Wait for the ingress to get an IP
ingress_json='{}'
while [[ "$ingress_json" == '{}' ]]; do
echo "Checking for ingress address..."
sleep 1
ingress_json=$(kubectl get ingress "${supervisor_app_name}" -n "$supervisor_namespace" -o json |
jq -r '.status.loadBalancer')
done
echo "Ingress reported address: $ingress_json"
ingress_ip=$(echo "$ingress_json" | jq -r '.ingress[0].ip')
# No static IP was provided, so the load balancer was allowed to choose its own IP.
# Update the DNS record associated with $SUPERVISOR_INGRESS_DNS_NAME to make it match the new IP.
update_gcloud_dns_record "$SUPERVISOR_INGRESS_DNS_NAME" "$ingress_ip"
fi
# Wait for the Ingress frontend to be up and running. Wait forever... until this Concourse task times out.
healthz_via_ingress_url="https://${SUPERVISOR_INGRESS_DNS_NAME}/healthz"
echo "The Ingress TLS CA bundle is:"

View File

@@ -65,7 +65,6 @@ gcloud container clusters create "$CLUSTER_NAME" \
--subnetwork "projects/${SHARED_VPC_PROJECT}/regions/${SUBNET_REGION}/subnetworks/${SUBNET_NAME}" \
--cluster-secondary-range-name "services" \
--services-secondary-range-name "pods"
# TODO is this also needed? --default-max-pods-per-node "64"
# Get the cluster details back, including the admin certificate:
gcloud container clusters describe "$CLUSTER_NAME" --zone "$CLUSTER_ZONE" --format json \

View File

@@ -5,6 +5,10 @@
set -euo pipefail
# TODO: REMOVE THIS AFTER WE CREATE THE TWO DNS RECORDS NEEDED FOR THIS JOB
echo "TEMPORARILY CAUSING THIS JOB TO FAIL IMMEDIATELY, UNTIL WE CAN CREATE THE TWO DNS RECORDS NEEDED FOR THIS JOB TO RUN"
exit 1
if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then
echo "PINNIPED_GCP_PROJECT env var must be set"
exit 1
@@ -48,9 +52,8 @@ CONCIERGE_NAMESPACE=concierge-acceptance \
SUPERVISOR_LOAD_BALANCER_DNS_NAME="$LOAD_BALANCER_DNS_NAME" \
SUPERVISOR_LOAD_BALANCER_STATIC_IP="$RESERVED_LOAD_BALANCER_STATIC_IP" \
SUPERVISOR_INGRESS=yes \
SUPERVISOR_INGRESS_DNS_NAME="$INGRESS_DNS_ENTRY_GCLOUD_NAME" \
SUPERVISOR_INGRESS_DNS_NAME="$INGRESS_DNS_NAME" \
SUPERVISOR_INGRESS_STATIC_IP_NAME="$INGRESS_STATIC_IP_GCLOUD_NAME" \
SUPERVISOR_INGRESS_PATH_PATTERN='/*' \
IMAGE_PULL_SECRET="$image_pull_secret" \
IMAGE_REPO="$CI_BUILD_IMAGE_NAME" \
IMAGE_DIGEST="$digest" \