From 9398b9622eb5312ce898aba3e8056ee50eeae316 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Wed, 2 Jul 2025 10:57:58 -0700 Subject: [PATCH] preparing to create and use GKE acceptance cluster (but need DNS first) --- hack/create-gke-acceptance-env.sh | 43 ++++++-- pipelines/main/pipeline.yml | 41 ++++--- .../prepare-cluster-for-integration-tests.sh | 100 +----------------- .../shared-tasks/deploy-gke-cluster/task.sh | 1 - .../deploy-to-acceptance-gke/task.sh | 7 +- 5 files changed, 67 insertions(+), 125 deletions(-) diff --git a/hack/create-gke-acceptance-env.sh b/hack/create-gke-acceptance-env.sh index 5e21d17db..e50c255dd 100755 --- a/hack/create-gke-acceptance-env.sh +++ b/hack/create-gke-acceptance-env.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright 2020-2024 the Pinniped contributors. All Rights Reserved. +# Copyright 2020-2025 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -15,6 +15,22 @@ if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then exit 1 fi +if [[ -z "${SHARED_VPC_PROJECT:-}" ]]; then + echo "SHARED_VPC_PROJECT env var must be set" + exit 1 +fi +if [[ -z "${SHARED_VPC_NAME:-}" ]]; then + echo "SHARED_VPC_NAME env var must be set" + exit 1 +fi +if [[ -z "${SUBNET_NAME:-}" ]]; then + echo "SUBNET_NAME env var must be set" + exit 1 +fi + +CLUSTER_ZONE="us-west1-c" +SUBNET_REGION="us-west1" + # Create (or recreate) a GKE acceptance cluster. # Pro tip: The GCP Console UI can help you build this command. # The following fields were customized, and all of the others are left as the GCP Console's defaults: @@ -23,22 +39,33 @@ fi # - Num nodes - sized smaller to be cheaper # - Maintenance window start and recurrence - to avoid downtime during business hours # - Issue client certificate - to make it possible to use an admin kubeconfig without the GKE auth plugin +# - tags, authorized networks, private nodes, private endpoint, network, subnet, and secondary ranges gcloud container --project "$PINNIPED_GCP_PROJECT" clusters create "gke-acceptance-cluster" \ - --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.30.4-gke.1348000" --release-channel "regular" \ + --zone "$CLUSTER_ZONE" \ + --no-enable-basic-auth \ + --cluster-version "1.33.1-gke.1584000" \ + --release-channel "regular" \ --machine-type "e2-medium" \ --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true \ --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \ --num-nodes "1" \ --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET \ - --enable-ip-alias \ - --network "projects/$PINNIPED_GCP_PROJECT/global/networks/default" \ - --subnetwork "projects/$PINNIPED_GCP_PROJECT/regions/us-central1/subnetworks/default" \ --no-enable-intra-node-visibility \ --default-max-pods-per-node "110" \ - --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks \ + --security-posture=standard --workload-vulnerability-scanning=disabled \ --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver \ --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 \ - --binauthz-evaluation-mode=DISABLED --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" \ + --binauthz-evaluation-mode=DISABLED --enable-managed-prometheus --enable-shielded-nodes --node-locations "$CLUSTER_ZONE" \ --maintenance-window-start "2020-07-01T03:00:00Z" --maintenance-window-end "2020-07-01T11:00:00Z" \ --maintenance-window-recurrence "FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR,SA,SU" \ - --issue-client-certificate + --issue-client-certificate \ + --tags "gke-broadcom" \ + --enable-master-authorized-networks \ + --master-authorized-networks "10.0.0.0/8" \ + --enable-private-nodes \ + --enable-private-endpoint \ + --enable-ip-alias \ + --network "projects/${SHARED_VPC_PROJECT}/global/networks/${SHARED_VPC_NAME}" \ + --subnetwork "projects/${SHARED_VPC_PROJECT}/regions/${SUBNET_REGION}/subnetworks/${SUBNET_NAME}" \ + --cluster-secondary-range-name "services" \ + --services-secondary-range-name "pods" diff --git a/pipelines/main/pipeline.yml b/pipelines/main/pipeline.yml index 8f0afc714..13ca9c3d8 100644 --- a/pipelines/main/pipeline.yml +++ b/pipelines/main/pipeline.yml @@ -530,7 +530,7 @@ resources: check_every: 5m source: driver: gcs - bucket: tanzu-user-authentication-private-ci + bucket: pinniped-ci-version-state key: semver/0.0.x-version.txt json_key: ((gcr-image-pusher-json-key)) initial_version: 0.0.0 @@ -1828,11 +1828,11 @@ jobs: # We don't need to run these on every version of Kubernetes for Kind in this pipeline, so we choose to run # them on one version to get some coverage. # TODO: replace this with some other LDAP and open firewall for outgoing LDAP and LDAPs -# <<: *jumpcloud_integration_env_vars + # <<: *jumpcloud_integration_env_vars # The following AD params enable the ActiveDirectory integration tests. We don't need to run these on every # version of Kubernetes for Kind in this pipeline, so we choose to run them on one version to get some coverage. # TODO: bring this back with a new AD server -# <<: *active_directory_integration_env_vars + # <<: *active_directory_integration_env_vars # The following params enable the GitHub integration tests. We don't need to run these on every # version of Kubernetes for Kind in this pipeline, so we choose to run them on one version to get some coverage. <<: *github_integration_env_vars @@ -1962,7 +1962,8 @@ jobs: # The following AD params enable the ActiveDirectory integration tests. We don't need to run these on every # version of Kubernetes for Kind in this pipeline, but it is useful to know if we can communicate with our # AD server when using FIPS cipher suites. - <<: *active_directory_integration_env_vars + # TODO: bring this back with a new AD server + # <<: *active_directory_integration_env_vars # The following params enable the GitHub integration tests. We don't need to run these on every # version of Kubernetes for Kind in this pipeline, but it is useful to know if we can communicate with # GitHub when using FIPS cipher suites. @@ -2494,22 +2495,30 @@ jobs: DEPLOY_LOCAL_USER_AUTHENTICATOR: "yes" GKE_CLUSTER_NAME: gke-acceptance-cluster SUPERVISOR_AND_CONCIERGE_NO_CPU_REQUEST: true - RESERVED_LOAD_BALANCER_STATIC_IP: 35.224.24.196 - LOAD_BALANCER_DNS_NAME: gke-acceptance-supervisor-lb.test.pinniped.dev + # Note that this static IP reservation needs to be manually created before the first time that this job is run, + # and the reserved IP needs to be updated here. Create it in the same subnet as the acceptance cluster itself. + # This is the IP address of a static IP reservation in GCP, not the name of the reservation. The name of this + # reservation in GCP is currently gke-acceptance-test-supervisor-loadbalancer-ip, and it can be viewed in the + # GCP Console's "IP Addresses" UI. + RESERVED_LOAD_BALANCER_STATIC_IP: 10.31.141.181 + # This DNS name needs to be manually created as an "A" record pointing to the above reserved IP address for + # the LB before this job is run. + LOAD_BALANCER_DNS_NAME: gke-acceptance-supervisor-lb.test.pinniped.broadcom.net + # Note that this static IP needs to be manually reserved in GCP before we run this job for the first time + # for this GKE cluster. Create it in the same subnet as the acceptance cluster itself. This needs to be the + # name of the IP reservation in GCP, not the IP address itself, because that's how a GKE Ingress accepts + # static IPs. It can be viewed in the GCP Console's "IP Addresses" UI. INGRESS_STATIC_IP_GCLOUD_NAME: gke-acceptance-test-supervisor-ingress-ip - INGRESS_DNS_ENTRY_GCLOUD_NAME: gke-acceptance-supervisor.test.pinniped.dev + # This DNS name needs to be manually created as an "A" record pointing to the above reserved IP address for + # the ingress before this job is run. + INGRESS_DNS_NAME: gke-acceptance-supervisor-ingress.test.pinniped.broadcom.net <<: *okta_integration_env_vars OKTA_SUPERVISOR_CALLBACK: ((okta-supervisor-callback)) - <<: *jumpcloud_integration_env_vars - <<: *active_directory_integration_env_vars + # TODO: replace this with some other LDAP and open firewall for outgoing LDAP and LDAPs + # <<: *jumpcloud_integration_env_vars + # TODO: bring this back with a new AD server + # <<: *active_directory_integration_env_vars <<: *github_integration_env_vars -# - task: install-and-configure-cert-manager -# file: pinniped-ci/pipelines/shared-tasks/install-and-configure-cert-manager/task.yml -# image: k8s-app-deployer-image -# timeout: 15m -# params: -# PINNIPED_GCP_PROJECT: ((gcp-project-name)) -# CERT_MANAGER_DNS_ADMIN_JSON_KEY: ((cert-manager-dns-admin-json-key)) - task: run-integration-tests timeout: 75m file: pinniped-ci/pipelines/shared-tasks/run-integration-tests/task.yml diff --git a/pipelines/shared-helpers/prepare-cluster-for-integration-tests.sh b/pipelines/shared-helpers/prepare-cluster-for-integration-tests.sh index c5179afef..7e3f6bfba 100755 --- a/pipelines/shared-helpers/prepare-cluster-for-integration-tests.sh +++ b/pipelines/shared-helpers/prepare-cluster-for-integration-tests.sh @@ -92,16 +92,9 @@ set -euo pipefail # NodePort Service defined and create an Ingress connected to that Service. # When set to "yes" the following additional variables are expected: # - $SUPERVISOR_INGRESS_STATIC_IP_NAME: The name of the static IP resource from the -# underlying cloud infrastructure platform. Optional. +# underlying cloud infrastructure platform. Required when $SUPERVISOR_INGRESS is "yes". # - $SUPERVISOR_INGRESS_DNS_NAME: The DNS hostname name associated with the # ingress' IP address. Required when $SUPERVISOR_INGRESS is "yes". -# - $SUPERVISOR_INGRESS_PATH_PATTERN: The path that will be set in the Ingress object -# (e.g., "/", "/*"; this depends on what is supported by the underlying platform). -# Required when $SUPERVISOR_INGRESS is "yes". -# - If the $SUPERVISOR_INGRESS_DNS_NAME is given without the -# $SUPERVISOR_INGRESS_STATIC_IP_NAME, then allow the ingress service -# to choose its own IP address, and dynamically register that address as the name -# specified in $SUPERVISOR_INGRESS_DNS_NAME using the Cloud DNS service. # - When neither $SUPERVISOR_LOAD_BALANCER nor $SUPERVISOR_INGRESS then we will use # nodeport services to make the supervisor available. In this case you may specify # $PINNIPED_SUPERVISOR_HTTP_NODEPORT and $PINNIPED_SUPERVISOR_HTTPS_NODEPORT if you @@ -176,64 +169,6 @@ function print_redacted_manifest() { print_or_redact_doc "$doc" } -function update_gcloud_dns_record() { - if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then - echo "PINNIPED_GCP_PROJECT env var must be set when using update_gcloud_dns_record" - exit 1 - fi - - local dns_name=$1 - local new_ip=$2 - local dns_record_name="${dns_name}." - local dns_zone="pinniped-dev" - local dns_project="$PINNIPED_GCP_PROJECT" - - # Login to gcloud CLI - gcloud auth activate-service-account "$GKE_USERNAME" --key-file <(echo "$GKE_JSON_KEY") --project "$dns_project" - - # Get the current value of the DNS A record. - # We assume that this record already exists because it was manually created. - # We also assume in the transaction commands below that it was created with a TTL of 30 seconds. - current_dns_record_ip=$(gcloud dns record-sets list --zone "$dns_zone" \ - --project "$dns_project" --name "$dns_record_name" --format json | - jq -r ".[] | select(.name ==\"${dns_record_name}\") | .rrdatas[0]") - - if [[ "$current_dns_record_ip" == "$new_ip" ]]; then - echo "No update needed: DNS record $dns_record_name was already set to $new_ip" - else - echo "Changing DNS record $dns_record_name from $current_dns_record_ip to $new_ip ..." - - # Updating a DNS record with gcloud must be done with a remove and an add wrapped in a transaction. - gcloud dns record-sets transaction start --zone "$dns_zone" --project "$dns_project" - gcloud dns record-sets transaction remove "$current_dns_record_ip" --name "$dns_name" \ - --ttl "30" --type "A" --zone "$dns_zone" --project "$dns_project" - gcloud dns record-sets transaction add "$new_ip" --name "$dns_name" \ - --ttl "30" --type "A" --zone "$dns_zone" --project "$dns_project" - change_id=$(gcloud dns record-sets transaction execute --zone "$dns_zone" --project "$dns_project" --format json | jq -r '.id') - - # Wait for that transaction to commit. This is usually quick. - change_status="not-done" - while [[ "$change_status" != "done" ]]; do - sleep 3 - change_status=$(gcloud dns record-sets changes describe "$change_id" \ - --zone "$dns_zone" --project "$dns_project" --format json | jq -r '.status') - echo "Waiting for change $change_id to have status 'done'. Current status: $change_status" - done - - # Wait for DNS propagation. The TTL is 30 seconds, so this shouldn't take too long. - echo "Waiting for new IP address $new_ip to appear in the result of a local DNS query. This may take a few minutes..." - while true; do - dig_result=$(dig +short "$dns_name") - echo "dig result for $dns_name: $dig_result" - if [[ "$dig_result" == "$new_ip" ]]; then - echo "New IP address has finished DNS propagation. Done with DNS update!" - break - fi - sleep 5 - done - fi -} - if [[ "${TMC_API_TOKEN:-}" == "" && "${DEPLOY_LOCAL_USER_AUTHENTICATOR:-no}" != "yes" ]]; then echo "Must use either \$TMC_API_TOKEN or \$DEPLOY_LOCAL_USER_AUTHENTICATOR" exit 1 @@ -1048,12 +983,6 @@ if [[ "${SUPERVISOR_LOAD_BALANCER:-no}" == "yes" ]]; then echo "Load balancer reported ingress: $ingress_json" ingress_ip=$(echo "$ingress_json" | jq -r '.ingress[0].ip') - if [[ "${SUPERVISOR_LOAD_BALANCER_STATIC_IP:-}" == "" ]]; then - # No static IP was provided, so the load balancer was allowed to choose its own IP. - # Update the DNS record associated with $SUPERVISOR_LOAD_BALANCER_DNS_NAME to make it match the new IP. - update_gcloud_dns_record "$SUPERVISOR_LOAD_BALANCER_DNS_NAME" "$ingress_ip" - fi - # Use the published ingress address for the integration test env vars below. supervisor_https_address="https://${SUPERVISOR_LOAD_BALANCER_DNS_NAME}:443" elif [[ "${USE_LOAD_BALANCERS_FOR_DEX_AND_SUPERVISOR:-no}" == "yes" ]]; then @@ -1160,12 +1089,6 @@ EOF kubectl get -n "$supervisor_namespace" secret "$ingress_tls_secret" -o jsonpath=\{.data.'tls\.crt'\} | base64 -d >"$ingress_tls_cert_file" fi - # If a static IP name was provided then use it. Otherwise, don't include the annotation at all. - static_ip_annotation="" - if [[ "${SUPERVISOR_INGRESS_STATIC_IP_NAME:-}" != "" ]]; then - static_ip_annotation="kubernetes.io/ingress.global-static-ip-name: ${SUPERVISOR_INGRESS_STATIC_IP_NAME}" - fi - if [[ "$cluster_has_gke_backend_config" == "yes" ]]; then # Get the nodePort port number that was dynamically assigned to the nodeport service. nodeport_service_port=$(kubectl get service -n "${supervisor_namespace}" "${supervisor_app_name}-nodeport" -o jsonpath='{.spec.ports[0].nodePort}') @@ -1200,6 +1123,7 @@ metadata: namespace: ${supervisor_namespace} annotations: kubernetes.io/ingress.class: "gce-internal" + kubernetes.io/ingress.regional-static-ip-name: "${SUPERVISOR_INGRESS_STATIC_IP_NAME}" kubernetes.io/ingress.allow-http: "false" nginx.ingress.kubernetes.io/backend-protocol: HTTPS # TODO Re-enable backend TLS cert verification once the Supervisor's default TLS cert is generated by automation in this script. @@ -1207,7 +1131,6 @@ metadata: #nginx.ingress.kubernetes.io/proxy-ssl-verify: "on" #nginx.ingress.kubernetes.io/proxy-ssl-secret: ${supervisor_namespace}/${supervisor_app_name}-default-tls-certificate nginx.ingress.kubernetes.io/proxy-ssl-verify: "off" - ${static_ip_annotation} spec: defaultBackend: service: @@ -1220,25 +1143,6 @@ spec: - ${SUPERVISOR_INGRESS_DNS_NAME} EOF - # If no static IP was provided for the ingress, then register the dynamic IP of the ingress with the DNS provider. - if [[ "${SUPERVISOR_INGRESS_STATIC_IP_NAME:-}" == "" ]]; then - # Wait for the ingress to get an IP - ingress_json='{}' - while [[ "$ingress_json" == '{}' ]]; do - echo "Checking for ingress address..." - sleep 1 - ingress_json=$(kubectl get ingress "${supervisor_app_name}" -n "$supervisor_namespace" -o json | - jq -r '.status.loadBalancer') - done - - echo "Ingress reported address: $ingress_json" - ingress_ip=$(echo "$ingress_json" | jq -r '.ingress[0].ip') - - # No static IP was provided, so the load balancer was allowed to choose its own IP. - # Update the DNS record associated with $SUPERVISOR_INGRESS_DNS_NAME to make it match the new IP. - update_gcloud_dns_record "$SUPERVISOR_INGRESS_DNS_NAME" "$ingress_ip" - fi - # Wait for the Ingress frontend to be up and running. Wait forever... until this Concourse task times out. healthz_via_ingress_url="https://${SUPERVISOR_INGRESS_DNS_NAME}/healthz" echo "The Ingress TLS CA bundle is:" diff --git a/pipelines/shared-tasks/deploy-gke-cluster/task.sh b/pipelines/shared-tasks/deploy-gke-cluster/task.sh index afcfe5489..2aa90b703 100755 --- a/pipelines/shared-tasks/deploy-gke-cluster/task.sh +++ b/pipelines/shared-tasks/deploy-gke-cluster/task.sh @@ -65,7 +65,6 @@ gcloud container clusters create "$CLUSTER_NAME" \ --subnetwork "projects/${SHARED_VPC_PROJECT}/regions/${SUBNET_REGION}/subnetworks/${SUBNET_NAME}" \ --cluster-secondary-range-name "services" \ --services-secondary-range-name "pods" - # TODO is this also needed? --default-max-pods-per-node "64" # Get the cluster details back, including the admin certificate: gcloud container clusters describe "$CLUSTER_NAME" --zone "$CLUSTER_ZONE" --format json \ diff --git a/pipelines/shared-tasks/deploy-to-acceptance-gke/task.sh b/pipelines/shared-tasks/deploy-to-acceptance-gke/task.sh index b4e2bee80..c2b85143c 100755 --- a/pipelines/shared-tasks/deploy-to-acceptance-gke/task.sh +++ b/pipelines/shared-tasks/deploy-to-acceptance-gke/task.sh @@ -5,6 +5,10 @@ set -euo pipefail +# TODO: REMOVE THIS AFTER WE CREATE THE TWO DNS RECORDS NEEDED FOR THIS JOB +echo "TEMPORARILY CAUSING THIS JOB TO FAIL IMMEDIATELY, UNTIL WE CAN CREATE THE TWO DNS RECORDS NEEDED FOR THIS JOB TO RUN" +exit 1 + if [[ -z "${PINNIPED_GCP_PROJECT:-}" ]]; then echo "PINNIPED_GCP_PROJECT env var must be set" exit 1 @@ -48,9 +52,8 @@ CONCIERGE_NAMESPACE=concierge-acceptance \ SUPERVISOR_LOAD_BALANCER_DNS_NAME="$LOAD_BALANCER_DNS_NAME" \ SUPERVISOR_LOAD_BALANCER_STATIC_IP="$RESERVED_LOAD_BALANCER_STATIC_IP" \ SUPERVISOR_INGRESS=yes \ - SUPERVISOR_INGRESS_DNS_NAME="$INGRESS_DNS_ENTRY_GCLOUD_NAME" \ + SUPERVISOR_INGRESS_DNS_NAME="$INGRESS_DNS_NAME" \ SUPERVISOR_INGRESS_STATIC_IP_NAME="$INGRESS_STATIC_IP_GCLOUD_NAME" \ - SUPERVISOR_INGRESS_PATH_PATTERN='/*' \ IMAGE_PULL_SECRET="$image_pull_secret" \ IMAGE_REPO="$CI_BUILD_IMAGE_NAME" \ IMAGE_DIGEST="$digest" \