From ecde8fa8af5a0e1dde962079ac846beafa808a17 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Mon, 17 Aug 2020 16:44:42 -0700 Subject: [PATCH 1/2] Implement basic liveness and readiness probes - Call the auto-generated /healthz endpoint of our aggregated API server - Use http for liveness even though tcp seems like it might be more appropriate, because tcp probes cause TLS handshake errors to appear in our logs every few seconds - Use conservative timeouts and retries on the liveness probe to avoid having our container get restarted when it is temporarily slow due to running in an environment under resource pressure - Use less conservative timeouts and retries for the readiness probe to remove an unhealthy pod from the service less conservatively than restarting the container - Tuning the settings for retries and timeouts seem to be a mysterious art, so these are just a first draft --- deploy/deployment.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/deploy/deployment.yaml b/deploy/deployment.yaml index f5ef824b5..d0a3cac0a 100644 --- a/deploy/deployment.yaml +++ b/deploy/deployment.yaml @@ -88,6 +88,24 @@ spec: mountPath: /etc/podinfo - name: k8s-certs mountPath: /etc/kubernetes/pki + livenessProbe: + httpGet: + path: /healthz + port: 443 + scheme: HTTPS + initialDelaySeconds: 20 + timeoutSeconds: 15 + periodSeconds: 10 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /healthz + port: 443 + scheme: HTTPS + initialDelaySeconds: 20 + timeoutSeconds: 3 + periodSeconds: 10 + failureThreshold: 3 volumes: - name: config-volume configMap: From 003aef75d274a985fe5f195d1345473d86d79383 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Tue, 18 Aug 2020 09:18:51 -0700 Subject: [PATCH 2/2] For liveness and readiness, succeed quickly and fail slowly - No reason to wait a long time before the first check, since our app should start quickly --- deploy/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/deployment.yaml b/deploy/deployment.yaml index d0a3cac0a..48c9b8d86 100644 --- a/deploy/deployment.yaml +++ b/deploy/deployment.yaml @@ -93,7 +93,7 @@ spec: path: /healthz port: 443 scheme: HTTPS - initialDelaySeconds: 20 + initialDelaySeconds: 2 timeoutSeconds: 15 periodSeconds: 10 failureThreshold: 5 @@ -102,7 +102,7 @@ spec: path: /healthz port: 443 scheme: HTTPS - initialDelaySeconds: 20 + initialDelaySeconds: 2 timeoutSeconds: 3 periodSeconds: 10 failureThreshold: 3