From 96b72acb2d40c6ab6719e158e6fc3724deee1883 Mon Sep 17 00:00:00 2001 From: Nolan Brubaker Date: Wed, 20 Jun 2018 14:08:07 -0400 Subject: [PATCH] Record backup start and completion times Signed-off-by: Nolan Brubaker --- pkg/apis/ark/v1/backup.go | 11 ++++++ pkg/apis/ark/v1/zz_generated.deepcopy.go | 2 + pkg/cmd/util/output/backup_describer.go | 15 +++++++- pkg/controller/backup_controller.go | 6 +++ pkg/controller/backup_controller_test.go | 36 ++++++++++++++---- pkg/metrics/metrics.go | 47 +++++++++++++++++++++--- pkg/util/test/comparisons.go | 31 +++++++++++++++- 7 files changed, 134 insertions(+), 14 deletions(-) diff --git a/pkg/apis/ark/v1/backup.go b/pkg/apis/ark/v1/backup.go index b871d6d77..8c071d993 100644 --- a/pkg/apis/ark/v1/backup.go +++ b/pkg/apis/ark/v1/backup.go @@ -170,6 +170,17 @@ type BackupStatus struct { // ValidationErrors is a slice of all validation errors (if // applicable). ValidationErrors []string `json:"validationErrors"` + + // StartTimestamp records the time a backup was started. + // Separate from CreationTimestamp, since that value changes + // on restores. + // The server's time is used for StartTimestamps + StartTimestamp metav1.Time `json:"startTimestamp"` + + // CompletionTimestamp records the time a backup was completed. + // Completion time is recorded even on failed backups. + // The server's time is used for CompletionTimestamps + CompletionTimestamp metav1.Time `json:"completionTimestamp"` } // VolumeBackupInfo captures the required information about diff --git a/pkg/apis/ark/v1/zz_generated.deepcopy.go b/pkg/apis/ark/v1/zz_generated.deepcopy.go index 062b2bcc0..08e553872 100644 --- a/pkg/apis/ark/v1/zz_generated.deepcopy.go +++ b/pkg/apis/ark/v1/zz_generated.deepcopy.go @@ -286,6 +286,8 @@ func (in *BackupStatus) DeepCopyInto(out *BackupStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + in.StartTimestamp.DeepCopyInto(&out.StartTimestamp) + in.CompletionTimestamp.DeepCopyInto(&out.CompletionTimestamp) return } diff --git a/pkg/cmd/util/output/backup_describer.go b/pkg/cmd/util/output/backup_describer.go index 850a6659e..de3fc56c9 100644 --- a/pkg/cmd/util/output/backup_describer.go +++ b/pkg/cmd/util/output/backup_describer.go @@ -162,9 +162,22 @@ func DescribeBackupStatus(d *Describer, status v1.BackupStatus) { d.Printf("Backup Format Version:\t%d\n", status.Version) d.Println() - d.Printf("Expiration:\t%s\n", status.Expiration.Time) + // "" output should only be applicable for backups that failed validation + if status.StartTimestamp.Time.IsZero() { + d.Printf("Started:\t%s\n", "") + } else { + d.Printf("Started:\t%s\n", status.StartTimestamp.Time) + } + if status.CompletionTimestamp.Time.IsZero() { + d.Printf("Completed:\t%s\n", "") + } else { + d.Printf("Completed:\t%s\n", status.CompletionTimestamp.Time) + } d.Println() + d.Printf("Expiration:\t%s\n", status.Expiration.Time) + d.Println() + d.Printf("Validation errors:") if len(status.ValidationErrors) == 0 { d.Printf("\t\n") diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index 7749d990a..8d563d4c5 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -337,6 +337,7 @@ func (controller *backupController) getValidationErrors(itm *api.Backup) []strin func (controller *backupController) runBackup(backup *api.Backup, bucket string) error { log := controller.logger.WithField("backup", kubeutil.NamespaceAndName(backup)) log.Info("Starting backup") + backup.Status.StartTimestamp.Time = controller.clock.Now() logFile, err := ioutil.TempFile("", "") if err != nil { @@ -392,6 +393,11 @@ func (controller *backupController) runBackup(backup *api.Backup, bucket string) backupScheduleName := backup.GetLabels()["ark-schedule"] controller.metrics.SetBackupTarballSizeBytesGauge(backupScheduleName, backupSizeBytes) + backup.Status.CompletionTimestamp.Time = controller.clock.Now() + backupDuration := backup.Status.CompletionTimestamp.Time.Sub(backup.Status.StartTimestamp.Time) + backupDurationSeconds := float64(backupDuration / time.Second) + controller.metrics.RegisterBackupDuration(backupScheduleName, backupDurationSeconds) + log.Info("Backup completed") return kerrors.NewAggregate(errs) diff --git a/pkg/controller/backup_controller_test.go b/pkg/controller/backup_controller_test.go index f71cc85e8..03df45b8a 100644 --- a/pkg/controller/backup_controller_test.go +++ b/pkg/controller/backup_controller_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/clock" core "k8s.io/client-go/testing" @@ -175,13 +176,15 @@ func TestProcessBackup(t *testing.T) { c.clock = clock.NewFakeClock(clockTime) - var expiration time.Time + var expiration, startTime time.Time if test.backup != nil { // add directly to the informer's store so the lister can function and so we don't have to // start the shared informers. sharedInformers.Ark().V1().Backups().Informer().GetStore().Add(test.backup.Backup) + startTime = c.clock.Now() + if test.backup.Spec.TTL.Duration > 0 { expiration = c.clock.Now().Add(test.backup.Spec.TTL.Duration) } @@ -194,6 +197,7 @@ func TestProcessBackup(t *testing.T) { backup.Spec.SnapshotVolumes = test.backup.Spec.SnapshotVolumes backup.Status.Phase = v1.BackupPhaseInProgress backup.Status.Expiration.Time = expiration + backup.Status.StartTimestamp.Time = startTime backup.Status.Version = 1 backupper.On("Backup", backup, mock.Anything, mock.Anything, mock.Anything).Return(nil) @@ -231,6 +235,21 @@ func TestProcessBackup(t *testing.T) { res.Status.Expiration.Time = expiration res.Status.Phase = v1.BackupPhase(phase) + // If there's an error, it's mostly likely that the key wasn't found + // which is fine since not all patches will have them. + completionString, err := collections.GetString(patchMap, "status.completionTimestamp") + if err == nil { + completionTime, err := time.Parse(time.RFC3339Nano, completionString) + require.NoError(t, err, "unexpected completionTimestamp parsing error %v", err) + res.Status.CompletionTimestamp.Time = completionTime + } + startString, err := collections.GetString(patchMap, "status.startTimestamp") + if err == nil { + startTime, err := time.Parse(time.RFC3339Nano, startString) + require.NoError(t, err, "unexpected startTimestamp parsing error %v", err) + res.Status.StartTimestamp.Time = startTime + } + return true, res, nil }) @@ -254,9 +273,11 @@ func TestProcessBackup(t *testing.T) { // structs and func for decoding patch content type StatusPatch struct { - Expiration time.Time `json:"expiration"` - Version int `json:"version"` - Phase v1.BackupPhase `json:"phase"` + Expiration time.Time `json:"expiration"` + Version int `json:"version"` + Phase v1.BackupPhase `json:"phase"` + StartTimestamp metav1.Time `json:"startTimestamp"` + CompletionTimestamp metav1.Time `json:"completionTimestamp"` } type Patch struct { @@ -281,13 +302,14 @@ func TestProcessBackup(t *testing.T) { arktest.ValidatePatch(t, actions[0], expected, decode) - // validate Patch call 2 (setting phase) + // validate Patch call 2 (setting phase, startTimestamp, completionTimestamp) expected = Patch{ Status: StatusPatch{ - Phase: v1.BackupPhaseCompleted, + Phase: v1.BackupPhaseCompleted, + StartTimestamp: metav1.Time{Time: c.clock.Now()}, + CompletionTimestamp: metav1.Time{Time: c.clock.Now()}, }, } - arktest.ValidatePatch(t, actions[1], expected, decode) }) } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 69d2fc5e9..c63725c18 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -17,6 +17,8 @@ limitations under the License. package metrics import ( + "time" + "github.com/prometheus/client_golang/prometheus" ) @@ -31,8 +33,11 @@ const ( backupAttemptCount = "backup_attempt_total" backupSuccessCount = "backup_success_total" backupFailureCount = "backup_failure_total" + backupDurationSeconds = "backup_duration_seconds" scheduleLabel = "schedule" + + secondsInMinute = 60.0 ) // NewServerMetrics returns new ServerMetrics @@ -71,41 +76,73 @@ func NewServerMetrics() *ServerMetrics { }, []string{scheduleLabel}, ), + backupDurationSeconds: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricNamespace, + Name: backupDurationSeconds, + Help: "Time taken to complete backup, in seconds", + Buckets: []float64{ + toSeconds(1 * time.Minute), + toSeconds(5 * time.Minute), + toSeconds(10 * time.Minute), + toSeconds(15 * time.Minute), + toSeconds(30 * time.Minute), + toSeconds(1 * time.Hour), + toSeconds(2 * time.Hour), + toSeconds(3 * time.Hour), + toSeconds(4 * time.Hour), + }, + }, + []string{scheduleLabel}, + ), }, } } -// RegisterAllMetrics registers all prometheus metrics +// RegisterAllMetrics registers all prometheus metrics. func (m *ServerMetrics) RegisterAllMetrics() { for _, pm := range m.metrics { prometheus.MustRegister(pm) } } -// SetBackupTarballSizeBytesGauge records the size, in bytes, of a backup tarball +// SetBackupTarballSizeBytesGauge records the size, in bytes, of a backup tarball. func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, size int64) { if g, ok := m.metrics[backupTarballSizeBytesGauge].(*prometheus.GaugeVec); ok { g.WithLabelValues(backupSchedule).Set(float64(size)) } } -// RegisterBackupAttempt records an backup attempt +// RegisterBackupAttempt records an backup attempt. func (m *ServerMetrics) RegisterBackupAttempt(backupSchedule string) { if c, ok := m.metrics[backupAttemptCount].(*prometheus.CounterVec); ok { c.WithLabelValues(backupSchedule).Inc() } } -// RegisterBackupSuccess records a successful completion of a backup +// RegisterBackupSuccess records a successful completion of a backup. func (m *ServerMetrics) RegisterBackupSuccess(backupSchedule string) { if c, ok := m.metrics[backupSuccessCount].(*prometheus.CounterVec); ok { c.WithLabelValues(backupSchedule).Inc() } } -// RegisterBackupFailed records a failed backup +// RegisterBackupFailed records a failed backup. func (m *ServerMetrics) RegisterBackupFailed(backupSchedule string) { if c, ok := m.metrics[backupFailureCount].(*prometheus.CounterVec); ok { c.WithLabelValues(backupSchedule).Inc() } } + +// RegisterBackupDuration records the number of seconds a backup took. +func (m *ServerMetrics) RegisterBackupDuration(backupSchedule string, seconds float64) { + if c, ok := m.metrics[backupDurationSeconds].(*prometheus.HistogramVec); ok { + c.WithLabelValues(backupSchedule).Observe(seconds) + } +} + +// toSeconds translates a time.Duration value into a float64 +// representing the number of seconds in that duration. +func toSeconds(d time.Duration) float64 { + return float64(d / time.Second) +} diff --git a/pkg/util/test/comparisons.go b/pkg/util/test/comparisons.go index 3c485119b..68b7a2edc 100644 --- a/pkg/util/test/comparisons.go +++ b/pkg/util/test/comparisons.go @@ -19,8 +19,12 @@ package test import ( "bytes" "encoding/json" + "fmt" "reflect" "testing" + "time" + + "k8s.io/apimachinery/pkg/api/equality" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -77,5 +81,30 @@ func ValidatePatch(t *testing.T, action core.Action, expected interface{}, decod actual, err := decodeFunc(decoder) require.NoError(t, err) - assert.Equal(t, expected, actual) + AssertDeepEqual(t, expected, actual) +} + +// TimesAreEqual compares two times for equality. +// This function is used by equality.Semantic.DeepEqual to compare two time objects +// without having to call a method. +func TimesAreEqual(t1, t2 time.Time) bool { + return t1.Equal(t2) +} + +// AssertDeepEqual asserts the semantic equality of objects. +// This function exists in order to make sure time.Time and metav1.Time objects +// can be compared correctly. See https://github.com/stretchr/testify/issues/502. +func AssertDeepEqual(t *testing.T, expected, actual interface{}) bool { + // By default, the equality.Semantic object doesn't have a function for comparing time.Times + err := equality.Semantic.AddFunc(TimesAreEqual) + if err != nil { + // Programmer error, the test should die. + t.Fatalf("Could not register equality function: %s", err) + } + + if !equality.Semantic.DeepEqual(expected, actual) { + return assert.Fail(t, fmt.Sprintf("Objects not equal")) + } + + return true }