Record backup start and completion times

Signed-off-by: Nolan Brubaker <nolan@heptio.com>
This commit is contained in:
Nolan Brubaker
2018-06-20 14:08:07 -04:00
parent f1e82a2fe3
commit 96b72acb2d
7 changed files with 134 additions and 14 deletions

View File

@@ -170,6 +170,17 @@ type BackupStatus struct {
// ValidationErrors is a slice of all validation errors (if
// applicable).
ValidationErrors []string `json:"validationErrors"`
// StartTimestamp records the time a backup was started.
// Separate from CreationTimestamp, since that value changes
// on restores.
// The server's time is used for StartTimestamps
StartTimestamp metav1.Time `json:"startTimestamp"`
// CompletionTimestamp records the time a backup was completed.
// Completion time is recorded even on failed backups.
// The server's time is used for CompletionTimestamps
CompletionTimestamp metav1.Time `json:"completionTimestamp"`
}
// VolumeBackupInfo captures the required information about

View File

@@ -286,6 +286,8 @@ func (in *BackupStatus) DeepCopyInto(out *BackupStatus) {
*out = make([]string, len(*in))
copy(*out, *in)
}
in.StartTimestamp.DeepCopyInto(&out.StartTimestamp)
in.CompletionTimestamp.DeepCopyInto(&out.CompletionTimestamp)
return
}

View File

@@ -162,9 +162,22 @@ func DescribeBackupStatus(d *Describer, status v1.BackupStatus) {
d.Printf("Backup Format Version:\t%d\n", status.Version)
d.Println()
d.Printf("Expiration:\t%s\n", status.Expiration.Time)
// "<n/a>" output should only be applicable for backups that failed validation
if status.StartTimestamp.Time.IsZero() {
d.Printf("Started:\t%s\n", "<n/a>")
} else {
d.Printf("Started:\t%s\n", status.StartTimestamp.Time)
}
if status.CompletionTimestamp.Time.IsZero() {
d.Printf("Completed:\t%s\n", "<n/a>")
} else {
d.Printf("Completed:\t%s\n", status.CompletionTimestamp.Time)
}
d.Println()
d.Printf("Expiration:\t%s\n", status.Expiration.Time)
d.Println()
d.Printf("Validation errors:")
if len(status.ValidationErrors) == 0 {
d.Printf("\t<none>\n")

View File

@@ -337,6 +337,7 @@ func (controller *backupController) getValidationErrors(itm *api.Backup) []strin
func (controller *backupController) runBackup(backup *api.Backup, bucket string) error {
log := controller.logger.WithField("backup", kubeutil.NamespaceAndName(backup))
log.Info("Starting backup")
backup.Status.StartTimestamp.Time = controller.clock.Now()
logFile, err := ioutil.TempFile("", "")
if err != nil {
@@ -392,6 +393,11 @@ func (controller *backupController) runBackup(backup *api.Backup, bucket string)
backupScheduleName := backup.GetLabels()["ark-schedule"]
controller.metrics.SetBackupTarballSizeBytesGauge(backupScheduleName, backupSizeBytes)
backup.Status.CompletionTimestamp.Time = controller.clock.Now()
backupDuration := backup.Status.CompletionTimestamp.Time.Sub(backup.Status.StartTimestamp.Time)
backupDurationSeconds := float64(backupDuration / time.Second)
controller.metrics.RegisterBackupDuration(backupScheduleName, backupDurationSeconds)
log.Info("Backup completed")
return kerrors.NewAggregate(errs)

View File

@@ -22,6 +22,7 @@ import (
"testing"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/clock"
core "k8s.io/client-go/testing"
@@ -175,13 +176,15 @@ func TestProcessBackup(t *testing.T) {
c.clock = clock.NewFakeClock(clockTime)
var expiration time.Time
var expiration, startTime time.Time
if test.backup != nil {
// add directly to the informer's store so the lister can function and so we don't have to
// start the shared informers.
sharedInformers.Ark().V1().Backups().Informer().GetStore().Add(test.backup.Backup)
startTime = c.clock.Now()
if test.backup.Spec.TTL.Duration > 0 {
expiration = c.clock.Now().Add(test.backup.Spec.TTL.Duration)
}
@@ -194,6 +197,7 @@ func TestProcessBackup(t *testing.T) {
backup.Spec.SnapshotVolumes = test.backup.Spec.SnapshotVolumes
backup.Status.Phase = v1.BackupPhaseInProgress
backup.Status.Expiration.Time = expiration
backup.Status.StartTimestamp.Time = startTime
backup.Status.Version = 1
backupper.On("Backup", backup, mock.Anything, mock.Anything, mock.Anything).Return(nil)
@@ -231,6 +235,21 @@ func TestProcessBackup(t *testing.T) {
res.Status.Expiration.Time = expiration
res.Status.Phase = v1.BackupPhase(phase)
// If there's an error, it's mostly likely that the key wasn't found
// which is fine since not all patches will have them.
completionString, err := collections.GetString(patchMap, "status.completionTimestamp")
if err == nil {
completionTime, err := time.Parse(time.RFC3339Nano, completionString)
require.NoError(t, err, "unexpected completionTimestamp parsing error %v", err)
res.Status.CompletionTimestamp.Time = completionTime
}
startString, err := collections.GetString(patchMap, "status.startTimestamp")
if err == nil {
startTime, err := time.Parse(time.RFC3339Nano, startString)
require.NoError(t, err, "unexpected startTimestamp parsing error %v", err)
res.Status.StartTimestamp.Time = startTime
}
return true, res, nil
})
@@ -254,9 +273,11 @@ func TestProcessBackup(t *testing.T) {
// structs and func for decoding patch content
type StatusPatch struct {
Expiration time.Time `json:"expiration"`
Version int `json:"version"`
Phase v1.BackupPhase `json:"phase"`
Expiration time.Time `json:"expiration"`
Version int `json:"version"`
Phase v1.BackupPhase `json:"phase"`
StartTimestamp metav1.Time `json:"startTimestamp"`
CompletionTimestamp metav1.Time `json:"completionTimestamp"`
}
type Patch struct {
@@ -281,13 +302,14 @@ func TestProcessBackup(t *testing.T) {
arktest.ValidatePatch(t, actions[0], expected, decode)
// validate Patch call 2 (setting phase)
// validate Patch call 2 (setting phase, startTimestamp, completionTimestamp)
expected = Patch{
Status: StatusPatch{
Phase: v1.BackupPhaseCompleted,
Phase: v1.BackupPhaseCompleted,
StartTimestamp: metav1.Time{Time: c.clock.Now()},
CompletionTimestamp: metav1.Time{Time: c.clock.Now()},
},
}
arktest.ValidatePatch(t, actions[1], expected, decode)
})
}

View File

@@ -17,6 +17,8 @@ limitations under the License.
package metrics
import (
"time"
"github.com/prometheus/client_golang/prometheus"
)
@@ -31,8 +33,11 @@ const (
backupAttemptCount = "backup_attempt_total"
backupSuccessCount = "backup_success_total"
backupFailureCount = "backup_failure_total"
backupDurationSeconds = "backup_duration_seconds"
scheduleLabel = "schedule"
secondsInMinute = 60.0
)
// NewServerMetrics returns new ServerMetrics
@@ -71,41 +76,73 @@ func NewServerMetrics() *ServerMetrics {
},
[]string{scheduleLabel},
),
backupDurationSeconds: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricNamespace,
Name: backupDurationSeconds,
Help: "Time taken to complete backup, in seconds",
Buckets: []float64{
toSeconds(1 * time.Minute),
toSeconds(5 * time.Minute),
toSeconds(10 * time.Minute),
toSeconds(15 * time.Minute),
toSeconds(30 * time.Minute),
toSeconds(1 * time.Hour),
toSeconds(2 * time.Hour),
toSeconds(3 * time.Hour),
toSeconds(4 * time.Hour),
},
},
[]string{scheduleLabel},
),
},
}
}
// RegisterAllMetrics registers all prometheus metrics
// RegisterAllMetrics registers all prometheus metrics.
func (m *ServerMetrics) RegisterAllMetrics() {
for _, pm := range m.metrics {
prometheus.MustRegister(pm)
}
}
// SetBackupTarballSizeBytesGauge records the size, in bytes, of a backup tarball
// SetBackupTarballSizeBytesGauge records the size, in bytes, of a backup tarball.
func (m *ServerMetrics) SetBackupTarballSizeBytesGauge(backupSchedule string, size int64) {
if g, ok := m.metrics[backupTarballSizeBytesGauge].(*prometheus.GaugeVec); ok {
g.WithLabelValues(backupSchedule).Set(float64(size))
}
}
// RegisterBackupAttempt records an backup attempt
// RegisterBackupAttempt records an backup attempt.
func (m *ServerMetrics) RegisterBackupAttempt(backupSchedule string) {
if c, ok := m.metrics[backupAttemptCount].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule).Inc()
}
}
// RegisterBackupSuccess records a successful completion of a backup
// RegisterBackupSuccess records a successful completion of a backup.
func (m *ServerMetrics) RegisterBackupSuccess(backupSchedule string) {
if c, ok := m.metrics[backupSuccessCount].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule).Inc()
}
}
// RegisterBackupFailed records a failed backup
// RegisterBackupFailed records a failed backup.
func (m *ServerMetrics) RegisterBackupFailed(backupSchedule string) {
if c, ok := m.metrics[backupFailureCount].(*prometheus.CounterVec); ok {
c.WithLabelValues(backupSchedule).Inc()
}
}
// RegisterBackupDuration records the number of seconds a backup took.
func (m *ServerMetrics) RegisterBackupDuration(backupSchedule string, seconds float64) {
if c, ok := m.metrics[backupDurationSeconds].(*prometheus.HistogramVec); ok {
c.WithLabelValues(backupSchedule).Observe(seconds)
}
}
// toSeconds translates a time.Duration value into a float64
// representing the number of seconds in that duration.
func toSeconds(d time.Duration) float64 {
return float64(d / time.Second)
}

View File

@@ -19,8 +19,12 @@ package test
import (
"bytes"
"encoding/json"
"fmt"
"reflect"
"testing"
"time"
"k8s.io/apimachinery/pkg/api/equality"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -77,5 +81,30 @@ func ValidatePatch(t *testing.T, action core.Action, expected interface{}, decod
actual, err := decodeFunc(decoder)
require.NoError(t, err)
assert.Equal(t, expected, actual)
AssertDeepEqual(t, expected, actual)
}
// TimesAreEqual compares two times for equality.
// This function is used by equality.Semantic.DeepEqual to compare two time objects
// without having to call a method.
func TimesAreEqual(t1, t2 time.Time) bool {
return t1.Equal(t2)
}
// AssertDeepEqual asserts the semantic equality of objects.
// This function exists in order to make sure time.Time and metav1.Time objects
// can be compared correctly. See https://github.com/stretchr/testify/issues/502.
func AssertDeepEqual(t *testing.T, expected, actual interface{}) bool {
// By default, the equality.Semantic object doesn't have a function for comparing time.Times
err := equality.Semantic.AddFunc(TimesAreEqual)
if err != nil {
// Programmer error, the test should die.
t.Fatalf("Could not register equality function: %s", err)
}
if !equality.Semantic.DeepEqual(expected, actual) {
return assert.Fail(t, fmt.Sprintf("Objects not equal"))
}
return true
}