mirror of
https://github.com/vmware-tanzu/velero.git
synced 2026-01-06 13:26:26 +00:00
move restores to PartiallyFailed if >=1 error in restore results
Signed-off-by: Steve Kriss <krisss@vmware.com>
This commit is contained in:
@@ -82,10 +82,14 @@ const (
|
|||||||
// RestorePhaseInProgress means the restore is currently executing.
|
// RestorePhaseInProgress means the restore is currently executing.
|
||||||
RestorePhaseInProgress RestorePhase = "InProgress"
|
RestorePhaseInProgress RestorePhase = "InProgress"
|
||||||
|
|
||||||
// RestorePhaseCompleted means the restore has finished executing.
|
// RestorePhaseCompleted means the restore has run successfully
|
||||||
// Any relevant warnings or errors will be captured in the Status.
|
// without errors.
|
||||||
RestorePhaseCompleted RestorePhase = "Completed"
|
RestorePhaseCompleted RestorePhase = "Completed"
|
||||||
|
|
||||||
|
// RestorePhasePartiallyFailed means the restore has run to completion
|
||||||
|
// but encountered 1+ errors restoring individual items.
|
||||||
|
RestorePhasePartiallyFailed RestorePhase = "PartiallyFailed"
|
||||||
|
|
||||||
// RestorePhaseFailed means the restore was unable to execute.
|
// RestorePhaseFailed means the restore was unable to execute.
|
||||||
// The failing error is recorded in status.FailureReason.
|
// The failing error is recorded in status.FailureReason.
|
||||||
RestorePhaseFailed RestorePhase = "Failed"
|
RestorePhaseFailed RestorePhase = "Failed"
|
||||||
|
|||||||
@@ -259,6 +259,10 @@ func (c *restoreController) processRestore(restore *api.Restore) error {
|
|||||||
restore.Status.Phase = api.RestorePhaseFailed
|
restore.Status.Phase = api.RestorePhaseFailed
|
||||||
restore.Status.FailureReason = err.Error()
|
restore.Status.FailureReason = err.Error()
|
||||||
c.metrics.RegisterRestoreFailed(backupScheduleName)
|
c.metrics.RegisterRestoreFailed(backupScheduleName)
|
||||||
|
} else if restore.Status.Errors > 0 {
|
||||||
|
c.logger.Debug("Restore partially failed")
|
||||||
|
restore.Status.Phase = api.RestorePhasePartiallyFailed
|
||||||
|
c.metrics.RegisterRestorePartialFailure(backupScheduleName)
|
||||||
} else {
|
} else {
|
||||||
c.logger.Debug("Restore completed")
|
c.logger.Debug("Restore completed")
|
||||||
restore.Status.Phase = api.RestorePhaseCompleted
|
restore.Status.Phase = api.RestorePhaseCompleted
|
||||||
|
|||||||
@@ -300,6 +300,7 @@ func TestProcessQueueItem(t *testing.T) {
|
|||||||
restorerError: errors.New("blarg"),
|
restorerError: errors.New("blarg"),
|
||||||
expectedErr: false,
|
expectedErr: false,
|
||||||
expectedPhase: string(api.RestorePhaseInProgress),
|
expectedPhase: string(api.RestorePhaseInProgress),
|
||||||
|
expectedFinalPhase: string(api.RestorePhasePartiallyFailed),
|
||||||
expectedRestoreErrors: 1,
|
expectedRestoreErrors: 1,
|
||||||
expectedRestorerCall: NewRestore("foo", "bar", "backup-1", "ns-1", "", api.RestorePhaseInProgress).Restore,
|
expectedRestorerCall: NewRestore("foo", "bar", "backup-1", "ns-1", "", api.RestorePhaseInProgress).Restore,
|
||||||
},
|
},
|
||||||
@@ -595,7 +596,7 @@ func TestProcessQueueItem(t *testing.T) {
|
|||||||
if test.expectedFinalPhase != "" {
|
if test.expectedFinalPhase != "" {
|
||||||
expected = Patch{
|
expected = Patch{
|
||||||
Status: StatusPatch{
|
Status: StatusPatch{
|
||||||
Phase: api.RestorePhaseCompleted,
|
Phase: api.RestorePhase(test.expectedFinalPhase),
|
||||||
Errors: test.expectedRestoreErrors,
|
Errors: test.expectedRestoreErrors,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ const (
|
|||||||
restoreAttemptTotal = "restore_attempt_total"
|
restoreAttemptTotal = "restore_attempt_total"
|
||||||
restoreValidationFailedTotal = "restore_validation_failed_total"
|
restoreValidationFailedTotal = "restore_validation_failed_total"
|
||||||
restoreSuccessTotal = "restore_success_total"
|
restoreSuccessTotal = "restore_success_total"
|
||||||
|
restorePartialFailureTotal = "restore_partial_failure_total"
|
||||||
restoreFailedTotal = "restore_failed_total"
|
restoreFailedTotal = "restore_failed_total"
|
||||||
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
|
volumeSnapshotAttemptTotal = "volume_snapshot_attempt_total"
|
||||||
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
|
volumeSnapshotSuccessTotal = "volume_snapshot_success_total"
|
||||||
@@ -162,6 +163,14 @@ func NewServerMetrics() *ServerMetrics {
|
|||||||
},
|
},
|
||||||
[]string{scheduleLabel},
|
[]string{scheduleLabel},
|
||||||
),
|
),
|
||||||
|
restorePartialFailureTotal: prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Namespace: metricNamespace,
|
||||||
|
Name: restorePartialFailureTotal,
|
||||||
|
Help: "Total number of partially failed restores",
|
||||||
|
},
|
||||||
|
[]string{scheduleLabel},
|
||||||
|
),
|
||||||
restoreFailedTotal: prometheus.NewCounterVec(
|
restoreFailedTotal: prometheus.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Namespace: metricNamespace,
|
Namespace: metricNamespace,
|
||||||
@@ -236,6 +245,9 @@ func (m *ServerMetrics) InitSchedule(scheduleName string) {
|
|||||||
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
if c, ok := m.metrics[restoreAttemptTotal].(*prometheus.CounterVec); ok {
|
||||||
c.WithLabelValues(scheduleName).Set(0)
|
c.WithLabelValues(scheduleName).Set(0)
|
||||||
}
|
}
|
||||||
|
if c, ok := m.metrics[restorePartialFailureTotal].(*prometheus.CounterVec); ok {
|
||||||
|
c.WithLabelValues(scheduleName).Set(0)
|
||||||
|
}
|
||||||
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
||||||
c.WithLabelValues(scheduleName).Set(0)
|
c.WithLabelValues(scheduleName).Set(0)
|
||||||
}
|
}
|
||||||
@@ -346,6 +358,13 @@ func (m *ServerMetrics) RegisterRestoreSuccess(backupSchedule string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RegisterRestorePartialFailure records a restore that partially failed.
|
||||||
|
func (m *ServerMetrics) RegisterRestorePartialFailure(backupSchedule string) {
|
||||||
|
if c, ok := m.metrics[restorePartialFailureTotal].(*prometheus.CounterVec); ok {
|
||||||
|
c.WithLabelValues(backupSchedule).Inc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterRestoreFailed records a restore that failed.
|
// RegisterRestoreFailed records a restore that failed.
|
||||||
func (m *ServerMetrics) RegisterRestoreFailed(backupSchedule string) {
|
func (m *ServerMetrics) RegisterRestoreFailed(backupSchedule string) {
|
||||||
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
if c, ok := m.metrics[restoreFailedTotal].(*prometheus.CounterVec); ok {
|
||||||
|
|||||||
Reference in New Issue
Block a user