mirror of
https://github.com/vmware-tanzu/velero.git
synced 2025-12-23 06:15:21 +00:00
This commit addresses three review comments on PR #9321: 1. Keep sanitization in controller (response to @ywk253100) - Maintaining centralized error handling for easier extension - Azure-specific patterns detected and others passed through unchanged 2. Sanitize unavailableErrors array (@priyansh17) - Now using sanitizeStorageError() for both unavailableErrors array and location.Status.Message for consistency 3. Add SAS token scrubbing (@anshulahuja98) - Scrubs Azure SAS token parameters to prevent credential leakage - Redacts: sig, se, st, sp, spr, sv, sr, sip, srt, ss - Example: ?sig=secret becomes ?sig=***REDACTED*** Added comprehensive test coverage for SAS token scrubbing with 4 new test cases covering various scenarios. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
370 lines
14 KiB
Go
370 lines
14 KiB
Go
/*
|
|
Copyright the Velero contributors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controller
|
|
|
|
import (
|
|
"context"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/vmware-tanzu/velero/pkg/metrics"
|
|
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/builder"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/predicate"
|
|
|
|
"github.com/vmware-tanzu/velero/internal/storage"
|
|
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
|
|
"github.com/vmware-tanzu/velero/pkg/constant"
|
|
"github.com/vmware-tanzu/velero/pkg/persistence"
|
|
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
|
|
"github.com/vmware-tanzu/velero/pkg/util/kube"
|
|
)
|
|
|
|
const (
|
|
// keep the enqueue period a smaller value to make sure the BSL can be validated as expected.
|
|
// The BSL validation frequency is 1 minute by default, if we set the enqueue period as 1 minute,
|
|
// this will cause the actual validation interval for each BSL to be 2 minutes
|
|
bslValidationEnqueuePeriod = 10 * time.Second
|
|
)
|
|
|
|
// sanitizeStorageError cleans up verbose HTTP responses from cloud provider errors,
|
|
// particularly Azure which includes full HTTP response details and XML in error messages.
|
|
// It extracts the error code and message while removing HTTP headers and response bodies.
|
|
// It also scrubs sensitive information like SAS tokens from URLs.
|
|
func sanitizeStorageError(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
|
|
errMsg := err.Error()
|
|
|
|
// Scrub sensitive information from URLs (SAS tokens, credentials, etc.)
|
|
// Azure SAS token parameters: sig, se, st, sp, spr, sv, sr, sip, srt, ss
|
|
// These appear as query parameters in URLs like: ?sig=value&se=value
|
|
sasParamsRegex := regexp.MustCompile(`([?&])(sig|se|st|sp|spr|sv|sr|sip|srt|ss)=([^&\s<>\n]+)`)
|
|
errMsg = sasParamsRegex.ReplaceAllString(errMsg, `${1}${2}=***REDACTED***`)
|
|
|
|
// Check if this looks like an Azure HTTP response error
|
|
// Azure errors contain patterns like "RESPONSE 404:" and "ERROR CODE:"
|
|
if !strings.Contains(errMsg, "RESPONSE") || !strings.Contains(errMsg, "ERROR CODE:") {
|
|
// Not an Azure-style error, return as-is
|
|
return errMsg
|
|
}
|
|
|
|
// Extract the error code (e.g., "ContainerNotFound", "BlobNotFound")
|
|
errorCodeRegex := regexp.MustCompile(`ERROR CODE:\s*(\w+)`)
|
|
errorCodeMatch := errorCodeRegex.FindStringSubmatch(errMsg)
|
|
var errorCode string
|
|
if len(errorCodeMatch) > 1 {
|
|
errorCode = errorCodeMatch[1]
|
|
}
|
|
|
|
// Extract the error message from the XML or plain text
|
|
// Look for message between <Message> tags or after "RESPONSE XXX:"
|
|
var errorMessage string
|
|
|
|
// Try to extract from XML first
|
|
messageRegex := regexp.MustCompile(`<Message>(.*?)</Message>`)
|
|
messageMatch := messageRegex.FindStringSubmatch(errMsg)
|
|
if len(messageMatch) > 1 {
|
|
errorMessage = messageMatch[1]
|
|
// Remove RequestId and Time from the message
|
|
if idx := strings.Index(errorMessage, "\nRequestId:"); idx != -1 {
|
|
errorMessage = errorMessage[:idx]
|
|
}
|
|
} else {
|
|
// Try to extract from plain text response (e.g., "RESPONSE 404: 404 The specified container does not exist.")
|
|
responseRegex := regexp.MustCompile(`RESPONSE\s+\d+:\s+\d+\s+([^\n]+)`)
|
|
responseMatch := responseRegex.FindStringSubmatch(errMsg)
|
|
if len(responseMatch) > 1 {
|
|
errorMessage = strings.TrimSpace(responseMatch[1])
|
|
}
|
|
}
|
|
|
|
// Build a clean error message
|
|
var cleanMsg string
|
|
if errorCode != "" && errorMessage != "" {
|
|
cleanMsg = errorCode + ": " + errorMessage
|
|
} else if errorCode != "" {
|
|
cleanMsg = errorCode
|
|
} else if errorMessage != "" {
|
|
cleanMsg = errorMessage
|
|
} else {
|
|
// Fallback: try to extract the desc part from gRPC error
|
|
descRegex := regexp.MustCompile(`desc\s*=\s*(.+)`)
|
|
descMatch := descRegex.FindStringSubmatch(errMsg)
|
|
if len(descMatch) > 1 {
|
|
// Take everything up to the first newline or "RESPONSE" marker
|
|
desc := descMatch[1]
|
|
if idx := strings.Index(desc, "\n"); idx != -1 {
|
|
desc = desc[:idx]
|
|
}
|
|
if idx := strings.Index(desc, "RESPONSE"); idx != -1 {
|
|
desc = strings.TrimSpace(desc[:idx])
|
|
}
|
|
cleanMsg = desc
|
|
} else {
|
|
// Last resort: return first line
|
|
if idx := strings.Index(errMsg, "\n"); idx != -1 {
|
|
cleanMsg = errMsg[:idx]
|
|
} else {
|
|
cleanMsg = errMsg
|
|
}
|
|
}
|
|
}
|
|
|
|
// Preserve the prefix part of the error (e.g., "rpc error: code = Unknown desc = ")
|
|
// but replace the verbose description with our clean message
|
|
if strings.Contains(errMsg, "desc = ") {
|
|
parts := strings.SplitN(errMsg, "desc = ", 2)
|
|
if len(parts) == 2 {
|
|
return parts[0] + "desc = " + cleanMsg
|
|
}
|
|
}
|
|
|
|
return cleanMsg
|
|
}
|
|
|
|
// BackupStorageLocationReconciler reconciles a BackupStorageLocation object
|
|
type backupStorageLocationReconciler struct {
|
|
ctx context.Context
|
|
client client.Client
|
|
defaultBackupLocationInfo storage.DefaultBackupLocationInfo
|
|
// use variables to refer to these functions so they can be
|
|
// replaced with fakes for testing.
|
|
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager
|
|
backupStoreGetter persistence.ObjectBackupStoreGetter
|
|
metrics *metrics.ServerMetrics
|
|
log logrus.FieldLogger
|
|
}
|
|
|
|
// NewBackupStorageLocationReconciler initialize and return a backupStorageLocationReconciler struct
|
|
func NewBackupStorageLocationReconciler(
|
|
ctx context.Context,
|
|
client client.Client,
|
|
defaultBackupLocationInfo storage.DefaultBackupLocationInfo,
|
|
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
|
|
backupStoreGetter persistence.ObjectBackupStoreGetter,
|
|
metrics *metrics.ServerMetrics,
|
|
log logrus.FieldLogger) *backupStorageLocationReconciler {
|
|
return &backupStorageLocationReconciler{
|
|
ctx: ctx,
|
|
client: client,
|
|
defaultBackupLocationInfo: defaultBackupLocationInfo,
|
|
newPluginManager: newPluginManager,
|
|
backupStoreGetter: backupStoreGetter,
|
|
metrics: metrics,
|
|
log: log,
|
|
}
|
|
}
|
|
|
|
// +kubebuilder:rbac:groups=velero.io,resources=backupstoragelocations,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=velero.io,resources=backupstoragelocations/status,verbs=get;update;patch
|
|
|
|
func (r *backupStorageLocationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
|
var unavailableErrors []string
|
|
var location velerov1api.BackupStorageLocation
|
|
|
|
log := r.log.WithField("controller", constant.ControllerBackupStorageLocation).WithField(constant.ControllerBackupStorageLocation, req.NamespacedName.String())
|
|
log.Debug("Validating availability of BackupStorageLocation")
|
|
|
|
locationList, err := storage.ListBackupStorageLocations(r.ctx, r.client, req.Namespace)
|
|
if err != nil {
|
|
log.WithError(err).Error("No BackupStorageLocations found, at least one is required")
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
pluginManager := r.newPluginManager(log)
|
|
defer pluginManager.CleanupClients()
|
|
|
|
// find the BSL that matches the request
|
|
for _, bsl := range locationList.Items {
|
|
if bsl.Name == req.Name && bsl.Namespace == req.Namespace {
|
|
location = bsl
|
|
}
|
|
}
|
|
|
|
if location.Name == "" || location.Namespace == "" {
|
|
log.WithError(err).Error("BackupStorageLocation is not found")
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
// decide the default BSL
|
|
defaultFound, err := r.ensureSingleDefaultBSL(locationList)
|
|
if err != nil {
|
|
log.WithError(err).Error("failed to ensure single default bsl")
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func() {
|
|
var err error
|
|
original := location.DeepCopy()
|
|
defer func() {
|
|
location.Status.LastValidationTime = &metav1.Time{Time: time.Now().UTC()}
|
|
if err != nil {
|
|
log.Info("BackupStorageLocation is invalid, marking as unavailable")
|
|
err = errors.Wrapf(err, "BackupStorageLocation %q is unavailable", location.Name)
|
|
unavailableErrors = append(unavailableErrors, sanitizeStorageError(err))
|
|
location.Status.Phase = velerov1api.BackupStorageLocationPhaseUnavailable
|
|
location.Status.Message = sanitizeStorageError(err)
|
|
} else {
|
|
log.Info("BackupStorageLocations is valid, marking as available")
|
|
location.Status.Phase = velerov1api.BackupStorageLocationPhaseAvailable
|
|
location.Status.Message = ""
|
|
}
|
|
if err := r.client.Patch(r.ctx, &location, client.MergeFrom(original)); err != nil {
|
|
log.WithError(err).Error("Error updating BackupStorageLocation phase")
|
|
}
|
|
}()
|
|
|
|
backupStore, err := r.backupStoreGetter.Get(&location, pluginManager, log)
|
|
if err != nil {
|
|
log.WithError(err).Error("Error getting a backup store")
|
|
return
|
|
}
|
|
|
|
log.Info("Validating BackupStorageLocation")
|
|
err = backupStore.IsValid()
|
|
if err != nil {
|
|
log.WithError(err).Error("fail to validate backup store")
|
|
return
|
|
}
|
|
}()
|
|
|
|
r.logReconciledPhase(defaultFound, locationList, unavailableErrors)
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *backupStorageLocationReconciler) logReconciledPhase(defaultFound bool, locationList velerov1api.BackupStorageLocationList, errs []string) {
|
|
var availableBSLs []*velerov1api.BackupStorageLocation
|
|
var unAvailableBSLs []*velerov1api.BackupStorageLocation
|
|
var unknownBSLs []*velerov1api.BackupStorageLocation
|
|
log := r.log.WithField("controller", constant.ControllerBackupStorageLocation)
|
|
|
|
for i, location := range locationList.Items {
|
|
phase := location.Status.Phase
|
|
switch phase {
|
|
case velerov1api.BackupStorageLocationPhaseAvailable:
|
|
availableBSLs = append(availableBSLs, &locationList.Items[i])
|
|
r.metrics.RegisterBackupLocationAvailable(locationList.Items[i].Name)
|
|
case velerov1api.BackupStorageLocationPhaseUnavailable:
|
|
unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i])
|
|
r.metrics.RegisterBackupLocationUnavailable(locationList.Items[i].Name)
|
|
default:
|
|
unknownBSLs = append(unknownBSLs, &locationList.Items[i])
|
|
}
|
|
}
|
|
|
|
numAvailable := len(availableBSLs)
|
|
numUnavailable := len(unAvailableBSLs)
|
|
numUnknown := len(unknownBSLs)
|
|
|
|
if numUnavailable+numUnknown == len(locationList.Items) { // no available BSL
|
|
if len(errs) > 0 {
|
|
log.Errorf("Current BackupStorageLocations available/unavailable/unknown: %v/%v/%v, %s)", numAvailable, numUnavailable, numUnknown, strings.Join(errs, "; "))
|
|
} else {
|
|
log.Errorf("Current BackupStorageLocations available/unavailable/unknown: %v/%v/%v)", numAvailable, numUnavailable, numUnknown)
|
|
}
|
|
} else if numUnavailable > 0 { // some but not all BSL unavailable
|
|
log.Warnf("Unavailable BackupStorageLocations detected: available/unavailable/unknown: %v/%v/%v, %s)", numAvailable, numUnavailable, numUnknown, strings.Join(errs, "; "))
|
|
}
|
|
|
|
if !defaultFound {
|
|
log.Warn("There is no existing BackupStorageLocation set as default. Please see `velero backup-location -h` for options.")
|
|
}
|
|
}
|
|
|
|
func (r *backupStorageLocationReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
|
|
location := object.(*velerov1api.BackupStorageLocation)
|
|
return storage.IsReadyToValidate(location.Spec.ValidationFrequency, location.Status.LastValidationTime, r.defaultBackupLocationInfo.ServerValidationFrequency, r.log.WithField("controller", constant.ControllerBackupStorageLocation))
|
|
})
|
|
g := kube.NewPeriodicalEnqueueSource(
|
|
r.log.WithField("controller", constant.ControllerBackupStorageLocation),
|
|
mgr.GetClient(),
|
|
&velerov1api.BackupStorageLocationList{},
|
|
bslValidationEnqueuePeriod,
|
|
kube.PeriodicalEnqueueSourceOption{
|
|
Predicates: []predicate.Predicate{gp},
|
|
},
|
|
)
|
|
|
|
return ctrl.NewControllerManagedBy(mgr).
|
|
// As the "status.LastValidationTime" field is always updated, this triggers new reconciling process, skip the update event that include no spec change to avoid the reconcile loop
|
|
For(&velerov1api.BackupStorageLocation{}, builder.WithPredicates(kube.SpecChangePredicate{})).
|
|
WatchesRawSource(g).
|
|
Named(constant.ControllerBackupStorageLocation).
|
|
Complete(r)
|
|
}
|
|
|
|
// ensureSingleDefaultBSL ensures that there is only one default BSL in the namespace.
|
|
// the default BSL priority is as follows:
|
|
// 1. follow the user's setting (the most recent validation BSL is the default BSL)
|
|
// 2. follow the server's setting ("velero server --default-backup-storage-location")
|
|
func (r *backupStorageLocationReconciler) ensureSingleDefaultBSL(locationList velerov1api.BackupStorageLocationList) (bool, error) {
|
|
// get all default BSLs
|
|
var defaultBSLs []*velerov1api.BackupStorageLocation
|
|
var defaultFound bool
|
|
for i, location := range locationList.Items {
|
|
if location.Spec.Default {
|
|
defaultBSLs = append(defaultBSLs, &locationList.Items[i])
|
|
}
|
|
}
|
|
|
|
if len(defaultBSLs) > 1 { // more than 1 default BSL
|
|
// find the most recent updated default BSL
|
|
var mostRecentCreatedBSL *velerov1api.BackupStorageLocation
|
|
defaultFound = true
|
|
for _, bsl := range defaultBSLs {
|
|
if mostRecentCreatedBSL == nil {
|
|
mostRecentCreatedBSL = bsl
|
|
continue
|
|
}
|
|
// For lack of a better way to compare timestamps, we use the CreationTimestamp
|
|
// it cloud not really find the most recent updated BSL, but it is good enough for now
|
|
bslTimestamp := bsl.CreationTimestamp
|
|
mostRecentTimestamp := mostRecentCreatedBSL.CreationTimestamp
|
|
if mostRecentTimestamp.Before(&bslTimestamp) {
|
|
mostRecentCreatedBSL = bsl
|
|
}
|
|
}
|
|
|
|
// unset all other default BSLs
|
|
for _, bsl := range defaultBSLs {
|
|
if bsl.Name != mostRecentCreatedBSL.Name {
|
|
bsl.Spec.Default = false
|
|
if err := r.client.Update(r.ctx, bsl); err != nil {
|
|
return defaultFound, errors.Wrapf(err, "failed to unset default backup storage location %q", bsl.Name)
|
|
}
|
|
r.log.Debugf("update default backup storage location %q to false", bsl.Name)
|
|
}
|
|
}
|
|
} else if len(defaultBSLs) == 0 { // no default BSL
|
|
defaultFound = false
|
|
} else { // only 1 default BSL
|
|
defaultFound = true
|
|
}
|
|
return defaultFound, nil
|
|
}
|