Files
velero/pkg/controller/backup_storage_location_controller.go
Shubham Pampattiwar 20af2c20c5 Address PR review comments: sanitize errors and add SAS token scrubbing
This commit addresses three review comments on PR #9321:

1. Keep sanitization in controller (response to @ywk253100)
   - Maintaining centralized error handling for easier extension
   - Azure-specific patterns detected and others passed through unchanged

2. Sanitize unavailableErrors array (@priyansh17)
   - Now using sanitizeStorageError() for both unavailableErrors array
     and location.Status.Message for consistency

3. Add SAS token scrubbing (@anshulahuja98)
   - Scrubs Azure SAS token parameters to prevent credential leakage
   - Redacts: sig, se, st, sp, spr, sv, sr, sip, srt, ss
   - Example: ?sig=secret becomes ?sig=***REDACTED***

Added comprehensive test coverage for SAS token scrubbing with 4 new
test cases covering various scenarios.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Signed-off-by: Shubham Pampattiwar <spampatt@redhat.com>
2025-12-02 11:37:50 -08:00

370 lines
14 KiB
Go

/*
Copyright the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"context"
"regexp"
"strings"
"time"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"github.com/vmware-tanzu/velero/internal/storage"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)
const (
// keep the enqueue period a smaller value to make sure the BSL can be validated as expected.
// The BSL validation frequency is 1 minute by default, if we set the enqueue period as 1 minute,
// this will cause the actual validation interval for each BSL to be 2 minutes
bslValidationEnqueuePeriod = 10 * time.Second
)
// sanitizeStorageError cleans up verbose HTTP responses from cloud provider errors,
// particularly Azure which includes full HTTP response details and XML in error messages.
// It extracts the error code and message while removing HTTP headers and response bodies.
// It also scrubs sensitive information like SAS tokens from URLs.
func sanitizeStorageError(err error) string {
if err == nil {
return ""
}
errMsg := err.Error()
// Scrub sensitive information from URLs (SAS tokens, credentials, etc.)
// Azure SAS token parameters: sig, se, st, sp, spr, sv, sr, sip, srt, ss
// These appear as query parameters in URLs like: ?sig=value&se=value
sasParamsRegex := regexp.MustCompile(`([?&])(sig|se|st|sp|spr|sv|sr|sip|srt|ss)=([^&\s<>\n]+)`)
errMsg = sasParamsRegex.ReplaceAllString(errMsg, `${1}${2}=***REDACTED***`)
// Check if this looks like an Azure HTTP response error
// Azure errors contain patterns like "RESPONSE 404:" and "ERROR CODE:"
if !strings.Contains(errMsg, "RESPONSE") || !strings.Contains(errMsg, "ERROR CODE:") {
// Not an Azure-style error, return as-is
return errMsg
}
// Extract the error code (e.g., "ContainerNotFound", "BlobNotFound")
errorCodeRegex := regexp.MustCompile(`ERROR CODE:\s*(\w+)`)
errorCodeMatch := errorCodeRegex.FindStringSubmatch(errMsg)
var errorCode string
if len(errorCodeMatch) > 1 {
errorCode = errorCodeMatch[1]
}
// Extract the error message from the XML or plain text
// Look for message between <Message> tags or after "RESPONSE XXX:"
var errorMessage string
// Try to extract from XML first
messageRegex := regexp.MustCompile(`<Message>(.*?)</Message>`)
messageMatch := messageRegex.FindStringSubmatch(errMsg)
if len(messageMatch) > 1 {
errorMessage = messageMatch[1]
// Remove RequestId and Time from the message
if idx := strings.Index(errorMessage, "\nRequestId:"); idx != -1 {
errorMessage = errorMessage[:idx]
}
} else {
// Try to extract from plain text response (e.g., "RESPONSE 404: 404 The specified container does not exist.")
responseRegex := regexp.MustCompile(`RESPONSE\s+\d+:\s+\d+\s+([^\n]+)`)
responseMatch := responseRegex.FindStringSubmatch(errMsg)
if len(responseMatch) > 1 {
errorMessage = strings.TrimSpace(responseMatch[1])
}
}
// Build a clean error message
var cleanMsg string
if errorCode != "" && errorMessage != "" {
cleanMsg = errorCode + ": " + errorMessage
} else if errorCode != "" {
cleanMsg = errorCode
} else if errorMessage != "" {
cleanMsg = errorMessage
} else {
// Fallback: try to extract the desc part from gRPC error
descRegex := regexp.MustCompile(`desc\s*=\s*(.+)`)
descMatch := descRegex.FindStringSubmatch(errMsg)
if len(descMatch) > 1 {
// Take everything up to the first newline or "RESPONSE" marker
desc := descMatch[1]
if idx := strings.Index(desc, "\n"); idx != -1 {
desc = desc[:idx]
}
if idx := strings.Index(desc, "RESPONSE"); idx != -1 {
desc = strings.TrimSpace(desc[:idx])
}
cleanMsg = desc
} else {
// Last resort: return first line
if idx := strings.Index(errMsg, "\n"); idx != -1 {
cleanMsg = errMsg[:idx]
} else {
cleanMsg = errMsg
}
}
}
// Preserve the prefix part of the error (e.g., "rpc error: code = Unknown desc = ")
// but replace the verbose description with our clean message
if strings.Contains(errMsg, "desc = ") {
parts := strings.SplitN(errMsg, "desc = ", 2)
if len(parts) == 2 {
return parts[0] + "desc = " + cleanMsg
}
}
return cleanMsg
}
// BackupStorageLocationReconciler reconciles a BackupStorageLocation object
type backupStorageLocationReconciler struct {
ctx context.Context
client client.Client
defaultBackupLocationInfo storage.DefaultBackupLocationInfo
// use variables to refer to these functions so they can be
// replaced with fakes for testing.
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
metrics *metrics.ServerMetrics
log logrus.FieldLogger
}
// NewBackupStorageLocationReconciler initialize and return a backupStorageLocationReconciler struct
func NewBackupStorageLocationReconciler(
ctx context.Context,
client client.Client,
defaultBackupLocationInfo storage.DefaultBackupLocationInfo,
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
backupStoreGetter persistence.ObjectBackupStoreGetter,
metrics *metrics.ServerMetrics,
log logrus.FieldLogger) *backupStorageLocationReconciler {
return &backupStorageLocationReconciler{
ctx: ctx,
client: client,
defaultBackupLocationInfo: defaultBackupLocationInfo,
newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter,
metrics: metrics,
log: log,
}
}
// +kubebuilder:rbac:groups=velero.io,resources=backupstoragelocations,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=velero.io,resources=backupstoragelocations/status,verbs=get;update;patch
func (r *backupStorageLocationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
var unavailableErrors []string
var location velerov1api.BackupStorageLocation
log := r.log.WithField("controller", constant.ControllerBackupStorageLocation).WithField(constant.ControllerBackupStorageLocation, req.NamespacedName.String())
log.Debug("Validating availability of BackupStorageLocation")
locationList, err := storage.ListBackupStorageLocations(r.ctx, r.client, req.Namespace)
if err != nil {
log.WithError(err).Error("No BackupStorageLocations found, at least one is required")
return ctrl.Result{}, nil
}
pluginManager := r.newPluginManager(log)
defer pluginManager.CleanupClients()
// find the BSL that matches the request
for _, bsl := range locationList.Items {
if bsl.Name == req.Name && bsl.Namespace == req.Namespace {
location = bsl
}
}
if location.Name == "" || location.Namespace == "" {
log.WithError(err).Error("BackupStorageLocation is not found")
return ctrl.Result{}, nil
}
// decide the default BSL
defaultFound, err := r.ensureSingleDefaultBSL(locationList)
if err != nil {
log.WithError(err).Error("failed to ensure single default bsl")
return ctrl.Result{}, nil
}
func() {
var err error
original := location.DeepCopy()
defer func() {
location.Status.LastValidationTime = &metav1.Time{Time: time.Now().UTC()}
if err != nil {
log.Info("BackupStorageLocation is invalid, marking as unavailable")
err = errors.Wrapf(err, "BackupStorageLocation %q is unavailable", location.Name)
unavailableErrors = append(unavailableErrors, sanitizeStorageError(err))
location.Status.Phase = velerov1api.BackupStorageLocationPhaseUnavailable
location.Status.Message = sanitizeStorageError(err)
} else {
log.Info("BackupStorageLocations is valid, marking as available")
location.Status.Phase = velerov1api.BackupStorageLocationPhaseAvailable
location.Status.Message = ""
}
if err := r.client.Patch(r.ctx, &location, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("Error updating BackupStorageLocation phase")
}
}()
backupStore, err := r.backupStoreGetter.Get(&location, pluginManager, log)
if err != nil {
log.WithError(err).Error("Error getting a backup store")
return
}
log.Info("Validating BackupStorageLocation")
err = backupStore.IsValid()
if err != nil {
log.WithError(err).Error("fail to validate backup store")
return
}
}()
r.logReconciledPhase(defaultFound, locationList, unavailableErrors)
return ctrl.Result{}, nil
}
func (r *backupStorageLocationReconciler) logReconciledPhase(defaultFound bool, locationList velerov1api.BackupStorageLocationList, errs []string) {
var availableBSLs []*velerov1api.BackupStorageLocation
var unAvailableBSLs []*velerov1api.BackupStorageLocation
var unknownBSLs []*velerov1api.BackupStorageLocation
log := r.log.WithField("controller", constant.ControllerBackupStorageLocation)
for i, location := range locationList.Items {
phase := location.Status.Phase
switch phase {
case velerov1api.BackupStorageLocationPhaseAvailable:
availableBSLs = append(availableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationAvailable(locationList.Items[i].Name)
case velerov1api.BackupStorageLocationPhaseUnavailable:
unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i])
r.metrics.RegisterBackupLocationUnavailable(locationList.Items[i].Name)
default:
unknownBSLs = append(unknownBSLs, &locationList.Items[i])
}
}
numAvailable := len(availableBSLs)
numUnavailable := len(unAvailableBSLs)
numUnknown := len(unknownBSLs)
if numUnavailable+numUnknown == len(locationList.Items) { // no available BSL
if len(errs) > 0 {
log.Errorf("Current BackupStorageLocations available/unavailable/unknown: %v/%v/%v, %s)", numAvailable, numUnavailable, numUnknown, strings.Join(errs, "; "))
} else {
log.Errorf("Current BackupStorageLocations available/unavailable/unknown: %v/%v/%v)", numAvailable, numUnavailable, numUnknown)
}
} else if numUnavailable > 0 { // some but not all BSL unavailable
log.Warnf("Unavailable BackupStorageLocations detected: available/unavailable/unknown: %v/%v/%v, %s)", numAvailable, numUnavailable, numUnknown, strings.Join(errs, "; "))
}
if !defaultFound {
log.Warn("There is no existing BackupStorageLocation set as default. Please see `velero backup-location -h` for options.")
}
}
func (r *backupStorageLocationReconciler) SetupWithManager(mgr ctrl.Manager) error {
gp := kube.NewGenericEventPredicate(func(object client.Object) bool {
location := object.(*velerov1api.BackupStorageLocation)
return storage.IsReadyToValidate(location.Spec.ValidationFrequency, location.Status.LastValidationTime, r.defaultBackupLocationInfo.ServerValidationFrequency, r.log.WithField("controller", constant.ControllerBackupStorageLocation))
})
g := kube.NewPeriodicalEnqueueSource(
r.log.WithField("controller", constant.ControllerBackupStorageLocation),
mgr.GetClient(),
&velerov1api.BackupStorageLocationList{},
bslValidationEnqueuePeriod,
kube.PeriodicalEnqueueSourceOption{
Predicates: []predicate.Predicate{gp},
},
)
return ctrl.NewControllerManagedBy(mgr).
// As the "status.LastValidationTime" field is always updated, this triggers new reconciling process, skip the update event that include no spec change to avoid the reconcile loop
For(&velerov1api.BackupStorageLocation{}, builder.WithPredicates(kube.SpecChangePredicate{})).
WatchesRawSource(g).
Named(constant.ControllerBackupStorageLocation).
Complete(r)
}
// ensureSingleDefaultBSL ensures that there is only one default BSL in the namespace.
// the default BSL priority is as follows:
// 1. follow the user's setting (the most recent validation BSL is the default BSL)
// 2. follow the server's setting ("velero server --default-backup-storage-location")
func (r *backupStorageLocationReconciler) ensureSingleDefaultBSL(locationList velerov1api.BackupStorageLocationList) (bool, error) {
// get all default BSLs
var defaultBSLs []*velerov1api.BackupStorageLocation
var defaultFound bool
for i, location := range locationList.Items {
if location.Spec.Default {
defaultBSLs = append(defaultBSLs, &locationList.Items[i])
}
}
if len(defaultBSLs) > 1 { // more than 1 default BSL
// find the most recent updated default BSL
var mostRecentCreatedBSL *velerov1api.BackupStorageLocation
defaultFound = true
for _, bsl := range defaultBSLs {
if mostRecentCreatedBSL == nil {
mostRecentCreatedBSL = bsl
continue
}
// For lack of a better way to compare timestamps, we use the CreationTimestamp
// it cloud not really find the most recent updated BSL, but it is good enough for now
bslTimestamp := bsl.CreationTimestamp
mostRecentTimestamp := mostRecentCreatedBSL.CreationTimestamp
if mostRecentTimestamp.Before(&bslTimestamp) {
mostRecentCreatedBSL = bsl
}
}
// unset all other default BSLs
for _, bsl := range defaultBSLs {
if bsl.Name != mostRecentCreatedBSL.Name {
bsl.Spec.Default = false
if err := r.client.Update(r.ctx, bsl); err != nil {
return defaultFound, errors.Wrapf(err, "failed to unset default backup storage location %q", bsl.Name)
}
r.log.Debugf("update default backup storage location %q to false", bsl.Name)
}
}
} else if len(defaultBSLs) == 0 { // no default BSL
defaultFound = false
} else { // only 1 default BSL
defaultFound = true
}
return defaultFound, nil
}