Compare commits

...

10 Commits

Author SHA1 Message Date
Xun Jiang/Bruce Jiang
540dfebf07 Merge branch 'main' into copilot/fix-9122
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m10s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Signed-off-by: Xun Jiang/Bruce Jiang <59276555+blackpiglet@users.noreply.github.com>
2025-12-02 16:42:35 +08:00
Xun Jiang
7b7b6bc2db Add the node-agent ConfigMap document.
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-12-02 16:41:23 +08:00
Xun Jiang/Bruce Jiang
d96434c8c9 Merge pull request #9424 from vmware-tanzu/bump_golang_to_1.25
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m22s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
build-image / Build (push) Failing after 14s
Main CI / get-go-version (push) Successful in 8s
Main CI / Build (push) Failing after 27s
Close stale issues and PRs / stale (push) Successful in 15s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m29s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m6s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m10s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m15s
Bump Golang version from 1.24-bookworm to 1.25-bookworm
2025-11-27 17:53:16 +08:00
Xun Jiang
64e3643006 Fix linter error reported.
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m17s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-26 14:16:42 +08:00
Xun Jiang
758f6a4847 Bump Golang version from 1.24-bookworm to 1.25-bookworm
Bump golangci-lint to v1.25.0, because golangci-lint start to support
Golang v1.25 since v1.24.0, and v1.26.x was not stable yet.
Align action pr-linter-check's golangci-lint version to v1.25.0

Signed-off-by: Xun Jiang <xun.jiang@broadcom.com>
2025-11-26 14:16:42 +08:00
Xun Jiang/Bruce Jiang
f6b3852d2f Merge pull request #9427 from vmware-tanzu/dependabot/github_actions/actions/checkout-6
Some checks failed
Run the E2E test on kind / get-go-version (push) Failing after 1m4s
Run the E2E test on kind / build (push) Has been skipped
Run the E2E test on kind / setup-test-matrix (push) Successful in 3s
Run the E2E test on kind / run-e2e-test (push) Has been skipped
Main CI / get-go-version (push) Successful in 10s
Main CI / Build (push) Failing after 25s
Close stale issues and PRs / stale (push) Successful in 13s
Trivy Nightly Scan / Trivy nightly scan (velero, main) (push) Failing after 1m33s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-aws, main) (push) Failing after 1m11s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-gcp, main) (push) Failing after 1m12s
Trivy Nightly Scan / Trivy nightly scan (velero-plugin-for-microsoft-azure, main) (push) Failing after 1m20s
Bump actions/checkout from 5 to 6
2025-11-26 14:15:06 +08:00
dependabot[bot]
981b29b4cb Bump actions/checkout from 5 to 6
Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 19:25:51 +00:00
copilot-swe-agent[bot]
49b2851f08 Add cross-references between node-agent config documents
Co-authored-by: kaovilai <11228024+kaovilai@users.noreply.github.com>
2025-08-19 17:49:20 +00:00
copilot-swe-agent[bot]
cbadd9047f Add comprehensive node-agent-config documentation
Co-authored-by: kaovilai <11228024+kaovilai@users.noreply.github.com>
2025-08-19 17:43:31 +00:00
copilot-swe-agent[bot]
764975ba29 Initial plan 2025-08-19 17:35:12 +00:00
48 changed files with 1060 additions and 49 deletions

View File

@@ -21,7 +21,7 @@ jobs:
minio-dockerfile-sha: ${{ steps.minio-version.outputs.dockerfile_sha }}
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
@@ -112,7 +112,7 @@ jobs:
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6

View File

@@ -17,7 +17,7 @@ jobs:
version: ${{ steps.pick-version.outputs.version }}
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- id: pick-version
run: |

View File

@@ -19,7 +19,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Changelog check
if: ${{ !(contains(github.event.pull_request.labels.*.name, 'kind/changelog-not-required') || contains(github.event.pull_request.labels.*.name, 'Design') || contains(github.event.pull_request.labels.*.name, 'Website') || contains(github.event.pull_request.labels.*.name, 'Documentation'))}}

View File

@@ -14,7 +14,7 @@ jobs:
fail-fast: false
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6

View File

@@ -8,7 +8,7 @@ jobs:
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Codespell
uses: codespell-project/actions-codespell@master

View File

@@ -13,7 +13,7 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
name: Checkout
- name: Set up QEMU

View File

@@ -14,7 +14,7 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
name: Checkout
- name: Verify .goreleaser.yml and try a dryrun release.

View File

@@ -18,7 +18,7 @@ jobs:
needs: get-go-version
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6
@@ -28,5 +28,5 @@ jobs:
- name: Linter check
uses: golangci/golangci-lint-action@v9
with:
version: v2.1.1
version: v2.5.0
args: --verbose

View File

@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
# The default value is "1" which fetches only a single commit. If we merge PR without squash or rebase,
# there are at least two commits: the first one is the merge commit and the second one is the real commit

View File

@@ -20,7 +20,7 @@ jobs:
needs: get-go-version
steps:
- name: Check out the code
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: Set up Go version
uses: actions/setup-go@v6

View File

@@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout the latest code
uses: actions/checkout@v5
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Automatic Rebase

View File

@@ -13,7 +13,7 @@
# limitations under the License.
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.24-bookworm AS velero-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS velero-builder
ARG GOPROXY
ARG BIN
@@ -49,7 +49,7 @@ RUN mkdir -p /output/usr/bin && \
go clean -modcache -cache
# Restic binary build section
FROM --platform=$BUILDPLATFORM golang:1.24-bookworm AS restic-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS restic-builder
ARG GOPROXY
ARG BIN

View File

@@ -15,7 +15,7 @@
ARG OS_VERSION=1809
# Velero binary build section
FROM --platform=$BUILDPLATFORM golang:1.24-bookworm AS velero-builder
FROM --platform=$BUILDPLATFORM golang:1.25-bookworm AS velero-builder
ARG GOPROXY
ARG BIN

View File

@@ -52,7 +52,7 @@ git_sha = str(local("git rev-parse HEAD", quiet = True, echo_off = True)).strip(
tilt_helper_dockerfile_header = """
# Tilt image
FROM golang:1.24 as tilt-helper
FROM golang:1.25 as tilt-helper
# Support live reloading with Tilt
RUN wget --output-document /restart.sh --quiet https://raw.githubusercontent.com/windmilleng/rerun-process-wrapper/master/restart.sh && \

2
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/vmware-tanzu/velero
go 1.24.0
go 1.25.0
require (
cloud.google.com/go/storage v1.55.0

View File

@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM --platform=$TARGETPLATFORM golang:1.24-bookworm
FROM --platform=$TARGETPLATFORM golang:1.25-bookworm
ARG GOPROXY
@@ -94,7 +94,7 @@ RUN ARCH=$(go env GOARCH) && \
chmod +x /usr/bin/goreleaser
# get golangci-lint
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.1.1
RUN curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.5.0
# install kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/$(go env GOARCH)/kubectl

View File

@@ -75,7 +75,7 @@ func TestDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
if err != nil {

View File

@@ -63,7 +63,7 @@ func TestNewDescribeCommand(t *testing.T) {
if os.Getenv(cmdtest.CaptureFlag) == "1" {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -91,7 +91,7 @@ func TestNewDownloadCommand(t *testing.T) {
assert.NoError(t, e)
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDownloadCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDownloadCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -63,7 +63,7 @@ func TestNewGetCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
require.NoError(t, err)
@@ -84,7 +84,7 @@ func TestNewGetCommand(t *testing.T) {
e = d.Execute()
require.NoError(t, e)
cmd = exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd = exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err = veleroexec.RunCommand(cmd)
require.NoError(t, err)

View File

@@ -66,7 +66,7 @@ func TestNewDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -50,7 +50,7 @@ func TestNewGetCommand(t *testing.T) {
c.Execute()
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -99,7 +99,7 @@ func TestSetCommand_Execute(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestSetCommand_Execute"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestSetCommand_Execute"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
_, stderr, err := veleroexec.RunCommand(cmd)

View File

@@ -18,6 +18,7 @@ package bug
import (
"bytes"
"context"
"errors"
"fmt"
"net/url"
@@ -147,7 +148,7 @@ func getKubectlVersion() (string, error) {
return "", errors.New("kubectl not found on PATH")
}
kubectlCmd := exec.Command("kubectl", "version")
kubectlCmd := exec.CommandContext(context.Background(), "kubectl", "version")
var outbuf bytes.Buffer
kubectlCmd.Stdout = &outbuf
if err := kubectlCmd.Start(); err != nil {
@@ -207,16 +208,17 @@ func renderToString(bugInfo *VeleroBugInfo) (string, error) {
// a platform specific binary.
func showIssueInBrowser(body string) error {
url := issueURL + "?body=" + url.QueryEscape(body)
ctx := context.Background()
switch runtime.GOOS {
case "darwin":
return exec.Command("open", url).Start()
return exec.CommandContext(ctx, "open", url).Start()
case "linux":
if cmdExistsOnPath("xdg-open") {
return exec.Command("xdg-open", url).Start()
return exec.CommandContext(ctx, "xdg-open", url).Start()
}
return fmt.Errorf("velero can't open a browser window using the command '%s'", "xdg-open")
case "windows":
return exec.Command("rundll32", "url.dll,FileProtocolHandler", url).Start()
return exec.CommandContext(ctx, "rundll32", "url.dll,FileProtocolHandler", url).Start()
default:
return fmt.Errorf("velero can't open a browser window on platform %s", runtime.GOOS)
}

View File

@@ -75,7 +75,7 @@ func TestDeleteCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestDeleteCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
if err != nil {

View File

@@ -63,7 +63,7 @@ func TestNewDescribeCommand(t *testing.T) {
if os.Getenv(cmdtest.CaptureFlag) == "1" {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewDescribeCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)

View File

@@ -62,7 +62,7 @@ func TestNewGetCommand(t *testing.T) {
return
}
cmd := exec.Command(os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd := exec.CommandContext(t.Context(), os.Args[0], []string{"-test.run=TestNewGetCommand"}...)
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", cmdtest.CaptureFlag))
stdout, _, err := veleroexec.RunCommand(cmd)
require.NoError(t, err)

View File

@@ -22,7 +22,8 @@ func TestPkgImportNoCloudProvider(t *testing.T) {
t.Logf("Current test file path: %s", filename)
t.Logf("Current test directory: %s", filepath.Dir(filename)) // should be this package name
// go list -f {{.Deps}} ./<path-to-this-package-dir>
cmd := exec.Command(
cmd := exec.CommandContext(
t.Context(),
"go",
"list",
"-f",

View File

@@ -18,6 +18,7 @@ limitations under the License.
package process
import (
"context"
"os"
"os/exec"
@@ -78,7 +79,7 @@ func (b *clientBuilder) clientConfig() *hcplugin.ClientConfig {
string(common.PluginKindItemBlockAction): ibav1.NewItemBlockActionPlugin(common.ClientLogger(b.clientLogger)),
},
Logger: b.pluginLogger,
Cmd: exec.Command(b.commandName, b.commandArgs...), //nolint:gosec // Internal call. No need to check the command line.
Cmd: exec.CommandContext(context.Background(), b.commandName, b.commandArgs...), //nolint:gosec // Internal call. No need to check the command line.
}
}

View File

@@ -65,9 +65,11 @@ func TestClientConfig(t *testing.T) {
string(common.PluginKindItemBlockAction): ibav1.NewItemBlockActionPlugin(common.ClientLogger(logger)),
},
Logger: cb.pluginLogger,
Cmd: exec.Command(cb.commandName, cb.commandArgs...),
Cmd: exec.CommandContext(t.Context(), cb.commandName, cb.commandArgs...),
}
cc := cb.clientConfig()
assert.Equal(t, expected, cc)
assert.Equal(t, expected.HandshakeConfig, cc.HandshakeConfig)
assert.Equal(t, expected.AllowedProtocols, cc.AllowedProtocols)
assert.Equal(t, expected.Plugins, cc.Plugins)
}

View File

@@ -22,7 +22,8 @@ func TestPkgImportNoCloudProvider(t *testing.T) {
t.Logf("Current test file path: %s", filename)
t.Logf("Current test directory: %s", filepath.Dir(filename)) // should be this package name
// go list -f {{.Deps}} ./<path-to-this-package-dir>
cmd := exec.Command(
cmd := exec.CommandContext(
t.Context(),
"go",
"list",
"-f",

View File

@@ -77,7 +77,7 @@ func (c *Command) String() string {
// Cmd returns an exec.Cmd for the command.
func (c *Command) Cmd() *exec.Cmd {
parts := c.StringSlice()
cmd := exec.Command(parts[0], parts[1:]...) //nolint:gosec // Internal call. No need to check the parameter.
cmd := exec.Command(parts[0], parts[1:]...) //nolint:gosec,noctx // Internal call. No need to check the parameter. No to add context for deprecated Restic.
cmd.Dir = c.Dir
if len(c.Env) > 0 {

View File

@@ -3,6 +3,8 @@ title: "BackupPVC Configuration for Data Movement Backup"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
`BackupPVC` is an intermediate PVC to access data from during the data movement backup operation.
In some scenarios users may need to configure some advanced options of the backupPVC so that the data movement backup
@@ -75,3 +77,13 @@ timeout (data movement prepare timeout value is 30m by default).
if the volume is not readOnly.
- If any of the above problems occur, then the DataUpload CR is `canceled` after timeout, and the backupPod and backupPVC will be deleted, and the backup
will be marked as `PartiallyFailed`.
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage

View File

@@ -42,5 +42,14 @@ Take Kopia repository and the above cache PVC configuration for example:
To enable both the node-agent configMap and backup repository configMap, specify the flags in velero installation by CLI:
`velero install --node-agent-configmap=<ConfigMap-Name> --backup-repository-configmap=<ConfigMap-Name>`
## Related Documentation
[1]: backup-repository-configuration.md
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage
[1]: backup-repository-configuration.md

View File

@@ -3,6 +3,8 @@ title: "Node Selection for Data Movement"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
Velero node-agent is a DaemonSet hosting the data movement modules to complete the concrete work of backups/restores.
Varying from the data size, data complexity, resource availability, the data movement may take a long time and remarkable resources (CPU, memory, network bandwidth, etc.) during the backup and restore.
@@ -258,3 +260,13 @@ volumeBindingMode: Immediate
Because the StorageClass volumeBindingMode is `Immediate`, although `ignoreDelayBinding` is set to `false`, restorePVC will not be created according to the target Pod.
The restorePod will be assigned to nodes, which instance type is `Standard_B4ms`.
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage

View File

@@ -3,6 +3,8 @@ title: "Data Movement Pod Resource Configuration"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
During [CSI Snapshot Data Movement][1], Velero built-in data mover launches data mover pods to run the data transfer.
During [fs-backup][2], Velero also launches data mover pods to run the data transfer.
The data transfer is a time and resource consuming activity.
@@ -123,6 +125,16 @@ kubectl create cm node-agent-config -n velero --from-file=node-agent-config.json
**Note**: If the specified priority class doesn't exist in the cluster when data mover pods are created, the pods will fail to schedule. Velero validates the priority class at startup and logs a warning if it doesn't exist, but the pods will still attempt to use it.
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage
[1]: csi-snapshot-data-movement.md
[2]: file-system-backup.md
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/

View File

@@ -3,6 +3,8 @@ title: "RestorePVC Configuration for Data Movement Restore"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
`RestorePVC` is an intermediate PVC to write data during the data movement restore operation.
In some scenarios users may need to configure some advanced options of the `restorePVC` so that the data movement restore operation could perform better. Specifically:
@@ -28,3 +30,13 @@ A sample of `restorePVC` config as part of the ConfigMap would look like:
**Note:**
- If `ignoreDelayBinding` is set, the restored volume is provisioned in the storage areas associated to an arbitrary node, if the restored pod cannot be scheduled to that node, e.g., because of topology constraints, the data mover restore still completes, but the workload is not usable since the restored pod cannot mount the restored volume
- At present, node selection is not supported for data mover restore, so the restored volume may be attached to any node in the cluster; once node selection is supported and enabled, the restored volume will be attached to one of the selected nodes only. In this way, node selection and `ignoreDelayBinding` can work together even though the environment is with topology constraints
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage

View File

@@ -3,6 +3,8 @@ title: "Node-agent Concurrency"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
Velero node-agent is a daemonset hosting modules to complete the concrete tasks of backups/restores, i.e., file system backup/restore, CSI snapshot data movement.
Varying from the data size, data complexity, resource availability, the tasks may take a long time and remarkable resources (CPU, memory, network bandwidth, etc.). These tasks make the loads of node-agent.
@@ -23,7 +25,7 @@ You can specify different concurrent number per node, for example, you can set 3
The range of Per-node concurrent number is the same with Global concurrent number. Per-node concurrent number is preferable to Global concurrent number, so it will overwrite the Global concurrent number for that node.
Per-node concurrent number is implemented through ```perNodeConfig``` field in ```loadConcurrency```.
```perNodeConfig``` is a list of ```RuledConfigs``` each item of which matches one or more nodes by label selectors and specify the concurrent number for the matched nodes.
`perNodeConfig` is a list of `RuledConfigs` each item of which matches one or more nodes by label selectors and specify the concurrent number for the matched nodes.
Here is an example of the ```perNodeConfig``:
```
"nodeSelector: kubernetes.io/hostname=node1; number: 3"
@@ -79,3 +81,13 @@ spec:
- args:
- --node-agent-configmap=<ConfigMap name>
```
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage

View File

@@ -0,0 +1,409 @@
---
title: "Node-agent Configuration"
layout: docs
---
The Velero node-agent is a DaemonSet that hosts modules for completing backup and restore operations, including file system backup/restore and CSI snapshot data movement. This document provides comprehensive configuration options for the node-agent through a ConfigMap.
## Overview
Node-agent configuration is provided through a ConfigMap that contains JSON configuration for various aspects of data movement operations. The ConfigMap should be created in the same namespace where Velero is installed, and its name is specified using the `--node-agent-configmap` parameter.
### Creating and Managing the ConfigMap
The ConfigMap name can be specified during Velero installation:
```bash
velero install --node-agent-configmap=<ConfigMap-Name>
```
To create the ConfigMap:
1. Save your configuration to a JSON file
2. Create the ConfigMap:
```bash
kubectl create cm <ConfigMap-Name> -n velero --from-file=<json-file-name>
```
To apply the ConfigMap to the node-agent DaemonSet:
```bash
kubectl edit ds node-agent -n velero
```
Add the ConfigMap reference to the container arguments:
```yaml
spec:
template:
spec:
containers:
- args:
- --node-agent-configmap=<ConfigMap-Name>
```
**Important**: The node-agent server checks configurations at startup time. After editing the ConfigMap, restart the node-agent DaemonSet for changes to take effect.
## Configuration Sections
### Load Concurrency (`loadConcurrency`)
Controls the concurrent number of data movement operations per node to optimize resource usage and performance.
#### Global Configuration
Sets a default concurrent number applied to all nodes:
```json
{
"loadConcurrency": {
"globalConfig": 2
}
}
```
#### Per-node Configuration
Specify different concurrent numbers for specific nodes using label selectors:
```json
{
"loadConcurrency": {
"globalConfig": 2,
"perNodeConfig": [
{
"nodeSelector": {
"matchLabels": {
"kubernetes.io/hostname": "node1"
}
},
"number": 3
},
{
"nodeSelector": {
"matchLabels": {
"beta.kubernetes.io/instance-type": "Standard_B4ms"
}
},
"number": 5
}
]
}
}
```
- **Range**: Starts from 1 (no concurrency), no upper limit
- **Priority**: Per-node configuration overrides global configuration
- **Conflicts**: If a node matches multiple rules, the smallest number is used
- **Default**: 1 if not specified
**Use Cases:**
- Increase concurrency on nodes with more resources
- Reduce concurrency on nodes with limited resources or critical workloads
- Prevent OOM kills and resource contention
For detailed information, see [Node-agent Concurrency](node-agent-concurrency.md).
### Node Selection (`loadAffinity`)
Constrains which nodes can run data movement operations using affinity and anti-affinity rules.
```json
{
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"beta.kubernetes.io/instance-type": "Standard_B4ms"
},
"matchExpressions": [
{
"key": "kubernetes.io/hostname",
"values": ["node-1", "node-2", "node-3"],
"operator": "In"
},
{
"key": "critical-workload",
"operator": "DoesNotExist"
}
]
}
}
]
}
```
#### Storage Class Specific Selection
Configure different node selection rules for specific storage classes:
```json
{
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"environment": "production"
}
},
"storageClass": "fast-ssd"
},
{
"nodeSelector": {
"matchLabels": {
"environment": "backup"
}
}
}
]
}
```
**Important Limitations:**
- Only the first element in the `loadAffinity` array is used for general node selection
- Additional elements are only considered if they have a `storageClass` field
- To combine multiple conditions, use both `matchLabels` and `matchExpressions` in a single element
**Use Cases:**
- Prevent data movement on nodes with critical workloads
- Run data movement only on nodes with sufficient resources
- Ensure data movement runs only on nodes where storage is accessible
- Comply with topology constraints
For detailed information, see [Node Selection for Data Movement](data-movement-node-selection.md).
### Pod Resources (`podResources`)
Configure CPU and memory resources for data mover pods to optimize performance and prevent resource conflicts.
```json
{
"podResources": {
"cpuRequest": "1000m",
"cpuLimit": "2000m",
"memoryRequest": "1Gi",
"memoryLimit": "4Gi"
},
"priorityClassName": "backup-priority"
}
```
#### Resource Configuration
- **Values**: Must be valid Kubernetes Quantity expressions
- **Validation**: Request values must not exceed limit values
- **Default**: BestEffort QoS if not specified
- **Failure Handling**: Invalid values cause the entire `podResources` section to be ignored
#### Priority Class Configuration
Configure pod priority to control scheduling behavior:
**High Priority** (e.g., `system-cluster-critical`):
- ✅ Faster scheduling and less likely to be preempted
- ❌ May impact production workload performance
**Low Priority** (e.g., `low-priority`):
- ✅ Protects production workloads from resource competition
- ❌ May delay backup operations or cause preemption
**Use Cases:**
- Limit resource consumption in resource-constrained clusters
- Guarantee resources for time-critical backup/restore operations
- Prevent OOM kills during large data transfers
- Control scheduling priority relative to production workloads
For detailed information, see [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md).
### Backup PVC Configuration (`backupPVC`)
Configure intermediate PVCs used during data movement backup operations for optimal performance.
```json
{
"backupPVC": {
"source-storage-class": {
"storageClass": "backup-optimized-class",
"readOnly": true,
"spcNoRelabeling": true
}
}
}
```
#### Configuration Options
- **`storageClass`**: Alternative storage class for backup PVCs (defaults to source PVC's storage class)
- **`readOnly`**: Use `ReadOnlyMany` access mode for faster volume creation from snapshots
- **`spcNoRelabeling`**: Required in SELinux clusters when using `readOnly` mode
#### Storage Class Mapping
Configure different backup PVC settings per source storage class:
```json
{
"backupPVC": {
"fast-storage": {
"storageClass": "backup-storage",
"readOnly": true
},
"slow-storage": {
"storageClass": "backup-storage"
}
}
}
```
**Use Cases:**
- Use read-only volumes for faster snapshot-to-volume conversion
- Use dedicated storage classes optimized for backup operations
- Reduce replica count for intermediate backup volumes
- Comply with SELinux requirements in secured environments
**Important Notes:**
- Ensure specified storage classes exist and support required access modes
- In SELinux environments, always set `spcNoRelabeling: true` when using `readOnly: true`
- Failures result in DataUpload CR staying in `Accepted` phase until timeout (30m default)
For detailed information, see [BackupPVC Configuration for Data Movement Backup](data-movement-backup-pvc-configuration.md).
### Restore PVC Configuration (`restorePVC`)
Configure intermediate PVCs used during data movement restore operations.
```json
{
"restorePVC": {
"ignoreDelayBinding": true
}
}
```
#### Configuration Options
- **`ignoreDelayBinding`**: Ignore `WaitForFirstConsumer` binding mode constraints
**Use Cases:**
- Improve restore parallelism by not waiting for pod scheduling
- Enable volume restore without requiring a pod to be mounted
- Work around topology constraints when you know the environment setup
**Important Notes:**
- Use only when you understand your cluster's topology constraints
- May result in volumes provisioned on nodes where workload pods cannot be scheduled
- Works best with node selection to ensure proper node targeting
For detailed information, see [RestorePVC Configuration for Data Movement Restore](data-movement-restore-pvc-configuration.md).
### Prepare Queue Length (`prepareQueueLength`)
Control the maximum number of backup/restore operations that can be in preparation phases simultaneously.
```json
{
"prepareQueueLength": 10
}
```
**Use Cases:**
- Limit resource consumption from intermediate objects (PVCs, VolumeSnapshots, etc.)
- Prevent resource exhaustion when backup/restore concurrency is limited
- Balance between parallelism and resource usage
**Affected CR Phases:**
- DataUpload/DataDownload CRs in `Accepted` or `Prepared` phases
- PodVolumeBackup/PodVolumeRestore CRs in preparation phases
For detailed information, see [Node-agent Prepare Queue Length](node-agent-prepare-queue-length.md).
## Complete Configuration Example
Here's a comprehensive example showing how all configuration sections work together:
```json
{
"loadConcurrency": {
"globalConfig": 2,
"perNodeConfig": [
{
"nodeSelector": {
"matchLabels": {
"node-type": "backup"
}
},
"number": 4
}
]
},
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"node-type": "backup"
},
"matchExpressions": [
{
"key": "critical-workload",
"operator": "DoesNotExist"
}
]
}
},
{
"nodeSelector": {
"matchLabels": {
"storage-tier": "fast"
}
},
"storageClass": "fast-ssd"
}
],
"podResources": {
"cpuRequest": "500m",
"cpuLimit": "1000m",
"memoryRequest": "1Gi",
"memoryLimit": "2Gi"
},
"priorityClassName": "backup-priority",
"backupPVC": {
"fast-ssd": {
"storageClass": "backup-optimized",
"readOnly": true
},
"standard": {
"storageClass": "backup-standard"
}
},
"restorePVC": {
"ignoreDelayBinding": true
},
"prepareQueueLength": 15
}
```
This configuration:
- Allows 2 concurrent operations globally, 4 on backup nodes
- Runs data movement only on backup nodes without critical workloads
- Uses fast storage nodes for fast-ssd storage class operations
- Limits pod resources to prevent cluster overload
- Uses high priority for backup operations
- Optimizes backup PVCs with read-only access and dedicated storage classes
- Ignores delay binding for faster restores
- Allows up to 15 operations in preparation phases
## Troubleshooting
### Common Issues
1. **ConfigMap not taking effect**: Restart node-agent DaemonSet after changes
2. **Invalid resource values**: Check logs for validation errors; entire section ignored on failure
3. **Storage class not found**: Ensure specified storage classes exist in the cluster
4. **SELinux issues**: Set `spcNoRelabeling: true` when using `readOnly: true`
5. **Node selection not working**: Verify node labels and check only first loadAffinity element is used
### Validation
To verify your configuration is loaded correctly:
```bash
kubectl logs -n velero -l app=node-agent | grep -i config
```
To check current node-agent configuration:
```bash
kubectl get cm <ConfigMap-Name> -n velero -o yaml
```
## Related Documentation
For detailed information on specific configuration sections:
- [Node-agent Concurrency](node-agent-concurrency.md)
- [Node Selection for Data Movement](data-movement-node-selection.md)
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md)
- [BackupPVC Configuration for Data Movement Backup](data-movement-backup-pvc-configuration.md)
- [RestorePVC Configuration for Data Movement Restore](data-movement-restore-pvc-configuration.md)
- [Node-agent Prepare Queue Length](node-agent-prepare-queue-length.md)

View File

@@ -3,6 +3,8 @@ title: "Node-agent Prepare Queue Length"
layout: docs
---
> **📖 For a comprehensive guide** covering all node-agent configuration options, see [Node-agent Configuration](node-agent-config.md).
During [CSI Snapshot Data Movement][1], Velero built-in data mover launches data mover pods to run the data transfer.
During [fs-backup][2], Velero also launches data mover pods to run the data transfer.
Other intermediate resources may also be created along with the data mover pods, i.e., PVCs, VolumeSnapshots, VolumeSnapshotContents, etc.
@@ -42,6 +44,16 @@ spec:
- --node-agent-configmap=<configMap name>
```
## Related Documentation
- [Node-agent Configuration](supported-configmaps/node-agent-configmap.md) - Complete reference for all configuration options
- [Node-agent Concurrency](node-agent-concurrency.md) - Configure concurrent operations per node
- [Node Selection for Data Movement](data-movement-node-selection.md) - Configure which nodes run data movement
- [Data Movement Pod Resource Configuration](data-movement-pod-resource-configuration.md) - Configure pod resources
- [BackupPVC Configuration](data-movement-backup-pvc-configuration.md) - Configure backup storage
- [RestorePVC Configuration](data-movement-restore-pvc-configuration.md) - Configure restore storage
- [Cache PVC Configuration](data-movement-cache-volume.md) - Configure restore data mover storage
[1]: csi-snapshot-data-movement.md
[2]: file-system-backup.md
[3]: node-agent-concurrency.md

View File

@@ -0,0 +1,10 @@
---
layout: docs
title: Supported ConfigMaps
---
Here's a list of ConfigMaps that Velero support, but their life cycle control are out of Velero's scope.
* [node-agent ConfigMap][1]
[1]: node-agent-configmap.md

View File

@@ -0,0 +1,494 @@
---
title: "Node-agent Configuration"
layout: docs
---
The Velero node-agent is a DaemonSet that hosts modules for completing backup and restore operations, including file system backup/restore and CSI snapshot data movement. This document provides comprehensive configuration options for the ConfigMap provisioned by node-agent's `--node-agent-configmap` parameter.
## Overview
Node-agent puts advanced configurations of data movement and PodVolume operations into a ConfigMap that contains JSON configuration. The ConfigMap should be created in the same namespace where Velero is installed, and its name is specified using the `--node-agent-configmap` parameter.
### Creating and Managing the ConfigMap
For detailed information, see [Node-agent Concurrency](../node-agent-concurrency.md).
**Notice**: The ConfigMap's life cycle control is out of the scope of Velero.
User need to create and maintain the ConfigMap themselves.
The ConfigMap name can be specified during Velero installation:
```bash
velero install --node-agent-configmap=<ConfigMap-Name>
```
To create the ConfigMap:
1. Save your configuration to a JSON file
2. Create the ConfigMap:
```bash
kubectl create cm <ConfigMap-Name> -n velero --from-file=<json-file-name>
```
To apply the ConfigMap to the node-agent DaemonSet:
```bash
kubectl edit ds node-agent -n velero
```
Add the ConfigMap reference to the container arguments:
```yaml
spec:
template:
spec:
containers:
- args:
- --node-agent-configmap=<ConfigMap-Name>
```
**Important**: The node-agent server checks configurations at startup time. After editing the ConfigMap, restart the node-agent DaemonSet for changes to take effect.
`kubectl rollout restart -n <velero-namespace> daemonset/node-agent`
## Configuration Sections
### Load Concurrency (`loadConcurrency`)
Controls the concurrent number of data movement operations per node to optimize resource usage and performance.
For detailed information, see [Node-agent Prepare Queue Length](../node-agent-prepare-queue-length.md).
#### Global Configuration
Sets a default concurrent number applied to all nodes:
```json
{
"loadConcurrency": {
"globalConfig": 2
}
}
```
#### Per-node Configuration
Specify different concurrent numbers for specific nodes using label selectors:
```json
{
"loadConcurrency": {
"globalConfig": 2,
"perNodeConfig": [
{
"nodeSelector": {
"matchLabels": {
"kubernetes.io/hostname": "node1"
}
},
"number": 3
},
{
"nodeSelector": {
"matchLabels": {
"beta.kubernetes.io/instance-type": "Standard_B4ms"
}
},
"number": 5
}
]
}
}
```
- **Range**: Starts from 1 (no concurrency per node), no upper limit
- **Priority**: Per-node configuration overrides global configuration
- **Conflicts**: If a node matches multiple rules, the smallest number is used
- **Default**: 1 if not specified
**Use Cases:**
- Increase concurrency on nodes with more resources
- Reduce concurrency on nodes with limited resources or critical workloads
- Prevent OOM kills and resource contention
#### PrepareQueueLength
Control the maximum number of backup/restore operations that can be in preparation phases simultaneously.
The concurrency numbers controls how many backup/restore operations can run at the same time.
The prepare queue length controls how many backup/restore operations can create the workload pods that are pending for start.
If there are thousands of volume B/R operations, and without this control, thousands of B/R pods will be created at the same time, then causing a big burden on the k8s API server.
```json
{
"loadConcurrency": {
"prepareQueueLength": 10
}
}
```
- **Range**: Starts from 1 (for all node-agent pods), no upper limit
- **Scope**: This parameter controls all PVB, PVR, DataUpload, and DataDownload pods pending number. It applies to all node-agent pods.
- **Default**: No limitation if not specified
**Use Cases:**
- Prevent too much workload pods are created, but cannot start.
- Limit resource consumption from intermediate objects (PVCs, VolumeSnapshots, etc.)
- Prevent resource exhaustion when backup/restore concurrency is limited
- Balance between parallelism and resource usage
**Affected CR Phases:**
- DataUpload/DataDownload CRs in `Accepted` or `Prepared` phases
- PodVolumeBackup/PodVolumeRestore CRs in preparation phases
### Node Selection (`loadAffinity`)
Constrains which nodes can run data movement operations using affinity and anti-affinity rules.
For detailed information, see [Node Selection for Data Movement](../data-movement-node-selection.md).
```json
{
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"beta.kubernetes.io/instance-type": "Standard_B4ms"
},
"matchExpressions": [
{
"key": "kubernetes.io/hostname",
"values": ["node-1", "node-2", "node-3"],
"operator": "In"
},
{
"key": "critical-workload",
"operator": "DoesNotExist"
}
]
}
}
]
}
```
#### Storage Class Specific Selection
Configure different node selection rules for specific storage classes:
* For StorageClass `fast-ssd`, the first match is chosen, which is nodes with label `"environment": "production"`.
* For StorageClass `hdd`, the nodes with label `"environment": "backup"` are chosen.
```json
{
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"environment": "production"
}
},
"storageClass": "fast-ssd"
},
{
"nodeSelector": {
"matchLabels": {
"environment": "staging"
}
},
"storageClass": "fast-ssd"
},
{
"nodeSelector": {
"matchLabels": {
"environment": "backup"
}
},
"storageClass": "hdd"
}
]
}
```
**Important Limitations:**
- Only the first element in the `loadAffinity` array is used for general node selection
- Additional elements are only considered if they have a `storageClass` field
- To combine multiple conditions, use both `matchLabels` and `matchExpressions` in a single element
**Use Cases:**
- Prevent data movement on nodes with critical workloads
- Run data movement only on nodes with sufficient resources
- Ensure data movement runs only on nodes where storage is accessible
- Comply with topology constraints
### Pod Resources (`podResources`)
Configure CPU and memory resources for data mover pods to optimize performance and prevent resource conflict.
The configurations work for PodVolumeBackup, PodVolumeRestore, DataUpload, and DataDownload pods.
```json
{
"podResources": {
"cpuRequest": "1000m",
"cpuLimit": "2000m",
"memoryRequest": "1Gi",
"memoryLimit": "4Gi"
}
}
```
**Use Cases:**
- Limit resource consumption in resource-constrained clusters
- Guarantee resources for time-critical backup/restore operations
- Prevent OOM kills during large data transfers
- Control scheduling priority relative to production workloads
**Values**: Must be valid Kubernetes Quantity expressions
**Validation**: Request values must not exceed limit values
**Default**: BestEffort QoS if not specified
**Failure Handling**: Invalid values cause the entire `podResources` section to be ignored
For detailed information, see [Data Movement Pod Resource Configuration](../data-movement-pod-resource-configuration.md).
### Priority Class (`priorityClassName`)
Configure the node-agent created pod's PriorityClass.
The configurations work for PodVolumeBackup, PodVolumeRestore, DataUpload, and DataDownload pods.
Configure pod priority to control scheduling behavior:
**High Priority** (e.g., `system-cluster-critical`):
- ✅ Faster scheduling and less likely to be preempted
- ❌ May impact production workload performance
**Low Priority** (e.g., `low-priority`):
- ✅ Protects production workloads from resource competition
- ❌ May delay backup operations or cause preemption
Example:
``` json
{
"priorityClassName": "low-priority"
}
```
### Backup PVC Configuration (`backupPVC`)
Configure intermediate PVCs used during data movement backup operations for optimal performance.
For detailed information, see [BackupPVC Configuration for Data Movement Backup](../data-movement-backup-pvc-configuration.md).
#### Configuration Options
- **`storageClass`**: Alternative storage class for backup PVCs (defaults to source PVC's storage class)
- **`readOnly`**: This is a boolean value. If set to `true` then `ReadOnlyMany` will be the only value set to the backupPVC's access modes. Otherwise `ReadWriteOnce` value will be used.
- **`spcNoRelabeling`**: This is a boolean value. If set to true, then `pod.Spec.SecurityContext.SELinuxOptions.Type` will be set to `spc_t`. From the SELinux point of view, this will be considered a `Super Privileged Container` which means that selinux enforcement will be disabled and volume relabeling will not occur. This field is ignored if `readOnly` is `false`.
**Use Cases:**
- Use read-only volumes for faster snapshot-to-volume conversion
- Use dedicated storage classes optimized for backup operations
- Reduce replica count for intermediate backup volumes
- Comply with SELinux requirements in secured environments
**Important Notes:**
- Ensure specified storage classes exist and support required access modes
- In SELinux environments, always set `spcNoRelabeling: true` when using `readOnly: true`
- Failures result in DataUpload CR staying in `Accepted` phase until timeout (30m default)
#### Storage Class Mapping
Configure different backup PVC settings per source storage class:
```json
{
"backupPVC": {
"fast-storage": {
"storageClass": "backup-storage-1"
},
"slow-storage": {
"storageClass": "backup-storage-2"
}
}
}
```
#### ReadOnly and SPC configuration
Create BackupPVC in ReadOnly mode, which can avoid full data clone during backup process in some storage providers, such as Ceph RBD.
In an `SELinux-enabled` cluster, any time users set `readOnly=true` they must also set `spcNoRelabeling=true`.
```json
{
"backupPVC": {
"source-storage-class": {
"storageClass": "backup-optimized-class",
"readOnly": true,
"spcNoRelabeling": true
}
}
}
```
### Restore PVC Configuration (`restorePVC`)
Configure intermediate PVCs used during data movement restore operations.
```json
{
"restorePVC": {
"ignoreDelayBinding": true
}
}
```
#### Configuration Options
- **`ignoreDelayBinding`**: Ignore `WaitForFirstConsumer` binding mode constraints
**Use Cases:**
- Improve restore parallelism by not waiting for pod scheduling
- Enable volume restore without requiring a pod to be mounted
- Work around topology constraints when you know the environment setup
**Important Notes:**
- Use only when you understand your cluster's topology constraints
- May result in volumes provisioned on nodes where workload pods cannot be scheduled
- Works best with node selection to ensure proper node targeting
For detailed information, see [RestorePVC Configuration for Data Movement Restore](../data-movement-restore-pvc-configuration.md).
### Privileged FS Backup and Restore (`privilegedFsBackup`)
Add `privileged` permission in PodVolumeBackup and PodVolumeRestore created pod's `SecurityContext`, because in some k8s environments, mounting HostPath volume needs privileged permission to work.
In v1.17, the PodVolumeBackup and PodVolumeRestore are micro-serviced into independent pods, but they still mount the target volume by HostPath way. As a result, `privileged` permission are needed.
``` json
{
"privilegedFsBackup": true
}
```
For detailed information, see [Enable file system backup document](../customize-installation.md#enable-file-system-backup)
### Cache PVC Configuration (`cachePVCConfig`)
Configure intermediate PVCs used for data movement restore operations to cache the downloaded data.
For detailed information, see [Cache PVC Configuration for Data Movement Restore](../data-movement-cache-volume.md).
#### Configuration Options
- **`thresholdInGB`**: Minimum backup data size (in GB) to trigger cache PVC creation during restore
- **`storageClass`**: Storage class used to create cache PVCs.
**Use Cases:**
- Improve restore performance by caching downloaded data locally
- Reduce repeated data downloads from object storage
- Optimize restore operations for large volumes
**Important Notes:**
- Cache PVC is only created when restored data size exceeds the threshold
- Ensure specified storage class exists and has sufficient capacity
- Cache PVCs are temporary and cleaned up after restore completion
```json
{
"cachePVCConfig": {
"thresholdInGB": 1,
"storageClass": "cache-optimized-storage"
}
}
```
## Complete Configuration Example
Here's a comprehensive example showing how all configuration sections work together:
```json
{
"loadConcurrency": {
"globalConfig": 2,
"prepareQueueLength": 15,
"perNodeConfig": [
{
"nodeSelector": {
"matchLabels": {
"kubernetes.io/hostname": "node1"
}
},
"number": 3
}
]
},
"loadAffinity": [
{
"nodeSelector": {
"matchLabels": {
"node-type": "backup"
},
"matchExpressions": [
{
"key": "critical-workload",
"operator": "DoesNotExist"
}
]
}
},
{
"nodeSelector": {
"matchLabels": {
"environment": "staging"
}
},
"storageClass": "fast-ssd"
}
],
"podResources": {
"cpuRequest": "500m",
"cpuLimit": "1000m",
"memoryRequest": "1Gi",
"memoryLimit": "2Gi"
},
"priorityClassName": "backup-priority",
"backupPVC": {
"fast-storage": {
"storageClass": "backup-optimized-class",
"readOnly": true,
"spcNoRelabeling": true
},
"slow-storage": {
"storageClass": "backup-storage-2"
}
},
"restorePVC": {
"ignoreDelayBinding": true
},
"privilegedFsBackup": true,
"cachePVC": {
"thresholdInGB": 1,
"storageClass": "cache-optimized-storage"
}
}
```
This configuration:
- Allows 2 concurrent operations globally, 3 on worker `node1`
- Allows up to 15 operations in preparation phases
- Runs data movement only on backup nodes without critical workloads
- Uses fast storage nodes for fast-ssd storage class operations
- Limits pod resources to prevent cluster overload
- Uses high priority for backup operations
- Optimizes backup PVCs with read-only access and dedicated storage classes
- Ignores delay binding for faster restores
- Enable privileged permission for PodVolume pods
- Enable cache PVC for FS restore
- The cache threshold is 1GB and use dedicated StorageClass
## Troubleshooting
### Common Issues
1. **ConfigMap not taking effect**: Restart node-agent DaemonSet after changes
2. **Invalid resource values**: Check logs for validation errors; entire section ignored on failure
3. **Storage class not found**: Ensure specified storage classes exist in the cluster
4. **SELinux issues**: Set `spcNoRelabeling: true` when using `readOnly: true`
5. **Node selection not working**: Verify node labels and check only first loadAffinity element is used
### Validation
To verify your configuration is loaded correctly:
```bash
kubectl logs -n velero -l app=node-agent | grep -i config
```
To check current node-agent configuration:
```bash
kubectl get cm <ConfigMap-Name> -n velero -o yaml
```
## Related Documentation
For detailed information on specific configuration sections:
- [Node-agent Concurrency](../node-agent-concurrency.md)
- [Node Selection for Data Movement](../data-movement-node-selection.md)
- [Data Movement Pod Resource Configuration](../data-movement-pod-resource-configuration.md)
- [BackupPVC Configuration for Data Movement Backup](../data-movement-backup-pvc-configuration.md)
- [RestorePVC Configuration for Data Movement Restore](../data-movement-restore-pvc-configuration.md)
- [Node-agent Prepare Queue Length](../node-agent-prepare-queue-length.md)
- [Cache PVC Configuration for Data Movement Restore](../data-movement-cache-volume.md)

View File

@@ -79,6 +79,10 @@ toc:
url: /data-movement-cache-volume.md
- page: Node-agent Concurrency
url: /node-agent-concurrency
- page: Node-agent Prepare Queue Length
url: /node-agent-prepare-queue-length
- page: Date Movement Cache Volume
url: /data-movement-cache-volume
- title: Plugins
subfolderitems:
- page: Overview
@@ -119,6 +123,8 @@ toc:
url: /output-file-format
- page: API types
url: /api-types
- page: Supported ConfigMap
url: /supported-configmaps
- page: Support process
url: /support-process
- page: For maintainers

View File

@@ -142,6 +142,7 @@ func (m *migrationE2E) Backup() error {
"Fail to set images for the migrate-from Velero installation.")
m.veleroCLI2Version.VeleroCLI, err = veleroutil.InstallVeleroCLI(
m.Ctx,
m.veleroCLI2Version.VeleroVersion)
Expect(err).To(Succeed())
}

View File

@@ -115,7 +115,10 @@ func BackupUpgradeRestoreTest(useVolumeSnapshots bool, veleroCLI2Version VeleroC
//Download velero CLI if it's empty according to velero CLI version
By(fmt.Sprintf("Install the expected old version Velero CLI (%s) for installing Velero",
veleroCLI2Version.VeleroVersion), func() {
veleroCLI2Version.VeleroCLI, err = InstallVeleroCLI(veleroCLI2Version.VeleroVersion)
veleroCLI2Version.VeleroCLI, err = InstallVeleroCLI(
oneHourTimeout,
veleroCLI2Version.VeleroVersion,
)
Expect(err).To(Succeed())
})
}

View File

@@ -33,7 +33,7 @@ func GetListByCmdPipes(ctx context.Context, cmdLines []*OsCommandLine) ([]string
var cmds []*exec.Cmd
for _, cmdline := range cmdLines {
cmd := exec.Command(cmdline.Cmd, cmdline.Args...)
cmd := exec.CommandContext(ctx, cmdline.Cmd, cmdline.Args...)
cmds = append(cmds, cmd)
}
fmt.Println(cmds)

View File

@@ -915,12 +915,12 @@ func CheckVeleroVersion(ctx context.Context, veleroCLI string, expectedVer strin
return nil
}
func InstallVeleroCLI(version string) (string, error) {
func InstallVeleroCLI(ctx context.Context, version string) (string, error) {
var tempVeleroCliDir string
name := "velero-" + version + "-" + runtime.GOOS + "-" + runtime.GOARCH
postfix := ".tar.gz"
tarball := name + postfix
err := wait.PollImmediate(time.Second*5, time.Minute*5, func() (bool, error) {
err := wait.PollUntilContextTimeout(ctx, time.Second*5, time.Minute*5, true, func(ctx context.Context) (bool, error) {
tempFile, err := getVeleroCliTarball("https://github.com/vmware-tanzu/velero/releases/download/" + version + "/" + tarball)
if err != nil {
return false, errors.WithMessagef(err, "failed to get Velero CLI tarball")
@@ -930,7 +930,7 @@ func InstallVeleroCLI(version string) (string, error) {
return false, errors.WithMessagef(err, "failed to create temp dir for tarball extraction")
}
cmd := exec.Command("tar", "-xvf", tempFile.Name(), "-C", tempVeleroCliDir)
cmd := exec.CommandContext(ctx, "tar", "-xvf", tempFile.Name(), "-C", tempVeleroCliDir)
defer os.Remove(tempFile.Name())
if _, err := cmd.Output(); err != nil {