diff --git a/Dockerfile-ark.alpine b/Dockerfile-ark.alpine index 2fb1a3492..7fff5c39f 100644 --- a/Dockerfile-ark.alpine +++ b/Dockerfile-ark.alpine @@ -19,9 +19,9 @@ MAINTAINER Andy Goldstein RUN apk add --no-cache ca-certificates RUN apk add --update --no-cache bzip2 && \ - wget --quiet https://github.com/restic/restic/releases/download/v0.9.3/restic_0.9.3_linux_amd64.bz2 && \ - bunzip2 restic_0.9.3_linux_amd64.bz2 && \ - mv restic_0.9.3_linux_amd64 /usr/bin/restic && \ + wget --quiet https://github.com/restic/restic/releases/download/v0.9.4/restic_0.9.4_linux_amd64.bz2 && \ + bunzip2 restic_0.9.4_linux_amd64.bz2 && \ + mv restic_0.9.4_linux_amd64 /usr/bin/restic && \ chmod +x /usr/bin/restic ADD /bin/linux/amd64/ark /ark diff --git a/changelogs/CHANGELOG-0.10.md b/changelogs/CHANGELOG-0.10.md index 059e3049b..187336e51 100644 --- a/changelogs/CHANGELOG-0.10.md +++ b/changelogs/CHANGELOG-0.10.md @@ -1,6 +1,18 @@ + - [v0.10.2](#v0102) - [v0.10.1](#v0101) - [v0.10.0](#v0100) +## v0.10.2 +#### 2019-02-28 + +### Download +- https://github.com/heptio/ark/releases/tag/v0.10.2 + +### Changes + * upgrade restic to v0.9.4 & replace --hostname flag with --host (#1156, @skriss) + * use 'restic stats' instead of 'restic check' to determine if repo exists (#1171, @skriss) + * Fix concurrency bug in code ensuring restic repository exists (#1235, @skriss) + ## v0.10.1 #### 2019-01-10 diff --git a/pkg/controller/restic_repository_controller.go b/pkg/controller/restic_repository_controller.go index 9f550c8fa..f72ce76e0 100644 --- a/pkg/controller/restic_repository_controller.go +++ b/pkg/controller/restic_repository_controller.go @@ -162,10 +162,10 @@ func (c *resticRepositoryController) initializeRepo(req *v1.ResticRepository, lo }) } -// ensureRepo first checks the repo, and returns if check passes. If it fails, -// attempts to init the repo, and returns the result. +// ensureRepo first tries to connect to the repo, and returns if it succeeds. If it fails, +// it attempts to init the repo, and returns the result. func ensureRepo(repo *v1.ResticRepository, repoManager restic.RepositoryManager) error { - if repoManager.CheckRepo(repo) == nil { + if repoManager.ConnectToRepo(repo) == nil { return nil } diff --git a/pkg/restic/command_factory.go b/pkg/restic/command_factory.go index 591d6f9d3..c216bd636 100644 --- a/pkg/restic/command_factory.go +++ b/pkg/restic/command_factory.go @@ -23,7 +23,7 @@ import ( // BackupCommand returns a Command for running a restic backup. func BackupCommand(repoIdentifier, passwordFile, path string, tags map[string]string) *Command { - // --hostname flag is provided with a generic value because restic uses the hostname + // --host flag is provided with a generic value because restic uses the host // to find a parent snapshot, and by default it will be the name of the daemonset pod // where the `restic backup` command is run. If this pod is recreated, we want to continue // taking incremental backups rather than triggering a full one due to a new pod name. @@ -34,7 +34,7 @@ func BackupCommand(repoIdentifier, passwordFile, path string, tags map[string]st PasswordFile: passwordFile, Dir: path, Args: []string{"."}, - ExtraFlags: append(backupTagFlags(tags), "--hostname=ark"), + ExtraFlags: append(backupTagFlags(tags), "--host=ark"), } } @@ -84,6 +84,13 @@ func InitCommand(repoIdentifier string) *Command { } } +func StatsCommand(repoIdentifier string) *Command { + return &Command{ + Command: "stats", + RepoIdentifier: repoIdentifier, + } +} + func CheckCommand(repoIdentifier string) *Command { return &Command{ Command: "check", diff --git a/pkg/restic/command_factory_test.go b/pkg/restic/command_factory_test.go index 5e2714038..610831875 100644 --- a/pkg/restic/command_factory_test.go +++ b/pkg/restic/command_factory_test.go @@ -32,7 +32,7 @@ func TestBackupCommand(t *testing.T) { assert.Equal(t, "path", c.Dir) assert.Equal(t, []string{"."}, c.Args) - expected := []string{"--tag=foo=bar", "--tag=c=d", "--hostname=ark"} + expected := []string{"--tag=foo=bar", "--tag=c=d", "--host=ark"} sort.Strings(expected) sort.Strings(c.ExtraFlags) assert.Equal(t, expected, c.ExtraFlags) @@ -96,6 +96,13 @@ func TestInitCommand(t *testing.T) { assert.Equal(t, "repo-id", c.RepoIdentifier) } +func TestStatsCommand(t *testing.T) { + c := StatsCommand("repo-id") + + assert.Equal(t, "stats", c.Command) + assert.Equal(t, "repo-id", c.RepoIdentifier) +} + func TestCheckCommand(t *testing.T) { c := CheckCommand("repo-id") diff --git a/pkg/restic/repository_ensurer.go b/pkg/restic/repository_ensurer.go index 3810c7ec2..df5117e94 100644 --- a/pkg/restic/repository_ensurer.go +++ b/pkg/restic/repository_ensurer.go @@ -35,18 +35,31 @@ import ( // repositoryEnsurer ensures that Ark restic repositories are created and ready. type repositoryEnsurer struct { + log logrus.FieldLogger repoLister arkv1listers.ResticRepositoryLister repoClient arkv1client.ResticRepositoriesGetter readyChansLock sync.Mutex readyChans map[string]chan *arkv1api.ResticRepository + + // repoLocksMu synchronizes reads/writes to the repoLocks map itself + // since maps are not threadsafe. + repoLocksMu sync.Mutex + repoLocks map[repoKey]*sync.Mutex +} + +type repoKey struct { + volumeNamespace string + backupLocation string } func newRepositoryEnsurer(repoInformer arkv1informers.ResticRepositoryInformer, repoClient arkv1client.ResticRepositoriesGetter, log logrus.FieldLogger) *repositoryEnsurer { r := &repositoryEnsurer{ + log: log, repoLister: repoInformer.Lister(), repoClient: repoClient, readyChans: make(map[string]chan *arkv1api.ResticRepository), + repoLocks: make(map[repoKey]*sync.Mutex), } repoInformer.Informer().AddEventHandler( @@ -67,7 +80,7 @@ func newRepositoryEnsurer(repoInformer arkv1informers.ResticRepositoryInformer, } readyChan <- newObj - delete(r.readyChans, newObj.Name) + delete(r.readyChans, key) } }, }, @@ -84,6 +97,30 @@ func repoLabels(volumeNamespace, backupLocation string) labels.Set { } func (r *repositoryEnsurer) EnsureRepo(ctx context.Context, namespace, volumeNamespace, backupLocation string) (*arkv1api.ResticRepository, error) { + log := r.log.WithField("volumeNamespace", volumeNamespace).WithField("backupLocation", backupLocation) + + // It's only safe to have one instance of this method executing concurrently for a + // given volumeNamespace + backupLocation, so synchronize based on that. It's fine + // to run concurrently for *different* namespaces/locations. If you had 2 goroutines + // running this for the same inputs, both might find no ResticRepository exists, then + // both would create new ones for the same namespace/location. + // + // This issue could probably be avoided if we had a deterministic name for + // each restic repository, and we just tried to create it, checked for an + // AlreadyExists err, and then waited for it to be ready. However, there are + // already repositories in the wild with non-deterministic names (i.e. using + // GenerateName) which poses a backwards compatibility problem. + log.Debug("Acquiring lock") + + repoMu := r.repoLock(volumeNamespace, backupLocation) + repoMu.Lock() + defer func() { + repoMu.Unlock() + log.Debug("Released lock") + }() + + log.Debug("Acquired lock") + selector := labels.SelectorFromSet(repoLabels(volumeNamespace, backupLocation)) repos, err := r.repoLister.ResticRepositories(namespace).List(selector) @@ -97,11 +134,14 @@ func (r *repositoryEnsurer) EnsureRepo(ctx context.Context, namespace, volumeNam if repos[0].Status.Phase != arkv1api.ResticRepositoryPhaseReady { return nil, errors.New("restic repository is not ready") } + + log.Debug("Ready repository found") return repos[0], nil } - // no repo found: create one and wait for it to be ready + log.Debug("No repository found, creating one") + // no repo found: create one and wait for it to be ready repo := &arkv1api.ResticRepository{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, @@ -137,3 +177,19 @@ func (r *repositoryEnsurer) getReadyChan(name string) chan *arkv1api.ResticRepos r.readyChans[name] = make(chan *arkv1api.ResticRepository) return r.readyChans[name] } + +func (r *repositoryEnsurer) repoLock(volumeNamespace, backupLocation string) *sync.Mutex { + r.repoLocksMu.Lock() + defer r.repoLocksMu.Unlock() + + key := repoKey{ + volumeNamespace: volumeNamespace, + backupLocation: backupLocation, + } + + if r.repoLocks[key] == nil { + r.repoLocks[key] = new(sync.Mutex) + } + + return r.repoLocks[key] +} diff --git a/pkg/restic/repository_manager.go b/pkg/restic/repository_manager.go index 65e307d68..d1dc6a785 100644 --- a/pkg/restic/repository_manager.go +++ b/pkg/restic/repository_manager.go @@ -42,6 +42,12 @@ type RepositoryManager interface { // InitRepo initializes a repo with the specified name and identifier. InitRepo(repo *arkv1api.ResticRepository) error + // ConnectToRepo runs the 'restic stats' command against the + // specified repo, and returns an error if it fails. This is + // intended to be used to ensure that the repo exists/can be + // authenticated to. + ConnectToRepo(repo *arkv1api.ResticRepository) error + // CheckRepo checks the specified repo for errors. CheckRepo(repo *arkv1api.ResticRepository) error @@ -170,6 +176,14 @@ func (rm *repositoryManager) InitRepo(repo *arkv1api.ResticRepository) error { return rm.exec(InitCommand(repo.Spec.ResticIdentifier), repo.Spec.BackupStorageLocation) } +func (rm *repositoryManager) ConnectToRepo(repo *arkv1api.ResticRepository) error { + // restic stats requires a non-exclusive lock + rm.repoLocker.Lock(repo.Name) + defer rm.repoLocker.Unlock(repo.Name) + + return rm.exec(StatsCommand(repo.Spec.ResticIdentifier), repo.Spec.BackupStorageLocation) +} + func (rm *repositoryManager) CheckRepo(repo *arkv1api.ResticRepository) error { // restic check requires an exclusive lock rm.repoLocker.LockExclusive(repo.Name)