From c58b602a1d0fd31a6ce84a55b36ed493dd4f5ae4 Mon Sep 17 00:00:00 2001 From: Carlisia Date: Thu, 11 Oct 2018 12:54:05 -0700 Subject: [PATCH 1/6] Remove timeout check for snapshot service Signed-off-by: Carlisia --- pkg/cloudprovider/snapshot_service.go | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/pkg/cloudprovider/snapshot_service.go b/pkg/cloudprovider/snapshot_service.go index 9ad886442..152257912 100644 --- a/pkg/cloudprovider/snapshot_service.go +++ b/pkg/cloudprovider/snapshot_service.go @@ -17,9 +17,6 @@ limitations under the License. package cloudprovider import ( - "time" - - "github.com/pkg/errors" "k8s.io/apimachinery/pkg/runtime" ) @@ -50,11 +47,6 @@ type SnapshotService interface { SetVolumeID(pv runtime.Unstructured, volumeID string) (runtime.Unstructured, error) } -const ( - volumeCreateWaitTimeout = 30 * time.Second - volumeCreatePollInterval = 1 * time.Second -) - type snapshotService struct { blockStore BlockStore } @@ -74,22 +66,7 @@ func (sr *snapshotService) CreateVolumeFromSnapshot(snapshotID string, volumeTyp return "", err } - // wait for volume to be ready (up to a maximum time limit) - ticker := time.NewTicker(volumeCreatePollInterval) - defer ticker.Stop() - - timeout := time.NewTimer(volumeCreateWaitTimeout) - - for { - select { - case <-timeout.C: - return "", errors.Errorf("timeout reached waiting for volume %v to be ready", volumeID) - case <-ticker.C: - if ready, err := sr.blockStore.IsVolumeReady(volumeID, volumeAZ); err == nil && ready { - return volumeID, nil - } - } - } + return volumeID, nil } func (sr *snapshotService) CreateSnapshot(volumeID, volumeAZ string, tags map[string]string) (string, error) { From 24d525681173ac6993beedeb401970f8f57f6ac6 Mon Sep 17 00:00:00 2001 From: Lukasz Jakimczuk Date: Thu, 30 Aug 2018 16:44:03 +0200 Subject: [PATCH 2/6] Adding support for the AWS_CLUSTER_NAME env variable allowing to claim volumes ownership Signed-off-by: Lukasz Jakimczuk Moving check for environment variable outside the loop Signed-off-by: Lukasz Jakimczuk Insert a note about AWS_CLUSTER_NAME in the aws-config doc Signed-off-by: Lukasz Jakimczuk Improving implementation and documentation Signed-off-by: Lukasz Jakimczuk Changing instructions, adding unit test for getTagsForCluster and removing duplicated Lookup Signed-off-by: Lukasz Jakimczuk Commit after update Signed-off-by: Lukasz Jakimczuk Correcting bad formatting in aws-config.md Signed-off-by: Lukasz Jakimczuk --- docs/aws-config.md | 26 +++++++ examples/aws/10-deployment.yaml | 4 +- pkg/cloudprovider/aws/block_store.go | 29 +++++++- pkg/cloudprovider/aws/block_store_test.go | 86 +++++++++++++++++++++++ 4 files changed, 143 insertions(+), 2 deletions(-) diff --git a/docs/aws-config.md b/docs/aws-config.md index 80b6d91a4..16af14699 100644 --- a/docs/aws-config.md +++ b/docs/aws-config.md @@ -145,6 +145,31 @@ Specify the following values in the example files: * Replace `` with `gp2`. This is AWS's default `StorageClass` name. +* (Optional) If you have multiple clusters and you want to support migration of resources between them, in file `examples/aws/10-deployment.yaml`: + + * Uncomment the environment variable `AWS_CLUSTER_NAME` and replace `` with the current cluster's name. When restoring backup, it will make Ark (and cluster it's running on) claim ownership of AWS volumes created from snapshots taken on different cluster. + The best way to get the current cluster's name is to either check it with used deployment tool or to read it directly from the EC2 instances tags. + + The following listing shows how to get the cluster's nodes EC2 Tags. First, get the nodes external IDs (EC2 IDs): + + ```bash + kubectl get nodes -o jsonpath='{.items[*].spec.externalID}' + ``` + + Copy one of the returned IDs `` and use it with the `aws` CLI tool to search for one of the following: + + * The `kubernetes.io/cluster/` tag of the value `owned`. The `` is then your cluster's name: + + ```bash + aws ec2 describe-tags --filters "Name=resource-id,Values=" "Name=value,Values=owned" + ``` + + * If the first output returns nothing, then check for the legacy Tag `KubernetesCluster` of the value ``: + + ```bash + aws ec2 describe-tags --filters "Name=resource-id,Values=" "Name=key,Values=KubernetesCluster" + ``` + ## Start the server In the root of your Ark directory, run: @@ -271,3 +296,4 @@ It can be set up for Ark by creating a role that will have required permissions, [5]: https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html [6]: config-definition.md#aws [14]: http://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html +[20]: faq.md diff --git a/examples/aws/10-deployment.yaml b/examples/aws/10-deployment.yaml index 1742c4713..5549fea67 100644 --- a/examples/aws/10-deployment.yaml +++ b/examples/aws/10-deployment.yaml @@ -50,6 +50,8 @@ spec: value: /credentials/cloud - name: ARK_SCRATCH_DIR value: /scratch + #- name: AWS_CLUSTER_NAME + # value: volumes: - name: cloud-credentials secret: @@ -57,4 +59,4 @@ spec: - name: plugins emptyDir: {} - name: scratch - emptyDir: {} \ No newline at end of file + emptyDir: {} diff --git a/pkg/cloudprovider/aws/block_store.go b/pkg/cloudprovider/aws/block_store.go index 8960e7c9b..bcc499257 100644 --- a/pkg/cloudprovider/aws/block_store.go +++ b/pkg/cloudprovider/aws/block_store.go @@ -17,7 +17,9 @@ limitations under the License. package aws import ( + "os" "regexp" + "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" @@ -93,6 +95,8 @@ func (b *blockStore) CreateVolumeFromSnapshot(snapshotID, volumeType, volumeAZ s return "", errors.Errorf("expected 1 snapshot from DescribeSnapshots for %s, got %v", snapshotID, count) } + // filter tags through getTagsForCluster() function in order to apply + // proper ownership tags to restored volumes req := &ec2.CreateVolumeInput{ SnapshotId: &snapshotID, AvailabilityZone: &volumeAZ, @@ -100,7 +104,7 @@ func (b *blockStore) CreateVolumeFromSnapshot(snapshotID, volumeType, volumeAZ s TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String(ec2.ResourceTypeVolume), - Tags: snapRes.Snapshots[0].Tags, + Tags: getTagsForCluster(snapRes.Snapshots[0].Tags), }, }, } @@ -187,6 +191,29 @@ func (b *blockStore) CreateSnapshot(volumeID, volumeAZ string, tags map[string]s return *res.SnapshotId, nil } +func getTagsForCluster(snapshotTags []*ec2.Tag) []*ec2.Tag { + var result []*ec2.Tag + + clusterName, haveAWSClusterNameEnvVar := os.LookupEnv("AWS_CLUSTER_NAME") + + if haveAWSClusterNameEnvVar { + result = append(result, ec2Tag("kubernetes.io/cluster/"+clusterName, "owned")) + result = append(result, ec2Tag("KubernetesCluster", clusterName)) + } + + for _, tag := range snapshotTags { + if haveAWSClusterNameEnvVar && (strings.HasPrefix(*tag.Key, "kubernetes.io/cluster/") || *tag.Key == "KubernetesCluster") { + // if the AWS_CLUSTER_NAME variable is found we want current cluster + // to overwrite the old ownership on volumes + continue + } + + result = append(result, ec2Tag(*tag.Key, *tag.Value)) + } + + return result +} + func getTags(arkTags map[string]string, volumeTags []*ec2.Tag) []*ec2.Tag { var result []*ec2.Tag diff --git a/pkg/cloudprovider/aws/block_store_test.go b/pkg/cloudprovider/aws/block_store_test.go index b3581c1e1..f2f7af448 100644 --- a/pkg/cloudprovider/aws/block_store_test.go +++ b/pkg/cloudprovider/aws/block_store_test.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "os" "sort" "testing" @@ -87,6 +88,91 @@ func TestSetVolumeID(t *testing.T) { assert.Equal(t, "vol-updated", actual) } +func TestGetTagsForCluster(t *testing.T) { + tests := []struct { + name string + isNameSet bool + snapshotTags []*ec2.Tag + expected []*ec2.Tag + }{ + { + name: "degenerate case (no tags)", + isNameSet: false, + snapshotTags: nil, + expected: nil, + }, + { + name: "cluster tags exist and remain set", + isNameSet: false, + snapshotTags: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "old-cluster"), + ec2Tag("kubernetes.io/cluster/old-cluster", "owned"), + ec2Tag("aws-key", "aws-val"), + }, + expected: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "old-cluster"), + ec2Tag("kubernetes.io/cluster/old-cluster", "owned"), + ec2Tag("aws-key", "aws-val"), + }, + }, + { + name: "cluster tags only get applied", + isNameSet: true, + snapshotTags: nil, + expected: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "current-cluster"), + ec2Tag("kubernetes.io/cluster/current-cluster", "owned"), + }, + }, + { + name: "non-overlaping cluster and snapshot tags both get applied", + isNameSet: true, + snapshotTags: []*ec2.Tag{ec2Tag("aws-key", "aws-val")}, + expected: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "current-cluster"), + ec2Tag("kubernetes.io/cluster/current-cluster", "owned"), + ec2Tag("aws-key", "aws-val"), + }, + }, + {name: "overlaping cluster tags, current cluster tags take precedence", + isNameSet: true, + snapshotTags: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "old-name"), + ec2Tag("kubernetes.io/cluster/old-name", "owned"), + ec2Tag("aws-key", "aws-val"), + }, + expected: []*ec2.Tag{ + ec2Tag("KubernetesCluster", "current-cluster"), + ec2Tag("kubernetes.io/cluster/current-cluster", "owned"), + ec2Tag("aws-key", "aws-val"), + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.isNameSet { + os.Setenv("AWS_CLUSTER_NAME", "current-cluster") + } + res := getTagsForCluster(test.snapshotTags) + + sort.Slice(res, func(i, j int) bool { + return *res[i].Key < *res[j].Key + }) + + sort.Slice(test.expected, func(i, j int) bool { + return *test.expected[i].Key < *test.expected[j].Key + }) + + assert.Equal(t, test.expected, res) + + if test.isNameSet { + os.Unsetenv("AWS_CLUSTER_NAME") + } + }) + } +} + func TestGetTags(t *testing.T) { tests := []struct { name string From 2f6f5de80c472170ef9234377c0ddc115cece463 Mon Sep 17 00:00:00 2001 From: Steve Kriss Date: Wed, 10 Oct 2018 13:29:15 -0600 Subject: [PATCH 3/6] only try to backup PVC's linked PV if PVC phase is Bound Signed-off-by: Steve Kriss --- pkg/backup/backup_pv_action.go | 30 +++++++++++------------- pkg/backup/backup_pv_action_test.go | 36 ++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/pkg/backup/backup_pv_action.go b/pkg/backup/backup_pv_action.go index 684e2bfa6..5c0637911 100644 --- a/pkg/backup/backup_pv_action.go +++ b/pkg/backup/backup_pv_action.go @@ -17,13 +17,14 @@ limitations under the License. package backup import ( + "github.com/pkg/errors" "github.com/sirupsen/logrus" + corev1api "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "github.com/heptio/ark/pkg/apis/ark/v1" "github.com/heptio/ark/pkg/kuberesource" - "github.com/heptio/ark/pkg/util/collections" ) // backupPVAction inspects a PersistentVolumeClaim for the PersistentVolume @@ -47,23 +48,18 @@ func (a *backupPVAction) AppliesTo() (ResourceSelector, error) { func (a *backupPVAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []ResourceIdentifier, error) { a.log.Info("Executing backupPVAction") - var additionalItems []ResourceIdentifier - - pvc := item.UnstructuredContent() - - volumeName, err := collections.GetString(pvc, "spec.volumeName") - // if there's no volume name, it's not an error, since it's possible - // for the PVC not be bound; don't return an additional PV item to - // back up. - if err != nil || volumeName == "" { - a.log.Info("No spec.volumeName found for PersistentVolumeClaim") - return nil, nil, nil + var pvc corev1api.PersistentVolumeClaim + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), &pvc); err != nil { + return nil, nil, errors.Wrap(err, "unable to convert unstructured item to persistent volume claim") } - additionalItems = append(additionalItems, ResourceIdentifier{ - GroupResource: kuberesource.PersistentVolumes, - Name: volumeName, - }) + if pvc.Status.Phase != corev1api.ClaimBound || pvc.Spec.VolumeName == "" { + return item, nil, nil + } - return item, additionalItems, nil + pv := ResourceIdentifier{ + GroupResource: kuberesource.PersistentVolumes, + Name: pvc.Spec.VolumeName, + } + return item, []ResourceIdentifier{pv}, nil } diff --git a/pkg/backup/backup_pv_action_test.go b/pkg/backup/backup_pv_action_test.go index 9c0c1ef36..bb33b0415 100644 --- a/pkg/backup/backup_pv_action_test.go +++ b/pkg/backup/backup_pv_action_test.go @@ -24,13 +24,15 @@ import ( arktest "github.com/heptio/ark/pkg/util/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1api "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) func TestBackupPVAction(t *testing.T) { pvc := &unstructured.Unstructured{ Object: map[string]interface{}{ - "spec": map[string]interface{}{}, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{}, }, } @@ -51,11 +53,39 @@ func TestBackupPVAction(t *testing.T) { assert.NoError(t, err) assert.Len(t, additional, 0) - // non-empty spec.volumeName should result in - // no error and an additional item for the PV + // non-empty spec.volumeName when status.phase is empty + // should result in no error and no additional items pvc.Object["spec"].(map[string]interface{})["volumeName"] = "myVolume" _, additional, err = a.Execute(pvc, backup) require.NoError(t, err) + require.Len(t, additional, 0) + + // non-empty spec.volumeName when status.phase is 'Pending' + // should result in no error and no additional items + pvc.Object["status"].(map[string]interface{})["phase"] = corev1api.ClaimPending + _, additional, err = a.Execute(pvc, backup) + require.NoError(t, err) + require.Len(t, additional, 0) + + // non-empty spec.volumeName when status.phase is 'Lost' + // should result in no error and no additional items + pvc.Object["status"].(map[string]interface{})["phase"] = corev1api.ClaimLost + _, additional, err = a.Execute(pvc, backup) + require.NoError(t, err) + require.Len(t, additional, 0) + + // non-empty spec.volumeName when status.phase is 'Bound' + // should result in no error and one additional item for the PV + pvc.Object["status"].(map[string]interface{})["phase"] = corev1api.ClaimBound + _, additional, err = a.Execute(pvc, backup) + require.NoError(t, err) require.Len(t, additional, 1) assert.Equal(t, ResourceIdentifier{GroupResource: kuberesource.PersistentVolumes, Name: "myVolume"}, additional[0]) + + // empty spec.volumeName when status.phase is 'Bound' should + // result in no error and no additional items + pvc.Object["spec"].(map[string]interface{})["volumeName"] = "" + _, additional, err = a.Execute(pvc, backup) + assert.NoError(t, err) + assert.Len(t, additional, 0) } From 09a9d950fcd5fccf511c284ae1103332dbb5cff7 Mon Sep 17 00:00:00 2001 From: Shubheksha Jalan Date: Thu, 11 Oct 2018 21:23:30 +0200 Subject: [PATCH 4/6] remove logic to get a gcp project Signed-off-by: Shubheksha Jalan --- docs/gcp-config.md | 1 - pkg/cloudprovider/gcp/block_store.go | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/docs/gcp-config.md b/docs/gcp-config.md index aac2273ff..1df89e9fa 100644 --- a/docs/gcp-config.md +++ b/docs/gcp-config.md @@ -54,7 +54,6 @@ To integrate Heptio Ark with GCP, create an Ark-specific [Service Account][15]: compute.snapshots.create compute.snapshots.useReadOnly compute.snapshots.delete - compute.projects.get ) gcloud iam roles create heptio_ark.server \ diff --git a/pkg/cloudprovider/gcp/block_store.go b/pkg/cloudprovider/gcp/block_store.go index 0fe257131..5002151dc 100644 --- a/pkg/cloudprovider/gcp/block_store.go +++ b/pkg/cloudprovider/gcp/block_store.go @@ -64,16 +64,6 @@ func (b *blockStore) Init(config map[string]string) error { return errors.WithStack(err) } - // validate connection - res, err := gce.Projects.Get(project).Do() - if err != nil { - return errors.WithStack(err) - } - - if res == nil { - return errors.Errorf("error getting project %q", project) - } - b.gce = gce b.project = project From 4f9aeb818d54e69aa23378af43731e950fd1041e Mon Sep 17 00:00:00 2001 From: captjt Date: Tue, 16 Oct 2018 15:37:10 -0400 Subject: [PATCH 5/6] Add --include-cluster-resources options when creating schedule Signed-off-by: captjt --- pkg/cmd/cli/schedule/create.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pkg/cmd/cli/schedule/create.go b/pkg/cmd/cli/schedule/create.go index a49c86be9..e1e39dd51 100644 --- a/pkg/cmd/cli/schedule/create.go +++ b/pkg/cmd/cli/schedule/create.go @@ -107,13 +107,14 @@ func (o *CreateOptions) Run(c *cobra.Command, f client.Factory) error { }, Spec: api.ScheduleSpec{ Template: api.BackupSpec{ - IncludedNamespaces: o.BackupOptions.IncludeNamespaces, - ExcludedNamespaces: o.BackupOptions.ExcludeNamespaces, - IncludedResources: o.BackupOptions.IncludeResources, - ExcludedResources: o.BackupOptions.ExcludeResources, - LabelSelector: o.BackupOptions.Selector.LabelSelector, - SnapshotVolumes: o.BackupOptions.SnapshotVolumes.Value, - TTL: metav1.Duration{Duration: o.BackupOptions.TTL}, + IncludedNamespaces: o.BackupOptions.IncludeNamespaces, + ExcludedNamespaces: o.BackupOptions.ExcludeNamespaces, + IncludedResources: o.BackupOptions.IncludeResources, + ExcludedResources: o.BackupOptions.ExcludeResources, + IncludeClusterResources: o.BackupOptions.IncludeClusterResources.Value, + LabelSelector: o.BackupOptions.Selector.LabelSelector, + SnapshotVolumes: o.BackupOptions.SnapshotVolumes.Value, + TTL: metav1.Duration{Duration: o.BackupOptions.TTL}, }, Schedule: o.Schedule, }, From 34da68fc8749481225ca6095e1a56a38cad4cbee Mon Sep 17 00:00:00 2001 From: Nolan Brubaker Date: Thu, 18 Oct 2018 11:00:20 -0400 Subject: [PATCH 6/6] Update v0.9.8 changelog Signed-off-by: Nolan Brubaker --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51125e88a..66e66543c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,14 @@ # Changelog -#### [v0.9.8-beta.1](https://github.com/heptio/ark/releases/tag/v0.9.8-beta.1) - 2018-10-10 +#### [v0.9.8](https://github.com/heptio/ark/releases/tag/v0.9.8) - 2018-10-18 #### Bug Fixes * Discard service account token volume mounts from init containers on restore (#910, @james-powis) + * Support --include-cluster-resources flag when creating schedule (#942, @captjt) + * Remove logic to get a GCP project (#926, @shubheksha) + * Only try to back up PVCs linked PV if the PVC's phase is Bound (#920, @skriss) + * Claim ownership of new AWS volumes on Kubernetes cluster being restored into (#801, @ljakimczuk) + * Remove timeout check when taking snapshots (#928, @carlisia) #### [v0.9.7](https://github.com/heptio/ark/releases/tag/v0.9.7) - 2018-10-04