1
0
mirror of https://github.com/google/nomulus synced 2025-12-23 06:15:42 +00:00

Remove more references to GAE (#2894)

These are old/pointless now that we've migrated to GKE. Note that this
doesn't update anything in the docs/ folder, as that's a much larger
project that should be done on its own.
This commit is contained in:
gbrodman
2025-12-01 11:43:49 -05:00
committed by GitHub
parent 0dc7ab99d7
commit 7149fd3307
72 changed files with 125 additions and 2537 deletions

View File

@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This script builds the GAE artifacts for a given environment, moves the
# This script builds the artifacts for a given environment, moves the
# artifacts for all services to a designated location, and then creates a
# tarball from there.

View File

@@ -30,8 +30,6 @@ import (
"gopkg.in/yaml.v3"
)
var gke bool = false
var projectName string
var baseDomain string
@@ -190,18 +188,8 @@ func (manager TasksSyncManager) getArgs(task Task, operationType string) []strin
description = strings.ReplaceAll(description, "\n", " ")
var service = "backend"
// Only BSA tasks run on the BSA service in GAE. GKE tasks are always
// on the backend service.
if task.Service != "backend" && task.Service != "" && !gke {
service = task.Service
}
var uri string
if gke {
uri = fmt.Sprintf("https://%s.%s%s", service, baseDomain, strings.TrimSpace(task.URL))
} else {
uri = fmt.Sprintf("https://%s-dot-%s.appspot.com%s", service, projectName, strings.TrimSpace(task.URL))
}
uri = fmt.Sprintf("https://%s.%s%s", service, baseDomain, strings.TrimSpace(task.URL))
args := []string{
"--project", projectName,
@@ -342,8 +330,7 @@ func getExistingEntries(cmd *exec.Cmd) ExistingEntries {
func main() {
if len(os.Args) < 4 || os.Args[1] == "" || os.Args[2] == "" || os.Args[3] == "" {
panic("Error - Invalid Parameters.\n" +
"Required params: 1 - Nomulus config YAML path; 2 - config XML path; 3 - project name;\n" +
"Optional params: 5 - [--gke]")
"Required params: 1 - Nomulus config YAML path; 2 - config XML path; 3 - project name;\n")
}
// Nomulus YAML config file path, used to extract OAuth client ID.
nomulusConfigFileLocation := os.Args[1]
@@ -351,11 +338,6 @@ func main() {
configFileLocation := os.Args[2]
// Project name where to submit the tasks
projectName = os.Args[3]
// Whether to deploy cloud scheduler tasks to run on GKE
if len(os.Args) > 4 && os.Args[4] == "--gke" {
gke = true
log.Default().Println("GKE mode enabled")
}
log.Default().Println("YAML Filepath " + nomulusConfigFileLocation)
yamlFile, err := os.Open(nomulusConfigFileLocation)

View File

@@ -1,46 +0,0 @@
# This will delete canary GAE versions named "nomulus".
#
# For reasons unknown, Spinnaker occasionally gets stuck when deploying to GAE
# canary, and the fix is to manually delete the canary versions before the
# deployment.
#
# To manually trigger a build on GCB, run:
# gcloud builds submit --config=cloudbuild-delete-canary.yaml \
# --substitutions=_ENV=[ENV] ..
#
# To trigger a build automatically, follow the instructions below and add a trigger:
# https://cloud.google.com/cloud-build/docs/running-builds/automate-builds
#
steps:
# Pull the credential for nomulus tool.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
entrypoint: /bin/bash
args:
- -c
- |
set -e
gcloud secrets versions access latest \
--secret nomulus-tool-cloudbuild-credential > tool-credential.json
# Delete unused GAE versions.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
entrypoint: /bin/bash
args:
- -c
- |
if [ ${_ENV} == production ]
then
project_id="domain-registry"
else
project_id="domain-registry-${_ENV}"
fi
gcloud auth activate-service-account --key-file=tool-credential.json
for service in default pubapi backend bsa tools console
do
gcloud app versions delete nomulus --service=$service \
--project=$project_id --quiet;
done
timeout: 3600s
options:
machineType: 'N1_HIGHCPU_8'

View File

@@ -1,60 +0,0 @@
# This will delete all stopped GAE versions (save 3) as there is a limit on how
# many versions can exist in a project.
#
# To manually trigger a build on GCB, run:
# gcloud builds submit --config=cloudbuild-delete.yaml \
# --substitutions=TAG_NAME=[TAG],_ENV=[ENV] ..
#
# To trigger a build automatically, follow the instructions below and add a trigger:
# https://cloud.google.com/cloud-build/docs/running-builds/automate-builds
#
# Note: to work around issue in Spinnaker's 'Deployment Manifest' stage,
# variable references must avoid the ${var} format. Valid formats include
# $var or ${"${var}"}. This file uses the former. Since TAG_NAME and _ENV are
# expanded in the copies sent to Spinnaker, we preserve the brackets around
# them for safe pattern matching during release.
# See https://github.com/spinnaker/spinnaker/issues/3028 for more information.
#
# GAE has a limit of ~250 versions per-project, including unused versions. We
# therefore need to periodically delete old versions. This GCB job finds all
# stopped versions and delete all but the last 3 (in case we need to rollback).
steps:
# Pull the credential for nomulus tool.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
entrypoint: /bin/bash
args:
- -c
- |
set -e
gcloud secrets versions access latest \
--secret nomulus-tool-cloudbuild-credential > tool-credential.json
# Delete unused GAE versions.
- name: 'gcr.io/$PROJECT_ID/builder:latest'
entrypoint: /bin/bash
args:
- -c
- |
if [ ${_ENV} == production ]
then
project_id="domain-registry"
else
project_id="domain-registry-${_ENV}"
fi
gcloud auth activate-service-account --key-file=tool-credential.json
for service in default pubapi backend bsa tools console
do
for version in $(gcloud app versions list \
--filter="SERVICE:$service AND SERVING_STATUS:STOPPED" \
--format="value(VERSION.ID,LAST_DEPLOYED)" \
--project=$project_id | sort -k 2 | head -n -3)
do
gcloud app versions delete $version --service=$service \
--project=$project_id --quiet;
done
done
timeout: 3600s
options:
machineType: 'N1_HIGHCPU_8'

View File

@@ -198,7 +198,6 @@ artifacts:
- 'core/src/main/java/google/registry/config/files/tasks/cloud-scheduler-tasks-*.xml'
- 'release/cloudbuild-sync-and-tag.yaml'
- 'release/cloudbuild-deploy-*.yaml'
- 'release/cloudbuild-delete-*.yaml'
- 'release/cloudbuild-renew-prober-certs-*.yaml'
- 'release/cloudbuild-schema-deploy-*.yaml'
- 'release/cloudbuild-schema-verify-*.yaml'

View File

@@ -100,8 +100,6 @@ steps:
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-sync-and-tag.yaml
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-schema-deploy.yaml
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-schema-verify.yaml
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-delete.yaml
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-delete-canary.yaml
sed -i s/builder:latest/builder@$builder_digest/g release/cloudbuild-restart-proxies.yaml
sed -i s/GCP_PROJECT/${PROJECT_ID}/ proxy/kubernetes/proxy-*.yaml
sed -i s/'$${TAG_NAME}'/${TAG_NAME}/g release/cloudbuild-sync-and-tag.yaml
@@ -109,10 +107,6 @@ steps:
for environment in alpha crash qa sandbox production; do
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-deploy-gke.yaml \
> release/cloudbuild-deploy-gke-${environment}.yaml
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-delete.yaml \
> release/cloudbuild-delete-${environment}.yaml
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-delete-canary.yaml \
> release/cloudbuild-delete-canary-${environment}.yaml
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-restart-proxies.yaml \
> release/cloudbuild-restart-proxies-${environment}.yaml
sed s/'$${_ENV}'/${environment}/g release/cloudbuild-restart-proxies.yaml | \

View File

@@ -1,151 +0,0 @@
## Summary
This package contains an automated rollback tool for the Nomulus server on
AppEngine. When given the Nomulus tag of a deployed release, the tool directs
all traffics in the four recognized services (backend, default, pubapi, and
tools) to that release. In the process, it handles Nomulus tag to AppEngine
version ID translation, checks the target binary's compatibility with SQL
schema, starts/stops versions and redirects traffic in proper sequence, and
updates deployment metadata appropriately.
The tool has two limitations:
1. This tool only accepts one release tag as rollback target, which is applied
to all services.
2. The tool immediately migrates all traffic to the new versions. It does not
support gradual migration. This is not an issue now since gradual migration
is only available in automatically scaled versions, while none of versions
is using automatic scaling.
Although this tool is named a rollback tool, it can also reverse a rollback,
that is, rolling forward to a newer release.
## Prerequisites
This tool requires python version 3.7+. It also requires two GCP client
libraries: google-cloud-storage and google-api-python-client. They can be
installed using pip.
Registry team members should use either non-sudo pip3 or virtualenv/venv to
install the GCP libraries. A 'sudo pip install' may interfere with the Linux
tooling on your corp desktop. The non-sudo 'pip3 install' command installs the
libraries under $HOME/.local. The virtualenv or venv methods allow more control
over the installation location.
Below is an example of using virtualenv to install the libraries:
```shell
sudo apt-get install virtualenv python3-venv
python3 -m venv myproject
source myproject/bin/activate
pip install google-cloud-storage
pip install google-api-python-client
deactivate
```
If using virtualenv, make sure to run 'source myproject/bin/activate' before
running the rollback script.
## Usage
The tool can be invoked using the rollback_tool script in the Nomulus root
directory. The following parameters may be requested:
* dev_project: This is the GCP project that hosts the release and deployment
infrastructure, including the Spinnaker pipelines.
* project: This is the GCP project that hosts the Nomulus server to be rolled
back.
* env: This is the name of the Nomulus environment, e.g., sandbox or
production. Although the project to environment is available in Gradle
scripts and internal configuration files, it is not easy to extract them.
Therefore, we require the user to provide it for now.
A typical workflow goes as follows:
### Check Which Release is Serving
From the Nomulus root directory:
```shell
rollback_tool show_serving_release --dev_project ... --project ... --env ...
```
The output may look like:
```
backend nomulus-v049 nomulus-20201019-RC00
default nomulus-v049 nomulus-20201019-RC00
pubapi nomulus-v049 nomulus-20201019-RC00
tools nomulus-v049 nomulus-20201019-RC00
```
### Review Recent Deployments
```shell
rollback_tool show_recent_deployments --dev_project ... --project ... --env ...
```
This command displays up to 3 most recent deployments. The output (from sandbox
which only has two tracked deployments as of the writing of this document) may
look like:
```
backend nomulus-v048 nomulus-20201012-RC00
default nomulus-v048 nomulus-20201012-RC00
pubapi nomulus-v048 nomulus-20201012-RC00
tools nomulus-v048 nomulus-20201012-RC00
backend nomulus-v049 nomulus-20201019-RC00
default nomulus-v049 nomulus-20201019-RC00
pubapi nomulus-v049 nomulus-20201019-RC00
tools nomulus-v049 nomulus-20201019-RC00
```
### Roll to the Target Release
```shell
rollback_tool rollback --dev_project ... --project ... --env ... \
--targt_release {YOUR_CHOSEN_TAG} --run_mode ...
```
The rollback subcommand has two new parameters:
* target_release: This is the Nomulus tag of the target release, in the form
of nomulus-YYYYMMDD-RC[0-9][0-9]
* run_mode: This is the execution mode of the rollback action. There are three
modes:
1. dryrun: The tool will only output information about every step of the
rollback, including commands that a user can copy and run elsewhere.
2. interactive: The tool will prompt the user before executing each step.
The user may choose to abort the rollback, skip the step, or continue
with the step.
3. automatic: Tool will execute all steps in one shot.
The rollback steps are organized according to the following logic:
```
for service in ['backend', 'default', 'pubapi', 'tools']:
if service is on basicScaling: (See Notes # 1)
start the target version
if service is on manualScaling:
start the target version
set num_instances to its originally configured value
for service in ['backend', 'default', 'pubapi', 'tools']:
direct traffic to target version
for service in ['backend', 'default', 'pubapi', 'tools']:
if originally serving version is not the target version:
if originally serving version is on basicaScaling
stop the version
if originally serving version is on manualScaling:
stop the version
set_num_instances to 1 (See Notes #2)
```
Notes:
1. Versions on automatic scaling cannot be started or stopped by gcloud or the
AppEngine Admin REST API.
2. The minimum value assignable to num_instances through the REST API is 1.
This instance eventually will be released too.

View File

@@ -1,199 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper for using the AppEngine Admin REST API."""
import time
from typing import FrozenSet, Optional, Set, Tuple
from googleapiclient import discovery
import common
# AppEngine services under management.
SERVICES = frozenset(['backend', 'default', 'pubapi', 'tools'])
# Number of times to check the status of an operation before timing out.
_STATUS_CHECK_TIMES = 5
# Delay between status checks of a long-running operation, in seconds
_STATUS_CHECK_INTERVAL = 5
class AppEngineAdmin:
"""Wrapper around the AppEngine Admin REST API client.
This class provides wrapper methods around the REST API for service and
version queries and for migrating between versions.
"""
def __init__(self,
project: str,
service_lookup: Optional[discovery.Resource] = None,
status_check_interval: int = _STATUS_CHECK_INTERVAL) -> None:
"""Initialize this instance for an AppEngine(GCP) project.
Args:
project: The GCP project name of this AppEngine instance.
service_lookup: The GCP discovery handle for service API lookup.
status_check_interval: The delay in seconds between status queries
when executing long running operations.
"""
self._project = project
if service_lookup is not None:
apps = service_lookup.apps()
else:
apps = discovery.build('appengine', 'v1beta').apps()
self._services = apps.services()
self._versions = self._services.versions()
self._instances = self._versions.instances()
self._operations = apps.operations()
self._status_check_interval = status_check_interval
@property
def project(self):
return self._project
def get_serving_versions(self) -> FrozenSet[common.VersionKey]:
"""Returns the serving versions of every Nomulus service.
For each service in appengine.SERVICES, gets the version(s) actually
serving traffic. Services with the 'SERVING' status but no allocated
traffic are not included. Services not included in appengine.SERVICES
are also ignored.
Returns: An immutable collection of the serving versions grouped by
service.
"""
services = common.list_all_pages(self._services.list,
'services',
appsId=self._project)
# Response format is specified at
# http://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.html#list.
versions = []
for service in services:
if service['id'] in SERVICES:
# yapf: disable
versions_with_traffic = (
service.get('split', {}).get('allocations', {}).keys())
# yapf: enable
for version in versions_with_traffic:
versions.append(common.VersionKey(service['id'], version))
return frozenset(versions)
# yapf: disable # argument indent wrong
def get_version_configs(
self, versions: Set[common.VersionKey]
) -> FrozenSet[common.VersionConfig]:
# yapf: enable
"""Returns the configuration of requested versions.
For each version in the request, gets the rollback-related data from
its static configuration (found in appengine-web.xml).
Args:
versions: A set of the VersionKey objects, each containing the
versions being queried in that service.
Returns:
The version configurations in an immutable set.
"""
requested_services = {version.service_id for version in versions}
version_configs = []
# Sort the requested services for ease of testing. For now the mocked
# AppEngine admin in appengine_test can only respond in a fixed order.
for service_id in sorted(requested_services):
response = common.list_all_pages(self._versions.list,
'versions',
appsId=self._project,
servicesId=service_id)
# Format of version_list is defined at
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.html#list.
for version in response:
if common.VersionKey(service_id, version['id']) in versions:
scalings = [
s for s in list(common.AppEngineScaling)
if s.value in version
]
if len(scalings) != 1:
raise common.CannotRollbackError(
f'Expecting exactly one scaling, found {scalings}')
scaling = common.AppEngineScaling(list(scalings)[0])
if scaling == common.AppEngineScaling.MANUAL:
manual_instances = version.get(
scaling.value).get('instances')
else:
manual_instances = None
version_configs.append(
common.VersionConfig(service_id, version['id'],
scaling, manual_instances))
return frozenset(version_configs)
def list_instances(
self,
version: common.VersionKey) -> Tuple[common.VmInstanceInfo, ...]:
instances = common.list_all_pages(self._versions.instances().list,
'instances',
appsId=self._project,
servicesId=version.service_id,
versionsId=version.version_id)
# Format of version_list is defined at
# https://googleapis.github.io/google-api-python-client/docs/dyn/appengine_v1beta.apps.services.versions.instances.html#list
return tuple([
common.VmInstanceInfo(
inst['id'], common.parse_gcp_timestamp(inst['startTime']))
for inst in instances
])
def set_manual_scaling_num_instance(self, service_id: str, version_id: str,
manual_instances: int) -> None:
"""Creates an request to change an AppEngine version's status."""
update_mask = 'manualScaling.instances'
body = {'manualScaling': {'instances': manual_instances}}
response = self._versions.patch(appsId=self._project,
servicesId=service_id,
versionsId=version_id,
updateMask=update_mask,
body=body).execute()
operation_id = response.get('name').split('operations/')[1]
for _ in range(_STATUS_CHECK_TIMES):
if self.query_operation_status(operation_id):
return
time.sleep(self._status_check_interval)
raise common.CannotRollbackError(
f'Operation {operation_id} timed out.')
def query_operation_status(self, operation_id):
response = self._operations.get(appsId=self._project,
operationsId=operation_id).execute()
if response.get('response') is not None:
return True
if response.get('error') is not None:
raise common.CannotRollbackError(response['error'])
assert not response.get('done'), 'Operation done but no results.'
return False

View File

@@ -1,132 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for appengine."""
from typing import Any, Dict, List, Tuple, Union
import unittest
from unittest import mock
from unittest.mock import patch
from googleapiclient import http
import appengine
import common
def setup_appengine_admin(
) -> Tuple[appengine.AppEngineAdmin, http.HttpRequest]:
"""Helper for setting up a mocked AppEngineAdmin instance.
Returns:
An AppEngineAdmin instance and a request with which API responses can
be mocked.
"""
# Assign mocked API response to mock_request.execute.
mock_request = mock.MagicMock()
mock_request.uri.return_value = 'myuri'
# Mocked resource shared by services, versions, instances, and operations.
resource = mock.MagicMock()
resource.list.return_value = mock_request
resource.get.return_value = mock_request
resource.patch.return_value = mock_request
# Root resource of AppEngine API. Exact type unknown.
apps = mock.MagicMock()
apps.services.return_value = resource
resource.versions.return_value = resource
resource.instances.return_value = resource
apps.operations.return_value = resource
service_lookup = mock.MagicMock()
service_lookup.apps.return_value = apps
appengine_admin = appengine.AppEngineAdmin('project', service_lookup, 1)
return (appengine_admin, mock_request)
class AppEngineTestCase(unittest.TestCase):
"""Unit tests for appengine."""
def setUp(self) -> None:
self._client, self._mock_request = setup_appengine_admin()
self.addCleanup(patch.stopall)
# yapf: disable
def _set_mocked_response(
self,
responses: Union[Dict[str, Any], List[Dict[str, Any]]]) -> None:
# yapf: enable
if isinstance(responses, list):
self._mock_request.execute.side_effect = responses
else:
self._mock_request.execute.return_value = responses
def test_get_serving_versions(self) -> None:
self._set_mocked_response({
'services': [{
'split': {
'allocations': {
'my_version': 3.14,
}
},
'id': 'pubapi'
}, {
'split': {
'allocations': {
'another_version': 2.71,
}
},
'id': 'error_dashboard'
}]
})
self.assertEqual(
self._client.get_serving_versions(),
frozenset([common.VersionKey('pubapi', 'my_version')]))
def test_get_version_configs(self):
self._set_mocked_response({
'versions': [{
'basicScaling': {
'maxInstances': 10
},
'id': 'version'
}]
})
self.assertEqual(
self._client.get_version_configs(
frozenset([common.VersionKey('default', 'version')])),
frozenset([
common.VersionConfig('default', 'version',
common.AppEngineScaling.BASIC)
]))
def test_async_update(self):
self._set_mocked_response([
{
'name': 'project/operations/op_id',
'done': False
},
{
'name': 'project/operations/op_id',
'done': False
},
{
'name': 'project/operations/op_id',
'response': {},
'done': True
},
])
self._client.set_manual_scaling_num_instance('service', 'version', 1)
self.assertEqual(self._mock_request.execute.call_count, 3)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,181 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data types and utilities common to the other modules in this package."""
import dataclasses
import datetime
import enum
import pathlib
import re
from typing import Any, Optional, Tuple
from google.protobuf import timestamp_pb2
class CannotRollbackError(Exception):
"""Indicates that rollback cannot be done by this tool.
This error is for situations where rollbacks are either not allowed or
cannot be planned. Example scenarios include:
- The target release is incompatible with the SQL schema.
- The target release has never been deployed to AppEngine.
- The target release is no longer available, e.g., has been manually
deleted by the operators.
- A state-changing call to AppEngine Admin API has failed.
User must manually fix such problems before trying again to roll back.
"""
pass
class AppEngineScaling(enum.Enum):
"""Types of scaling schemes supported in AppEngine.
The value of each name is the property name in the REST API requests and
responses.
"""
AUTOMATIC = 'automaticScaling'
BASIC = 'basicScaling'
MANUAL = 'manualScaling'
@dataclasses.dataclass(frozen=True)
class VersionKey:
"""Identifier of a deployed version on AppEngine.
AppEngine versions as deployable units are managed on per-service basis.
Each instance of this class uniquely identifies an AppEngine version.
This class implements the __eq__ method so that its equality property
applies to subclasses by default unless they override it.
"""
service_id: str
version_id: str
def __eq__(self, other):
return (isinstance(other, VersionKey)
and self.service_id == other.service_id
and self.version_id == other.version_id)
@dataclasses.dataclass(frozen=True, eq=False)
class VersionConfig(VersionKey):
"""Rollback-related static configuration of an AppEngine version.
Contains data found from the application-web.xml for this version.
Attributes:
scaling: The scaling scheme of this version. This value determines what
steps are needed for the rollback. If a version is on automatic
scaling, we only need to direct traffic to it or away from it. The
version cannot be started, stopped, or have its number of instances
updated. If a version is on manual scaling, it not only needs to be
started or stopped explicitly, its instances need to be updated too
(to 1, the lowest allowed number) when it is shutdown, and to its
originally configured number of VM instances when brought up.
manual_scaling_instances: The originally configured VM instances to use
for each version that is on manual scaling.
"""
scaling: AppEngineScaling
manual_scaling_instances: Optional[int] = None
@dataclasses.dataclass(frozen=True)
class VmInstanceInfo:
"""Information about an AppEngine VM instance."""
instance_name: str
start_time: datetime.datetime
def get_nomulus_root() -> str:
"""Finds the current Nomulus root directory.
Returns:
The absolute path to the Nomulus root directory.
"""
for folder in pathlib.Path(__file__).parents:
if not folder.joinpath('rollback_tool').exists():
continue
if not folder.joinpath('settings.gradle').exists():
continue
with open(folder.joinpath('settings.gradle'), 'r') as file:
for line in file:
if re.match(r"^rootProject.name\s*=\s*'nomulus'\s*$", line):
return folder.absolute()
raise RuntimeError(
'Do not move this file out of the Nomulus directory tree.')
def list_all_pages(func, data_field: str, *args, **kwargs) -> Tuple[Any, ...]:
"""Collects all data items from a paginator-based 'List' API.
Args:
func: The GCP API method that supports paged responses.
data_field: The field in a response object containing the data
items to be returned. This is guaranteed to be an Iterable
type.
*args: Positional arguments passed to func.
*kwargs: Keyword arguments passed to func.
Returns: An immutable collection of data items assembled from the
paged responses.
"""
result_collector = []
page_token = None
while True:
request = func(*args, pageToken=page_token, **kwargs)
response = request.execute()
result_collector.extend(response.get(data_field, []))
page_token = response.get('nextPageToken')
if not page_token:
return tuple(result_collector)
def parse_gcp_timestamp(timestamp: str) -> datetime.datetime:
"""Parses a timestamp string in GCP API to datetime.
This method uses protobuf's Timestamp class to parse timestamp strings.
This class is used by GCP APIs to parse timestamp strings, and is tolerant
to certain cases which can break datetime as of Python 3.8, e.g., the
trailing 'Z' as timezone, and fractional seconds with number of digits
other than 3 or 6.
Args:
timestamp: A string in RFC 3339 format.
Returns: A datetime instance.
"""
ts = timestamp_pb2.Timestamp()
ts.FromJsonString(timestamp)
return ts.ToDatetime()
def to_gcp_timestamp(timestamp: datetime.datetime) -> str:
"""Converts a datetime to string.
This method uses protobuf's Timestamp class to parse timestamp strings.
This class is used by GCP APIs to parse timestamp strings.
Args:
timestamp: The datetime instance to be converted.
Returns: A string in RFC 3339 format.
"""
ts = timestamp_pb2.Timestamp()
ts.FromDatetime(timestamp)
return ts.ToJsonString()

View File

@@ -1,70 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for the common module."""
import datetime
import unittest
from unittest import mock
from unittest.mock import call, patch
import common
class CommonTestCase(unittest.TestCase):
"""Unit tests for the common module."""
def setUp(self) -> None:
self._mock_request = mock.MagicMock()
self._mock_api = mock.MagicMock()
self._mock_api.list.return_value = self._mock_request
self.addCleanup(patch.stopall)
def test_list_all_pages_single_page(self):
self._mock_request.execute.return_value = {'data': [1]}
response = common.list_all_pages(self._mock_api.list,
'data',
appsId='project')
self.assertSequenceEqual(response, [1])
self._mock_api.list.assert_called_once_with(pageToken=None,
appsId='project')
def test_list_all_pages_multi_page(self):
self._mock_request.execute.side_effect = [{
'data': [1],
'nextPageToken': 'token'
}, {
'data': [2]
}]
response = common.list_all_pages(self._mock_api.list,
'data',
appsId='project')
self.assertSequenceEqual(response, [1, 2])
self.assertSequenceEqual(self._mock_api.list.call_args_list, [
call(pageToken=None, appsId='project'),
call(pageToken='token', appsId='project')
])
def test_parse_timestamp(self):
self.assertEqual(common.parse_gcp_timestamp('2020-01-01T00:00:00Z'),
datetime.datetime(2020, 1, 1))
def test_parse_timestamp_irregular_nano_digits(self):
# datetime only accepts 3 or 6 digits in fractional second.
self.assertRaises(
ValueError,
lambda: datetime.datetime.fromisoformat('2020-01-01T00:00:00.9'))
self.assertEqual(common.parse_gcp_timestamp('2020-01-01T00:00:00.9Z'),
datetime.datetime(2020, 1, 1, microsecond=900000))
if __name__ == '__main__':
unittest.main()

View File

@@ -1,148 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper for managing Nomulus deployment records on GCS."""
from typing import Dict, FrozenSet, Set
from google.cloud import storage
import common
def _get_version_map_name(env: str):
return f'nomulus.{env}.versions'
def _get_schema_tag_file(env: str):
return f'sql.{env}.tag'
class GcsClient:
"""Manages Nomulus deployment records on GCS."""
def __init__(self, project: str, gcs_client=None) -> None:
"""Initializes the instance for a GCP project.
Args:
project: The GCP project with Nomulus deployment records.
gcs_client: Optional API client to use.
"""
self._project = project
if gcs_client is not None:
self._client = gcs_client
else:
self._client = storage.Client(self._project)
@property
def project(self):
return self._project
def _get_deploy_bucket_name(self):
return f'{self._project}-deployed-tags'
def _get_release_to_version_mapping(
self, env: str) -> Dict[common.VersionKey, str]:
"""Returns the content of the release to version mapping file.
File content is returned in utf-8 encoding. Each line in the file is
in this format:
'{RELEASE_TAG},{APP_ENGINE_SERVICE_ID},{APP_ENGINE_VERSION}'.
"""
file_content = self._client.get_bucket(
self._get_deploy_bucket_name()).get_blob(
_get_version_map_name(env)).download_as_text()
mapping = {}
for line in file_content.splitlines(False):
tag, service_id, version_id = line.split(',')
mapping[common.VersionKey(service_id, version_id)] = tag
return mapping
def get_versions_by_release(self, env: str,
nom_tag: str) -> FrozenSet[common.VersionKey]:
"""Returns AppEngine version ids of a given Nomulus release tag.
Fetches the version mapping file maintained by the deployment process
and parses its content into a collection of VersionKey instances.
A release may map to multiple versions in a service if it has been
deployed multiple times. This is not intended behavior and may only
happen by mistake.
Args:
env: The environment of the deployed release, e.g., sandbox.
nom_tag: The Nomulus release tag.
Returns:
An immutable set of VersionKey instances.
"""
mapping = self._get_release_to_version_mapping(env)
return frozenset(
[version for version in mapping if mapping[version] == nom_tag])
def get_releases_by_versions(
self, env: str,
versions: Set[common.VersionKey]) -> Dict[common.VersionKey, str]:
"""Gets the release tags of the AppEngine versions.
Args:
env: The environment of the deployed release, e.g., sandbox.
versions: The AppEngine versions.
Returns:
A mapping of versions to release tags.
"""
mapping = self._get_release_to_version_mapping(env)
return {
version: tag
for version, tag in mapping.items() if version in versions
}
def get_recent_deployments(
self, env: str, num_records: int) -> Dict[common.VersionKey, str]:
"""Gets the most recent deployment records.
Deployment records are stored in a file, with one line per service.
Caller should adjust num_records according to the number of services
in AppEngine.
Args:
env: The environment of the deployed release, e.g., sandbox.
num_records: the number of lines to go back.
"""
file_content = self._client.get_bucket(
self._get_deploy_bucket_name()).get_blob(
_get_version_map_name(env)).download_as_text()
mapping = {}
for line in file_content.splitlines(False)[-num_records:]:
tag, service_id, version_id = line.split(',')
mapping[common.VersionKey(service_id, version_id)] = tag
return mapping
def get_schema_tag(self, env: str) -> str:
"""Gets the release tag of the SQL schema in the given environment.
This tag is needed for the server/schema compatibility test.
"""
file_content = self._client.get_bucket(
self._get_deploy_bucket_name()).get_blob(
_get_schema_tag_file(env)).download_as_text().splitlines(False)
assert len(
file_content
) == 1, f'Unexpected content in {_get_schema_tag_file(env)}.'
return file_content[0]

View File

@@ -1,152 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for gcs."""
import textwrap
import unittest
from unittest import mock
import common
import gcs
def setup_gcs_client(env: str):
"""Sets up a mocked GcsClient.
Args:
env: Name of the Nomulus environment.
Returns:
A GcsClient instance and two mocked blobs representing the two schema
tag file and version map file on GCS.
"""
schema_tag_blob = mock.MagicMock()
schema_tag_blob.download_as_text.return_value = 'tag\n'
version_map_blob = mock.MagicMock()
blobs_by_name = {
f'nomulus.{env}.versions': version_map_blob,
f'sql.{env}.tag': schema_tag_blob
}
bucket = mock.MagicMock()
bucket.get_blob.side_effect = lambda blob_name: blobs_by_name[blob_name]
google_client = mock.MagicMock()
google_client.get_bucket.return_value = bucket
gcs_client = gcs.GcsClient('project', google_client)
return (gcs_client, schema_tag_blob, version_map_blob)
class GcsTestCase(unittest.TestCase):
"""Unit tests for gcs."""
_ENV = 'crash'
def setUp(self) -> None:
self._client, self._schema_tag_blob, self._version_map_blob = \
setup_gcs_client(self._ENV)
self.addCleanup(mock.patch.stopall)
def test_get_schema_tag(self):
self.assertEqual(self._client.get_schema_tag(self._ENV), 'tag')
def test_get_versions_by_release(self):
self._version_map_blob.download_as_text.return_value = \
'nomulus-20200925-RC02,backend,nomulus-backend-v008'
self.assertEqual(
self._client.get_versions_by_release(self._ENV,
'nomulus-20200925-RC02'),
frozenset([common.VersionKey('backend', 'nomulus-backend-v008')]))
def test_get_versions_by_release_not_found(self):
self._version_map_blob.download_as_text.return_value = \
'nomulus-20200925-RC02,backend,nomulus-backend-v008'
self.assertEqual(
self._client.get_versions_by_release(self._ENV, 'no-such-tag'),
frozenset([]))
def test_get_versions_by_release_multiple_service(self):
self._version_map_blob.download_as_text.return_value = textwrap.dedent(
"""\
nomulus-20200925-RC02,backend,nomulus-backend-v008
nomulus-20200925-RC02,default,nomulus-default-v008
""")
self.assertEqual(
self._client.get_versions_by_release(self._ENV,
'nomulus-20200925-RC02'),
frozenset([
common.VersionKey('backend', 'nomulus-backend-v008'),
common.VersionKey('default', 'nomulus-default-v008')
]))
def test_get_versions_by_release_multiple_deployment(self):
self._version_map_blob.download_as_text.return_value = textwrap.dedent(
"""\
nomulus-20200925-RC02,backend,nomulus-backend-v008
nomulus-20200925-RC02,backend,nomulus-backend-v018
""")
self.assertEqual(
self._client.get_versions_by_release(self._ENV,
'nomulus-20200925-RC02'),
frozenset([
common.VersionKey('backend', 'nomulus-backend-v008'),
common.VersionKey('backend', 'nomulus-backend-v018')
]))
def test_get_releases_by_versions(self):
self._version_map_blob.download_as_text.return_value = textwrap.dedent(
"""\
nomulus-20200925-RC02,backend,nomulus-backend-v008
nomulus-20200925-RC02,default,nomulus-default-v008
""")
self.assertEqual(
self._client.get_releases_by_versions(
self._ENV, {
common.VersionKey('backend', 'nomulus-backend-v008'),
common.VersionKey('default', 'nomulus-default-v008')
}), {
common.VersionKey('backend', 'nomulus-backend-v008'):
'nomulus-20200925-RC02',
common.VersionKey('default', 'nomulus-default-v008'):
'nomulus-20200925-RC02',
})
def test_get_recent_deployments(self):
file_content = textwrap.dedent("""\
nomulus-20200925-RC02,backend,nomulus-backend-v008
nomulus-20200925-RC02,default,nomulus-default-v008
""")
self._version_map_blob.download_as_text.return_value = file_content
self.assertEqual(
self._client.get_recent_deployments(self._ENV, 2), {
common.VersionKey('default', 'nomulus-default-v008'):
'nomulus-20200925-RC02',
common.VersionKey('backend', 'nomulus-backend-v008'):
'nomulus-20200925-RC02'
})
def test_get_recent_deployments_fewer_lines(self):
self._version_map_blob.download_as_text.return_value = textwrap.dedent(
"""\
nomulus-20200925-RC02,backend,nomulus-backend-v008
nomulus-20200925-RC02,default,nomulus-default-v008
""")
self.assertEqual(
self._client.get_recent_deployments(self._ENV, 1), {
common.VersionKey('default', 'nomulus-default-v008'):
'nomulus-20200925-RC02'
})
if __name__ == '__main__':
unittest.main()

View File

@@ -1,198 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generates a sequence of operations for execution."""
from typing import FrozenSet, Tuple
import appengine
import common
import dataclasses
import gcs
import steps
@dataclasses.dataclass(frozen=True)
class ServiceRollback:
"""Data needed for rolling back one service.
Holds the configurations of both the currently serving version(s) and the
rollback target in a service.
Attributes:
target_version: The version to roll back to.
serving_versions: The currently serving versions to be stopped. This
set may be empty. It may also have multiple versions (when traffic
is split).
"""
target_version: common.VersionConfig
serving_versions: FrozenSet[common.VersionConfig]
def __post_init__(self):
"""Validates that all versions are for the same service."""
if self.serving_versions:
for config in self.serving_versions:
assert config.service_id == self.target_version.service_id
# yapf: disable
def _get_service_rollback_plan(
target_configs: FrozenSet[common.VersionConfig],
serving_configs: FrozenSet[common.VersionConfig]
) -> Tuple[ServiceRollback, ...]:
# yapf: enable
"""Determines the versions to bring up/down in each service.
In each service, this method makes sure that at least one version is found
for the rollback target. If multiple versions are found, which may only
happen if the target release was deployed multiple times, randomly choose
one.
If a target version is already serving traffic, instead of checking if it
gets 100 percent of traffic, this method still generates operations to
start it and direct all traffic to it. This is not a problem since these
operations are idempotent.
Attributes:
target_configs: The rollback target versions in each managed service
(as defined in appengine.SERVICES).
serving_configs: The currently serving versions in each service.
Raises:
CannotRollbackError: Rollback is impossible because a target version
cannot be found for some service.
Returns:
For each service, the versions to bring up/down if applicable.
"""
targets_by_service = {}
for version in target_configs:
targets_by_service.setdefault(version.service_id, set()).add(version)
serving_by_service = {}
for version in serving_configs:
serving_by_service.setdefault(version.service_id, set()).add(version)
# The target_configs parameter only has configs for managed services.
# Since targets_by_service is derived from it, its keyset() should equal
# to appengine.SERVICES.
if targets_by_service.keys() != appengine.SERVICES:
cannot_rollback = appengine.SERVICES.difference(
targets_by_service.keys())
raise common.CannotRollbackError(
f'Target version(s) not found for {cannot_rollback}')
plan = []
for service_id, versions in targets_by_service.items():
serving_configs = serving_by_service.get(service_id, set())
versions_to_stop = serving_configs.difference(versions)
chosen_target = list(versions)[0]
plan.append(ServiceRollback(chosen_target,
frozenset(versions_to_stop)))
return tuple(plan)
# yapf: disable
def _generate_steps(
gcs_client: gcs.GcsClient,
appengine_admin: appengine.AppEngineAdmin,
env: str,
target_release: str,
rollback_plan: Tuple[ServiceRollback, ...]
) -> Tuple[steps.RollbackStep, ...]:
# yapf: enable
"""Generates the sequence of operations for execution.
A rollback consists of the following steps:
1. Run schema compatibility test for the target release.
2. For each service,
a. If the target version does not use automatic scaling, start it.
i. If target version uses manual scaling, sets its instances to the
configured values.
b. If the target version uses automatic scaling, do nothing.
3. For each service, immediately direct all traffic to the target version.
4. For each service, go over its versions to be stopped:
a. If a version uses automatic scaling, do nothing.
b. If a version does not use automatic scaling, stop it.
i. If a version uses manual scaling, sets its instances to 1 (one, the
lowest value allowed on the REST API) to release the instances.
5. Update the appropriate deployed tag file on GCS with the target release
tag.
Returns:
The sequence of operations to execute for rollback.
"""
rollback_steps = [
steps.check_schema_compatibility(gcs_client.project, target_release,
gcs_client.get_schema_tag(env))
]
for plan in rollback_plan:
if plan.target_version.scaling != common.AppEngineScaling.AUTOMATIC:
rollback_steps.append(
steps.start_or_stop_version(appengine_admin.project, 'start',
plan.target_version))
if plan.target_version.scaling == common.AppEngineScaling.MANUAL:
rollback_steps.append(
steps.set_manual_scaling_instances(
appengine_admin, plan.target_version,
plan.target_version.manual_scaling_instances))
for plan in rollback_plan:
rollback_steps.append(
steps.direct_service_traffic_to_version(appengine_admin.project,
plan.target_version))
for plan in rollback_plan:
for version in plan.serving_versions:
if version.scaling != common.AppEngineScaling.AUTOMATIC:
rollback_steps.append(
steps.start_or_stop_version(appengine_admin.project,
'stop', version))
if version.scaling == common.AppEngineScaling.MANUAL:
# Release all but one instances. Cannot set num_instances to 0
# with this api.
rollback_steps.append(
steps.set_manual_scaling_instances(appengine_admin,
version, 1))
rollback_steps.append(
steps.update_deploy_tags(gcs_client.project, env, target_release))
rollback_steps.append(
steps.sync_live_release(gcs_client.project, target_release))
return tuple(rollback_steps)
def get_rollback_plan(gcs_client: gcs.GcsClient,
appengine_admin: appengine.AppEngineAdmin, env: str,
target_release: str) -> Tuple[steps.RollbackStep, ...]:
"""Generates the sequence of rollback operations for execution."""
target_versions = gcs_client.get_versions_by_release(env, target_release)
serving_versions = appengine_admin.get_serving_versions()
all_version_configs = appengine_admin.get_version_configs(
target_versions.union(serving_versions))
target_configs = frozenset([
config for config in all_version_configs if config in target_versions
])
serving_configs = frozenset([
config for config in all_version_configs if config in serving_versions
])
rollback_plan = _get_service_rollback_plan(target_configs, serving_configs)
return _generate_steps(gcs_client, appengine_admin, env, target_release,
rollback_plan)

View File

@@ -1,130 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""End-to-end test of rollback."""
import textwrap
from typing import Any, Dict
import unittest
from unittest import mock
import appengine_test
import gcs_test
import plan
def _make_serving_version(service: str, version: str) -> Dict[str, Any]:
"""Creates description of one serving version in API response."""
return {
'split': {
'allocations': {
version: 1,
}
},
'id': service
}
def _make_version_config(version,
scaling: str,
instance_tag: str,
instances: int = 10) -> Dict[str, Any]:
"""Creates one version config as part of an API response."""
return {scaling: {instance_tag: instances}, 'id': version}
class RollbackTestCase(unittest.TestCase):
"""End-to-end test of rollback."""
def setUp(self) -> None:
self._appengine_admin, self._appengine_request = (
appengine_test.setup_appengine_admin())
self._gcs_client, self._schema_tag, self._version_map = (
gcs_test.setup_gcs_client('crash'))
self.addCleanup(mock.patch.stopall)
def test_rollback_success(self):
self._schema_tag.download_as_text.return_value = (
'nomulus-2010-1014-RC00')
self._version_map.download_as_text.return_value = textwrap.dedent("""\
nomulus-20201014-RC00,backend,nomulus-backend-v009
nomulus-20201014-RC00,default,nomulus-default-v009
nomulus-20201014-RC00,pubapi,nomulus-pubapi-v009
nomulus-20201014-RC00,tools,nomulus-tools-v009
nomulus-20201014-RC01,backend,nomulus-backend-v011
nomulus-20201014-RC01,default,nomulus-default-v010
nomulus-20201014-RC01,pubapi,nomulus-pubapi-v010
nomulus-20201014-RC01,tools,nomulus-tools-v010
""")
self._appengine_request.execute.side_effect = [
# Response to get_serving_versions:
{
'services': [
_make_serving_version('backend', 'nomulus-backend-v011'),
_make_serving_version('default', 'nomulus-default-v010'),
_make_serving_version('pubapi', 'nomulus-pubapi-v010'),
_make_serving_version('tools', 'nomulus-tools-v010')
]
},
# Responses to get_version_configs. AppEngineAdmin queries the
# services by alphabetical order to facilitate this test.
{
'versions': [
_make_version_config('nomulus-backend-v009',
'basicScaling', 'maxInstances'),
_make_version_config('nomulus-backend-v011',
'basicScaling', 'maxInstances')
]
},
{
'versions': [
_make_version_config('nomulus-default-v009',
'basicScaling', 'maxInstances'),
_make_version_config('nomulus-default-v010',
'basicScaling', 'maxInstances')
]
},
{
'versions': [
_make_version_config('nomulus-pubapi-v009',
'manualScaling', 'instances'),
_make_version_config('nomulus-pubapi-v010',
'manualScaling', 'instances')
]
},
{
'versions': [
_make_version_config('nomulus-tools-v009',
'automaticScaling',
'maxTotalInstances'),
_make_version_config('nomulus-tools-v010',
'automaticScaling',
'maxTotalInstances')
]
}
]
steps = plan.get_rollback_plan(self._gcs_client, self._appengine_admin,
'crash', 'nomulus-20201014-RC00')
self.assertEqual(len(steps), 15)
self.assertRegex(steps[0].info(),
'.*nom_build :integration:sqlIntegrationTest.*')
self.assertRegex(steps[1].info(), '.*gcloud app versions start.*')
self.assertRegex(steps[5].info(),
'.*gcloud app services set-traffic.*')
self.assertRegex(steps[9].info(), '.*gcloud app versions stop.*')
self.assertRegex(steps[13].info(),
'.*echo nomulus-20201014-RC00 | gcloud storage cat -.*')
self.assertRegex(steps[14].info(), '.*gcloud storage rsync --delete-unmatched-destination-objects .*')
if __name__ == '__main__':
unittest.main()

View File

@@ -1,178 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to rollback the Nomulus server on AppEngine."""
import argparse
import dataclasses
import sys
import textwrap
from typing import Any, Optional, Tuple
import appengine
import gcs
import plan
MAIN_HELP = 'Script to roll back the Nomulus server on AppEngine.'
ROLLBACK_HELP = 'Rolls back Nomulus to the target release.'
GET_SERVING_RELEASE_HELP = 'Shows the release tag(s) of the serving versions.'
GET_RECENT_DEPLOYMENTS_HELP = ('Shows recently deployed versions and their '
'release tags.')
ROLLBACK_MODE_HELP = textwrap.dedent("""\
The execution mode.
- dryrun: Prints descriptions of all steps.
- interactive: Prompts for confirmation before executing
each step.
- auto: Executes all steps in one go.
""")
@dataclasses.dataclass(frozen=True)
class Argument:
"""Describes a command line argument.
This class is for use with argparse.ArgumentParser. Except for the
'arg_names' attribute which specifies the argument name and/or flags, all
other attributes must match an accepted parameter in the parser's
add_argument() method.
"""
arg_names: Tuple[str, ...]
help: str
default: Optional[Any] = None
required: bool = True
choices: Optional[Tuple[str, ...]] = None
def get_arg_attrs(self):
return dict((k, v) for k, v in vars(self).items() if k != 'arg_names')
ARGUMENTS = (Argument(('--dev_project', '-d'),
'The GCP project with Nomulus deployment records.'),
Argument(('--project', '-p'),
'The GCP project where the Nomulus server is deployed.'),
Argument(('--env', '-e'),
'The name of the Nomulus server environment.',
choices=('production', 'sandbox', 'crash', 'alpha')))
ROLLBACK_ARGUMENTS = (Argument(('--target_release', '-t'),
'The release to be deployed.'),
Argument(('--run_mode', '-m'),
ROLLBACK_MODE_HELP,
required=False,
default='dryrun',
choices=('dryrun', 'interactive', 'auto')))
def rollback(dev_project: str, project: str, env: str, target_release: str,
run_mode: str) -> None:
"""Rolls back a Nomulus server to the target release.
Args:
dev_project: The GCP project with deployment records.
project: The GCP project of the Nomulus server.
env: The environment name of the Nomulus server.
target_release: The tag of the release to be brought up.
run_mode: How to handle the rollback steps: print-only (dryrun)
one step at a time with user confirmation (interactive),
or all steps in one shot (automatic).
"""
steps = plan.get_rollback_plan(gcs.GcsClient(dev_project),
appengine.AppEngineAdmin(project), env,
target_release)
print('Rollback steps:\n\n')
for step in steps:
print(f'{step.info()}\n')
if run_mode == 'dryrun':
continue
if run_mode == 'interactive':
confirmation = input(
'Do you wish to (c)ontinue, (s)kip, or (a)bort? ')
if confirmation == 'a':
return
if confirmation == 's':
continue
step.execute()
def show_serving_release(dev_project: str, project: str, env: str) -> None:
"""Shows the release tag(s) of the currently serving versions."""
serving_versions = appengine.AppEngineAdmin(project).get_serving_versions()
versions_to_tags = gcs.GcsClient(dev_project).get_releases_by_versions(
env, serving_versions)
print(f'{project}:')
for version, tag in versions_to_tags.items():
print(f'{version.service_id}\t{version.version_id}\t{tag}')
def show_recent_deployments(dev_project: str, project: str, env: str) -> None:
"""Show release and version of recent deployments."""
num_services = len(appengine.SERVICES)
num_records = 3 * num_services
print(f'{project}:')
for version, tag in gcs.GcsClient(dev_project).get_recent_deployments(
env, num_records).items():
print(f'{version.service_id}\t{version.version_id}\t{tag}')
def main() -> int:
parser = argparse.ArgumentParser(prog='nom_rollback',
description=MAIN_HELP)
subparsers = parser.add_subparsers(dest='command',
help='Supported commands')
rollback_parser = subparsers.add_parser(
'rollback',
help=ROLLBACK_HELP,
formatter_class=argparse.RawTextHelpFormatter)
for flag in ARGUMENTS:
rollback_parser.add_argument(*flag.arg_names, **flag.get_arg_attrs())
for flag in ROLLBACK_ARGUMENTS:
rollback_parser.add_argument(*flag.arg_names, **flag.get_arg_attrs())
show_serving_release_parser = subparsers.add_parser(
'show_serving_release', help=GET_SERVING_RELEASE_HELP)
for flag in ARGUMENTS:
show_serving_release_parser.add_argument(*flag.arg_names,
**flag.get_arg_attrs())
show_recent_deployments_parser = subparsers.add_parser(
'show_recent_deployments', help=GET_RECENT_DEPLOYMENTS_HELP)
for flag in ARGUMENTS:
show_recent_deployments_parser.add_argument(*flag.arg_names,
**flag.get_arg_attrs())
args = parser.parse_args()
command = args.command
args = {k: v for k, v in vars(args).items() if k != 'command'}
{
'rollback': rollback,
'show_recent_deployments': show_recent_deployments,
'show_serving_release': show_serving_release
}[command](**args)
return 0
if __name__ == '__main__':
try:
sys.exit(main())
except Exception as ex: # pylint: disable=broad-except
print(ex)
sys.exit(1)

View File

@@ -1,186 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to rolling-restart the Nomulus server on AppEngine.
This script effects a rolling restart of the Nomulus server by deleting VM
instances at a controlled pace and leave it to the AppEngine scaling policy
to bring up new VM instances.
For each service, this script gets a list of VM instances and sequentially
handles each instance as follows:
1. Issue a gcloud delete command for this instance.
2. Poll the AppEngine at fixed intervals until this instance no longer exists.
Instance deletion is not instantaneous. An instance actively processing
requests takes time to shutdown, and its replacement almost always comes
up immediately after the shutdown. For this reason, we believe that our current
implementation is sufficient safe, and will not pursue more sophisticated
algorithms.
Note that for backend instances that may handle large queries, it may take tens
of seconds, even minutes, to shut down one of them.
This script also accepts an optional start_time parameter that serves as a
filter of instances to delete: only those instances that started before this
time will be deleted. This parameter makes error handling easy. When this
script fails, simply rerun with the same start_time until it succeeds.
"""
import argparse
import datetime
import sys
import time
from typing import Iterable, Optional, Tuple
import appengine
import common
import steps
HELP_MAIN = 'Script to rolling-restart the Nomulus server on AppEngine'
HELP_MIN_DELAY = 'Minimum delay in seconds between instance deletions.'
HELP_MIN_LIVE_INSTANCE_PERCENT = (
'Minimum number of instances to keep, as a percentage '
'of the total at the beginning of the restart process.')
# yapf: disable
def generate_steps(
appengine_admin: appengine.AppEngineAdmin,
version: common.VersionKey,
started_before: datetime.datetime
) -> Tuple[steps.KillNomulusInstance, ...]:
# yapf: enable
instances = appengine_admin.list_instances(version)
return tuple([
steps.kill_nomulus_instance(appengine_admin.project, version,
inst.instance_name) for inst in instances
if inst.start_time <= started_before
])
def execute_steps(appengine_admin: appengine.AppEngineAdmin,
version: common.VersionKey,
cmds: Tuple[steps.KillNomulusInstance, ...], min_delay: int,
configured_num_instances: Optional[int]) -> None:
print(f'Restarting {len(cmds)} instances in {version.service_id}')
for cmd in cmds:
print(cmd.info())
cmd.execute()
while True:
time.sleep(min_delay)
running_instances = [
inst.instance_name
for inst in appengine_admin.list_instances(version)
]
if cmd.instance_name in running_instances:
print('Waiting for VM to shut down...')
continue
if (configured_num_instances is not None
and len(running_instances) < configured_num_instances):
print('Waiting for new VM to come up...')
continue
break
print('VM instance has shut down.\n')
print(f'Done: {len(cmds)} instances in {version.service_id}\n')
# yapf: disable
def restart_one_service(appengine_admin: appengine.AppEngineAdmin,
version: common.VersionKey,
min_delay: int,
started_before: datetime.datetime,
configured_num_instances: Optional[int]) -> None:
# yapf: enable
"""Restart VM instances in one service according to their start time.
Args:
appengine_admin: The client of AppEngine Admin API.
version: The Nomulus version to restart. This must be the currently
serving version.
min_delay: The minimum delay between successive deletions.
started_before: Only VM instances started before this time are to be
deleted.
configured_num_instances: When present, the constant number of instances
this version is configured with.
"""
cmds = generate_steps(appengine_admin, version, started_before)
# yapf: disable
execute_steps(
appengine_admin, version, cmds, min_delay, configured_num_instances)
# yapf: enable
# yapf: disable
def rolling_restart(project: str,
services: Iterable[str],
min_delay: int,
started_before: datetime.datetime):
# yapf: enable
print(f'Rolling restart {project} at '
f'{common.to_gcp_timestamp(started_before)}\n')
appengine_admin = appengine.AppEngineAdmin(project)
version_configs = appengine_admin.get_version_configs(
set(appengine_admin.get_serving_versions()))
restart_versions = [
version for version in version_configs
if version.service_id in services
]
# yapf: disable
for version in restart_versions:
restart_one_service(appengine_admin,
version,
min_delay,
started_before,
version.manual_scaling_instances)
# yapf: enable
def main() -> int:
parser = argparse.ArgumentParser(prog='rolling_restart',
description=HELP_MAIN)
parser.add_argument('--project',
'-p',
required=True,
help='The GCP project of the Nomulus server.')
parser.add_argument('--services',
'-s',
nargs='+',
choices=appengine.SERVICES,
default=appengine.SERVICES,
help='The services to rollback.')
parser.add_argument('--min_delay',
'-d',
type=int,
default=5,
choices=range(1, 100),
help=HELP_MIN_DELAY)
parser.add_argument(
'--started_before',
'-b',
type=common.parse_gcp_timestamp,
default=datetime.datetime.utcnow(),
help='Only kill VM instances started before this time.')
args = parser.parse_args()
rolling_restart(**vars(args))
return 0
if __name__ == '__main__':
try:
sys.exit(main())
except Exception as ex: # pylint: disable=broad-except
print(ex)
sys.exit(1)

View File

@@ -1,149 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests of rolling_restart."""
import datetime
import unittest
from unittest import mock
import common
import rolling_restart
import steps
import appengine_test
class RollingRestartTestCase(unittest.TestCase):
"""Tests for rolling_restart."""
def setUp(self) -> None:
self._appengine_admin, self._appengine_request = (
appengine_test.setup_appengine_admin())
self._version = common.VersionKey('my_service', 'my_version')
self.addCleanup(mock.patch.stopall)
def _setup_execute_steps_tests(self):
self._appengine_request.execute.side_effect = [
# First list_instance response.
{
'instances': [{
'id': 'vm_to_delete',
'startTime': '2019-01-01T00:00:00Z'
}, {
'id': 'vm_to_stay',
'startTime': '2019-01-01T00:00:00Z'
}]
},
# Second list_instance response
{
'instances': [{
'id': 'vm_to_stay',
'startTime': '2019-01-01T00:00:00Z'
}]
},
# Third list_instance response
{
'instances': [{
'id': 'vm_to_stay',
'startTime': '2019-01-01T00:00:00Z'
}, {
'id': 'vm_new',
'startTime': '2019-01-01T00:00:00Z'
}]
}
]
def _setup_generate_steps_tests(self):
self._appengine_request.execute.side_effect = [
# First page of list_instance response.
{
'instances': [{
'id': 'vm_2019',
'startTime': '2019-01-01T00:00:00Z'
}],
'nextPageToken':
'token'
},
# Second and final page of list_instance response
{
'instances': [{
'id': 'vm_2020',
'startTime': '2020-01-01T00:00:00Z'
}]
}
]
def test_kill_vm_command(self) -> None:
cmd = steps.kill_nomulus_instance(
'my_project', common.VersionKey('my_service', 'my_version'),
'my_inst')
self.assertEqual(cmd.instance_name, 'my_inst')
self.assertIn(('gcloud app instances delete my_inst --quiet '
'--user-output-enabled=false --service my_service '
'--version my_version --project my_project'),
cmd.info())
def _generate_kill_vm_command(self, version: common.VersionKey,
instance_name: str):
return steps.kill_nomulus_instance(self._appengine_admin.project,
version, instance_name)
def test_generate_commands(self):
self._setup_generate_steps_tests()
commands = rolling_restart.generate_steps(self._appengine_admin,
self._version,
datetime.datetime.utcnow())
self.assertSequenceEqual(commands, [
self._generate_kill_vm_command(self._version, 'vm_2019'),
self._generate_kill_vm_command(self._version, 'vm_2020')
])
def test_generate_commands_older_vm(self):
self._setup_generate_steps_tests()
version = common.VersionKey('my_service', 'my_version')
# yapf: disable
commands = rolling_restart.generate_steps(
self._appengine_admin,
version,
common.parse_gcp_timestamp('2019-12-01T00:00:00Z'))
# yapf: enable
self.assertSequenceEqual(
commands, [self._generate_kill_vm_command(version, 'vm_2019')])
def test_execute_steps_variable_instances(self):
self._setup_execute_steps_tests()
cmd = mock.MagicMock()
cmd.instance_name = 'vm_to_delete'
cmds = tuple([cmd]) # yapf does not format (cmd,) correctly.
rolling_restart.execute_steps(appengine_admin=self._appengine_admin,
version=self._version,
cmds=cmds,
min_delay=0,
configured_num_instances=None)
self.assertEqual(self._appengine_request.execute.call_count, 2)
def test_execute_steps_fixed_instances(self):
self._setup_execute_steps_tests()
cmd = mock.MagicMock()
cmd.instance_name = 'vm_to_delete'
cmds = tuple([cmd]) # yapf does not format (cmd,) correctly.
rolling_restart.execute_steps(appengine_admin=self._appengine_admin,
version=self._version,
cmds=cmds,
min_delay=0,
configured_num_instances=2)
self.assertEqual(self._appengine_request.execute.call_count, 3)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,186 +0,0 @@
# Copyright 2020 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Definition of rollback steps and factory methods to create them."""
import dataclasses
import subprocess
import textwrap
from typing import Tuple
import appengine
import common
@dataclasses.dataclass(frozen=True)
class RollbackStep:
"""One rollback step.
Most steps are implemented using commandline tools, e.g., gcloud,
and execute their commands by forking a subprocess. Each step
also has a info method that returns its command with a description.
Two steps are handled differently. The _UpdateDeployTag step gets a piped
shell command, which needs to be handled differently. The
_SetManualScalingNumInstances step uses the AppEngine Admin API client in
this package to set the number of instances.
"""
description: str
command: Tuple[str, ...]
def info(self) -> str:
return f'# {self.description}\n' f'{" ".join(self.command)}'
def execute(self) -> None:
"""Executes the step.
Raises:
CannotRollbackError if command fails.
"""
if subprocess.call(self.command) != 0:
raise common.CannotRollbackError(f'Failed: {self.description}')
def check_schema_compatibility(dev_project: str, nom_tag: str,
sql_tag: str) -> RollbackStep:
return RollbackStep(description='Check compatibility with SQL schema.',
command=(f'{common.get_nomulus_root()}/nom_build',
':integration:sqlIntegrationTest',
f'--schema_version={sql_tag}',
f'--nomulus_version={nom_tag}',
'--publish_repo='
f'gcs://{dev_project}-deployed-tags/maven'))
@dataclasses.dataclass(frozen=True)
class _SetManualScalingNumInstances(RollbackStep):
"""Sets the number of instances for a manual scaling version.
The Nomulus set_num_instances command is currently broken. This step uses
the AppEngine REST API to update the version.
"""
appengine_admin: appengine.AppEngineAdmin
version: common.VersionKey
num_instance: int
def execute(self) -> None:
self.appengine_admin.set_manual_scaling_num_instance(
self.version.service_id, self.version.version_id,
self.num_instance)
def set_manual_scaling_instances(appengine_admin: appengine.AppEngineAdmin,
version: common.VersionConfig,
num_instances: int) -> RollbackStep:
cmd_description = textwrap.dedent("""\
Nomulus set_num_instances command is currently broken.
This script uses the AppEngine REST API to update the version.
To set this value without using this tool, you may use the REST API at
https://cloud.google.com/appengine/docs/admin-api/reference/rest/v1beta/apps.services.versions/patch
""")
return _SetManualScalingNumInstances(
f'Set number of instance for manual-scaling version '
f'{version.version_id} in {version.service_id} to {num_instances}.',
(cmd_description, ''), appengine_admin, version, num_instances)
def start_or_stop_version(project: str, action: str,
version: common.VersionKey) -> RollbackStep:
"""Creates a rollback step that starts or stops an AppEngine version.
Args:
project: The GCP project of the AppEngine application.
action: Start or Stop.
version: The version being managed.
"""
return RollbackStep(
f'{action.title()} {version.version_id} in {version.service_id}',
('gcloud', 'app', 'versions', action, version.version_id, '--quiet',
'--service', version.service_id, '--project', project))
def direct_service_traffic_to_version(
project: str, version: common.VersionKey) -> RollbackStep:
return RollbackStep(
f'Direct all traffic to {version.version_id} in {version.service_id}.',
('gcloud', 'app', 'services', 'set-traffic', version.service_id,
'--quiet', f'--splits={version.version_id}=1', '--project', project))
@dataclasses.dataclass(frozen=True)
class KillNomulusInstance(RollbackStep):
"""Step that kills a Nomulus VM instance."""
instance_name: str
# yapf: disable
def kill_nomulus_instance(project: str,
version: common.VersionKey,
instance_name: str) -> KillNomulusInstance:
# yapf: enable
return KillNomulusInstance(
'Delete one VM instance.',
('gcloud', 'app', 'instances', 'delete', instance_name, '--quiet',
'--user-output-enabled=false', '--service', version.service_id,
'--version', version.version_id, '--project', project), instance_name)
@dataclasses.dataclass(frozen=True)
class _UpdateDeployTag(RollbackStep):
"""Updates the deployment tag on GCS."""
nom_tag: str
destination: str
def execute(self) -> None:
with subprocess.Popen(('gcloud', 'storage', 'cp', '-', self.destination),
stdin=subprocess.PIPE) as p:
try:
p.communicate(self.nom_tag.encode('utf-8'))
if p.wait() != 0:
raise common.CannotRollbackError(
f'Failed: {self.description}')
except:
p.kill()
raise
def update_deploy_tags(dev_project: str, env: str,
nom_tag: str) -> RollbackStep:
destination = f'gs://{dev_project}-deployed-tags/nomulus.{env}.tag'
return _UpdateDeployTag(
f'Update Nomulus tag in {env}',
(f'echo {nom_tag} | gcloud storage cp - {destination}', ''), nom_tag,
destination)
def sync_live_release(dev_project: str, nom_tag: str) -> RollbackStep:
"""Syncs the target release artifacts to the live folder.
By convention the gs://{dev_project}-deploy/live folder should contain the
artifacts from the currently serving release.
For Domain Registry team members, this step updates the nomulus tool
installed on corp desktops.
"""
artifacts_folder = f'gs://{dev_project}-deploy/{nom_tag}'
live_folder = f'gs://{dev_project}-deploy/live'
return RollbackStep(
f'Syncing {artifacts_folder} to {live_folder}.',
('gcloud', 'storage', 'rsync', '--delete-unmatched-destination-objects', artifacts_folder, live_folder))

View File

@@ -1,55 +0,0 @@
#!/bin/bash
# Copyright 2023 The Nomulus Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This script updates number of instances of the service running on GCP
# Required parameters are:
# 1) projectId
# 2) service name
#
# Example:
# ./update_num_instances.sh domain-registry-sandbox pubapi
set -e
project=$1
service=$2
[[ -z "$1" || -z "$2" ]] && { echo "2 parameters required - projectId and service" ; exit 1; }
echo "Project: $project";
echo "Service: $service";
deployed_version=$(gcloud app versions list --service "${service}" \
--project "${project}" \
--filter "TRAFFIC_SPLIT>0.00" \
--format="csv[no-heading](VERSION.ID)")
service_description=$(curl -H "Authorization: Bearer $(gcloud auth print-access-token)" https://appengine.googleapis.com/v1/apps/${project}/services/${service}/versions/${deployed_version})
echo "Service configuration: $service_description"
echo "Input new number of instances: "
read num_instances
if [[ -n ${num_instances//[0-9]/} ]]; then
echo "Should be an integer"
exit 1;
fi
echo "Settings new number of instances: $num_instances"
curl -X PATCH https://appengine.googleapis.com/v1/apps/${project}/services/${service}/versions/${deployed_version}?updateMask=manualScaling.instances \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H 'Content-Type: application/json' \
-d "{ \"manualScaling\": { \"instances\": $num_instances }}"
service_description=$(curl -H "Authorization: Bearer $(gcloud auth print-access-token)" https://appengine.googleapis.com/v1/apps/${project}/services/${service}/versions/${deployed_version})
echo "Updated service configuration: $service_description"