treewide: accept list of sstables in "restore" API

before this change, we enumerate the sstables tracked by the
system.sstables table, and restore them when serving
requests to "storage_service/restore" API. this works fine with
"storage_service/backup" API. but this "restore" API cannot be
used as a drop-in replacement of the rclone based API currently
used by scylla-manager.

in order to fill the gap, in this change:

* add the "prefix" parameter for specifying the shared prefix of
  sstables
* add the "sstables" parameter for specifying the list of  TOC
  components of sstables
* remove the "snapshot" parameter, as we don't encode the prefix
  on scylla's end anymore.
* make the "table" parameter mandatory.

Fixes scylladb/scylladb#20461
Signed-off-by: Kefu Chai <kefu.chai@scylladb.com>
This commit is contained in:
Kefu Chai
2024-10-01 23:18:25 +08:00
parent 17181c2eca
commit 787ea4b1d4
14 changed files with 180 additions and 49 deletions

View File

@@ -8,26 +8,21 @@ import pytest
from test.nodetool.rest_api_mock import expected_request
@pytest.mark.parametrize("table",
["cf",
pytest.param("",
marks=pytest.mark.xfail(
reason="full keyspace restore not implemented yet"))])
@pytest.mark.parametrize("nowait,task_state,task_error", [(False, "failed", "error"),
(False, "done", ""),
(True, "", "")])
def test_restore(nodetool, scylla_only, table, nowait, task_state, task_error):
def test_restore(nodetool, scylla_only, nowait, task_state, task_error):
endpoint = "s3.us-east-2.amazonaws.com"
bucket = "bucket-foo"
keyspace = "ks"
table = "cf"
prefix = "foo/bar"
snapshot = "ss"
params = {"endpoint": endpoint,
"bucket": bucket,
"snapshot": snapshot,
"keyspace": keyspace}
if table:
params["table"] = table
"prefix": prefix,
"keyspace": keyspace,
"table": table}
task_id = "2c4a3e5f"
start_time = "2024-08-08T14:29:25Z"
@@ -48,25 +43,29 @@ def test_restore(nodetool, scylla_only, table, nowait, task_state, task_error):
"progress_completed": 1.0,
"children_ids": []
}
# just generate filenames of TOC components of random sstables
sstables = [f"me-{id}-big-TOC.txt" for id in range(12)]
expected_requests = [
expected_request(
"POST",
"/storage_service/restore",
params,
sstables,
response=task_id)
]
args = ["restore",
"--endpoint", endpoint,
"--bucket", bucket,
"--snapshot", snapshot,
"--keyspace", keyspace]
if table:
args.extend(["--table", table])
"--prefix", prefix,
"--keyspace", keyspace,
"--table", table]
if nowait:
args.append("--nowait")
args.extend(sstables)
res = nodetool(*args, expected_requests=expected_requests)
assert task_id in res.stdout
else:
args.extend(sstables)
# wait for the completion of backup task
expected_requests.append(
expected_request(

View File

@@ -153,6 +153,8 @@ async def test_simple_backup_and_restore(manager: ManagerClient, s3_server):
orig_res = cql.execute(f"SELECT * FROM {ks}.{cf}")
orig_rows = { x.name: x.value for x in orig_res }
toc_names = [entry.name for entry in list_sstables() if entry.name.endswith('TOC.txt')]
prefix = f'{cf}/{snap_name}'
tid = await manager.api.backup(server.ip_addr, ks, cf, snap_name, s3_server.address, s3_server.bucket_name, prefix)
status = await manager.api.wait_task(server.ip_addr, tid)
@@ -166,7 +168,7 @@ async def test_simple_backup_and_restore(manager: ManagerClient, s3_server):
assert not res
print(f'Try to restore')
tid = await manager.api.restore(server.ip_addr, ks, cf, snap_name, s3_server.address, s3_server.bucket_name)
tid = await manager.api.restore(server.ip_addr, ks, cf, s3_server.address, s3_server.bucket_name, prefix, toc_names)
status = await manager.api.wait_task(server.ip_addr, tid)
assert (status is not None) and (status['state'] == 'done')
print(f'Check that sstables came back')

View File

@@ -317,14 +317,14 @@ class ScyllaRESTAPIClient():
"snapshot": tag}
return await self.client.post_json(f"/storage_service/backup", host=node_ip, params=params)
async def restore(self, node_ip: str, ks: str, cf: str, tag: str, dest: str, bucket: str) -> str:
async def restore(self, node_ip: str, ks: str, cf: str, dest: str, bucket: str, prefix: str, sstables: list[str]) -> str:
"""Restore keyspace:table from backup"""
params = {"keyspace": ks,
"table": cf,
"endpoint": dest,
"bucket": bucket,
"snapshot": tag}
return await self.client.post_json(f"/storage_service/restore", host=node_ip, params=params)
"prefix": prefix}
return await self.client.post_json(f"/storage_service/restore", host=node_ip, params=params, json=sstables)
async def take_snapshot(self, node_ip: str, ks: str, tag: str) -> None:
"""Take keyspace snapshot"""