Files
scylladb/test/boost/s3_test.cc
Pavel Emelyanov 86bc5b11fe s3-client: Add support for lister::filter
Directory lister comes with a filter function that tells lister which
entries to skip by its .get() method. For uniformity, add the same to
S3 bucket_lister.

After this change the lister reports shorter name in the returned
directory entry (with the prefix cut), so also need to tune up the unit
test respectively.

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
2024-08-27 16:15:40 +03:00

435 lines
17 KiB
C++

/*
* Copyright (C) 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include <unordered_set>
#include <boost/test/unit_test.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <seastar/core/thread.hh>
#include <seastar/core/reactor.hh>
#include <seastar/core/file.hh>
#include <seastar/core/fstream.hh>
#include <seastar/http/exception.hh>
#include <seastar/util/closeable.hh>
#include <seastar/util/short_streams.hh>
#include <seastar/core/units.hh>
#include "test/lib/scylla_test_case.hh"
#include "test/lib/log.hh"
#include "test/lib/random_utils.hh"
#include "test/lib/test_utils.hh"
#include "test/lib/tmpdir.hh"
#include "utils/assert.hh"
#include "utils/s3/client.hh"
#include "utils/s3/creds.hh"
#include "utils/exceptions.hh"
#include "sstables/checksum_utils.hh"
#include "gc_clock.hh"
// The test can be run on real AWS-S3 bucket. For that, create a bucket with
// permissive enough policy and then run the test with env set respectively
// E.g. like this
//
// export S3_SERVER_ADDRESS_FOR_TEST=s3.us-east-2.amazonaws.com
// export S3_SERVER_PORT_FOR_TEST=443
// export S3_BUCKET_FOR_TEST=xemul
// export AWS_ACCESS_KEY_ID=${aws_access_key_id}
// export AWS_SECRET_ACCESS_KEY=${aws_secret_access_key}
// export AWS_SESSION_TOKEN=${aws_session_token}
// export AWS_DEFAULT_REGION="us-east-2"
s3::endpoint_config_ptr make_minio_config() {
s3::endpoint_config cfg = {
.port = std::stoul(tests::getenv_safe("S3_SERVER_PORT_FOR_TEST")),
.use_https = ::getenv("AWS_DEFAULT_REGION") != nullptr,
.aws = {{
.access_key_id = tests::getenv_safe("AWS_ACCESS_KEY_ID"),
.secret_access_key = tests::getenv_safe("AWS_SECRET_ACCESS_KEY"),
.session_token = ::getenv("AWS_SESSION_TOKEN") ? : "",
.region = ::getenv("AWS_DEFAULT_REGION") ? : "local",
}},
};
return make_lw_shared<s3::endpoint_config>(std::move(cfg));
}
/*
* Tests below expect minio server to be running on localhost
* with the bucket named env['S3_BUCKET_FOR_TEST'] created with
* unrestricted anonymous read-write access
*/
SEASTAR_THREAD_TEST_CASE(test_client_put_get_object) {
const sstring name(fmt::format("/{}/testobject-{}", tests::getenv_safe("S3_BUCKET_FOR_TEST"), ::getpid()));
testlog.info("Make client\n");
semaphore mem(16<<20);
auto cln = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*cln);
testlog.info("Put object {}\n", name);
temporary_buffer<char> data = sstring("1234567890").release();
cln->put_object(name, std::move(data)).get();
testlog.info("Get object size\n");
size_t sz = cln->get_object_size(name).get();
BOOST_REQUIRE_EQUAL(sz, 10);
testlog.info("Get object stats\n");
s3::stats st = cln->get_object_stats(name).get();
BOOST_REQUIRE_EQUAL(st.size, 10);
// forgive timezone difference as minio server is GMT by default
BOOST_REQUIRE(std::difftime(st.last_modified, gc_clock::to_time_t(gc_clock::now())) < 24*3600);
testlog.info("Get object content\n");
temporary_buffer<char> res = cln->get_object_contiguous(name).get();
BOOST_REQUIRE_EQUAL(to_sstring(std::move(res)), sstring("1234567890"));
testlog.info("Get object part\n");
res = cln->get_object_contiguous(name, s3::range{ 1, 3 }).get();
BOOST_REQUIRE_EQUAL(to_sstring(std::move(res)), sstring("234"));
testlog.info("Delete object\n");
cln->delete_object(name).get();
testlog.info("Verify it's gone\n");
BOOST_REQUIRE_EXCEPTION(cln->get_object_size(name).get(), storage_io_error, [] (const storage_io_error& ex) {
return ex.code().value() == ENOENT;
});
}
static auto deferred_delete_object(shared_ptr<s3::client> client, sstring name) {
return seastar::defer([client, name] {
testlog.info("Delete object: {}\n", name);
client->delete_object(name).get();
});
}
void do_test_client_multipart_upload(bool with_copy_upload) {
const sstring name(fmt::format("/{}/test{}object-{}", tests::getenv_safe("S3_BUCKET_FOR_TEST"), with_copy_upload ? "jumbo" : "large", ::getpid()));
testlog.info("Make client\n");
semaphore mem(16<<20);
auto cln = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*cln);
testlog.info("Upload object (with copy = {})\n", with_copy_upload);
auto out = output_stream<char>(
// Make it 3 parts per piece, so that 128Mb buffer below
// would be split into several 15Mb pieces
with_copy_upload ? cln->make_upload_jumbo_sink(name, 3) : cln->make_upload_sink(name)
);
auto close = seastar::deferred_close(out);
static constexpr unsigned chunk_size = 1000;
auto rnd = tests::random::get_bytes(chunk_size);
uint64_t object_size = 0;
for (unsigned ch = 0; ch < 128 * 1024; ch++) {
out.write(reinterpret_cast<char*>(rnd.begin()), rnd.size()).get();
object_size += rnd.size();
}
testlog.info("Flush multipart upload\n");
out.flush().get();
auto delete_object = deferred_delete_object(cln, name);
testlog.info("Closing\n");
close.close_now();
testlog.info("Checking file size\n");
size_t sz = cln->get_object_size(name).get();
BOOST_REQUIRE_EQUAL(sz, object_size);
testlog.info("Checking correctness\n");
for (int samples = 0; samples < 7; samples++) {
uint64_t len = tests::random::get_int(1u, chunk_size);
uint64_t off = tests::random::get_int(object_size - len);
auto s_buf = cln->get_object_contiguous(name, s3::range{ off, len }).get();
unsigned align = off % chunk_size;
testlog.info("Got [{}:{}) chunk\n", off, len);
testlog.info("Checking {} vs {} len {}\n", align, 0, std::min<uint64_t>(chunk_size - align, len));
BOOST_REQUIRE_EQUAL(memcmp(rnd.begin() + align, s_buf.get(), std::min<uint64_t>(chunk_size - align, len)), 0);
if (len > chunk_size - align) {
testlog.info("Checking {} vs {} len {}\n", 0, chunk_size - align, len - (chunk_size - align));
BOOST_REQUIRE_EQUAL(memcmp(rnd.begin(), s_buf.get() + (chunk_size - align), len - (chunk_size - align)), 0);
}
}
}
SEASTAR_THREAD_TEST_CASE(test_client_multipart_upload) {
do_test_client_multipart_upload(false);
}
SEASTAR_THREAD_TEST_CASE(test_client_multipart_copy_upload) {
do_test_client_multipart_upload(true);
}
SEASTAR_THREAD_TEST_CASE(test_client_multipart_upload_fallback) {
const sstring name(fmt::format("/{}/testfbobject-{}", tests::getenv_safe("S3_BUCKET_FOR_TEST"), ::getpid()));
testlog.info("Make client");
semaphore mem(0);
mem.broken(); // so that any attempt to use it throws
auto cln = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*cln);
testlog.info("Upload object");
auto out = output_stream<char>(cln->make_upload_sink(name));
auto close = seastar::deferred_close(out);
temporary_buffer<char> data = sstring("1A3B5C7890").release();
out.write(reinterpret_cast<const char*>(data.begin()), data.size()).get();
testlog.info("Flush upload");
out.flush().get(); // if it tries to do regular flush, memory claim would throw
auto delete_object = deferred_delete_object(cln, name);
testlog.info("Closing");
close.close_now();
testlog.info("Get object content");
temporary_buffer<char> res = cln->get_object_contiguous(name).get();
BOOST_REQUIRE_EQUAL(to_sstring(std::move(res)), to_sstring(std::move(data)));
}
using with_remainder_t = bool_class<class with_remainder_tag>;
future<> test_client_upload_file(std::string_view test_name, size_t total_size, size_t memory_size) {
tmpdir tmp;
const auto file_path = tmp.path() / "test";
uint32_t expected_checksum = crc32_utils::init_checksum();
// 1. prefill the data file to be uploaded
{
file f = co_await open_file_dma(file_path.native(), open_flags::create | open_flags::wo);
auto output = co_await make_file_output_stream(std::move(f));
std::string_view data = "1234567890ABCDEF";
// so we can test !with_remainder case properly with multiple writes
SCYLLA_ASSERT(total_size % data.size() == 0);
for (size_t bytes_written = 0;
bytes_written < total_size;
bytes_written += data.size()) {
co_await output.write(data.data(), data.size());
uint32_t chunk_checksum = crc32_utils::checksum(data.data(), data.size());
expected_checksum = checksum_combine_or_feed<crc32_utils>(
expected_checksum, chunk_checksum, data.data(), data.size());
}
co_await output.close();
}
const auto object_name = fmt::format("/{}/{}-{}",
tests::getenv_safe("S3_BUCKET_FOR_TEST"),
test_name,
::getpid());
// 2. upload the file to s3
semaphore mem{memory_size};
auto client = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"),
make_minio_config(),
mem);
co_await client->upload_file(file_path, object_name);
// 3. retrieve the object from s3 and retrieve the object from S3 and
// compare it with the pattern
uint32_t actual_checksum = crc32_utils::init_checksum();
auto readable_file = client->make_readable_file(object_name);
auto input = make_file_input_stream(readable_file);
size_t actual_size = 0;
for (;;) {
auto buf = co_await input.read();
if (buf.empty()) {
// empty() signifies the end of stream
break;
}
actual_size += buf.size();
bytes_view bv{reinterpret_cast<const int8_t*>(buf.get()), buf.size()};
//fmt::print("{}", fmt_hex(bv));
uint32_t chunk_checksum = crc32_utils::checksum(buf.get(), buf.size());
actual_checksum = checksum_combine_or_feed<crc32_utils>(
actual_checksum, chunk_checksum, buf.get(), buf.size());
}
BOOST_CHECK_EQUAL(total_size, actual_size);
BOOST_CHECK_EQUAL(expected_checksum, actual_checksum);
co_await readable_file.close();
co_await input.close();
co_await client->close();
}
SEASTAR_TEST_CASE(test_client_upload_file_multi_part_without_remainder) {
const size_t part_size = 5_MiB;
const size_t total_size = 4 * part_size;
const size_t memory_size = part_size;
co_await test_client_upload_file(seastar_test::get_name(), total_size, memory_size);
}
SEASTAR_TEST_CASE(test_client_upload_file_multi_part_with_remainder) {
const size_t part_size = 5_MiB;
const size_t remainder_size = part_size / 2;
const size_t total_size = 4 * part_size + remainder_size;
const size_t memory_size = part_size;
co_await test_client_upload_file(seastar_test::get_name(), total_size, memory_size);
}
SEASTAR_TEST_CASE(test_client_upload_file_single_part) {
const size_t part_size = 5_MiB;
const size_t total_size = part_size / 2;
const size_t memory_size = part_size;
co_await test_client_upload_file(seastar_test::get_name(), total_size, memory_size);
}
SEASTAR_THREAD_TEST_CASE(test_client_readable_file) {
const sstring name(fmt::format("/{}/testroobject-{}", tests::getenv_safe("S3_BUCKET_FOR_TEST"), ::getpid()));
testlog.info("Make client\n");
semaphore mem(16<<20);
auto cln = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*cln);
testlog.info("Put object {}\n", name);
temporary_buffer<char> data = sstring("1234567890ABCDEF").release();
cln->put_object(name, std::move(data)).get();
auto delete_object = deferred_delete_object(cln, name);
auto f = cln->make_readable_file(name);
auto close_readable_file = deferred_close(f);
testlog.info("Check file size\n");
size_t sz = f.size().get();
BOOST_REQUIRE_EQUAL(sz, 16);
testlog.info("Check buffer read\n");
char buffer[16];
sz = f.dma_read(4, buffer, 7).get();
BOOST_REQUIRE_EQUAL(sz, 7);
BOOST_REQUIRE_EQUAL(sstring(buffer, 7), sstring("567890A"));
testlog.info("Check iovec read\n");
std::vector<iovec> iovs;
iovs.push_back({buffer, 3});
iovs.push_back({buffer + 3, 2});
iovs.push_back({buffer + 5, 4});
sz = f.dma_read(3, std::move(iovs)).get();
BOOST_REQUIRE_EQUAL(sz, 9);
BOOST_REQUIRE_EQUAL(sstring(buffer, 3), sstring("456"));
BOOST_REQUIRE_EQUAL(sstring(buffer + 3, 2), sstring("78"));
BOOST_REQUIRE_EQUAL(sstring(buffer + 5, 4), sstring("90AB"));
testlog.info("Check bulk read\n");
auto buf = f.dma_read_bulk<char>(5, 8).get();
BOOST_REQUIRE_EQUAL(to_sstring(std::move(buf)), sstring("67890ABC"));
}
SEASTAR_THREAD_TEST_CASE(test_client_readable_file_stream) {
const sstring name(fmt::format("/{}/teststreamobject-{}", tests::getenv_safe("S3_BUCKET_FOR_TEST"), ::getpid()));
testlog.info("Make client\n");
semaphore mem(16<<20);
auto cln = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*cln);
testlog.info("Put object {}\n", name);
sstring sample("1F2E3D4C5B6A70899807A6B5C4D3E2F1");
temporary_buffer<char> data(sample.c_str(), sample.size());
cln->put_object(name, std::move(data)).get();
auto delete_object = deferred_delete_object(cln, name);
auto f = cln->make_readable_file(name);
auto close_readable_file = deferred_close(f);
auto in = make_file_input_stream(f);
auto close_stream = deferred_close(in);
testlog.info("Check input stream read\n");
auto res = seastar::util::read_entire_stream_contiguous(in).get();
BOOST_REQUIRE_EQUAL(res, sample);
}
SEASTAR_THREAD_TEST_CASE(test_client_put_get_tagging) {
const sstring name(fmt::format("/{}/testobject-{}",
tests::getenv_safe("S3_BUCKET_FOR_TEST"), ::getpid()));
semaphore mem(16<<20);
auto client = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*client);
auto data = sstring("1234567890ABCDEF").release();
client->put_object(name, std::move(data)).get();
auto delete_object = deferred_delete_object(client, name);
{
auto tagset = client->get_object_tagging(name).get();
BOOST_CHECK(tagset.empty());
}
{
s3::tag_set expected_tagset{{"1", "one"}, {"2", "two"}};
client->put_object_tagging(name, expected_tagset).get();
auto actual_tagset = client->get_object_tagging(name).get();
std::ranges::sort(actual_tagset);
std::ranges::sort(expected_tagset);
BOOST_CHECK(actual_tagset == expected_tagset);
}
{
client->delete_object_tagging(name).get();
auto tagset = client->get_object_tagging(name).get();
BOOST_CHECK(tagset.empty());
}
}
static std::unordered_set<sstring> populate_bucket(shared_ptr<s3::client> client, sstring bucket, sstring prefix, int nr_objects) {
std::unordered_set<sstring> names;
for (int i = 0; i < nr_objects; i++) {
temporary_buffer<char> data = sstring("1234567890").release();
auto name = format("obj.{}", i);
client->put_object(format("/{}/{}{}", bucket, prefix, name), std::move(data)).get();
names.insert(name);
}
return names;
}
SEASTAR_THREAD_TEST_CASE(test_client_list_objects) {
const sstring bucket = tests::getenv_safe("S3_BUCKET_FOR_TEST");
const sstring prefix(fmt::format("testprefix-{}/", ::getpid()));
semaphore mem(16<<20);
auto client = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*client);
// Put extra object to check list-by-prefix filters it out
temporary_buffer<char> data = sstring("1234567890").release();
client->put_object(format("/{}/extra-{}", bucket, ::getpid()), std::move(data)).get();
auto names = populate_bucket(client, bucket, prefix, 12);
s3::client::bucket_lister lister(client, bucket, prefix, 5);
auto close_lister = deferred_close(lister);
while (auto de = lister.get().get()) {
testlog.info("-> [{}]", de->name);
auto it = names.find(de->name);
BOOST_REQUIRE(it != names.end());
names.erase(it);
}
BOOST_REQUIRE(names.empty());
}
SEASTAR_THREAD_TEST_CASE(test_client_list_objects_incomplete) {
const sstring bucket = tests::getenv_safe("S3_BUCKET_FOR_TEST");
const sstring prefix(fmt::format("testprefix-{}/", ::getpid()));
semaphore mem(16<<20);
auto client = s3::client::make(tests::getenv_safe("S3_SERVER_ADDRESS_FOR_TEST"), make_minio_config(), mem);
auto close_client = deferred_close(*client);
populate_bucket(client, bucket, prefix, 8);
s3::client::bucket_lister lister(client, bucket, prefix, 9, 2);
auto close_lister = deferred_close(lister);
// Peek one entry to start lister work
auto de = lister.get().get();
close_lister.close_now();
}