From fa1ca8096b1a39b60281f107660be4bbfd6bc5e9 Mon Sep 17 00:00:00 2001 From: Ernest Zaslavsky Date: Mon, 9 Feb 2026 16:06:31 +0200 Subject: [PATCH] s3_client: add tests for calc_part_size Introduce tests that validate the corrected multipart part-size calculation, including boundary conditions and error cases. (cherry picked from commit 6280cb91ca2e847d3cc0a979dc19fd99316fa031) --- test/boost/s3_test.cc | 75 +++++++++++++++++++++++++++++++++++++++++++ utils/s3/client.cc | 48 +++++++++++++-------------- utils/s3/client.hh | 2 ++ 3 files changed, 101 insertions(+), 24 deletions(-) diff --git a/test/boost/s3_test.cc b/test/boost/s3_test.cc index a4cc70da1a..df177ca514 100644 --- a/test/boost/s3_test.cc +++ b/test/boost/s3_test.cc @@ -1007,3 +1007,78 @@ BOOST_AUTO_TEST_CASE(s3_fqn_manipulation) { BOOST_REQUIRE_EQUAL(bucket_name, "bucket"); BOOST_REQUIRE_EQUAL(object_name, "prefix1/prefix2/foo.bar"); } + +BOOST_AUTO_TEST_CASE(part_size_calculation_test) { + { + BOOST_REQUIRE_EXCEPTION(s3::calc_part_size(490_GiB, 5_MiB), std::runtime_error, [](const std::runtime_error& e) { + return std::string(e.what()).starts_with("too many parts: 100352 > 10000"); + }); + } + { + auto [parts, size] = s3::calc_part_size(490_GiB, 100_MiB); + BOOST_REQUIRE_EQUAL(size, 100_MiB); + BOOST_REQUIRE(parts == 5018); + } + { + BOOST_REQUIRE_EXCEPTION(s3::calc_part_size(490_GiB, 4_MiB), std::runtime_error, [](const std::runtime_error& e) { + return std::string(e.what()).starts_with("part_size too small: 4194304 is smaller than minimum part size: 5242880"); + }); + } + { + auto [parts, size] = s3::calc_part_size(50_MiB, 0); + BOOST_REQUIRE_EQUAL(size, 50_MiB); + BOOST_REQUIRE_EQUAL(parts, 1); + } + { + auto [parts, size] = s3::calc_part_size(49_MiB, 0); + BOOST_REQUIRE_EQUAL(size, 50_MiB); + BOOST_REQUIRE_EQUAL(parts, 1); + } + { + auto [parts, size] = s3::calc_part_size(490_GiB, 0); + BOOST_REQUIRE_EQUAL(size, 51_MiB); + BOOST_REQUIRE(parts == 9839); + } + { + auto [parts, size] = s3::calc_part_size(50_MiB * 10000, 0); + BOOST_REQUIRE_EQUAL(size, 50_MiB); + BOOST_REQUIRE_EQUAL(parts, 10000); + } + { + auto [parts, size] = s3::calc_part_size(50_MiB * 10000 + 1, 0); + BOOST_REQUIRE(size > 50_MiB); + BOOST_REQUIRE(parts <= 10000); + } + { + auto [parts, size] = s3::calc_part_size(10_TiB, 0); + BOOST_REQUIRE(parts <= 10000); + BOOST_REQUIRE(size >= 50_MiB); + } + { + auto [parts, size] = s3::calc_part_size(5_MiB * 10000, 5_MiB); + BOOST_REQUIRE_EQUAL(size, 5_MiB); + BOOST_REQUIRE_EQUAL(parts, 10000); + } + { + size_t total = 5_MiB * 10001; // 10001 parts at 5 MiB + BOOST_REQUIRE_EXCEPTION( + s3::calc_part_size(total, 5_MiB), std::runtime_error, [](auto& e) { return std::string(e.what()).starts_with("too many parts: 10001 > 10000"); }); + } + { + size_t total = 500_GiB + 123; // odd size to force non-MiB alignment + auto [parts, size] = s3::calc_part_size(total, 0); + + BOOST_REQUIRE(size % 1_MiB == 0); // aligned + BOOST_REQUIRE(parts <= 10000); + } + { + auto [parts, size] = s3::calc_part_size(6_MiB, 0); + BOOST_REQUIRE_EQUAL(size, 50_MiB); + BOOST_REQUIRE_EQUAL(parts, 1); + } + { + auto [parts, size] = s3::calc_part_size(100_MiB, 200_MiB); + BOOST_REQUIRE_EQUAL(parts, 1); + BOOST_REQUIRE_EQUAL(size, 200_MiB); + } +} diff --git a/utils/s3/client.cc b/utils/s3/client.cc index 127f088ae0..0963e5f9ed 100644 --- a/utils/s3/client.cc +++ b/utils/s3/client.cc @@ -1522,30 +1522,6 @@ class client::do_upload_file : private multipart_upload { }).finally([gh = std::move(gh)] {}); } - // returns pair - static std::pair calc_part_size(size_t total_size, size_t part_size) { - if (part_size > 0) { - if (part_size < aws_minimum_part_size) { - on_internal_error(s3l, fmt::format("part_size too small: {} is smaller than minimum part size: {}", part_size, aws_minimum_part_size)); - } - const size_t num_parts = div_ceil(total_size, part_size); - if (num_parts > aws_maximum_parts_in_piece) { - on_internal_error(s3l, fmt::format("too many parts: {} > {}", num_parts, aws_maximum_parts_in_piece)); - } - return {num_parts, part_size}; - } - // if part_size is 0, this means the caller leaves it to us to decide the part_size. The default part size for multipart upload is set to 50MiB. This - // value was determined empirically by running `perf_s3_client` with various part sizes to find the optimal one. - static constexpr size_t default_part_size = 50_MiB; - const size_t num_parts = div_ceil(total_size, default_part_size); - if (num_parts <= aws_maximum_parts_in_piece) { - return {num_parts, default_part_size}; - } - - part_size = align_up(div_ceil(total_size, aws_maximum_parts_in_piece), 1_MiB); - return {div_ceil(total_size, part_size), part_size}; - } - future<> multi_part_upload(file&& f, uint64_t total_size, size_t part_size) { co_await start_upload(); @@ -1911,4 +1887,28 @@ future<> client::bucket_lister::close() noexcept { } } +// returns pair +std::pair calc_part_size(size_t total_size, size_t part_size) { + if (part_size > 0) { + if (part_size < aws_minimum_part_size) { + on_internal_error(s3l, fmt::format("part_size too small: {} is smaller than minimum part size: {}", part_size, aws_minimum_part_size)); + } + const size_t num_parts = div_ceil(total_size, part_size); + if (num_parts > aws_maximum_parts_in_piece) { + on_internal_error(s3l, fmt::format("too many parts: {} > {}", num_parts, aws_maximum_parts_in_piece)); + } + return {num_parts, part_size}; + } + // if part_size is 0, this means the caller leaves it to us to decide the part_size. The default part size for multipart upload is set to 50MiB. This + // value was determined empirically by running `perf_s3_client` with various part sizes to find the optimal one. + static constexpr size_t default_part_size = 50_MiB; + const size_t num_parts = div_ceil(total_size, default_part_size); + if (num_parts <= aws_maximum_parts_in_piece) { + return {num_parts, default_part_size}; + } + + part_size = align_up(div_ceil(total_size, aws_maximum_parts_in_piece), 1_MiB); + return {div_ceil(total_size, part_size), part_size}; +} + } // s3 namespace diff --git a/utils/s3/client.hh b/utils/s3/client.hh index 98bc66b255..ab982d9992 100644 --- a/utils/s3/client.hh +++ b/utils/s3/client.hh @@ -231,6 +231,8 @@ public: future<> close(); }; +std::pair calc_part_size(size_t total_size, size_t part_size); + } // namespace s3 template <>