From 0dcdec6a556188edfe98784922f4560c5eff643e Mon Sep 17 00:00:00 2001 From: "Raphael S. Carvalho" Date: Wed, 26 Apr 2023 10:35:05 -0300 Subject: [PATCH] sstable: Use filter memory footprint in filter_size() For S3, filter size is currently set to zero, as we want to avoid "fstat-ing" each file. On-disk representation of bloom filter is similar to the in-memory one, therefore let's use memory footprint in filter_size(). User of filter_size() is API implementing "nodetool cfstats" and it cares about the size of bloom filter data (that's how it's described). This way, we provide the filter data size regardless of the underlying storage type. Refs #13649. Signed-off-by: Raphael S. Carvalho --- sstables/sstables.cc | 14 ++++---------- sstables/sstables.hh | 7 ++----- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 0c7df2b61d..71c44c88b9 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1469,9 +1469,6 @@ future<> sstable::update_info_for_opened_data(sstable_open_config cfg) { _run_identifier = _components->scylla_metadata->get_optional_run_identifier().value_or(run_id::create_random_id()); auto stat = co_await _storage->get_stats(*this); _bytes_on_disk = stat.bytes_on_disk; - if (stat.filter_file_size != 0) { - _filter_file_size = stat.filter_file_size; - } if (cfg.load_first_and_last_position_metadata) { co_await load_first_and_last_position_in_partition(); } @@ -1499,13 +1496,6 @@ future sstable::filesystem_storage::get_stats(const ssta ret.bytes_on_disk += bytes; } - if (sst.has_component(component_type::Filter)) { - auto size = co_await io_check([&] { - return file_size(sst.filename(component_type::Filter)); - }); - ret.filter_file_size = size; - } - co_return ret; } @@ -2007,6 +1997,10 @@ uint64_t sstable::bytes_on_disk() const { return _bytes_on_disk; } +uint64_t sstable::filter_size() const { + return _components->filter->memory_size(); +} + const bool sstable::has_component(component_type f) const { return _recognized_components.contains(f); } diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 7225af9e9e..d116a0e076 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -317,9 +317,8 @@ public: return _index_file; } file uncached_index_file(); - uint64_t filter_size() const { - return _filter_file_size; - } + // Returns size of bloom filter data. + uint64_t filter_size() const; db_clock::time_point data_file_write_time() const { return _data_file_write_time; @@ -496,7 +495,6 @@ public: virtual future make_component_sink(sstable& sst, component_type type, open_flags oflags, file_output_stream_options options) = 0; struct stat { uint64_t bytes_on_disk = 0; - uint64_t filter_file_size = 0; }; virtual future get_stats(const sstable& sst) = 0; @@ -537,7 +535,6 @@ private: file _data_file; uint64_t _data_file_size; uint64_t _index_file_size; - uint64_t _filter_file_size = 0; uint64_t _bytes_on_disk = 0; db_clock::time_point _data_file_write_time; position_range _min_max_position_range = position_range::all_clustered_rows();