sstable: Use filter memory footprint in filter_size()

For S3, filter size is currently set to zero, as we want to avoid
"fstat-ing" each file.

On-disk representation of bloom filter is similar to the in-memory
one, therefore let's use memory footprint in filter_size().

User of filter_size() is API implementing "nodetool cfstats" and
it cares about the size of bloom filter data (that's how it's
described).

This way, we provide the filter data size regardless of the
underlying storage type.

Refs #13649.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
This commit is contained in:
Raphael S. Carvalho
2023-04-26 10:35:05 -03:00
parent 47d7939b8f
commit 0dcdec6a55
2 changed files with 6 additions and 15 deletions

View File

@@ -1469,9 +1469,6 @@ future<> sstable::update_info_for_opened_data(sstable_open_config cfg) {
_run_identifier = _components->scylla_metadata->get_optional_run_identifier().value_or(run_id::create_random_id());
auto stat = co_await _storage->get_stats(*this);
_bytes_on_disk = stat.bytes_on_disk;
if (stat.filter_file_size != 0) {
_filter_file_size = stat.filter_file_size;
}
if (cfg.load_first_and_last_position_metadata) {
co_await load_first_and_last_position_in_partition();
}
@@ -1499,13 +1496,6 @@ future<sstable::storage::stat> sstable::filesystem_storage::get_stats(const ssta
ret.bytes_on_disk += bytes;
}
if (sst.has_component(component_type::Filter)) {
auto size = co_await io_check([&] {
return file_size(sst.filename(component_type::Filter));
});
ret.filter_file_size = size;
}
co_return ret;
}
@@ -2007,6 +1997,10 @@ uint64_t sstable::bytes_on_disk() const {
return _bytes_on_disk;
}
uint64_t sstable::filter_size() const {
return _components->filter->memory_size();
}
const bool sstable::has_component(component_type f) const {
return _recognized_components.contains(f);
}

View File

@@ -317,9 +317,8 @@ public:
return _index_file;
}
file uncached_index_file();
uint64_t filter_size() const {
return _filter_file_size;
}
// Returns size of bloom filter data.
uint64_t filter_size() const;
db_clock::time_point data_file_write_time() const {
return _data_file_write_time;
@@ -496,7 +495,6 @@ public:
virtual future<data_sink> make_component_sink(sstable& sst, component_type type, open_flags oflags, file_output_stream_options options) = 0;
struct stat {
uint64_t bytes_on_disk = 0;
uint64_t filter_file_size = 0;
};
virtual future<stat> get_stats(const sstable& sst) = 0;
@@ -537,7 +535,6 @@ private:
file _data_file;
uint64_t _data_file_size;
uint64_t _index_file_size;
uint64_t _filter_file_size = 0;
uint64_t _bytes_on_disk = 0;
db_clock::time_point _data_file_write_time;
position_range _min_max_position_range = position_range::all_clustered_rows();