diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 36303ad3cb..37a59ceb6a 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -2387,6 +2387,48 @@ void sstable::mark_sstable_for_deletion(const schema_ptr& schema, sstring dir, i sst.mark_for_deletion(); } +/** + * Returns a pair of positions [p1, p2) in the summary file corresponding to entries + * covered by the specified range, or a disengaged optional if no such pair exists. + */ +stdx::optional> sstable::get_sample_indexes_for_range(const nonwrapping_range& range) { + auto entries_size = _summary.entries.size(); + auto search = [this](bool before, const dht::token& token) { + auto kind = before ? key::kind::before_all_keys : key::kind::after_all_keys; + key k(kind); + // Binary search will never returns positive values. + return uint64_t((binary_search(_summary.entries, k, token) + 1) * -1); + }; + uint64_t left = 0; + if (range.start()) { + left = search(range.start()->is_inclusive(), range.start()->value()); + if (left == entries_size) { + // left is past the end of the sampling. + return stdx::nullopt; + } + } + uint64_t right = entries_size; + if (range.end()) { + right = search(!range.end()->is_inclusive(), range.end()->value()); + if (right == 0) { + // The first key is strictly greater than right. + return stdx::nullopt; + } + } + if (left < right) { + return stdx::optional>(stdx::in_place_t(), left, right); + } + return stdx::nullopt; +} + +uint64_t sstable::estimated_keys_for_range(const nonwrapping_range& range) { + auto sample_index_range = get_sample_indexes_for_range(range); + uint64_t sample_key_count = sample_index_range ? sample_index_range->second - sample_index_range->first : 0; + // adjust for the current sampling level + uint64_t estimated_keys = sample_key_count * ((downsampling::BASE_SAMPLING_LEVEL * _summary.header.min_index_interval) / _summary.header.sampling_level); + return std::max(uint64_t(1), estimated_keys); +} + std::ostream& operator<<(std::ostream& os, const sstable_to_delete& std) { return os << std.name << "(" << (std.shared ? "shared" : "unshared") << ")"; diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 8debd3f59b..fba7c2b3e3 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -291,6 +291,8 @@ public: _summary.header.min_index_interval; } + uint64_t estimated_keys_for_range(const nonwrapping_range& range); + // mark_for_deletion() specifies that a sstable isn't relevant to the // current shard, and thus can be deleted by the deletion manager, if // all shards sharing it agree. In case the sstable is unshared, it's @@ -576,6 +578,8 @@ private: write_range_tombstone(out, start, bound_kind::incl_start, end, bound_kind::incl_end, std::move(suffix), std::move(t)); } void write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection); + + stdx::optional> get_sample_indexes_for_range(const nonwrapping_range& range); public: future<> read_toc();