sstables: Get estimates for a particular range

This patch adds the estimated_keys_for_range() function, which
estimates the number of keys present between the specified range.

Signed-off-by: Duarte Nunes <duarte@scylladb.com>
This commit is contained in:
Duarte Nunes
2016-09-23 15:02:26 +02:00
parent 8c223b31c8
commit ceed09b23e
2 changed files with 46 additions and 0 deletions

View File

@@ -2387,6 +2387,48 @@ void sstable::mark_sstable_for_deletion(const schema_ptr& schema, sstring dir, i
sst.mark_for_deletion();
}
/**
* Returns a pair of positions [p1, p2) in the summary file corresponding to entries
* covered by the specified range, or a disengaged optional if no such pair exists.
*/
stdx::optional<std::pair<uint64_t, uint64_t>> sstable::get_sample_indexes_for_range(const nonwrapping_range<dht::token>& range) {
auto entries_size = _summary.entries.size();
auto search = [this](bool before, const dht::token& token) {
auto kind = before ? key::kind::before_all_keys : key::kind::after_all_keys;
key k(kind);
// Binary search will never returns positive values.
return uint64_t((binary_search(_summary.entries, k, token) + 1) * -1);
};
uint64_t left = 0;
if (range.start()) {
left = search(range.start()->is_inclusive(), range.start()->value());
if (left == entries_size) {
// left is past the end of the sampling.
return stdx::nullopt;
}
}
uint64_t right = entries_size;
if (range.end()) {
right = search(!range.end()->is_inclusive(), range.end()->value());
if (right == 0) {
// The first key is strictly greater than right.
return stdx::nullopt;
}
}
if (left < right) {
return stdx::optional<std::pair<uint64_t, uint64_t>>(stdx::in_place_t(), left, right);
}
return stdx::nullopt;
}
uint64_t sstable::estimated_keys_for_range(const nonwrapping_range<dht::token>& range) {
auto sample_index_range = get_sample_indexes_for_range(range);
uint64_t sample_key_count = sample_index_range ? sample_index_range->second - sample_index_range->first : 0;
// adjust for the current sampling level
uint64_t estimated_keys = sample_key_count * ((downsampling::BASE_SAMPLING_LEVEL * _summary.header.min_index_interval) / _summary.header.sampling_level);
return std::max(uint64_t(1), estimated_keys);
}
std::ostream&
operator<<(std::ostream& os, const sstable_to_delete& std) {
return os << std.name << "(" << (std.shared ? "shared" : "unshared") << ")";

View File

@@ -291,6 +291,8 @@ public:
_summary.header.min_index_interval;
}
uint64_t estimated_keys_for_range(const nonwrapping_range<dht::token>& range);
// mark_for_deletion() specifies that a sstable isn't relevant to the
// current shard, and thus can be deleted by the deletion manager, if
// all shards sharing it agree. In case the sstable is unshared, it's
@@ -576,6 +578,8 @@ private:
write_range_tombstone(out, start, bound_kind::incl_start, end, bound_kind::incl_end, std::move(suffix), std::move(t));
}
void write_collection(file_writer& out, const composite& clustering_key, const column_definition& cdef, collection_mutation_view collection);
stdx::optional<std::pair<uint64_t, uint64_t>> get_sample_indexes_for_range(const nonwrapping_range<dht::token>& range);
public:
future<> read_toc();