Change *_row_* names that refer to partitions

This renames some variables and functions to make it clear that they
refer to partitions and not rows.

Old versions of sstablemetadata used to refer to a row histogram, but
current versions now mention a partition histogram instead.

This patch doesn't change the exposed API names.

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
Message-Id: <20181229223311.4184-2-espindola@scylladb.com>
This commit is contained in:
Rafael Ávila de Espíndola
2018-12-29 17:33:10 -05:00
committed by Avi Kivity
parent f00e9051ea
commit 26ac2c23ef
5 changed files with 33 additions and 25 deletions

View File

@@ -174,27 +174,27 @@ static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ct
}, std::plus<int64_t>());
}
static int64_t min_row_size(column_family& cf) {
static int64_t min_partition_size(column_family& cf) {
int64_t res = INT64_MAX;
for (auto i: *cf.get_sstables() ) {
res = std::min(res, i->get_stats_metadata().estimated_row_size.min());
res = std::min(res, i->get_stats_metadata().estimated_partition_size.min());
}
return (res == INT64_MAX) ? 0 : res;
}
static int64_t max_row_size(column_family& cf) {
static int64_t max_partition_size(column_family& cf) {
int64_t res = 0;
for (auto i: *cf.get_sstables() ) {
res = std::max(i->get_stats_metadata().estimated_row_size.max(), res);
res = std::max(i->get_stats_metadata().estimated_partition_size.max(), res);
}
return res;
}
static integral_ratio_holder mean_row_size(column_family& cf) {
static integral_ratio_holder mean_partition_size(column_family& cf) {
integral_ratio_holder res;
for (auto i: *cf.get_sstables() ) {
auto c = i->get_stats_metadata().estimated_row_size.count();
res.sub += i->get_stats_metadata().estimated_row_size.mean() * c;
auto c = i->get_stats_metadata().estimated_partition_size.count();
res.sub += i->get_stats_metadata().estimated_partition_size.mean() * c;
res.total += c;
}
return res;
@@ -411,22 +411,24 @@ void set_column_family(http_context& ctx, routes& r) {
return get_cf_stats(ctx, &column_family::stats::memtable_switch_count);
});
// FIXME: this refers to partitions, not rows.
cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
utils::estimated_histogram res(0);
for (auto i: *cf.get_sstables() ) {
res.merge(i->get_stats_metadata().estimated_row_size);
res.merge(i->get_stats_metadata().estimated_partition_size);
}
return res;
},
utils::estimated_histogram_merge, utils_json::estimated_histogram());
});
// FIXME: this refers to partitions, not rows.
cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
uint64_t res = 0;
for (auto i: *cf.get_sstables() ) {
res += i->get_stats_metadata().estimated_row_size.count();
res += i->get_stats_metadata().estimated_partition_size.count();
}
return res;
},
@@ -554,30 +556,36 @@ void set_column_family(http_context& ctx, routes& r) {
return sum_sstable(ctx, true);
});
// FIXME: this refers to partitions, not rows.
cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_row_size, min_int64);
return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_partition_size, min_int64);
});
// FIXME: this refers to partitions, not rows.
cf::get_all_min_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, INT64_MAX, min_row_size, min_int64);
return map_reduce_cf(ctx, INT64_MAX, min_partition_size, min_int64);
});
// FIXME: this refers to partitions, not rows.
cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_row_size, max_int64);
return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_partition_size, max_int64);
});
// FIXME: this refers to partitions, not rows.
cf::get_all_max_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, int64_t(0), max_row_size, max_int64);
return map_reduce_cf(ctx, int64_t(0), max_partition_size, max_int64);
});
// FIXME: this refers to partitions, not rows.
cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
// Cassandra 3.x mean values are truncated as integrals.
return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
});
// FIXME: this refers to partitions, not rows.
cf::get_all_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
// Cassandra 3.x mean values are truncated as integrals.
return map_reduce_cf(ctx, integral_ratio_holder(), mean_row_size, std::plus<integral_ratio_holder>());
return map_reduce_cf(ctx, integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
});
cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {

View File

@@ -313,7 +313,7 @@ private:
auto rp_range = as_ring_position_range(r);
for (auto&& sstable : cf.select_sstables(rp_range)) {
count += sstable->estimated_keys_for_range(r);
hist.merge(sstable->get_stats_metadata().estimated_row_size);
hist.merge(sstable->get_stats_metadata().estimated_partition_size);
}
}
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};

View File

@@ -114,7 +114,7 @@ public:
}
private:
// EH of 150 can track a max value of 1697806495183, i.e., > 1.5PB
utils::estimated_histogram _estimated_row_size{150};
utils::estimated_histogram _estimated_partition_size{150};
// EH of 114 can track a max value of 2395318855, i.e., > 2B cells
utils::estimated_histogram _estimated_cells_count{114};
db::replay_position _replay_position;
@@ -159,8 +159,8 @@ public:
_cardinality.offer_hashed(hashed);
}
void add_row_size(uint64_t row_size) {
_estimated_row_size.add(row_size);
void add_partition_size(uint64_t partition_size) {
_estimated_partition_size.add(partition_size);
}
void add_cells_count(uint64_t cells_count) {
@@ -215,7 +215,7 @@ public:
_timestamp_tracker.update(stats.timestamp_tracker);
_local_deletion_time_tracker.update(stats.local_deletion_time_tracker);
_ttl_tracker.update(stats.ttl_tracker);
add_row_size(stats.partition_size);
add_partition_size(stats.partition_size);
add_cells_count(stats.cells_count);
merge_tombstone_histogram(stats.tombstone_histogram);
update_has_legacy_counter_shards(stats.has_legacy_counter_shards);
@@ -232,7 +232,7 @@ public:
}
void construct_stats(stats_metadata& m) {
m.estimated_row_size = std::move(_estimated_row_size);
m.estimated_partition_size = std::move(_estimated_partition_size);
m.estimated_cells_count = std::move(_estimated_cells_count);
m.position = _replay_position;
m.min_timestamp = _timestamp_tracker.min();

View File

@@ -297,7 +297,7 @@ struct compaction_metadata : public metadata_base<compaction_metadata> {
};
struct stats_metadata : public metadata_base<stats_metadata> {
utils::estimated_histogram estimated_row_size;
utils::estimated_histogram estimated_partition_size;
utils::estimated_histogram estimated_cells_count;
db::replay_position position;
int64_t min_timestamp;
@@ -323,7 +323,7 @@ struct stats_metadata : public metadata_base<stats_metadata> {
switch (v) {
case sstable_version_types::mc:
return f(
estimated_row_size,
estimated_partition_size,
estimated_cells_count,
position,
min_timestamp,
@@ -347,7 +347,7 @@ struct stats_metadata : public metadata_base<stats_metadata> {
case sstable_version_types::ka:
case sstable_version_types::la:
return f(
estimated_row_size,
estimated_partition_size,
estimated_cells_count,
position,
min_timestamp,

View File

@@ -3137,7 +3137,7 @@ static void validate_stats_metadata(schema_ptr s, sstable_assertions written_sst
BOOST_REQUIRE(orig_stats.max_column_names.elements == written_stats.max_column_names.elements);
BOOST_REQUIRE_EQUAL(orig_stats.columns_count, written_stats.columns_count);
BOOST_REQUIRE_EQUAL(orig_stats.rows_count, written_stats.rows_count);
check_estimated_histogram(orig_stats.estimated_row_size, written_stats.estimated_row_size);
check_estimated_histogram(orig_stats.estimated_partition_size, written_stats.estimated_partition_size);
check_estimated_histogram(orig_stats.estimated_cells_count, written_stats.estimated_cells_count);
}