mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 01:50:35 +00:00
sstable statistics file
Signed-off-by: Glauber Costa <glommer@cloudius-systems.com> Reviewed-by: Nadav Har'El <nyh@cloudius-systems.com>
This commit is contained in:
@@ -242,6 +242,74 @@ future<> parse(file_input_stream& in, summary& s) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, struct replay_position& rp) {
|
||||
return parse(in, rp.segment, rp.position);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, estimated_histogram::eh_elem &e) {
|
||||
return parse(in, e.offset, e.bucket);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, estimated_histogram &e) {
|
||||
return parse(in, e.elements);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, streaming_histogram &h) {
|
||||
return parse(in, h.max_bin_size, h.hash);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, validation_metadata& m) {
|
||||
return parse(in, m.partitioner, m.filter_chance);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, compaction_metadata& m) {
|
||||
return parse(in, m.ancestors, m.cardinality);
|
||||
}
|
||||
|
||||
template <typename Child>
|
||||
future<> parse(file_input_stream& in, std::unique_ptr<metadata>& p) {
|
||||
p.reset(new Child);
|
||||
return parse(in, *static_cast<Child *>(p.get()));
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, stats_metadata& m) {
|
||||
return parse(in,
|
||||
m.estimated_row_size,
|
||||
m.estimated_column_count,
|
||||
m.position,
|
||||
m.min_timestamp,
|
||||
m.max_timestamp,
|
||||
m.max_local_deletion_time,
|
||||
m.compression_ratio,
|
||||
m.estimated_tombstone_drop_time,
|
||||
m.sstable_level,
|
||||
m.repaired_at,
|
||||
m.min_column_names,
|
||||
m.max_column_names,
|
||||
m.has_legacy_counter_shards
|
||||
);
|
||||
}
|
||||
|
||||
future<> parse(file_input_stream& in, statistics& s) {
|
||||
return parse(in, s.hash).then([&in, &s] {
|
||||
return do_for_each(s.hash.map.begin(), s.hash.map.end(), [&in, &s] (auto val) mutable {
|
||||
in.seek(val.second);
|
||||
|
||||
switch (val.first) {
|
||||
case metadata_type::Validation:
|
||||
return parse<validation_metadata>(in, s.contents[val.first]);
|
||||
case metadata_type::Compaction:
|
||||
return parse<compaction_metadata>(in, s.contents[val.first]);
|
||||
case metadata_type::Stats:
|
||||
return parse<stats_metadata>(in, s.contents[val.first]);
|
||||
default:
|
||||
sstlog.warn("Invalid metadata type at Statistics file: {} ", int(val.first));
|
||||
return make_ready_future<>();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// This is small enough, and well-defined. Easier to just read it all
|
||||
// at once
|
||||
future<> sstable::read_toc() {
|
||||
@@ -334,8 +402,14 @@ future<> sstable::read_compression() {
|
||||
return read_simple<compression, component_type::CompressionInfo, &sstable::_compression>();
|
||||
}
|
||||
|
||||
future<> sstable::read_statistics() {
|
||||
return read_simple<statistics, component_type::Statistics, &sstable::_statistics>();
|
||||
}
|
||||
|
||||
future<> sstable::load() {
|
||||
return read_toc().then([this] {
|
||||
return read_statistics();
|
||||
}).then([this] {
|
||||
return read_compression();
|
||||
}).then([this] {
|
||||
return read_filter();
|
||||
|
||||
@@ -53,6 +53,7 @@ private:
|
||||
compression _compression;
|
||||
filter _filter;
|
||||
summary _summary;
|
||||
statistics _statistics;
|
||||
|
||||
sstring _dir;
|
||||
unsigned long _epoch = 0;
|
||||
@@ -74,6 +75,7 @@ private:
|
||||
future<> read_summary() {
|
||||
return read_simple<summary, component_type::Summary, &sstable::_summary>();
|
||||
}
|
||||
future<> read_statistics();
|
||||
|
||||
public:
|
||||
sstable(sstring dir, unsigned long epoch, version_types v, format_types f) : _dir(dir), _epoch(epoch), _version(v), _format(f) {}
|
||||
|
||||
@@ -82,4 +82,68 @@ struct summary_la {
|
||||
disk_array<uint32_t, summary_entry> entries;
|
||||
};
|
||||
using summary = summary_la;
|
||||
|
||||
struct estimated_histogram {
|
||||
struct eh_elem {
|
||||
uint64_t offset;
|
||||
uint64_t bucket;
|
||||
};
|
||||
|
||||
disk_array<uint32_t, eh_elem> elements;
|
||||
};
|
||||
|
||||
struct replay_position {
|
||||
uint64_t segment;
|
||||
uint32_t position;
|
||||
};
|
||||
|
||||
struct streaming_histogram {
|
||||
uint32_t max_bin_size;
|
||||
disk_hash<uint32_t, double, uint64_t> hash;
|
||||
};
|
||||
|
||||
struct metadata {
|
||||
};
|
||||
|
||||
struct validation_metadata : public metadata {
|
||||
disk_string<uint16_t> partitioner;
|
||||
double filter_chance;
|
||||
};
|
||||
|
||||
struct compaction_metadata : public metadata {
|
||||
disk_array<uint32_t, uint32_t> ancestors;
|
||||
disk_array<uint32_t, uint8_t> cardinality;
|
||||
};
|
||||
|
||||
struct la_stats_metadata : public metadata {
|
||||
estimated_histogram estimated_row_size;
|
||||
estimated_histogram estimated_column_count;
|
||||
replay_position position;
|
||||
uint64_t min_timestamp;
|
||||
uint64_t max_timestamp;
|
||||
uint32_t max_local_deletion_time;
|
||||
double compression_ratio;
|
||||
streaming_histogram estimated_tombstone_drop_time;
|
||||
uint32_t sstable_level;
|
||||
uint64_t repaired_at;
|
||||
disk_array<uint32_t, disk_string<uint16_t>> min_column_names;
|
||||
disk_array<uint32_t, disk_string<uint16_t>> max_column_names;
|
||||
bool has_legacy_counter_shards;
|
||||
};
|
||||
using stats_metadata = la_stats_metadata;
|
||||
|
||||
// Numbers are found on disk, so they do matter. Also, setting their sizes of
|
||||
// that of an uint32_t is a bit wasteful, but it simplifies the code a lot
|
||||
// since we can now still use a strongly typed enum without introducing a
|
||||
// notion of "disk-size" vs "memory-size".
|
||||
enum class metadata_type : uint32_t {
|
||||
Validation = 0,
|
||||
Compaction = 1,
|
||||
Stats = 2,
|
||||
};
|
||||
|
||||
struct statistics {
|
||||
disk_hash<uint32_t, metadata_type, uint32_t> hash;
|
||||
std::unordered_map<metadata_type, std::unique_ptr<metadata>> contents;
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user