sstable statistics file

Signed-off-by: Glauber Costa <glommer@cloudius-systems.com>
Reviewed-by: Nadav Har'El <nyh@cloudius-systems.com>
This commit is contained in:
Glauber Costa
2015-02-19 14:30:07 -05:00
parent 0d98caf885
commit fb3682cb4f
3 changed files with 140 additions and 0 deletions

View File

@@ -242,6 +242,74 @@ future<> parse(file_input_stream& in, summary& s) {
});
}
future<> parse(file_input_stream& in, struct replay_position& rp) {
return parse(in, rp.segment, rp.position);
}
future<> parse(file_input_stream& in, estimated_histogram::eh_elem &e) {
return parse(in, e.offset, e.bucket);
}
future<> parse(file_input_stream& in, estimated_histogram &e) {
return parse(in, e.elements);
}
future<> parse(file_input_stream& in, streaming_histogram &h) {
return parse(in, h.max_bin_size, h.hash);
}
future<> parse(file_input_stream& in, validation_metadata& m) {
return parse(in, m.partitioner, m.filter_chance);
}
future<> parse(file_input_stream& in, compaction_metadata& m) {
return parse(in, m.ancestors, m.cardinality);
}
template <typename Child>
future<> parse(file_input_stream& in, std::unique_ptr<metadata>& p) {
p.reset(new Child);
return parse(in, *static_cast<Child *>(p.get()));
}
future<> parse(file_input_stream& in, stats_metadata& m) {
return parse(in,
m.estimated_row_size,
m.estimated_column_count,
m.position,
m.min_timestamp,
m.max_timestamp,
m.max_local_deletion_time,
m.compression_ratio,
m.estimated_tombstone_drop_time,
m.sstable_level,
m.repaired_at,
m.min_column_names,
m.max_column_names,
m.has_legacy_counter_shards
);
}
future<> parse(file_input_stream& in, statistics& s) {
return parse(in, s.hash).then([&in, &s] {
return do_for_each(s.hash.map.begin(), s.hash.map.end(), [&in, &s] (auto val) mutable {
in.seek(val.second);
switch (val.first) {
case metadata_type::Validation:
return parse<validation_metadata>(in, s.contents[val.first]);
case metadata_type::Compaction:
return parse<compaction_metadata>(in, s.contents[val.first]);
case metadata_type::Stats:
return parse<stats_metadata>(in, s.contents[val.first]);
default:
sstlog.warn("Invalid metadata type at Statistics file: {} ", int(val.first));
return make_ready_future<>();
}
});
});
}
// This is small enough, and well-defined. Easier to just read it all
// at once
future<> sstable::read_toc() {
@@ -334,8 +402,14 @@ future<> sstable::read_compression() {
return read_simple<compression, component_type::CompressionInfo, &sstable::_compression>();
}
future<> sstable::read_statistics() {
return read_simple<statistics, component_type::Statistics, &sstable::_statistics>();
}
future<> sstable::load() {
return read_toc().then([this] {
return read_statistics();
}).then([this] {
return read_compression();
}).then([this] {
return read_filter();

View File

@@ -53,6 +53,7 @@ private:
compression _compression;
filter _filter;
summary _summary;
statistics _statistics;
sstring _dir;
unsigned long _epoch = 0;
@@ -74,6 +75,7 @@ private:
future<> read_summary() {
return read_simple<summary, component_type::Summary, &sstable::_summary>();
}
future<> read_statistics();
public:
sstable(sstring dir, unsigned long epoch, version_types v, format_types f) : _dir(dir), _epoch(epoch), _version(v), _format(f) {}

View File

@@ -82,4 +82,68 @@ struct summary_la {
disk_array<uint32_t, summary_entry> entries;
};
using summary = summary_la;
struct estimated_histogram {
struct eh_elem {
uint64_t offset;
uint64_t bucket;
};
disk_array<uint32_t, eh_elem> elements;
};
struct replay_position {
uint64_t segment;
uint32_t position;
};
struct streaming_histogram {
uint32_t max_bin_size;
disk_hash<uint32_t, double, uint64_t> hash;
};
struct metadata {
};
struct validation_metadata : public metadata {
disk_string<uint16_t> partitioner;
double filter_chance;
};
struct compaction_metadata : public metadata {
disk_array<uint32_t, uint32_t> ancestors;
disk_array<uint32_t, uint8_t> cardinality;
};
struct la_stats_metadata : public metadata {
estimated_histogram estimated_row_size;
estimated_histogram estimated_column_count;
replay_position position;
uint64_t min_timestamp;
uint64_t max_timestamp;
uint32_t max_local_deletion_time;
double compression_ratio;
streaming_histogram estimated_tombstone_drop_time;
uint32_t sstable_level;
uint64_t repaired_at;
disk_array<uint32_t, disk_string<uint16_t>> min_column_names;
disk_array<uint32_t, disk_string<uint16_t>> max_column_names;
bool has_legacy_counter_shards;
};
using stats_metadata = la_stats_metadata;
// Numbers are found on disk, so they do matter. Also, setting their sizes of
// that of an uint32_t is a bit wasteful, but it simplifies the code a lot
// since we can now still use a strongly typed enum without introducing a
// notion of "disk-size" vs "memory-size".
enum class metadata_type : uint32_t {
Validation = 0,
Compaction = 1,
Stats = 2,
};
struct statistics {
disk_hash<uint32_t, metadata_type, uint32_t> hash;
std::unordered_map<metadata_type, std::unique_ptr<metadata>> contents;
};
}