sstables: Store raw token into summary entries
Scylla stores a dht::token into each summary entry, for convenience. But that costs us 16 bytes for each summary entry. That's because dht::token has a kind field in addition to data, both 64 bits. With 1kk partitions, each averaging 4k bytes, summary may end up with ~90k summary entries. So dht::token only will add ~1.5M to the memory footprint of summary. We know summary samples index keys, therefore all tokens in all summary entries cannot have any token kind other than 'key'. Therefore, we can save 8 bytes for each summary entry by storing a 64-bit raw token and converting it back into token whenever needed. Memory footprint of summary entries in a summary goes from sizeof(summary_entry) * entries.size(): 1771520 to sizeof(summary_entry) * entries.size(): 1417216 which is explained by the 8 bytes reduction per summary entry. Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
This commit is contained in:
@@ -554,7 +554,7 @@ future<> parse(const schema& schema, sstable_version_types v, random_access_read
|
||||
// position is little-endian encoded
|
||||
auto position = seastar::read_le<uint64_t>(buf.get());
|
||||
auto token = schema.get_partitioner().get_token(key_view(key_data));
|
||||
s.entries.push_back({ token, key_data, position });
|
||||
s.entries.push_back(summary_entry{ token, key_data, position });
|
||||
}
|
||||
// Delete last element which isn't part of the on-disk format.
|
||||
s.positions.pop_back();
|
||||
@@ -1689,7 +1689,7 @@ void maybe_add_summary_entry(summary& s, const dht::token& token, bytes_view key
|
||||
auto entry_size = 8 + 2 + key.size(); // offset + key_size.size + key.size
|
||||
state.next_data_offset_to_write_summary += state.summary_byte_cost * entry_size;
|
||||
auto key_data = s.add_summary_data(key);
|
||||
s.entries.push_back({ token, key_data, index_offset });
|
||||
s.entries.push_back(summary_entry{ token, key_data, index_offset });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -126,16 +126,26 @@ inline std::ostream& operator<<(std::ostream& o, indexable_element e) {
|
||||
|
||||
class summary_entry {
|
||||
public:
|
||||
dht::token token;
|
||||
int64_t raw_token;
|
||||
bytes_view key;
|
||||
uint64_t position;
|
||||
|
||||
explicit summary_entry(dht::token token, bytes_view key, uint64_t position)
|
||||
: raw_token(dht::token::to_int64(token))
|
||||
, key(key)
|
||||
, position(position) {
|
||||
}
|
||||
|
||||
key_view get_key() const {
|
||||
return key_view{key};
|
||||
}
|
||||
|
||||
dht::token get_token() const {
|
||||
return dht::token::from_int64(raw_token);
|
||||
}
|
||||
|
||||
decorated_key_view get_decorated_key() const {
|
||||
return decorated_key_view(token, get_key());
|
||||
return decorated_key_view(get_token(), get_key());
|
||||
}
|
||||
|
||||
bool operator==(const summary_entry& x) const {
|
||||
|
||||
@@ -980,7 +980,7 @@ void dump_summary_operation(schema_ptr schema, reader_permit permit, const std::
|
||||
|
||||
auto pkey = e.get_key().to_partition_key(*schema);
|
||||
writer.Key("key");
|
||||
writer.DataKey(*schema, pkey, e.token);
|
||||
writer.DataKey(*schema, pkey, e.get_token());
|
||||
writer.Key("position");
|
||||
writer.Uint64(e.position);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user