mirror of
https://github.com/versity/scoutfs.git
synced 2026-02-08 03:30:46 +00:00
scoutfs: add dirent name fingerprint
Entries in a directory are indexed by the hash of their name. This introduces a perfectly random access pattern. And this results in a cow storm as directories get large enough such that the leaf blocks that store their entries are larger than our commits. Each commit ends up being full of cowed leaf blocks that contain a single new entry. The dirent name fingerprints change the dirent key to first start with a fingerprint of the name. This reduces the scope of hash randomization from the entire directory to entries with the same fingerprint. On real customer dir sizes and file names we saw roughly 3x create rate improvements from being able to create more entries in leaf blocks within a commit. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -213,12 +213,44 @@ static struct scoutfs_dirent *alloc_dirent(unsigned int name_len)
|
||||
return kmalloc(dirent_bytes(name_len), GFP_NOFS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a bit number as though an array of bytes is a large len-bit
|
||||
* big-endian value. nr 0 is the LSB of the final byte, nr (len - 1) is
|
||||
* the MSB of the first byte.
|
||||
*/
|
||||
static int test_be_bytes_bit(int nr, const char *bytes, int len)
|
||||
{
|
||||
return bytes[(len - 1 - nr) >> 3] & (1 << (nr & 7));
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a 32bit "fingerprint" of the name by extracting 32 evenly
|
||||
* distributed bits from the name. The intent is to have the sort order
|
||||
* of the fingerprints reflect the memcmp() sort order of the names
|
||||
* while mapping large names down to small fs keys.
|
||||
*
|
||||
* Names that are smaller than 32bits are biased towards the high bits
|
||||
* of the fingerprint so that most significant bits of the fingerprints
|
||||
* consistently reflect the initial characters of the names.
|
||||
*/
|
||||
static u32 dirent_name_fingerprint(const char *name, unsigned int name_len)
|
||||
{
|
||||
int name_bits = name_len * 8;
|
||||
int skip = max(name_bits / 32, 1);
|
||||
u32 fp = 0;
|
||||
int f;
|
||||
int n;
|
||||
|
||||
for (f = 31, n = name_bits - 1; f >= 0 && n >= 0; f--, n -= skip)
|
||||
fp |= !!test_be_bytes_bit(n, name, name_bits) << f;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
static u64 dirent_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
unsigned int half = (name_len + 1) / 2;
|
||||
|
||||
return crc32c(~0, name, half) |
|
||||
((u64)crc32c(~0, name + name_len - half, half) << 32);
|
||||
return crc32c(~0, name, name_len) |
|
||||
((u64)dirent_name_fingerprint(name, name_len) << 32);
|
||||
}
|
||||
|
||||
static u64 dirent_names_equal(const char *a_name, unsigned int a_len,
|
||||
|
||||
Reference in New Issue
Block a user