scoutfs: switch to using fnv1a for hashing

We had a few uses of crc for hashing.  That was fine enough for initial
testing but the huge number of xattrs that srch is recording was
seeing very bad collisions from the clumsy combination of crc32c into
a 64bit hash.  Replace it with FNV for now.

This also takes the opportunity to use 3 hash functions in the forest
bloom filter so that we can extract them from the 64bit hash of the key
rather than iterating and recalculating hashes for each function.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2020-07-02 15:10:53 -07:00
committed by Zach Brown
parent f48112e2a7
commit 42e7fbb4f7
5 changed files with 56 additions and 19 deletions

View File

@@ -14,7 +14,6 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/crc32c.h>
#include <linux/sort.h>
#include <linux/random.h>
@@ -29,6 +28,7 @@
#include "block.h"
#include "radix.h"
#include "avl.h"
#include "hash.h"
#include "scoutfs_trace.h"
@@ -210,7 +210,7 @@ static int cmp_key_item(void *arg, struct scoutfs_avl_node *node)
*/
static int leaf_item_hash_ind(struct scoutfs_key *key)
{
return crc32c(~0, key, sizeof(struct scoutfs_key)) %
return scoutfs_hash32(key, sizeof(struct scoutfs_key)) %
SCOUTFS_BTREE_LEAF_ITEM_HASH_NR;
}

View File

@@ -13,7 +13,6 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/crc32c.h>
#include <linux/uio.h>
#include <linux/xattr.h>
#include <linux/namei.h>
@@ -31,6 +30,7 @@
#include "kvec.h"
#include "forest.h"
#include "lock.h"
#include "hash.h"
#include "counters.h"
#include "scoutfs_trace.h"
@@ -249,7 +249,7 @@ static u32 dirent_name_fingerprint(const char *name, unsigned int name_len)
static u64 dirent_name_hash(const char *name, unsigned int name_len)
{
return crc32c(~0, name, name_len) |
return scoutfs_hash32(name, name_len) |
((u64)dirent_name_fingerprint(name, name_len) << 32);
}

View File

@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/rwsem.h>
#include <linux/random.h>
#include <linux/crc32c.h>
#include "super.h"
#include "format.h"
@@ -24,6 +23,7 @@
#include "radix.h"
#include "block.h"
#include "forest.h"
#include "hash.h"
#include "counters.h"
#include "scoutfs_trace.h"
@@ -240,18 +240,20 @@ static void read_unlock_forest_root(struct forest_info *finf,
}
}
/*
* XXX need something better.
*/
static void calc_bloom_nrs(struct forest_bloom_nrs *bloom,
struct scoutfs_key *key)
{
u32 crc = ~0;
u64 hash;
int i;
BUILD_BUG_ON((SCOUTFS_FOREST_BLOOM_FUNC_BITS *
SCOUTFS_FOREST_BLOOM_NRS) > 64);
hash = scoutfs_hash64(key, sizeof(struct scoutfs_key));
for (i = 0; i < ARRAY_SIZE(bloom->nrs); i++) {
crc = crc32c(crc, key, sizeof(struct scoutfs_key));
bloom->nrs[i] = crc % SCOUTFS_FOREST_BLOOM_BITS;
bloom->nrs[i] = (u32)hash % SCOUTFS_FOREST_BLOOM_BITS;
hash >>= SCOUTFS_FOREST_BLOOM_FUNC_BITS;
}
}

View File

@@ -328,11 +328,12 @@ struct scoutfs_bloom_block {
* before the bloom filters fill up and start returning excessive false
* positives.
*/
#define SCOUTFS_FOREST_BLOOM_NRS 7
#define SCOUTFS_FOREST_BLOOM_NRS 3
#define SCOUTFS_FOREST_BLOOM_BITS \
(((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_bloom_block)) / \
member_sizeof(struct scoutfs_bloom_block, bits[0])) * \
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8) \
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8)
#define SCOUTFS_FOREST_BLOOM_FUNC_BITS (SCOUTFS_BLOCK_LG_SHIFT + 3)
/*
* Keys are first sorted by major key zones.

View File

@@ -1,15 +1,49 @@
#ifndef _SCOUTFS_HASH_H_
#define _SCOUTFS_HASH_H_
#include <linux/crc32c.h>
/*
* We're using FNV1a for now. It's fine. Ish.
*
* The longer term plan is xxh3 but it looks like it'll take just a bit
* more time to be declared stable and then it needs to be ported to the
* kernel.
*
* - https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
* - https://github.com/Cyan4973/xxHash/releases/tag/v0.7.4
*/
static inline u32 fnv1a32(const void *data, unsigned int len)
{
u32 hash = 0x811c9dc5;
while (len--) {
hash ^= *(u8 *)(data++);
hash *= 0x01000193;
}
return hash;
}
static inline u64 fnv1a64(const void *data, unsigned int len)
{
u64 hash = 0xcbf29ce484222325;
while (len--) {
hash ^= *(u8 *)(data++);
hash *= 0x100000001b3;
}
return hash;
}
static inline u32 scoutfs_hash32(const void *data, unsigned int len)
{
return fnv1a32(data, len);
}
/* XXX replace with xxhash */
static inline u64 scoutfs_hash64(const void *data, unsigned int len)
{
unsigned int half = (len + 1) / 2;
return crc32c(~0, data, half) |
((u64)crc32c(~0, data + len - half, half) << 32);
return fnv1a64(data, len);
}
#endif