Compare commits

...

15 Commits

Author SHA1 Message Date
Auke Kok
6b682b6651 Swap meta allocators with much more reserve.
We increase the reserve from 2x to 3x the minimum number of blocks
needed for our reserve, and change the algorithm that determines when
to swap them.

The old algorithm swaps them if _avail is just larger than _freed. While
the simplest algorithm, it suffers from the problem that in practice,
when we hit ENOSPC conditions, it will almost always swap on every
iteration when we hit our low water mark.

In our testing, we regularly see failures because what is effectively
happening is that we starve both allocators by slowly draining them
block by block, trying to do work. The work of course requires us to
drain more blocks to commit changes. This cycle doesn't end until both
allocators are almost completely drained and not enough blocks remain to
do any real work anymore.

The new algorithm will not swap allocators unless _freed is 50%
larger than _avail.

The outcome is that, during meta space pressure, we're allowing _avail to
slowly drain down to the same levels as before, effectively. However, we
then swap to _freed which is now 1.5x larger.

This results in us being able to do a whole chunk of work without
needing to swap. While draining _avail for longer, we allow work to
commit and recycle blocks back into _freed muich more effectively.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-02-11 17:04:28 -05:00
Zach Brown
5a10c79409 Merge pull request #201 from versity/auke/fixes_pre_parallel_restore
Misc. fixes and changes to support parallel_restore and check.
2025-02-02 06:53:25 -08:00
Zach Brown
295f751aed Add test_bit to utils bitmap
Add test_bit() to the trivial utils bitmap.c implementation.

Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:58 -08:00
Zach Brown
7f6032d9b4 Add lk rbtree wrapper
Import the kernel's rbtree implementation with a wrapper so we can use
it from userspace.

Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:49 -08:00
Zach Brown
7e3a6537ec Add userspace version of our dirent name hash
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:41 -08:00
Zach Brown
49b7b70438 Add userspace version of our mode to type
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:31 -08:00
Zach Brown
de0fdd1f9f Promote userspace btree block initialization
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:23 -08:00
Zach Brown
a6d7de3c00 Add fls64() alias for userspace flsll()
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:16 -08:00
Zach Brown
2c2c127c5e Add put_unaligned_leXX() for userspace
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:58:10 -08:00
Zach Brown
9491c784e7 Add srch_encode_entry() for userspace utils
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:57:56 -08:00
Zach Brown
c3b30930fa Add bloom filter index calc for userspace utils
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:57:46 -08:00
Zach Brown
e7e46a80e6 Add userspace NSEC_PER_SEC
Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:57:39 -08:00
Zach Brown
1ddf752f42 Import a few more functions to our list.h
Import a few more functions from the kernel's list.h into our imported
copy.

Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:57:29 -08:00
Zach Brown
14b65c6360 Fix printing alloc list block extents
The list alloc blocks have an array of blknos that are offset by a start
field in the block header.  The print code wasn't using that and was
always referencing the beginning of the array, which could miss blocks.

Signed-off-by: Zach Brown <zab@versity.com>
2025-01-22 09:57:21 -08:00
Zach Brown
934f6c7648 Merge pull request #199 from versity/zab/v1.23
v1.23 Release
2024-12-11 17:02:52 -08:00
20 changed files with 1567 additions and 25 deletions

View File

@@ -668,14 +668,16 @@ static void scoutfs_server_commit_func(struct work_struct *work)
* the reserved blocks after having filled the log trees's avail * the reserved blocks after having filled the log trees's avail
* allocator during its transaction. To avoid prematurely * allocator during its transaction. To avoid prematurely
* setting the low flag and causing enospc we make sure that the * setting the low flag and causing enospc we make sure that the
* next transaction's meta_avail has 2x the reserved blocks so * next transaction's meta_avail has 3x the reserved blocks so
* that it can consume a full reserved amount and still have * that it can consume a full reserved amount and still have
* enough to avoid enospc. We swap to freed if avail is under * enough to avoid enospc. We swap to freed if avail is under
* the buffer and freed is larger. * the buffer and freed is larger by 50%. This results in much less
* swapping overall and allows the pools to refill naturally.
*/ */
if ((le64_to_cpu(server->meta_avail->total_len) < if ((le64_to_cpu(server->meta_avail->total_len) <
(scoutfs_server_reserved_meta_blocks(sb) * 2)) && (scoutfs_server_reserved_meta_blocks(sb) * 3)) &&
(le64_to_cpu(server->meta_freed->total_len) > ((le64_to_cpu(server->meta_freed->total_len) +
(le64_to_cpu(server->meta_freed->total_len) >> 1)) >
le64_to_cpu(server->meta_avail->total_len))) le64_to_cpu(server->meta_avail->total_len)))
swap(server->meta_avail, server->meta_freed); swap(server->meta_avail, server->meta_freed);

View File

@@ -10,6 +10,11 @@
* Just a quick simple native bitmap. * Just a quick simple native bitmap.
*/ */
int test_bit(unsigned long *bits, u64 nr)
{
return !!(bits[nr / BITS_PER_LONG] & (1UL << (nr & (BITS_PER_LONG - 1))));
}
void set_bit(unsigned long *bits, u64 nr) void set_bit(unsigned long *bits, u64 nr)
{ {
bits[nr / BITS_PER_LONG] |= 1UL << (nr & (BITS_PER_LONG - 1)); bits[nr / BITS_PER_LONG] |= 1UL << (nr & (BITS_PER_LONG - 1));

View File

@@ -1,6 +1,7 @@
#ifndef _BITMAP_H_ #ifndef _BITMAP_H_
#define _BITMAP_H_ #define _BITMAP_H_
int test_bit(unsigned long *bits, u64 nr);
void set_bit(unsigned long *bits, u64 nr); void set_bit(unsigned long *bits, u64 nr);
void clear_bit(unsigned long *bits, u64 nr); void clear_bit(unsigned long *bits, u64 nr);
u64 find_next_set_bit(unsigned long *start, u64 from, u64 total); u64 find_next_set_bit(unsigned long *start, u64 from, u64 total);

20
utils/src/bloom.c Normal file
View File

@@ -0,0 +1,20 @@
#include <errno.h>
#include "sparse.h"
#include "util.h"
#include "format.h"
#include "hash.h"
#include "bloom.h"
void calc_bloom_nrs(struct scoutfs_key *key, unsigned int *nrs)
{
u64 hash;
int i;
hash = scoutfs_hash64(key, sizeof(struct scoutfs_key));
for (i = 0; i < SCOUTFS_FOREST_BLOOM_NRS; i++) {
nrs[i] = (u32)hash % SCOUTFS_FOREST_BLOOM_BITS;
hash >>= SCOUTFS_FOREST_BLOOM_FUNC_BITS;
}
}

6
utils/src/bloom.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef _BLOOM_H_
#define _BLOOM_H_
void calc_bloom_nrs(struct scoutfs_key *key, unsigned int *nrs);
#endif

View File

@@ -8,7 +8,7 @@
#include "leaf_item_hash.h" #include "leaf_item_hash.h"
#include "btree.h" #include "btree.h"
static void init_block(struct scoutfs_btree_block *bt, int level) void btree_init_block(struct scoutfs_btree_block *bt, int level)
{ {
int free; int free;
@@ -33,7 +33,7 @@ void btree_init_root_single(struct scoutfs_btree_root *root,
memset(bt, 0, SCOUTFS_BLOCK_LG_SIZE); memset(bt, 0, SCOUTFS_BLOCK_LG_SIZE);
init_block(bt, 0); btree_init_block(bt, 0);
} }
static void *alloc_val(struct scoutfs_btree_block *bt, int len) static void *alloc_val(struct scoutfs_btree_block *bt, int len)

View File

@@ -1,6 +1,7 @@
#ifndef _BTREE_H_ #ifndef _BTREE_H_
#define _BTREE_H_ #define _BTREE_H_
void btree_init_block(struct scoutfs_btree_block *bt, int level);
void btree_init_root_single(struct scoutfs_btree_root *root, void btree_init_root_single(struct scoutfs_btree_root *root,
struct scoutfs_btree_block *bt, struct scoutfs_btree_block *bt,
u64 seq, u64 blkno); u64 seq, u64 blkno);

View File

@@ -156,6 +156,16 @@ static inline void list_move_tail(struct list_head *list,
list_add_tail(list, head); list_add_tail(list, head);
} }
/**
* list_is_head - tests whether @list is the list @head
* @list: the entry to test
* @head: the head of the list
*/
static inline int list_is_head(const struct list_head *list, const struct list_head *head)
{
return list == head;
}
/** /**
* list_empty - tests whether a list is empty * list_empty - tests whether a list is empty
* @head: the list to test. * @head: the list to test.
@@ -242,6 +252,15 @@ static inline void list_splice_init(struct list_head *list,
for (pos = (head)->next, n = pos->next; pos != (head); \ for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next) pos = n, n = pos->next)
/**
* list_entry_is_head - test if the entry points to the head of the list
* @pos: the type * to cursor
* @head: the head for your list.
* @member: the name of the list_head within the struct.
*/
#define list_entry_is_head(pos, head, member) \
(&pos->member == (head))
/** /**
* list_for_each_entry - iterate over list of given type * list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter. * @pos: the type * to use as a loop counter.
@@ -307,4 +326,28 @@ static inline void list_splice_init(struct list_head *list,
#define list_next_entry(pos, member) \ #define list_next_entry(pos, member) \
list_entry((pos)->member.next, typeof(*(pos)), member) list_entry((pos)->member.next, typeof(*(pos)), member)
/**
* list_prev_entry - get the prev element in list
* @pos: the type * to cursor
* @member: the name of the list_head within the struct.
*/
#define list_prev_entry(pos, member) \
list_entry((pos)->member.prev, typeof(*(pos)), member)
/**
* list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
* @pos: the type * to use as a loop cursor.
* @n: another type * to use as temporary storage
* @head: the head for your list.
* @member: the name of the list_head within the struct.
*
* Iterate backwards over list of given type, safe against removal
* of list entry.
*/
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_last_entry(head, typeof(*pos), member), \
n = list_prev_entry(pos, member); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member))
#endif #endif

View File

@@ -0,0 +1,24 @@
#ifndef _LK_RBTREE_WRAPPER_H_
#define _LK_RBTREE_WRAPPER_H_
/*
* We're using this lame hack to build and use the kernel's rbtree in
* userspace. We drop the kernel's rbtree*[ch] implementation in and
* use them with this wrapper. We only have to remove the kernel
* includes from the imported files.
*/
#include <stdbool.h>
#include "util.h"
#define rcu_assign_pointer(a, b) do { a = b; } while (0)
#define READ_ONCE(a) ({ a; })
#define WRITE_ONCE(a, b) do { a = b; } while (0)
#define unlikely(a) ({ a; })
#define EXPORT_SYMBOL(a) /* nop */
#include "rbtree_types.h"
#include "rbtree.h"
#include "rbtree_augmented.h"
#endif

24
utils/src/mode_types.c Normal file
View File

@@ -0,0 +1,24 @@
#include <unistd.h>
#include <sys/stat.h>
#include "sparse.h"
#include "util.h"
#include "format.h"
#include "mode_types.h"
unsigned int mode_to_type(mode_t mode)
{
#define S_SHIFT 12
static unsigned char mode_types[S_IFMT >> S_SHIFT] = {
[S_IFIFO >> S_SHIFT] = SCOUTFS_DT_FIFO,
[S_IFCHR >> S_SHIFT] = SCOUTFS_DT_CHR,
[S_IFDIR >> S_SHIFT] = SCOUTFS_DT_DIR,
[S_IFBLK >> S_SHIFT] = SCOUTFS_DT_BLK,
[S_IFREG >> S_SHIFT] = SCOUTFS_DT_REG,
[S_IFLNK >> S_SHIFT] = SCOUTFS_DT_LNK,
[S_IFSOCK >> S_SHIFT] = SCOUTFS_DT_SOCK,
};
return mode_types[(mode & S_IFMT) >> S_SHIFT];
#undef S_SHIFT
}

6
utils/src/mode_types.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef _MODE_TYPES_H_
#define _MODE_TYPES_H_
unsigned int mode_to_type(mode_t mode);
#endif

46
utils/src/name_hash.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef _SCOUTFS_NAME_HASH_H_
#define _SCOUTFS_NAME_HASH_H_
#include "hash.h"
/*
* Test a bit number as though an array of bytes is a large len-bit
* big-endian value. nr 0 is the LSB of the final byte, nr (len - 1) is
* the MSB of the first byte.
*/
static int test_be_bytes_bit(int nr, const char *bytes, int len)
{
return bytes[(len - 1 - nr) >> 3] & (1 << (nr & 7));
}
/*
* Generate a 32bit "fingerprint" of the name by extracting 32 evenly
* distributed bits from the name. The intent is to have the sort order
* of the fingerprints reflect the memcmp() sort order of the names
* while mapping large names down to small fs keys.
*
* Names that are smaller than 32bits are biased towards the high bits
* of the fingerprint so that most significant bits of the fingerprints
* consistently reflect the initial characters of the names.
*/
static inline u32 dirent_name_fingerprint(const char *name, unsigned int name_len)
{
int name_bits = name_len * 8;
int skip = max(name_bits / 32, 1);
u32 fp = 0;
int f;
int n;
for (f = 31, n = name_bits - 1; f >= 0 && n >= 0; f--, n -= skip)
fp |= !!test_be_bytes_bit(n, name, name_bits) << f;
return fp;
}
static inline u64 dirent_name_hash(const char *name, unsigned int name_len)
{
return scoutfs_hash32(name, name_len) |
((u64)dirent_name_fingerprint(name, name_len) << 32);
}
#endif

View File

@@ -645,6 +645,8 @@ static int print_alloc_list_block(int fd, char *str, struct scoutfs_block_ref *r
u64 blkno; u64 blkno;
u64 start; u64 start;
u64 len; u64 len;
u64 st;
u64 nr;
int wid; int wid;
int ret; int ret;
int i; int i;
@@ -663,27 +665,37 @@ static int print_alloc_list_block(int fd, char *str, struct scoutfs_block_ref *r
AL_REF_A(&lblk->next), le32_to_cpu(lblk->start), AL_REF_A(&lblk->next), le32_to_cpu(lblk->start),
le32_to_cpu(lblk->nr)); le32_to_cpu(lblk->nr));
if (lblk->nr) { st = le32_to_cpu(lblk->start);
wid = printf(" exts: "); nr = le32_to_cpu(lblk->nr);
start = 0; if (st >= SCOUTFS_ALLOC_LIST_MAX_BLOCKS ||
len = 0; nr > SCOUTFS_ALLOC_LIST_MAX_BLOCKS ||
for (i = 0; i < le32_to_cpu(lblk->nr); i++) { (st + nr) > SCOUTFS_ALLOC_LIST_MAX_BLOCKS) {
if (len == 0) printf(" (invalid start and nr fields)\n");
start = le64_to_cpu(lblk->blknos[i]); goto out;
len++;
if (i == (le32_to_cpu(lblk->nr) - 1) ||
start + len != le64_to_cpu(lblk->blknos[i + 1])) {
if (wid >= 72)
wid = printf("\n ");
wid += printf("%llu,%llu ", start, len);
len = 0;
}
}
printf("\n");
} }
if (lblk->nr == 0)
goto out;
wid = printf(" exts: ");
start = 0;
len = 0;
for (i = 0; i < nr; i++) {
if (len == 0)
start = le64_to_cpu(lblk->blknos[st + i]);
len++;
if (i == (nr - 1) || (start + len) != le64_to_cpu(lblk->blknos[st + i + 1])) {
if (wid >= 72)
wid = printf("\n ");
wid += printf("%llu,%llu ", start, len);
len = 0;
}
}
printf("\n");
out:
next = lblk->next; next = lblk->next;
free(lblk); free(lblk);
return print_alloc_list_block(fd, str, &next); return print_alloc_list_block(fd, str, &next);

629
utils/src/rbtree.c Normal file
View File

@@ -0,0 +1,629 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
Red Black Trees
(C) 1999 Andrea Arcangeli <andrea@suse.de>
(C) 2002 David Woodhouse <dwmw2@infradead.org>
(C) 2012 Michel Lespinasse <walken@google.com>
linux/lib/rbtree.c
*/
#include "lk_rbtree_wrapper.h"
/*
* red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
*
* 1) A node is either red or black
* 2) The root is black
* 3) All leaves (NULL) are black
* 4) Both children of every red node are black
* 5) Every simple path from root to leaves contains the same number
* of black nodes.
*
* 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
* consecutive red nodes in a path and every red node is therefore followed by
* a black. So if B is the number of black nodes on every simple path (as per
* 5), then the longest possible path due to 4 is 2B.
*
* We shall indicate color with case, where black nodes are uppercase and red
* nodes will be lowercase. Unknown color nodes shall be drawn as red within
* parentheses and have some accompanying text comment.
*/
/*
* Notes on lockless lookups:
*
* All stores to the tree structure (rb_left and rb_right) must be done using
* WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
* tree structure as seen in program order.
*
* These two requirements will allow lockless iteration of the tree -- not
* correct iteration mind you, tree rotations are not atomic so a lookup might
* miss entire subtrees.
*
* But they do guarantee that any such traversal will only see valid elements
* and that it will indeed complete -- does not get stuck in a loop.
*
* It also guarantees that if the lookup returns an element it is the 'correct'
* one. But not returning an element does _NOT_ mean it's not present.
*
* NOTE:
*
* Stores to __rb_parent_color are not important for simple lookups so those
* are left undone as of now. Nor did I check for loops involving parent
* pointers.
*/
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
}
static inline struct rb_node *rb_red_parent(struct rb_node *red)
{
return (struct rb_node *)red->__rb_parent_color;
}
/*
* Helper function for rotations:
* - old's parent and color get assigned to new
* - old gets assigned new as a parent and 'color' as a color.
*/
static inline void
__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
struct rb_root *root, int color)
{
struct rb_node *parent = rb_parent(old);
new->__rb_parent_color = old->__rb_parent_color;
rb_set_parent_color(old, new, color);
__rb_change_child(old, new, parent, root);
}
static __always_inline void
__rb_insert(struct rb_node *node, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
while (true) {
/*
* Loop invariant: node is red.
*/
if (unlikely(!parent)) {
/*
* The inserted node is root. Either this is the
* first node, or we recursed at Case 1 below and
* are no longer violating 4).
*/
rb_set_parent_color(node, NULL, RB_BLACK);
break;
}
/*
* If there is a black parent, we are done.
* Otherwise, take some corrective action as,
* per 4), we don't want a red root or two
* consecutive red nodes.
*/
if(rb_is_black(parent))
break;
gparent = rb_red_parent(parent);
tmp = gparent->rb_right;
if (parent != tmp) { /* parent == gparent->rb_left */
if (tmp && rb_is_red(tmp)) {
/*
* Case 1 - node's uncle is red (color flips).
*
* G g
* / \ / \
* p u --> P U
* / /
* n n
*
* However, since g's parent might be red, and
* 4) does not allow this, we need to recurse
* at g.
*/
rb_set_parent_color(tmp, gparent, RB_BLACK);
rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
parent = rb_parent(node);
rb_set_parent_color(node, parent, RB_RED);
continue;
}
tmp = parent->rb_right;
if (node == tmp) {
/*
* Case 2 - node's uncle is black and node is
* the parent's right child (left rotate at parent).
*
* G G
* / \ / \
* p U --> n U
* \ /
* n p
*
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
tmp = node->rb_left;
WRITE_ONCE(parent->rb_right, tmp);
WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
rb_set_parent_color(parent, node, RB_RED);
augment_rotate(parent, node);
parent = node;
tmp = node->rb_right;
}
/*
* Case 3 - node's uncle is black and node is
* the parent's left child (right rotate at gparent).
*
* G P
* / \ / \
* p U --> n g
* / \
* n U
*/
WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
augment_rotate(gparent, parent);
break;
} else {
tmp = gparent->rb_left;
if (tmp && rb_is_red(tmp)) {
/* Case 1 - color flips */
rb_set_parent_color(tmp, gparent, RB_BLACK);
rb_set_parent_color(parent, gparent, RB_BLACK);
node = gparent;
parent = rb_parent(node);
rb_set_parent_color(node, parent, RB_RED);
continue;
}
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
tmp = node->rb_right;
WRITE_ONCE(parent->rb_left, tmp);
WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
rb_set_parent_color(parent, node, RB_RED);
augment_rotate(parent, node);
parent = node;
tmp = node->rb_left;
}
/* Case 3 - left rotate at gparent */
WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
augment_rotate(gparent, parent);
break;
}
}
}
/*
* Inline version for rb_erase() use - we want to be able to inline
* and eliminate the dummy_rotate callback there
*/
static __always_inline void
____rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
while (true) {
/*
* Loop invariants:
* - node is black (or NULL on first iteration)
* - node is not the root (parent is not NULL)
* - All leaf paths going through parent and node have a
* black node count that is 1 lower than other leaf paths.
*/
sibling = parent->rb_right;
if (node != sibling) { /* node == parent->rb_left */
if (rb_is_red(sibling)) {
/*
* Case 1 - left rotate at parent
*
* P S
* / \ / \
* N s --> p Sr
* / \ / \
* Sl Sr N Sl
*/
tmp1 = sibling->rb_left;
WRITE_ONCE(parent->rb_right, tmp1);
WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
augment_rotate(parent, sibling);
sibling = tmp1;
}
tmp1 = sibling->rb_right;
if (!tmp1 || rb_is_black(tmp1)) {
tmp2 = sibling->rb_left;
if (!tmp2 || rb_is_black(tmp2)) {
/*
* Case 2 - sibling color flip
* (p could be either color here)
*
* (p) (p)
* / \ / \
* N S --> N s
* / \ / \
* Sl Sr Sl Sr
*
* This leaves us violating 5) which
* can be fixed by flipping p to black
* if it was red, or by recursing at p.
* p is red when coming from Case 1.
*/
rb_set_parent_color(sibling, parent,
RB_RED);
if (rb_is_red(parent))
rb_set_black(parent);
else {
node = parent;
parent = rb_parent(node);
if (parent)
continue;
}
break;
}
/*
* Case 3 - right rotate at sibling
* (p could be either color here)
*
* (p) (p)
* / \ / \
* N S --> N sl
* / \ \
* sl Sr S
* \
* Sr
*
* Note: p might be red, and then both
* p and sl are red after rotation(which
* breaks property 4). This is fixed in
* Case 4 (in __rb_rotate_set_parents()
* which set sl the color of p
* and set p RB_BLACK)
*
* (p) (sl)
* / \ / \
* N sl --> P S
* \ / \
* S N Sr
* \
* Sr
*/
tmp1 = tmp2->rb_right;
WRITE_ONCE(sibling->rb_left, tmp1);
WRITE_ONCE(tmp2->rb_right, sibling);
WRITE_ONCE(parent->rb_right, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
augment_rotate(sibling, tmp2);
tmp1 = sibling;
sibling = tmp2;
}
/*
* Case 4 - left rotate at parent + color flips
* (p and sl could be either color here.
* After rotation, p becomes black, s acquires
* p's color, and sl keeps its color)
*
* (p) (s)
* / \ / \
* N S --> P Sr
* / \ / \
* (sl) sr N (sl)
*/
tmp2 = sibling->rb_left;
WRITE_ONCE(parent->rb_right, tmp2);
WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
__rb_rotate_set_parents(parent, sibling, root,
RB_BLACK);
augment_rotate(parent, sibling);
break;
} else {
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
tmp1 = sibling->rb_right;
WRITE_ONCE(parent->rb_left, tmp1);
WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
augment_rotate(parent, sibling);
sibling = tmp1;
}
tmp1 = sibling->rb_left;
if (!tmp1 || rb_is_black(tmp1)) {
tmp2 = sibling->rb_right;
if (!tmp2 || rb_is_black(tmp2)) {
/* Case 2 - sibling color flip */
rb_set_parent_color(sibling, parent,
RB_RED);
if (rb_is_red(parent))
rb_set_black(parent);
else {
node = parent;
parent = rb_parent(node);
if (parent)
continue;
}
break;
}
/* Case 3 - left rotate at sibling */
tmp1 = tmp2->rb_left;
WRITE_ONCE(sibling->rb_right, tmp1);
WRITE_ONCE(tmp2->rb_left, sibling);
WRITE_ONCE(parent->rb_left, tmp2);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
augment_rotate(sibling, tmp2);
tmp1 = sibling;
sibling = tmp2;
}
/* Case 4 - right rotate at parent + color flips */
tmp2 = sibling->rb_right;
WRITE_ONCE(parent->rb_left, tmp2);
WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
__rb_rotate_set_parents(parent, sibling, root,
RB_BLACK);
augment_rotate(parent, sibling);
break;
}
}
}
/* Non-inline version for rb_erase_augmented() use */
void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
____rb_erase_color(parent, root, augment_rotate);
}
EXPORT_SYMBOL(__rb_erase_color);
/*
* Non-augmented rbtree manipulation functions.
*
* We use dummy augmented callbacks here, and have the compiler optimize them
* out of the rb_insert_color() and rb_erase() function definitions.
*/
static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
static const struct rb_augment_callbacks dummy_callbacks = {
.propagate = dummy_propagate,
.copy = dummy_copy,
.rotate = dummy_rotate
};
void rb_insert_color(struct rb_node *node, struct rb_root *root)
{
__rb_insert(node, root, dummy_rotate);
}
EXPORT_SYMBOL(rb_insert_color);
void rb_erase(struct rb_node *node, struct rb_root *root)
{
struct rb_node *rebalance;
rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
if (rebalance)
____rb_erase_color(rebalance, root, dummy_rotate);
}
EXPORT_SYMBOL(rb_erase);
/*
* Augmented rbtree manipulation functions.
*
* This instantiates the same __always_inline functions as in the non-augmented
* case, but this time with user-defined callbacks.
*/
void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
{
__rb_insert(node, root, augment_rotate);
}
EXPORT_SYMBOL(__rb_insert_augmented);
/*
* This function returns the first node (in sort order) of the tree.
*/
struct rb_node *rb_first(const struct rb_root *root)
{
struct rb_node *n;
n = root->rb_node;
if (!n)
return NULL;
while (n->rb_left)
n = n->rb_left;
return n;
}
EXPORT_SYMBOL(rb_first);
struct rb_node *rb_last(const struct rb_root *root)
{
struct rb_node *n;
n = root->rb_node;
if (!n)
return NULL;
while (n->rb_right)
n = n->rb_right;
return n;
}
EXPORT_SYMBOL(rb_last);
struct rb_node *rb_next(const struct rb_node *node)
{
struct rb_node *parent;
if (RB_EMPTY_NODE(node))
return NULL;
/*
* If we have a right-hand child, go down and then left as far
* as we can.
*/
if (node->rb_right) {
node = node->rb_right;
while (node->rb_left)
node = node->rb_left;
return (struct rb_node *)node;
}
/*
* No right-hand children. Everything down and left is smaller than us,
* so any 'next' node must be in the general direction of our parent.
* Go up the tree; any time the ancestor is a right-hand child of its
* parent, keep going up. First time it's a left-hand child of its
* parent, said parent is our 'next' node.
*/
while ((parent = rb_parent(node)) && node == parent->rb_right)
node = parent;
return parent;
}
EXPORT_SYMBOL(rb_next);
struct rb_node *rb_prev(const struct rb_node *node)
{
struct rb_node *parent;
if (RB_EMPTY_NODE(node))
return NULL;
/*
* If we have a left-hand child, go down and then right as far
* as we can.
*/
if (node->rb_left) {
node = node->rb_left;
while (node->rb_right)
node = node->rb_right;
return (struct rb_node *)node;
}
/*
* No left-hand children. Go up till we find an ancestor which
* is a right-hand child of its parent.
*/
while ((parent = rb_parent(node)) && node == parent->rb_left)
node = parent;
return parent;
}
EXPORT_SYMBOL(rb_prev);
void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root)
{
struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
/* Set the surrounding nodes to point to the replacement */
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
__rb_change_child(victim, new, parent, root);
}
EXPORT_SYMBOL(rb_replace_node);
void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
struct rb_root *root)
{
struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
/* Set the surrounding nodes to point to the replacement */
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
/* Set the parent's pointer to the new node last after an RCU barrier
* so that the pointers onwards are seen to be set correctly when doing
* an RCU walk over the tree.
*/
__rb_change_child_rcu(victim, new, parent, root);
}
EXPORT_SYMBOL(rb_replace_node_rcu);
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
{
for (;;) {
if (node->rb_left)
node = node->rb_left;
else if (node->rb_right)
node = node->rb_right;
else
return (struct rb_node *)node;
}
}
struct rb_node *rb_next_postorder(const struct rb_node *node)
{
const struct rb_node *parent;
if (!node)
return NULL;
parent = rb_parent(node);
/* If we're sitting on node, we've already seen our children */
if (parent && node == parent->rb_left && parent->rb_right) {
/* If we are the parent's left node, go to the parent's right
* node then all the way down to the left */
return rb_left_deepest_node(parent->rb_right);
} else
/* Otherwise we are the parent's right node, and the parent
* should be next */
return (struct rb_node *)parent;
}
EXPORT_SYMBOL(rb_next_postorder);
struct rb_node *rb_first_postorder(const struct rb_root *root)
{
if (!root->rb_node)
return NULL;
return rb_left_deepest_node(root->rb_node);
}
EXPORT_SYMBOL(rb_first_postorder);

328
utils/src/rbtree.h Normal file
View File

@@ -0,0 +1,328 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
Red Black Trees
(C) 1999 Andrea Arcangeli <andrea@suse.de>
linux/include/linux/rbtree.h
To use rbtrees you'll have to implement your own insert and search cores.
This will avoid us to use callbacks and to drop drammatically performances.
I know it's not the cleaner way, but in C (not in C++) to get
performances and genericity...
See Documentation/core-api/rbtree.rst for documentation and samples.
*/
#ifndef _LINUX_RBTREE_H
#define _LINUX_RBTREE_H
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL)
/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node) \
((node)->__rb_parent_color == (unsigned long)(node))
#define RB_CLEAR_NODE(node) \
((node)->__rb_parent_color = (unsigned long)(node))
extern void rb_insert_color(struct rb_node *, struct rb_root *);
extern void rb_erase(struct rb_node *, struct rb_root *);
/* Find logical next and previous nodes in a tree */
extern struct rb_node *rb_next(const struct rb_node *);
extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
*rb_link = node;
}
static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
{
node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
rcu_assign_pointer(*rb_link, node);
}
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
})
/**
* rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
* given type allowing the backing memory of @pos to be invalidated
*
* @pos: the 'type *' to use as a loop cursor.
* @n: another 'type *' to use as temporary storage
* @root: 'rb_root *' of the rbtree.
* @field: the name of the rb_node field within 'type'.
*
* rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
* list_for_each_entry_safe() and allows the iteration to continue independent
* of changes to @pos by the body of the loop.
*
* Note, however, that it cannot handle other modifications that re-order the
* rbtree it is iterating over. This includes calling rb_erase() on @pos, as
* rb_erase() may rebalance the tree, causing us to miss some nodes.
*/
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
typeof(*pos), field); 1; }); \
pos = n)
/* Same as rb_first(), but O(1) */
#define rb_first_cached(root) (root)->rb_leftmost
static inline void rb_insert_color_cached(struct rb_node *node,
struct rb_root_cached *root,
bool leftmost)
{
if (leftmost)
root->rb_leftmost = node;
rb_insert_color(node, &root->rb_root);
}
static inline struct rb_node *
rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
{
struct rb_node *leftmost = NULL;
if (root->rb_leftmost == node)
leftmost = root->rb_leftmost = rb_next(node);
rb_erase(node, &root->rb_root);
return leftmost;
}
static inline void rb_replace_node_cached(struct rb_node *victim,
struct rb_node *new,
struct rb_root_cached *root)
{
if (root->rb_leftmost == victim)
root->rb_leftmost = new;
rb_replace_node(victim, new, &root->rb_root);
}
/*
* The below helper functions use 2 operators with 3 different
* calling conventions. The operators are related like:
*
* comp(a->key,b) < 0 := less(a,b)
* comp(a->key,b) > 0 := less(b,a)
* comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
*
* If these operators define a partial order on the elements we make no
* guarantee on which of the elements matching the key is found. See
* rb_find().
*
* The reason for this is to allow the find() interface without requiring an
* on-stack dummy object, which might not be feasible due to object size.
*/
/**
* rb_add_cached() - insert @node into the leftmost cached tree @tree
* @node: node to insert
* @tree: leftmost cached tree to insert @node into
* @less: operator defining the (partial) node order
*
* Returns @node when it is the new leftmost, or NULL.
*/
static __always_inline struct rb_node *
rb_add_cached(struct rb_node *node, struct rb_root_cached *tree,
bool (*less)(struct rb_node *, const struct rb_node *))
{
struct rb_node **link = &tree->rb_root.rb_node;
struct rb_node *parent = NULL;
bool leftmost = true;
while (*link) {
parent = *link;
if (less(node, parent)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = false;
}
}
rb_link_node(node, parent, link);
rb_insert_color_cached(node, tree, leftmost);
return leftmost ? node : NULL;
}
/**
* rb_add() - insert @node into @tree
* @node: node to insert
* @tree: tree to insert @node into
* @less: operator defining the (partial) node order
*/
static __always_inline void
rb_add(struct rb_node *node, struct rb_root *tree,
bool (*less)(struct rb_node *, const struct rb_node *))
{
struct rb_node **link = &tree->rb_node;
struct rb_node *parent = NULL;
while (*link) {
parent = *link;
if (less(node, parent))
link = &parent->rb_left;
else
link = &parent->rb_right;
}
rb_link_node(node, parent, link);
rb_insert_color(node, tree);
}
/**
* rb_find_add() - find equivalent @node in @tree, or add @node
* @node: node to look-for / insert
* @tree: tree to search / modify
* @cmp: operator defining the node order
*
* Returns the rb_node matching @node, or NULL when no match is found and @node
* is inserted.
*/
static __always_inline struct rb_node *
rb_find_add(struct rb_node *node, struct rb_root *tree,
int (*cmp)(struct rb_node *, const struct rb_node *))
{
struct rb_node **link = &tree->rb_node;
struct rb_node *parent = NULL;
int c;
while (*link) {
parent = *link;
c = cmp(node, parent);
if (c < 0)
link = &parent->rb_left;
else if (c > 0)
link = &parent->rb_right;
else
return parent;
}
rb_link_node(node, parent, link);
rb_insert_color(node, tree);
return NULL;
}
/**
* rb_find() - find @key in tree @tree
* @key: key to match
* @tree: tree to search
* @cmp: operator defining the node order
*
* Returns the rb_node matching @key or NULL.
*/
static __always_inline struct rb_node *
rb_find(const void *key, const struct rb_root *tree,
int (*cmp)(const void *key, const struct rb_node *))
{
struct rb_node *node = tree->rb_node;
while (node) {
int c = cmp(key, node);
if (c < 0)
node = node->rb_left;
else if (c > 0)
node = node->rb_right;
else
return node;
}
return NULL;
}
/**
* rb_find_first() - find the first @key in @tree
* @key: key to match
* @tree: tree to search
* @cmp: operator defining node order
*
* Returns the leftmost node matching @key, or NULL.
*/
static __always_inline struct rb_node *
rb_find_first(const void *key, const struct rb_root *tree,
int (*cmp)(const void *key, const struct rb_node *))
{
struct rb_node *node = tree->rb_node;
struct rb_node *match = NULL;
while (node) {
int c = cmp(key, node);
if (c <= 0) {
if (!c)
match = node;
node = node->rb_left;
} else if (c > 0) {
node = node->rb_right;
}
}
return match;
}
/**
* rb_next_match() - find the next @key in @tree
* @key: key to match
* @tree: tree to search
* @cmp: operator defining node order
*
* Returns the next node matching @key, or NULL.
*/
static __always_inline struct rb_node *
rb_next_match(const void *key, struct rb_node *node,
int (*cmp)(const void *key, const struct rb_node *))
{
node = rb_next(node);
if (node && cmp(key, node))
node = NULL;
return node;
}
/**
* rb_for_each() - iterates a subtree matching @key
* @node: iterator
* @key: key to match
* @tree: tree to search
* @cmp: operator defining node order
*/
#define rb_for_each(node, key, tree, cmp) \
for ((node) = rb_find_first((key), (tree), (cmp)); \
(node); (node) = rb_next_match((key), (node), (cmp)))
#endif /* _LINUX_RBTREE_H */

View File

@@ -0,0 +1,313 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
Red Black Trees
(C) 1999 Andrea Arcangeli <andrea@suse.de>
(C) 2002 David Woodhouse <dwmw2@infradead.org>
(C) 2012 Michel Lespinasse <walken@google.com>
linux/include/linux/rbtree_augmented.h
*/
#ifndef _LINUX_RBTREE_AUGMENTED_H
#define _LINUX_RBTREE_AUGMENTED_H
/*
* Please note - only struct rb_augment_callbacks and the prototypes for
* rb_insert_augmented() and rb_erase_augmented() are intended to be public.
* The rest are implementation details you are not expected to depend on.
*
* See Documentation/core-api/rbtree.rst for documentation and samples.
*/
struct rb_augment_callbacks {
void (*propagate)(struct rb_node *node, struct rb_node *stop);
void (*copy)(struct rb_node *old, struct rb_node *new);
void (*rotate)(struct rb_node *old, struct rb_node *new);
};
extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
/*
* Fixup the rbtree and update the augmented information when rebalancing.
*
* On insertion, the user must update the augmented information on the path
* leading to the inserted node, then call rb_link_node() as usual and
* rb_insert_augmented() instead of the usual rb_insert_color() call.
* If rb_insert_augmented() rebalances the rbtree, it will callback into
* a user provided function to update the augmented information on the
* affected subtrees.
*/
static inline void
rb_insert_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
__rb_insert_augmented(node, root, augment->rotate);
}
static inline void
rb_insert_augmented_cached(struct rb_node *node,
struct rb_root_cached *root, bool newleft,
const struct rb_augment_callbacks *augment)
{
if (newleft)
root->rb_leftmost = node;
rb_insert_augmented(node, &root->rb_root, augment);
}
/*
* Template for declaring augmented rbtree callbacks (generic case)
*
* RBSTATIC: 'static' or empty
* RBNAME: name of the rb_augment_callbacks structure
* RBSTRUCT: struct type of the tree nodes
* RBFIELD: name of struct rb_node field within RBSTRUCT
* RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
* RBCOMPUTE: name of function that recomputes the RBAUGMENTED data
*/
#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \
RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \
static inline void \
RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop) \
{ \
while (rb != stop) { \
RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD); \
if (RBCOMPUTE(node, true)) \
break; \
rb = rb_parent(&node->RBFIELD); \
} \
} \
static inline void \
RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
{ \
RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \
RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \
new->RBAUGMENTED = old->RBAUGMENTED; \
} \
static void \
RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
{ \
RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \
RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \
new->RBAUGMENTED = old->RBAUGMENTED; \
RBCOMPUTE(old, false); \
} \
RBSTATIC const struct rb_augment_callbacks RBNAME = { \
.propagate = RBNAME ## _propagate, \
.copy = RBNAME ## _copy, \
.rotate = RBNAME ## _rotate \
};
/*
* Template for declaring augmented rbtree callbacks,
* computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
*
* RBSTATIC: 'static' or empty
* RBNAME: name of the rb_augment_callbacks structure
* RBSTRUCT: struct type of the tree nodes
* RBFIELD: name of struct rb_node field within RBSTRUCT
* RBTYPE: type of the RBAUGMENTED field
* RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
* RBCOMPUTE: name of function that returns the per-node RBTYPE scalar
*/
#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \
RBTYPE, RBAUGMENTED, RBCOMPUTE) \
static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit) \
{ \
RBSTRUCT *child; \
RBTYPE max = RBCOMPUTE(node); \
if (node->RBFIELD.rb_left) { \
child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD); \
if (child->RBAUGMENTED > max) \
max = child->RBAUGMENTED; \
} \
if (node->RBFIELD.rb_right) { \
child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD); \
if (child->RBAUGMENTED > max) \
max = child->RBAUGMENTED; \
} \
if (exit && node->RBAUGMENTED == max) \
return true; \
node->RBAUGMENTED = max; \
return false; \
} \
RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \
RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
#define RB_RED 0
#define RB_BLACK 1
#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
#define __rb_color(pc) ((pc) & 1)
#define __rb_is_black(pc) __rb_color(pc)
#define __rb_is_red(pc) (!__rb_color(pc))
#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
{
rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
}
static inline void rb_set_parent_color(struct rb_node *rb,
struct rb_node *p, int color)
{
rb->__rb_parent_color = (unsigned long)p | color;
}
static inline void
__rb_change_child(struct rb_node *old, struct rb_node *new,
struct rb_node *parent, struct rb_root *root)
{
if (parent) {
if (parent->rb_left == old)
WRITE_ONCE(parent->rb_left, new);
else
WRITE_ONCE(parent->rb_right, new);
} else
WRITE_ONCE(root->rb_node, new);
}
static inline void
__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
struct rb_node *parent, struct rb_root *root)
{
if (parent) {
if (parent->rb_left == old)
rcu_assign_pointer(parent->rb_left, new);
else
rcu_assign_pointer(parent->rb_right, new);
} else
rcu_assign_pointer(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
struct rb_node *child = node->rb_right;
struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
if (!tmp) {
/*
* Case 1: node to erase has no more than 1 child (easy!)
*
* Note that if there is one child it must be red due to 5)
* and node must be black due to 4). We adjust colors locally
* so as to bypass __rb_erase_color() later on.
*/
pc = node->__rb_parent_color;
parent = __rb_parent(pc);
__rb_change_child(node, child, parent, root);
if (child) {
child->__rb_parent_color = pc;
rebalance = NULL;
} else
rebalance = __rb_is_black(pc) ? parent : NULL;
tmp = parent;
} else if (!child) {
/* Still case 1, but this time the child is node->rb_left */
tmp->__rb_parent_color = pc = node->__rb_parent_color;
parent = __rb_parent(pc);
__rb_change_child(node, tmp, parent, root);
rebalance = NULL;
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
tmp = child->rb_left;
if (!tmp) {
/*
* Case 2: node's successor is its right child
*
* (n) (s)
* / \ / \
* (x) (s) -> (x) (c)
* \
* (c)
*/
parent = successor;
child2 = successor->rb_right;
augment->copy(node, successor);
} else {
/*
* Case 3: node's successor is leftmost under
* node's right child subtree
*
* (n) (s)
* / \ / \
* (x) (y) -> (x) (y)
* / /
* (p) (p)
* / /
* (s) (c)
* \
* (c)
*/
do {
parent = successor;
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
child2 = successor->rb_right;
WRITE_ONCE(parent->rb_left, child2);
WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
augment->copy(node, successor);
augment->propagate(parent, successor);
}
tmp = node->rb_left;
WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
if (child2) {
rb_set_parent_color(child2, parent, RB_BLACK);
rebalance = NULL;
} else {
rebalance = rb_is_black(successor) ? parent : NULL;
}
successor->__rb_parent_color = pc;
tmp = successor;
}
augment->propagate(tmp, NULL);
return rebalance;
}
static __always_inline void
rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
if (rebalance)
__rb_erase_color(rebalance, root, augment->rotate);
}
static __always_inline void
rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
const struct rb_augment_callbacks *augment)
{
if (root->rb_leftmost == node)
root->rb_leftmost = rb_next(node);
rb_erase_augmented(node, &root->rb_root, augment);
}
#endif /* _LINUX_RBTREE_AUGMENTED_H */

34
utils/src/rbtree_types.h Normal file
View File

@@ -0,0 +1,34 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_RBTREE_TYPES_H
#define _LINUX_RBTREE_TYPES_H
struct rb_node {
unsigned long __rb_parent_color;
struct rb_node *rb_right;
struct rb_node *rb_left;
} __attribute__((aligned(sizeof(long))));
/* The alignment might seem pointless, but allegedly CRIS needs it */
struct rb_root {
struct rb_node *rb_node;
};
/*
* Leftmost-cached rbtrees.
*
* We do not cache the rightmost node based on footprint
* size vs number of potential users that could benefit
* from O(1) rb_last(). Just not worth it, users that want
* this feature can always implement the logic explicitly.
* Furthermore, users that want to cache both pointers may
* find it a bit asymmetric, but that's ok.
*/
struct rb_root_cached {
struct rb_root rb_root;
struct rb_node *rb_leftmost;
};
#define RB_ROOT (struct rb_root) { NULL, }
#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
#endif

View File

@@ -44,3 +44,37 @@ int srch_decode_entry(void *buf, struct scoutfs_srch_entry *sre,
return tot; return tot;
} }
static int encode_u64(__le64 *buf, u64 val)
{
int bytes;
val = (val << 1) ^ ((s64)val >> 63); /* shift sign extend */
bytes = (fls64(val) + 7) >> 3;
put_unaligned_le64(val, buf);
return bytes;
}
int srch_encode_entry(void *buf, struct scoutfs_srch_entry *sre, struct scoutfs_srch_entry *prev)
{
u64 diffs[] = {
le64_to_cpu(sre->hash) - le64_to_cpu(prev->hash),
le64_to_cpu(sre->ino) - le64_to_cpu(prev->ino),
le64_to_cpu(sre->id) - le64_to_cpu(prev->id),
};
u16 lengths = 0;
int bytes;
int tot = 2;
int i;
for (i = 0; i < array_size(diffs); i++) {
bytes = encode_u64(buf + tot, diffs[i]);
lengths |= bytes << (i << 2);
tot += bytes;
}
put_unaligned_le16(lengths, buf);
return tot;
}

View File

@@ -3,5 +3,6 @@
int srch_decode_entry(void *buf, struct scoutfs_srch_entry *sre, int srch_decode_entry(void *buf, struct scoutfs_srch_entry *sre,
struct scoutfs_srch_entry *prev); struct scoutfs_srch_entry *prev);
int srch_encode_entry(void *buf, struct scoutfs_srch_entry *sre, struct scoutfs_srch_entry *prev);
#endif #endif

View File

@@ -70,6 +70,8 @@ do { \
#define container_of(ptr, type, memb) \ #define container_of(ptr, type, memb) \
((type *)((void *)(ptr) - offsetof(type, memb))) ((type *)((void *)(ptr) - offsetof(type, memb)))
#define NSEC_PER_SEC 1000000000
#define BITS_PER_LONG (sizeof(long) * 8) #define BITS_PER_LONG (sizeof(long) * 8)
#define U8_MAX ((u8)~0ULL) #define U8_MAX ((u8)~0ULL)
#define U16_MAX ((u16)~0ULL) #define U16_MAX ((u16)~0ULL)
@@ -82,6 +84,7 @@ do { \
\ \
(_x == 0 ? 0 : 64 - __builtin_clzll(_x)); \ (_x == 0 ? 0 : 64 - __builtin_clzll(_x)); \
}) })
#define fls64(x) flsll(x)
#define ilog2(x) \ #define ilog2(x) \
({ \ ({ \
@@ -99,6 +102,16 @@ emit_get_unaligned_le(16)
emit_get_unaligned_le(32) emit_get_unaligned_le(32)
emit_get_unaligned_le(64) emit_get_unaligned_le(64)
#define emit_put_unaligned_le(nr) \
static inline void put_unaligned_le##nr(u##nr val, void *buf) \
{ \
__le##nr x = cpu_to_le##nr(val); \
memcpy(buf, &x, sizeof(x)); \
}
emit_put_unaligned_le(16)
emit_put_unaligned_le(32)
emit_put_unaligned_le(64)
/* /*
* return -1,0,+1 based on the memcmp comparison of the minimum of their * return -1,0,+1 based on the memcmp comparison of the minimum of their
* two lengths. If their min shared bytes are equal but the lengths * two lengths. If their min shared bytes are equal but the lengths