mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-20 21:40:29 +00:00
Add xattr .totl. tag
Add the .totl. xattr tag. When the tag is set the end of the name specifies a total name with 3 encoded u64s separated by dots. The value of the xattr is a u64 that is added to the named total. An ioctl is added to read the totals. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
136
kmod/src/btree.c
136
kmod/src/btree.c
@@ -30,6 +30,7 @@
|
||||
#include "avl.h"
|
||||
#include "hash.h"
|
||||
#include "sort_priv.h"
|
||||
#include "forest.h"
|
||||
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -1902,9 +1903,23 @@ int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
do {
|
||||
item = leaf_item_hash_search(sb, bt, &lst->key);
|
||||
if (item) {
|
||||
/* try to merge delta values, _NULL not deleted; merge will */
|
||||
ret = scoutfs_forest_combine_deltas(&lst->key,
|
||||
item_val(bt, item),
|
||||
item_val_len(item),
|
||||
lst->val, lst->val_len);
|
||||
if (ret < 0) {
|
||||
scoutfs_block_put(sb, bl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
item->seq = cpu_to_le64(lst->seq);
|
||||
item->flags = lst->flags;
|
||||
update_item_value(bt, item, lst->val, lst->val_len);
|
||||
|
||||
if (ret == 0)
|
||||
update_item_value(bt, item, lst->val, lst->val_len);
|
||||
else
|
||||
ret = 0;
|
||||
} else {
|
||||
scoutfs_avl_search(&bt->item_root,
|
||||
cmp_key_item, &lst->key,
|
||||
@@ -2039,6 +2054,16 @@ static struct merge_pos *first_mpos(struct rb_root *root)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct merge_pos *next_mpos(struct merge_pos *mpos)
|
||||
{
|
||||
struct rb_node *node;
|
||||
|
||||
if (mpos && (node = rb_next(&mpos->node)))
|
||||
return container_of(node, struct merge_pos, node);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_mpos(struct super_block *sb, struct merge_pos *mpos)
|
||||
{
|
||||
scoutfs_block_put(sb, mpos->bl);
|
||||
@@ -2142,6 +2167,56 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has reset all the merge positions for all the input log
|
||||
* btree roots and wants the next logged item it should try and merge
|
||||
* with the items in the fs_root.
|
||||
*
|
||||
* We look ahead in the logged item stream to see if we should merge any
|
||||
* older logged delta items into one result for the caller. We also
|
||||
* take this opportunity to skip and reset the mpos for any older
|
||||
* versions of the first item.
|
||||
*/
|
||||
static int next_resolved_mpos(struct super_block *sb, struct rb_root *pos_root,
|
||||
struct scoutfs_key *end, struct merge_pos **mpos_ret)
|
||||
{
|
||||
struct merge_pos *mpos;
|
||||
struct merge_pos *next;
|
||||
struct scoutfs_key key;
|
||||
int ret = 0;
|
||||
|
||||
while ((mpos = first_mpos(pos_root)) && (next = next_mpos(mpos)) &&
|
||||
!scoutfs_key_compare(mpos->key, next->key)) {
|
||||
|
||||
ret = scoutfs_forest_combine_deltas(mpos->key, mpos->val, mpos->val_len,
|
||||
next->val, next->val_len);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/* reset advances to the next item */
|
||||
key = *mpos->key;
|
||||
scoutfs_key_inc(&key);
|
||||
|
||||
/* always skip next combined or older version */
|
||||
ret = reset_mpos(sb, pos_root, next, &key, end);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
/* if merging resulted in no info, skip current */
|
||||
ret = reset_mpos(sb, pos_root, mpos, &key, end);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*mpos_ret = mpos;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge items from a number of read-only input roots into a writable
|
||||
* destination root. The order of the input roots doesn't matter, the
|
||||
@@ -2179,6 +2254,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
int walk_val_len;
|
||||
int walk_flags;
|
||||
bool is_del;
|
||||
int delta;
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
@@ -2205,7 +2281,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
walk_flags |= BTW_SUBTREE;
|
||||
walk_val_len = 0;
|
||||
|
||||
while ((mpos = first_mpos(&pos_root))) {
|
||||
while ((ret = next_resolved_mpos(sb, &pos_root, end, &mpos)) == 0 && mpos) {
|
||||
|
||||
if (scoutfs_block_writer_dirty_bytes(sb, wri) >= dirty_limit) {
|
||||
scoutfs_inc_counter(sb, btree_merge_dirty_limit);
|
||||
@@ -2233,7 +2309,13 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
bt = bl->data;
|
||||
scoutfs_inc_counter(sb, btree_merge_walk);
|
||||
|
||||
for (; mpos; mpos = first_mpos(&pos_root)) {
|
||||
/* catch non-root blocks that fell under low, maybe from null deltas */
|
||||
if (root->ref.blkno != bt->hdr.blkno && !total_above_join_low_water(bt)) {
|
||||
walk_flags |= BTW_DELETE;
|
||||
continue;
|
||||
}
|
||||
|
||||
while ((ret = next_resolved_mpos(sb, &pos_root, end, &mpos)) == 0 && mpos) {
|
||||
|
||||
/* walk to new leaf if we exceed parent ref key */
|
||||
if (scoutfs_key_compare(mpos->key, &kr.end) > 0)
|
||||
@@ -2243,6 +2325,23 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
item = leaf_item_hash_search(sb, bt, mpos->key);
|
||||
is_del = !!(mpos->flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
|
||||
/* see if we're merging delta items */
|
||||
if (item && !is_del)
|
||||
delta = scoutfs_forest_combine_deltas(mpos->key,
|
||||
item_val(bt, item),
|
||||
item_val_len(item),
|
||||
mpos->val, mpos->val_len);
|
||||
else
|
||||
delta = 0;
|
||||
if (delta < 0) {
|
||||
ret = delta;
|
||||
goto out;
|
||||
} else if (delta == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
} else if (delta == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
}
|
||||
|
||||
trace_scoutfs_btree_merge_items(sb, mpos->root,
|
||||
mpos->key, mpos->val_len,
|
||||
item ? root : NULL,
|
||||
@@ -2250,7 +2349,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
item ? item_val_len(item) : 0, is_del);
|
||||
|
||||
/* rewalk and split if ins/update needs room */
|
||||
if (!is_del && !mid_free_item_room(bt, mpos->val_len)) {
|
||||
if (!is_del && !delta && !mid_free_item_room(bt, mpos->val_len)) {
|
||||
walk_flags |= BTW_INSERT;
|
||||
walk_val_len = mpos->val_len;
|
||||
break;
|
||||
@@ -2267,13 +2366,31 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
}
|
||||
|
||||
/* update existing items */
|
||||
if (item && !is_del) {
|
||||
if (item && !is_del && !delta) {
|
||||
item->seq = cpu_to_le64(mpos->seq);
|
||||
item->flags = mpos->flags;
|
||||
update_item_value(bt, item, mpos->val, mpos->val_len);
|
||||
scoutfs_inc_counter(sb, btree_merge_update);
|
||||
}
|
||||
|
||||
/* update combined delta item seq */
|
||||
if (delta == SCOUTFS_DELTA_COMBINED) {
|
||||
item->seq = cpu_to_le64(mpos->seq);
|
||||
}
|
||||
|
||||
/*
|
||||
* combined delta items that aren't needed are
|
||||
* immediately dropped. We don't back off if
|
||||
* the deletion would fall under the low water
|
||||
* mark because we've already modified the
|
||||
* value, we don't want to retry after a join
|
||||
* and apply the value a second time.
|
||||
*/
|
||||
if (delta == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
delete_item(bt, item, NULL);
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
}
|
||||
|
||||
/* delete if merge item was deletion */
|
||||
if (item && is_del) {
|
||||
/* rewalk and join if non-root falls under low water mark */
|
||||
@@ -2293,12 +2410,9 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
/* finished with this key, skip any older items */
|
||||
next = *mpos->key;
|
||||
scoutfs_key_inc(&next);
|
||||
while (mpos && scoutfs_key_compare(mpos->key, &next) < 0) {
|
||||
ret = reset_mpos(sb, &pos_root, mpos, &next, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
mpos = first_mpos(&pos_root);
|
||||
}
|
||||
ret = reset_mpos(sb, &pos_root, mpos, &next, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,8 @@
|
||||
EXPAND_COUNTER(btree_merge) \
|
||||
EXPAND_COUNTER(btree_merge_alloc_low) \
|
||||
EXPAND_COUNTER(btree_merge_delete) \
|
||||
EXPAND_COUNTER(btree_merge_delta_combined) \
|
||||
EXPAND_COUNTER(btree_merge_delta_null) \
|
||||
EXPAND_COUNTER(btree_merge_dirty_limit) \
|
||||
EXPAND_COUNTER(btree_merge_drop_old) \
|
||||
EXPAND_COUNTER(btree_merge_insert) \
|
||||
@@ -91,6 +93,8 @@
|
||||
EXPAND_COUNTER(item_clear_dirty) \
|
||||
EXPAND_COUNTER(item_create) \
|
||||
EXPAND_COUNTER(item_delete) \
|
||||
EXPAND_COUNTER(item_delta) \
|
||||
EXPAND_COUNTER(item_delta_written) \
|
||||
EXPAND_COUNTER(item_dirty) \
|
||||
EXPAND_COUNTER(item_invalidate) \
|
||||
EXPAND_COUNTER(item_invalidate_page) \
|
||||
@@ -188,6 +192,11 @@
|
||||
EXPAND_COUNTER(srch_search_xattrs) \
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
EXPAND_COUNTER(totl_read_copied) \
|
||||
EXPAND_COUNTER(totl_read_finalized) \
|
||||
EXPAND_COUNTER(totl_read_fs) \
|
||||
EXPAND_COUNTER(totl_read_item) \
|
||||
EXPAND_COUNTER(totl_read_logged) \
|
||||
EXPAND_COUNTER(trans_commit_data_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_dirty_meta_full) \
|
||||
EXPAND_COUNTER(trans_commit_fsync) \
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "hash.h"
|
||||
#include "srch.h"
|
||||
#include "counters.h"
|
||||
#include "xattr.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -221,7 +222,7 @@ out:
|
||||
}
|
||||
|
||||
struct forest_read_items_data {
|
||||
bool is_fs;
|
||||
int fic;
|
||||
scoutfs_forest_item_cb cb;
|
||||
void *cb_arg;
|
||||
};
|
||||
@@ -231,7 +232,7 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
{
|
||||
struct forest_read_items_data *rid = arg;
|
||||
|
||||
return rid->cb(sb, key, seq, flags, val, val_len, rid->cb_arg);
|
||||
return rid->cb(sb, key, seq, flags, val, val_len, rid->fic, rid->cb_arg);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -247,8 +248,8 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
* to reset their state and retry with a newer version of the btrees.
|
||||
*/
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
@@ -264,11 +265,13 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_block *bl;
|
||||
struct scoutfs_key ltk;
|
||||
struct scoutfs_key orig_start = *start;
|
||||
struct scoutfs_key orig_end = *end;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
scoutfs_inc_counter(sb, forest_read_items);
|
||||
calc_bloom_nrs(&bloom, &lock->start);
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
@@ -276,16 +279,16 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root);
|
||||
|
||||
*start = lock->start;
|
||||
*end = lock->end;
|
||||
*start = orig_start;
|
||||
*end = orig_end;
|
||||
|
||||
/* start with fs root items */
|
||||
rid.is_fs = true;
|
||||
rid.fic |= FIC_FS_ROOT;
|
||||
ret = scoutfs_btree_read_items(sb, &roots.fs_root, key, start, end,
|
||||
forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
rid.is_fs = false;
|
||||
rid.fic &= ~FIC_FS_ROOT;
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
for (;; scoutfs_key_inc(<k)) {
|
||||
@@ -330,10 +333,15 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
|
||||
rid.fic |= FIC_FINALIZED;
|
||||
|
||||
ret = scoutfs_btree_read_items(sb, <.item_root, key, start,
|
||||
end, forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
rid.fic &= ~FIC_FINALIZED;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
@@ -341,6 +349,24 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the items are deltas then combine the src with the destination
|
||||
* value and store the result in the destination.
|
||||
*
|
||||
* Returns:
|
||||
* -errno: fatal error, no change
|
||||
* 0: not delta items, no change
|
||||
* +ve: SCOUTFS_DELTA_ values indicating when dst and/or src can be dropped
|
||||
*/
|
||||
int scoutfs_forest_combine_deltas(struct scoutfs_key *key, void *dst, int dst_len,
|
||||
void *src, int src_len)
|
||||
{
|
||||
if (key->sk_zone == SCOUTFS_XATTR_TOTL_ZONE)
|
||||
return scoutfs_xattr_combine_totl(dst, dst_len, src, src_len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that the bloom bits for the lock's start key are all set in
|
||||
* the current log's bloom block. We record the nr of our log tree in
|
||||
|
||||
@@ -8,14 +8,18 @@ struct scoutfs_block;
|
||||
#include "btree.h"
|
||||
|
||||
/* caller gives an item to the callback */
|
||||
enum {
|
||||
FIC_FS_ROOT = (1 << 0),
|
||||
FIC_FINALIZED = (1 << 1),
|
||||
};
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, void *arg);
|
||||
u8 flags, void *val, int val_len, int fic, void *arg);
|
||||
|
||||
int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next);
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
@@ -36,6 +40,12 @@ void scoutfs_forest_init_btrees(struct super_block *sb,
|
||||
void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
|
||||
/* > 0 error codes */
|
||||
#define SCOUTFS_DELTA_COMBINED 1 /* src val was combined, drop src */
|
||||
#define SCOUTFS_DELTA_COMBINED_NULL 2 /* combined val has no data, drop both */
|
||||
int scoutfs_forest_combine_deltas(struct scoutfs_key *key, void *dst, int dst_len,
|
||||
void *src, int src_len);
|
||||
|
||||
int scoutfs_forest_setup(struct super_block *sb);
|
||||
void scoutfs_forest_start(struct super_block *sb);
|
||||
void scoutfs_forest_stop(struct super_block *sb);
|
||||
|
||||
@@ -168,6 +168,11 @@ struct scoutfs_key {
|
||||
#define sko_rid _sk_first
|
||||
#define sko_ino _sk_second
|
||||
|
||||
/* xattr totl */
|
||||
#define skxt_a _sk_first
|
||||
#define skxt_b _sk_second
|
||||
#define skxt_c _sk_third
|
||||
|
||||
/* inode */
|
||||
#define ski_ino _sk_first
|
||||
|
||||
@@ -568,8 +573,9 @@ struct scoutfs_log_merge_freeing {
|
||||
*/
|
||||
#define SCOUTFS_INODE_INDEX_ZONE 1
|
||||
#define SCOUTFS_ORPHAN_ZONE 2
|
||||
#define SCOUTFS_FS_ZONE 3
|
||||
#define SCOUTFS_LOCK_ZONE 4
|
||||
#define SCOUTFS_XATTR_TOTL_ZONE 3
|
||||
#define SCOUTFS_FS_ZONE 4
|
||||
#define SCOUTFS_LOCK_ZONE 5
|
||||
/* Items only stored in server btrees */
|
||||
#define SCOUTFS_LOG_TREES_ZONE 6
|
||||
#define SCOUTFS_TRANS_SEQ_ZONE 7
|
||||
@@ -633,6 +639,17 @@ struct scoutfs_xattr {
|
||||
__u8 name[];
|
||||
};
|
||||
|
||||
/*
|
||||
* .totl. xattrs are mapped to items. The dotted u64s in the xattr name
|
||||
* map to the item key. The item value total is the sum of all the
|
||||
* xattr values. The item value count records the number of xattrs
|
||||
* contributing to the total and is used when combining logged items to
|
||||
* determine if totals are being created or destroyed.
|
||||
*/
|
||||
struct scoutfs_xattr_totl_val {
|
||||
__le64 total;
|
||||
__le64 count;
|
||||
};
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
|
||||
@@ -883,6 +900,7 @@ enum scoutfs_dentry_type {
|
||||
#define SCOUTFS_XATTR_MAX_NAME_LEN 255
|
||||
#define SCOUTFS_XATTR_MAX_VAL_LEN 65535
|
||||
#define SCOUTFS_XATTR_MAX_PART_SIZE SCOUTFS_MAX_VAL_SIZE
|
||||
#define SCOUTFS_XATTR_MAX_TOTL_U64 23 /* octal U64_MAX */
|
||||
|
||||
#define SCOUTFS_XATTR_NR_PARTS(name_len, val_len) \
|
||||
DIV_ROUND_UP(sizeof(struct scoutfs_xattr) + name_len + val_len, \
|
||||
|
||||
291
kmod/src/ioctl.c
291
kmod/src/ioctl.c
@@ -21,6 +21,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/aio.h>
|
||||
#include <linux/list_sort.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
@@ -39,6 +40,7 @@
|
||||
#include "srch.h"
|
||||
#include "alloc.h"
|
||||
#include "server.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -1041,6 +1043,293 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct xattr_total_entry {
|
||||
struct rb_node node;
|
||||
struct scoutfs_ioctl_xattr_total xt;
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
};
|
||||
|
||||
static int cmp_xt_entry_name(const struct xattr_total_entry *a,
|
||||
const struct xattr_total_entry *b)
|
||||
|
||||
{
|
||||
return scoutfs_cmp_u64s(a->xt.name[0], b->xt.name[0]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[1], b->xt.name[1]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[2], b->xt.name[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the contribution of the three classes of logged items we can
|
||||
* see: the item in the fs_root, items from finalized log btrees, and
|
||||
* items from active log btrees. Once we have the full set the caller
|
||||
* can decide which of the items contribute to the total it sends to the
|
||||
* user.
|
||||
*/
|
||||
static int read_xattr_total_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
struct xattr_total_entry *ent;
|
||||
struct xattr_total_entry rd;
|
||||
struct rb_root *root = arg;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
int cmp;
|
||||
|
||||
rd.xt.name[0] = le64_to_cpu(key->skxt_a);
|
||||
rd.xt.name[1] = le64_to_cpu(key->skxt_b);
|
||||
rd.xt.name[2] = le64_to_cpu(key->skxt_c);
|
||||
|
||||
/* find entry matching name */
|
||||
node = &root->rb_node;
|
||||
parent = NULL;
|
||||
cmp = -1;
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
ent = container_of(*node, struct xattr_total_entry, node);
|
||||
|
||||
/* sort merge items by key then newest to oldest */
|
||||
cmp = cmp_xt_entry_name(&rd, ent);
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else if (cmp > 0)
|
||||
node = &(*node)->rb_right;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* allocate and insert new node if we need to */
|
||||
if (cmp != 0) {
|
||||
ent = kzalloc(sizeof(*ent), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&ent->xt.name, &rd.xt.name, sizeof(ent->xt.name));
|
||||
|
||||
rb_link_node(&ent->node, parent, node);
|
||||
rb_insert_color(&ent->node, root);
|
||||
}
|
||||
|
||||
if (fic & FIC_FS_ROOT) {
|
||||
ent->fs_seq = seq;
|
||||
ent->fs_total = le64_to_cpu(tval->total);
|
||||
ent->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
ent->fin_seq = seq;
|
||||
ent->fin_total += le64_to_cpu(tval->total);
|
||||
ent->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
ent->log_seq = seq;
|
||||
ent->log_total += le64_to_cpu(tval->total);
|
||||
ent->log_count += le64_to_cpu(tval->count);
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, totl_read_item);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* these are always _safe, node stores next */
|
||||
#define for_each_xt_ent(ent, node, root) \
|
||||
for (node = rb_first(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_next(node), 1); )
|
||||
|
||||
#define for_each_xt_ent_reverse(ent, node, root) \
|
||||
for (node = rb_last(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_prev(node), 1); )
|
||||
|
||||
static void free_xt_ent(struct rb_root *root, struct xattr_total_entry *ent)
|
||||
{
|
||||
rb_erase(&ent->node, root);
|
||||
kfree(ent);
|
||||
}
|
||||
|
||||
static void free_all_xt_ents(struct rb_root *root)
|
||||
{
|
||||
struct xattr_total_entry *ent;
|
||||
struct rb_node *node;
|
||||
|
||||
for_each_xt_ent(ent, node, root)
|
||||
free_xt_ent(root, ent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Starting from the caller's pos_name, copy the names, totals, and
|
||||
* counts for the .totl. tagged xattrs in the system sorted by their
|
||||
* name until the user's buffer is full. This only sees xattrs that
|
||||
* have been committed. It doesn't use locking to force commits and
|
||||
* block writers so it can be a little bit out of date with respect to
|
||||
* dirty xattrs in memory across the system.
|
||||
*
|
||||
* Our reader has to be careful because the log btree merging code can
|
||||
* write partial results to the fs_root. This means that a reader can
|
||||
* see both cases where new finalized logs should be applied to the old
|
||||
* fs items and where old finalized logs have already been applied to
|
||||
* the partially merged fs items. Currently active logged items are
|
||||
* always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*
|
||||
* We're allocating a tracking struct for each totl name we see while
|
||||
* traversing the item btrees. The forest reader is providing the items
|
||||
* it finds in leaf blocks that contain the search key. In the worst
|
||||
* case all of these blocks are full and none of the items overlap. At
|
||||
* most, figure order a thousand names per mount. But in practice many
|
||||
* of these factors fall away: leaf blocks aren't fill, leaf items
|
||||
* overlap, there aren't finalized log btrees, and not all mounts are
|
||||
* actively changing totals. We're much more likely to only read a
|
||||
* leaf block's worth of totals that have been long since merged into
|
||||
* the fs_root.
|
||||
*/
|
||||
static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_read_xattr_totals __user *urxt = (void __user *)arg;
|
||||
struct scoutfs_ioctl_read_xattr_totals rxt;
|
||||
struct scoutfs_ioctl_xattr_total __user *uxt;
|
||||
struct xattr_total_entry *ent;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key bloom_key;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct rb_root root = RB_ROOT;
|
||||
struct rb_node *node;
|
||||
int count = 0;
|
||||
int ret;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
ret = -EBADF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rxt, urxt, sizeof(rxt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
uxt = (void __user *)rxt.totals_ptr;
|
||||
|
||||
if ((rxt.totals_ptr & (sizeof(__u64) - 1)) ||
|
||||
(rxt.totals_bytes < sizeof(struct scoutfs_ioctl_xattr_total))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_key_set_zeros(&bloom_key);
|
||||
bloom_key.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_xattr_init_totl_key(&start, rxt.pos_name);
|
||||
|
||||
while (rxt.totals_bytes >= sizeof(struct scoutfs_ioctl_xattr_total)) {
|
||||
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
if (scoutfs_key_compare(&start, &end) > 0)
|
||||
break;
|
||||
|
||||
key = start;
|
||||
ret = scoutfs_forest_read_items(sb, &key, &bloom_key, &start, &end,
|
||||
read_xattr_total_item, &root);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESTALE) {
|
||||
free_all_xt_ents(&root);
|
||||
continue;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (RB_EMPTY_ROOT(&root))
|
||||
break;
|
||||
|
||||
/* trim totals that fall outside of the consistent range */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &start) < 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_each_xt_ent_reverse(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &end) > 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* copy resulting unique non-zero totals to userspace */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
if (rxt.totals_bytes < sizeof(ent->xt))
|
||||
break;
|
||||
|
||||
/* start with the fs item if we have it */
|
||||
if (ent->fs_seq != 0) {
|
||||
ent->xt.total = ent->fs_total;
|
||||
ent->xt.count = ent->fs_count;
|
||||
scoutfs_inc_counter(sb, totl_read_fs);
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((ent->fs_seq != 0) && (ent->fin_seq > ent->fs_seq)) ||
|
||||
((ent->fs_seq == 0) && (ent->fin_count > 0))) {
|
||||
ent->xt.total += ent->fin_total;
|
||||
ent->xt.count += ent->fin_count;
|
||||
scoutfs_inc_counter(sb, totl_read_finalized);
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (ent->log_seq > 0) {
|
||||
ent->xt.total += ent->log_total;
|
||||
ent->xt.count += ent->log_count;
|
||||
scoutfs_inc_counter(sb, totl_read_logged);
|
||||
}
|
||||
|
||||
if (ent->xt.total != 0 || ent->xt.count != 0) {
|
||||
if (copy_to_user(uxt, &ent->xt, sizeof(ent->xt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
uxt++;
|
||||
rxt.totals_bytes -= sizeof(ent->xt);
|
||||
count++;
|
||||
scoutfs_inc_counter(sb, totl_read_copied);
|
||||
}
|
||||
|
||||
free_xt_ent(&root, ent);
|
||||
}
|
||||
|
||||
/* continue after the last possible key read */
|
||||
start = end;
|
||||
scoutfs_key_inc(&start);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_all_xt_ents(&root);
|
||||
|
||||
return ret ?: count;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -1072,6 +1361,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_move_blocks(file, arg);
|
||||
case SCOUTFS_IOC_RESIZE_DEVICES:
|
||||
return scoutfs_ioc_resize_devices(file, arg);
|
||||
case SCOUTFS_IOC_READ_XATTR_TOTALS:
|
||||
return scoutfs_ioc_read_xattr_totals(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
@@ -490,4 +490,55 @@ struct scoutfs_ioctl_resize_devices {
|
||||
#define SCOUTFS_IOC_RESIZE_DEVICES \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 14, struct scoutfs_ioctl_resize_devices)
|
||||
|
||||
#define SCOUTFs_IOCTL_XATTR_TOTAL_NAME_NR 3
|
||||
|
||||
/*
|
||||
* Copy global totals of .totl. xattr value payloads to the user. This
|
||||
* only sees xattrs which have been committed and this doesn't force
|
||||
* commits of dirty data throughout the system. This can be out of sync
|
||||
* by the amount of xattrs that can be dirty in open transactions that
|
||||
* are being built throughout the system.
|
||||
*
|
||||
* pos_name: The array name of the first total that can be returned.
|
||||
* The name is derived from the key of the xattrs that contribute to the
|
||||
* total. For xattrs with a .totl.1.2.3 key, the pos_name[] should be
|
||||
* {1, 2, 3}.
|
||||
*
|
||||
* totals_ptr: An aligned pointer to a buffer that will be filled with
|
||||
* an array of scoutfs_ioctl_xattr_total structs for each total copied.
|
||||
*
|
||||
* totals_bytes: The size of the buffer in bytes. There must be room
|
||||
* for at least one struct element so that returning 0 can promise that
|
||||
* there were no more totals to copy after the pos_name.
|
||||
*
|
||||
* The number of copied elements is returned and 0 is returned if there
|
||||
* were no more totals to copy after the pos_name.
|
||||
*
|
||||
* In addition to the usual errnos (EIO, EINVAL, EPERM, EFAULT) this
|
||||
* adds:
|
||||
*
|
||||
* EINVAL: The totals_ buffer was not aligned or was not large enough
|
||||
* for a single struct entry.
|
||||
*/
|
||||
struct scoutfs_ioctl_read_xattr_totals {
|
||||
__u64 pos_name[SCOUTFs_IOCTL_XATTR_TOTAL_NAME_NR];
|
||||
__u64 totals_ptr;
|
||||
__u64 totals_bytes;
|
||||
};
|
||||
|
||||
/*
|
||||
* An individual total that is given to userspace. The total is the
|
||||
* sum of all the values in the xattr payloads matching the name. The
|
||||
* count is the number of xattrs, not number of files, contributing to
|
||||
* the total.
|
||||
*/
|
||||
struct scoutfs_ioctl_xattr_total {
|
||||
__u64 name[SCOUTFs_IOCTL_XATTR_TOTAL_NAME_NR];
|
||||
__u64 total;
|
||||
__u64 count;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_READ_XATTR_TOTALS \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 15, struct scoutfs_ioctl_read_xattr_totals)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -139,7 +139,8 @@ struct cached_item {
|
||||
struct list_head dirty_head;
|
||||
unsigned int dirty:1, /* needs to be written */
|
||||
persistent:1, /* in btrees, needs deletion item */
|
||||
deletion:1; /* negative del item for writing */
|
||||
deletion:1, /* negative del item for writing */
|
||||
delta:1; /* item vales are combined, freed after write */
|
||||
unsigned int val_len;
|
||||
struct scoutfs_key key;
|
||||
u64 seq;
|
||||
@@ -415,6 +416,7 @@ static struct cached_item *alloc_item(struct cached_page *pg,
|
||||
item->dirty = 0;
|
||||
item->persistent = 0;
|
||||
item->deletion = !!deletion;
|
||||
item->delta = 0;
|
||||
item->val_len = val_len;
|
||||
item->key = *key;
|
||||
item->seq = seq;
|
||||
@@ -720,6 +722,7 @@ static void move_page_items(struct super_block *sb,
|
||||
}
|
||||
|
||||
to->persistent = from->persistent;
|
||||
to->delta = from->delta;
|
||||
|
||||
erase_item(left, from);
|
||||
}
|
||||
@@ -1353,7 +1356,7 @@ static void del_active_reader(struct item_cache_info *cinf, struct active_reader
|
||||
* don't have to compare seqs.
|
||||
*/
|
||||
static int read_page_item(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, void *arg)
|
||||
void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const bool deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
@@ -1480,8 +1483,9 @@ static int read_pages(struct super_block *sb, struct item_cache_info *cinf,
|
||||
/* set active reader seq before reading persistent roots */
|
||||
add_active_reader(sb, &active);
|
||||
|
||||
ret = scoutfs_forest_read_items(sb, lock, key, &start, &end,
|
||||
read_page_item, &root);
|
||||
start = lock->start;
|
||||
end = lock->end;
|
||||
ret = scoutfs_forest_read_items(sb, key, &lock->start, &start, &end, read_page_item, &root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -2006,6 +2010,77 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a delta item. Delta items are an incremental change relative to
|
||||
* the current persistent delta items. We never have to read the
|
||||
* current items so the caller always writes with write only locks. If
|
||||
* combining the current delta item and the caller's item results in a
|
||||
* null we can just drop it, we don't have to emit a deletion item.
|
||||
*/
|
||||
int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
struct rb_node **pnode;
|
||||
struct rb_node *par;
|
||||
int ret;
|
||||
|
||||
scoutfs_inc_counter(sb, item_delta);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE_ONLY)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = get_cached_page(sb, cinf, lock, key, true, true, val_len, &pg);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
__acquire(pg->rwlock);
|
||||
|
||||
item = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode);
|
||||
if (item) {
|
||||
if (!item->delta) {
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = scoutfs_forest_combine_deltas(key, item->val, item->val_len, val, val_len);
|
||||
if (ret <= 0) {
|
||||
if (ret == 0)
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
item->seq = seq;
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
clear_item_dirty(sb, cinf, pg, item);
|
||||
erase_item(pg, item);
|
||||
} else {
|
||||
ret = -EIO;
|
||||
goto unlock;
|
||||
}
|
||||
ret = 0;
|
||||
} else {
|
||||
item = alloc_item(pg, key, seq, false, val, val_len);
|
||||
rbtree_insert(&item->node, par, pnode, &pg->item_root);
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
item->delta = 1;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
unlock:
|
||||
write_unlock(&pg->rwlock);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete an item from the cache. We can leave behind a dirty deletion
|
||||
* item if there is a persistent item that needs to be overwritten.
|
||||
@@ -2280,8 +2355,11 @@ retry:
|
||||
dirty_head) {
|
||||
clear_item_dirty(sb, cinf, pg, item);
|
||||
|
||||
if (item->delta)
|
||||
scoutfs_inc_counter(sb, item_delta_written);
|
||||
|
||||
/* free deletion items */
|
||||
if (item->deletion)
|
||||
if (item->deletion || item->delta)
|
||||
erase_item(pg, item);
|
||||
else
|
||||
item->persistent = 1;
|
||||
|
||||
@@ -18,6 +18,8 @@ int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete_force(struct super_block *sb,
|
||||
|
||||
@@ -1237,6 +1237,20 @@ int scoutfs_lock_orphan(struct super_block *sb, enum scoutfs_lock_mode mode, int
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
|
||||
@@ -84,6 +84,8 @@ int scoutfs_lock_rename(struct super_block *sb, enum scoutfs_lock_mode mode, int
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_orphan(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
u64 ino, struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
|
||||
205
kmod/src/xattr.c
205
kmod/src/xattr.c
@@ -97,6 +97,7 @@ static int unknown_prefix(const char *name)
|
||||
|
||||
#define HIDE_TAG "hide."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TOTL_TAG "totl."
|
||||
#define TAG_LEN (sizeof(HIDE_TAG) - 1)
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
@@ -119,6 +120,9 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
} else if (!strncmp(name, SRCH_TAG, TAG_LEN)) {
|
||||
if (++tgs->srch == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, TOTL_TAG, TAG_LEN)) {
|
||||
if (++tgs->totl == 0)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* only reason to use scoutfs. is tags */
|
||||
if (!found)
|
||||
@@ -468,6 +472,100 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name)
|
||||
{
|
||||
scoutfs_key_set_zeros(key);
|
||||
key->sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
key->skxt_a = cpu_to_le64(name[0]);
|
||||
key->skxt_b = cpu_to_le64(name[1]);
|
||||
key->skxt_c = cpu_to_le64(name[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse a u64 in any base after null terminating it while forbidding
|
||||
* the leading + and trailing \n that kstrotull allows.
|
||||
*/
|
||||
static int parse_totl_u64(const char *s, int len, u64 *res)
|
||||
{
|
||||
char str[SCOUTFS_XATTR_MAX_TOTL_U64 + 1];
|
||||
|
||||
if (len <= 0 || len >= ARRAY_SIZE(str) || s[0] == '+' || s[len - 1] == '\n')
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(str, s, len);
|
||||
str[len] = '\0';
|
||||
|
||||
return kstrtoull(str, 0, res) != 0 ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* non-destructive relatively quick parse of the last 3 dotted u64s that
|
||||
* make up the name of the xattr total. -EINVAL is returned if there
|
||||
* are anything but 3 valid u64 encodings between single dots at the end
|
||||
* of the name.
|
||||
*/
|
||||
static int parse_totl_key(struct scoutfs_key *key, const char *name, int name_len)
|
||||
{
|
||||
u64 tot_name[3];
|
||||
int end = name_len;
|
||||
int nr = 0;
|
||||
int len;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/* parse name elements in reserve order from end of xattr name string */
|
||||
for (i = name_len - 1; i >= 0 && nr < ARRAY_SIZE(tot_name); i--) {
|
||||
if (name[i] != '.')
|
||||
continue;
|
||||
|
||||
len = end - (i + 1);
|
||||
ret = parse_totl_u64(&name[i + 1], len, &tot_name[nr]);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
end = i;
|
||||
nr++;
|
||||
}
|
||||
|
||||
if (nr == ARRAY_SIZE(tot_name)) {
|
||||
/* swap to account for parsing in reverse */
|
||||
swap(tot_name[0], tot_name[2]);
|
||||
scoutfs_xattr_init_totl_key(key, tot_name);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int apply_totl_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_xattr_totl_val *tval, struct scoutfs_lock *lock)
|
||||
{
|
||||
if (tval->total == 0 && tval->count == 0)
|
||||
return 0;
|
||||
|
||||
return scoutfs_item_delta(sb, key, tval, sizeof(*tval), lock);
|
||||
}
|
||||
|
||||
int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *s_tval = src;
|
||||
struct scoutfs_xattr_totl_val *d_tval = dst;
|
||||
|
||||
if (src_len != sizeof(*s_tval) || dst_len != src_len)
|
||||
return -EIO;
|
||||
|
||||
le64_add_cpu(&d_tval->total, le64_to_cpu(s_tval->total));
|
||||
le64_add_cpu(&d_tval->count, le64_to_cpu(s_tval->count));
|
||||
|
||||
if (d_tval->total == 0 && d_tval->count == 0)
|
||||
return SCOUTFS_DELTA_COMBINED_NULL;
|
||||
|
||||
return SCOUTFS_DELTA_COMBINED;
|
||||
}
|
||||
|
||||
/*
|
||||
* The confusing swiss army knife of creating, modifying, and deleting
|
||||
* xattrs.
|
||||
@@ -486,16 +584,22 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_totl_val tval = {0,};
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key key;
|
||||
bool undo_srch = false;
|
||||
bool undo_totl = false;
|
||||
LIST_HEAD(ind_locks);
|
||||
u8 found_parts;
|
||||
unsigned int bytes;
|
||||
unsigned int val_len;
|
||||
u64 ind_seq;
|
||||
u64 total;
|
||||
u64 hash = 0;
|
||||
u64 id = 0;
|
||||
int ret;
|
||||
@@ -519,11 +623,15 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs.hide || tgs.srch) && !capable(CAP_SYS_ADMIN))
|
||||
if ((tgs.hide | tgs.srch | tgs.totl) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (tgs.totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
return ret;
|
||||
|
||||
bytes = sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
xat = __vmalloc(bytes, GFP_NOFS, PAGE_KERNEL);
|
||||
/* alloc enough to read old totl value */
|
||||
xat = __vmalloc(bytes + SCOUTFS_XATTR_MAX_TOTL_U64, GFP_NOFS, PAGE_KERNEL);
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -536,9 +644,9 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
|
||||
down_write(&si->xattr_rwsem);
|
||||
|
||||
/* find an existing xattr to delete */
|
||||
/* find an existing xattr to delete, including possible totl value */
|
||||
ret = get_next_xattr(inode, &key, xat,
|
||||
sizeof(struct scoutfs_xattr) + name_len,
|
||||
sizeof(struct scoutfs_xattr) + name_len + SCOUTFS_XATTR_MAX_TOTL_U64,
|
||||
name, name_len, 0, 0, lck);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto unlock;
|
||||
@@ -558,9 +666,23 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* s64 count delta if we create or delete */
|
||||
if (tgs.totl)
|
||||
tval.count = cpu_to_le64((u64)!!(value) - (u64)!!(ret != -ENOENT));
|
||||
|
||||
/* found fields in key will also be used */
|
||||
found_parts = ret >= 0 ? xattr_nr_parts(xat) : 0;
|
||||
|
||||
if (found_parts && tgs.totl) {
|
||||
/* parse old totl value before we clobber xat buf */
|
||||
val_len = ret - offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
ret = parse_totl_u64(&xat->name[xat->name_len], val_len, &total);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
le64_add_cpu(&tval.total, -total);
|
||||
}
|
||||
|
||||
/* prepare our xattr */
|
||||
if (value) {
|
||||
if (found_parts)
|
||||
@@ -572,6 +694,20 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
memset(xat->__pad, 0, sizeof(xat->__pad));
|
||||
memcpy(xat->name, name, name_len);
|
||||
memcpy(&xat->name[xat->name_len], value, size);
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = parse_totl_u64(value, size, &total);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
le64_add_cpu(&tval.total, total);
|
||||
}
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
retry:
|
||||
@@ -597,6 +733,13 @@ retry:
|
||||
undo_srch = true;
|
||||
}
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
undo_totl = true;
|
||||
}
|
||||
|
||||
if (found_parts && value)
|
||||
ret = change_xattr_items(inode, id, xat, bytes,
|
||||
xattr_nr_parts(xat), found_parts, lck);
|
||||
@@ -620,12 +763,20 @@ release:
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
}
|
||||
if (ret < 0 && undo_totl) {
|
||||
/* _delta() on dirty items shouldn't fail */
|
||||
tval.total = cpu_to_le64(-le64_to_cpu(tval.total));
|
||||
tval.count = cpu_to_le64(-le64_to_cpu(tval.count));
|
||||
err = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
unlock:
|
||||
up_write(&si->xattr_rwsem);
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
out:
|
||||
vfree(xat);
|
||||
|
||||
@@ -746,15 +897,22 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
{
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_xattr_totl_val tval;
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
bool release = false;
|
||||
unsigned int bytes;
|
||||
unsigned int val_len;
|
||||
void *value;
|
||||
u64 total;
|
||||
u64 hash;
|
||||
int ret;
|
||||
|
||||
/* need a buffer large enough for all possible names */
|
||||
bytes = sizeof(struct scoutfs_xattr) + SCOUTFS_XATTR_MAX_NAME_LEN;
|
||||
/* need a buffer large enough for all possible names and totl value */
|
||||
bytes = sizeof(struct scoutfs_xattr) + SCOUTFS_XATTR_MAX_NAME_LEN +
|
||||
SCOUTFS_XATTR_MAX_TOTL_U64;
|
||||
xat = kmalloc(bytes, GFP_NOFS);
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
@@ -773,11 +931,37 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
break;
|
||||
}
|
||||
|
||||
if (key.skx_part == 0 && (ret < sizeof(struct scoutfs_xattr) ||
|
||||
ret < offsetof(struct scoutfs_xattr, name[xat->name_len]))) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
if (key.skx_part != 0 ||
|
||||
scoutfs_xattr_parse_tags(xat->name, xat->name_len,
|
||||
&tgs) != 0)
|
||||
memset(&tgs, 0, sizeof(tgs));
|
||||
|
||||
if (tgs.totl) {
|
||||
value = &xat->name[xat->name_len];
|
||||
val_len = ret - offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
if (val_len != le16_to_cpu(xat->val_len)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = parse_totl_key(&totl_key, xat->name, xat->name_len) ?:
|
||||
parse_totl_u64(value, val_len, &total);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.totl && totl_lock == NULL) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, false);
|
||||
if (ret < 0)
|
||||
break;
|
||||
@@ -795,6 +979,14 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.totl) {
|
||||
tval.total = cpu_to_le64(-total);
|
||||
tval.count = cpu_to_le64(-1LL);
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
release = false;
|
||||
|
||||
@@ -803,6 +995,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
if (release)
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
kfree(xat);
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -16,10 +16,14 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
srch:1;
|
||||
srch:1,
|
||||
totl:1;
|
||||
};
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs);
|
||||
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name);
|
||||
int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len);
|
||||
|
||||
#endif
|
||||
|
||||
30
tests/golden/totl-xattr-tag
Normal file
30
tests/golden/totl-xattr-tag
Normal file
@@ -0,0 +1,30 @@
|
||||
== single file
|
||||
1.2.3 = 1, 1
|
||||
4.5.6 = 1, 1
|
||||
== multiple files add up
|
||||
1.2.3 = 2, 2
|
||||
4.5.6 = 2, 2
|
||||
== removing xattr updates total
|
||||
1.2.3 = 2, 2
|
||||
4.5.6 = 1, 1
|
||||
== updating xattr updates total
|
||||
1.2.3 = 11, 2
|
||||
4.5.6 = 1, 1
|
||||
== removing files update total
|
||||
1.2.3 = 10, 1
|
||||
== multiple files/names in one transaction
|
||||
1.2.3 = 55, 10
|
||||
== testing invalid names
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
== testing invalid values
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/totl-xattr-tag/invalid: Invalid argument
|
||||
== larger population that could merge
|
||||
@@ -10,6 +10,7 @@ move-blocks.sh
|
||||
enospc.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
totl-xattr-tag.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-pr-cw-conflict.sh
|
||||
|
||||
126
tests/tests/totl-xattr-tag.sh
Normal file
126
tests/tests/totl-xattr-tag.sh
Normal file
@@ -0,0 +1,126 @@
|
||||
t_require_commands touch rm setfattr scoutfs find_xattrs
|
||||
|
||||
read_xattr_totals()
|
||||
{
|
||||
sync
|
||||
scoutfs read-xattr-totals -p "$T_M0"
|
||||
}
|
||||
|
||||
echo "== single file"
|
||||
touch "$T_D0/file-1"
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v 1 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.4.5.6 -v 1 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
read_xattr_totals
|
||||
|
||||
echo "== multiple files add up"
|
||||
touch "$T_D0/file-2"
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v 1 "$T_D0/file-2" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.4.5.6 -v 1 "$T_D0/file-2" 2>&1 | t_filter_fs
|
||||
read_xattr_totals
|
||||
|
||||
echo "== removing xattr updates total"
|
||||
setfattr -x scoutfs.totl.test.4.5.6 "$T_D0/file-2" 2>&1 | t_filter_fs
|
||||
read_xattr_totals
|
||||
|
||||
echo "== updating xattr updates total"
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v 10 "$T_D0/file-2" 2>&1 | t_filter_fs
|
||||
read_xattr_totals
|
||||
|
||||
echo "== removing files update total"
|
||||
rm -f "$T_D0/file-1"
|
||||
read_xattr_totals
|
||||
rm -f "$T_D0/file-2"
|
||||
read_xattr_totals
|
||||
|
||||
echo "== multiple files/names in one transaction"
|
||||
for a in $(seq 1 10); do
|
||||
touch "$T_D0/file-$a"
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v $a "$T_D0/file-$a" 2>&1 | t_filter_fs
|
||||
done
|
||||
read_xattr_totals
|
||||
rm -rf "$T_D0"/file-[0-9]*
|
||||
|
||||
echo "== testing invalid names"
|
||||
touch "$T_D0/invalid"
|
||||
setfattr -n scoutfs.totl.test... -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test..2.3 -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1..3 -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2. -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1 -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2 -v 10 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== testing invalid values"
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v "+1" "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v "10." "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v "-" "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v "junk10" "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.totl.test.1.2.3 -v "10junk" "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
rm -f "$T_D0/invalid"
|
||||
|
||||
echo "== larger population that could merge"
|
||||
NR=5000
|
||||
TOTS=100
|
||||
CHECK=100
|
||||
PER_DIR=1000
|
||||
PER_FILE=10
|
||||
|
||||
declare -A totals counts
|
||||
LOTS="$T_D0/lots"
|
||||
|
||||
for i in $(seq 0 $PER_DIR $NR); do
|
||||
p="$LOTS/$((i / PER_DIR))"
|
||||
mkdir -p $p
|
||||
done
|
||||
for i in $(seq 0 $PER_FILE $NR); do
|
||||
p="$LOTS/$((i / PER_DIR))/file-$((i / PER_FILE))"
|
||||
touch $p
|
||||
done
|
||||
|
||||
for phase in create update remove; do
|
||||
for i in $(seq 0 $NR); do
|
||||
p="$LOTS/$((i / PER_DIR))/file-$((i / PER_FILE))"
|
||||
|
||||
t=$((i % TOTS))
|
||||
n="scoutfs.totl.test-$i.$t.0.0"
|
||||
|
||||
case $phase in
|
||||
create)
|
||||
v="$i"
|
||||
setfattr -n "$n" -v "$v" "$p" 2>&1 >> $T_TMP.sfa
|
||||
((totals[$t]+=$v))
|
||||
((counts[$t]++))
|
||||
;;
|
||||
update)
|
||||
v=$((i * 3))
|
||||
delta=$((i * 2))
|
||||
setfattr -n "$n" -v "$v" "$p" 2>&1 >> $T_TMP.sfa
|
||||
((totals[$t]+=$delta))
|
||||
;;
|
||||
remove)
|
||||
v=$((i * 3))
|
||||
setfattr -x "$n" "$p" 2>&1 >> $T_TMP.sfa
|
||||
((totals[$t]-=$v))
|
||||
((counts[$t]--))
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ "$i" -gt 0 -a "$((i % CHECK))" == "0" ]; then
|
||||
echo "checking $phase $i" > $T_TMP.check_arr
|
||||
echo "checking $phase $i" > $T_TMP.check_read
|
||||
|
||||
( for k in ${!totals[@]}; do
|
||||
echo "$k.0.0 = ${totals[$k]}, ${counts[$k]}"
|
||||
done ) | grep -v "= 0, 0$" | sort -n >> $T_TMP.check_arr
|
||||
|
||||
sync
|
||||
read_xattr_totals | sort -n >> $T_TMP.check_read
|
||||
|
||||
diff -u $T_TMP.check_arr $T_TMP.check_read || \
|
||||
t_fail "totals read didn't match expected arrays"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
rm -rf "$T_D0/merging"
|
||||
|
||||
t_pass
|
||||
@@ -142,6 +142,62 @@ If the
|
||||
file is written to then the server cannot make forward progress and
|
||||
shuts down. The request can similarly enter an errored state if enough
|
||||
time passes before userspace completes the request.
|
||||
|
||||
.SH EXTENDED ATTRIBUTE TAGS
|
||||
|
||||
.B scoutfs
|
||||
adds the
|
||||
.IB scoutfs.
|
||||
extended attribute namespace which uses a system of tags to extend the
|
||||
functionality of extended attributes. Immediately following the
|
||||
scoutfs. prefix are a series of tag words seperated by dots.
|
||||
Any text starting after the last recognized tag is considered the xattr
|
||||
name and is not parsed.
|
||||
.sp
|
||||
Tags may be combined in any order. Specifying a tag more than once
|
||||
will return an error. There is no explicit boundary between the end of
|
||||
tags and the start of the name so unknown or incorrect tags will be
|
||||
successfully parsed as part of the name of the xattr. Tags can only be
|
||||
created, updated, or removed with the CAP_SYS_ADMIN capability.
|
||||
|
||||
The following tags are currently supported:
|
||||
|
||||
.RS
|
||||
.TP
|
||||
.B .hide.
|
||||
Attributes with the .hide. tag are not visible to the
|
||||
.BR listxattr(2)
|
||||
system call. They will instead be included in the output of the
|
||||
.IB LISTXATTR_HIDDEN
|
||||
ioctl. This is meant to be used by archival management agents to store
|
||||
metadata that is bound to a specific volume and should not be
|
||||
transferred with the file by tools that read extended attributes, like
|
||||
.BR tar(1) .
|
||||
.TP
|
||||
.B .srch.
|
||||
Attributes with the .srch. tag are indexed so that they can be
|
||||
found by the
|
||||
.IB SEARCH_XATTRS
|
||||
ioctl. The search ioctl takes an extended attribute name and returns
|
||||
the inode number of all the inodes which contain an extended attribute
|
||||
with that name. The indexing structures behind .srch. tags are designed
|
||||
to efficiently handle a large number of .srch. attributes per file with
|
||||
no limits on the number of indexed files.
|
||||
.TP
|
||||
.B .totl.
|
||||
Attributes with the .totl. flag are used to efficiently maintain counts
|
||||
across all files in the system. The attribute's name must end in three
|
||||
64bit values seperated by dots that specify the global total that the
|
||||
extended attribute will contribute to. The value of the extended
|
||||
attribute is a string representation of the 64bit quantity which will be
|
||||
added to the total. As attributes are added, updated, or removed (and
|
||||
particularly as a file is finally deleted), the corresponding global
|
||||
total is also updated by the file system. All the totals with their
|
||||
name, total value, and a count of contributing attributes can be read
|
||||
with the
|
||||
.IB READ_XATTR_TOTALS
|
||||
ioctl.
|
||||
.RE
|
||||
|
||||
.SH CORRUPTION DETECTION
|
||||
A
|
||||
|
||||
@@ -75,6 +75,17 @@ static void print_orphan(struct scoutfs_key *key, void *val, int val_len)
|
||||
printf(" orphan: ino %llu\n", le64_to_cpu(key->sko_ino));
|
||||
}
|
||||
|
||||
|
||||
static void print_xattr_totl(struct scoutfs_key *key, void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
|
||||
printf(" xattr totl: %llu.%llu.%llu = %lld, %lld\n",
|
||||
le64_to_cpu(key->skxt_a), le64_to_cpu(key->skxt_b),
|
||||
le64_to_cpu(key->skxt_c), le64_to_cpu(tval->total),
|
||||
le64_to_cpu(tval->count));
|
||||
}
|
||||
|
||||
static u8 *global_printable_name(u8 *name, int name_len)
|
||||
{
|
||||
static u8 name_buf[SCOUTFS_NAME_LEN + 1];
|
||||
@@ -163,6 +174,9 @@ static print_func_t find_printer(u8 zone, u8 type)
|
||||
return print_orphan;
|
||||
}
|
||||
|
||||
if (zone == SCOUTFS_XATTR_TOTL_ZONE)
|
||||
return print_xattr_totl;
|
||||
|
||||
if (zone == SCOUTFS_FS_ZONE) {
|
||||
switch(type) {
|
||||
case SCOUTFS_INODE_TYPE: return print_inode;
|
||||
|
||||
120
utils/src/read_xattr_totals.c
Normal file
120
utils/src/read_xattr_totals.c
Normal file
@@ -0,0 +1,120 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
|
||||
struct xattr_args {
|
||||
char *path;
|
||||
};
|
||||
|
||||
static int do_read_xattr_totals(struct xattr_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_read_xattr_totals rxt;
|
||||
struct scoutfs_ioctl_xattr_total *xts = NULL;
|
||||
struct scoutfs_ioctl_xattr_total *xt;
|
||||
u64 bytes = 1024 * 1024;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
xts = malloc(bytes);
|
||||
if (!xts) {
|
||||
fprintf(stderr, "xattr total mem alloc failed\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd = get_path(args->path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
memset(&rxt, 0, sizeof(rxt));
|
||||
rxt.totals_ptr = (unsigned long)xts;
|
||||
rxt.totals_bytes = bytes;
|
||||
|
||||
for (;;) {
|
||||
ret = ioctl(fd, SCOUTFS_IOC_READ_XATTR_TOTALS, &rxt);
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "read_xattr_totals ioctl failed: "
|
||||
"%s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0, xt = xts; i < ret; i++, xt++)
|
||||
printf("%llu.%llu.%llu = %lld, %lld\n",
|
||||
xt->name[0], xt->name[1], xt->name[2], xt->total, xt->count);
|
||||
|
||||
memcpy(&rxt.pos_name, &xts[ret - 1].name, sizeof(rxt.pos_name));
|
||||
if (++rxt.pos_name[2] == 0 && ++rxt.pos_name[1] == 0 && ++rxt.pos_name[0] == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
free(xts);
|
||||
|
||||
return ret;
|
||||
};
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct xattr_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'p':
|
||||
args->path = strdup_or_error(state, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"",
|
||||
"Print global value totals of .totl. xattrs"
|
||||
};
|
||||
|
||||
static int read_xattr_totals_cmd(int argc, char **argv)
|
||||
{
|
||||
|
||||
struct xattr_args xattr_args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &xattr_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_read_xattr_totals(&xattr_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) read_xattr_totals_ctor(void)
|
||||
{
|
||||
cmd_register_argp("read-xattr-totals", &argp, GROUP_INFO, read_xattr_totals_cmd);
|
||||
}
|
||||
Reference in New Issue
Block a user