mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-08 12:42:35 +00:00
scoutfs: remove name from dirent keys
Directory entries were the last items that had large variable length keys because they stored the entry name in the key. We'd like to have small fixed size keys so let's store dirents with small keys. Entries for lookup are stored at the hash of the name instead of the full name. The key also contains the unique readdir pos so that we don't have to deal with collision on creation. The lookup procedure now does need to iterate over all the readdir positions for the hash value and compare the names. Entries for link backref walking are stored with the entry's position in the parent dir instead of the entry's name. The name is then stored in the value. Inode to path conversion can still walk the backref items without having to lookup dirent items. These changes mean that all directory entry items are now stored at a small key with some u64s (hash, pos, parent dir, etc) and have a value with the dirent struct and full entry name. This lets us use the same key and value format for the three entry key types. We no longer have to allocate keys, we can store them on the stack. We store the entry's hash and pos in the dirent struct in the item value so that any item has all the fields to reference all the other item keys. We store the same values in the dentry_info so that deletion (unlink and rename) can find all the entries. The ino_path ioctl can now much more clearly iterate over parent directories and entry positions instead of oh so cleverly iterating over null terminated names in the parent directories. The ioctl interface structs and implementation become simpler. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -71,17 +71,14 @@ static inline const struct scoutfs_item_count SIC_DIRTY_INODE(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Adding a dirent adds the entry key, readdir key, and backref.
|
||||
* Directory entries are stored in three items.
|
||||
*/
|
||||
static inline void __count_dirents(struct scoutfs_item_count *cnt,
|
||||
unsigned name_len)
|
||||
{
|
||||
|
||||
cnt->items += 3;
|
||||
cnt->keys += offsetof(struct scoutfs_dirent_key, name[name_len]) +
|
||||
sizeof(struct scoutfs_readdir_key) +
|
||||
offsetof(struct scoutfs_link_backref_key, name[name_len]);
|
||||
cnt->vals += 2 * offsetof(struct scoutfs_dirent, name[name_len]);
|
||||
cnt->keys += 3 * sizeof(struct scoutfs_dirent_key);
|
||||
cnt->vals += 3 * offsetof(struct scoutfs_dirent, name[name_len]);
|
||||
}
|
||||
|
||||
static inline void __count_sym_target(struct scoutfs_item_count *cnt,
|
||||
|
||||
521
kmod/src/dir.c
521
kmod/src/dir.c
@@ -35,34 +35,31 @@
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
* Directory entries are stored in entries with offsets calculated from
|
||||
* the hash of their entry name.
|
||||
* Directory entries are stored in three different items. Each has the
|
||||
* same key format and all have identical values which contain the full
|
||||
* entry name.
|
||||
*
|
||||
* Having a single index of items used for both lookup and readdir
|
||||
* iteration reduces the storage overhead of directories. It also
|
||||
* avoids having to manage the allocation of readdir positions as
|
||||
* directories age and the aggregate create count inches towards the
|
||||
* small 31 bit position limit. The downside is that dirent name
|
||||
* operations produce random item access patterns.
|
||||
* Entries for name lookup are stored at the hash of the name and the
|
||||
* readdir position. Including the position lets us create names
|
||||
* without having to read the items to check for hash collisions.
|
||||
* Lookup iterates over all the positions with the same hash values and
|
||||
* compares the names.
|
||||
*
|
||||
* Hash values are limited to 31 bits primarily to support older
|
||||
* deployed protocols that only support 31 bits of file entry offsets,
|
||||
* but also to avoid unlikely bugs in programs that store offsets in
|
||||
* signed ints.
|
||||
* Entries for readdir are stored in an increasing unique readdir
|
||||
* position. This results in returning entries in creation order which
|
||||
* matches inode allocation order and avoids random inode access
|
||||
* patterns during readdir.
|
||||
*
|
||||
* We have to worry about hash collisions. We linearly probe a fixed
|
||||
* number of hash values past the natural value. In a typical small
|
||||
* directory this search will terminate immediately because adjacent
|
||||
* items will have distant offset values. It's only as the directory
|
||||
* gets very large that hash values will start to be this dense and
|
||||
* sweeping over items in a btree leaf is reasonably efficient.
|
||||
* Entries for link backref traversal are stored at the target inode
|
||||
* sorted by the parent dir and the entry's position in the parent dir.
|
||||
* This keeps link backref users away from the higher contention area of
|
||||
* dirent items in parent dirs.
|
||||
*
|
||||
* For each directory entry item stored in a directory inode there is a
|
||||
* corresponding link backref item stored at the target inode. This
|
||||
* lets us find all the paths that refer to a given inode. The link
|
||||
* backref offset comes from an advancing counter in the inode and the
|
||||
* item value contains the dir inode and dirent offset of the referring
|
||||
* link.
|
||||
* All the entries have a dirent struct with the full name in their
|
||||
* value. The dirent struct contains the name hash and readdir position
|
||||
* so that any item use can reference all the items for a given entry.
|
||||
* This is important for deleting all the items given a dentry that was
|
||||
* populated by lookup.
|
||||
*/
|
||||
|
||||
static unsigned int mode_to_type(umode_t mode)
|
||||
@@ -102,14 +99,15 @@ static unsigned int dentry_type(unsigned int type)
|
||||
}
|
||||
|
||||
/*
|
||||
* @readdir_pos lets us remove items on final unlink without having to
|
||||
* look them up.
|
||||
* @lock_cov: tells revalidation that the dentry is still locked and valid.
|
||||
*
|
||||
* @lock_cov tells revalidation that the dentry is still locked and valid.
|
||||
* @pos, @hash: lets us remove items on final unlink without having to
|
||||
* look them up.
|
||||
*/
|
||||
struct dentry_info {
|
||||
u64 readdir_pos;
|
||||
struct scoutfs_lock_coverage lock_cov;
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
};
|
||||
|
||||
static struct kmem_cache *dentry_info_cache;
|
||||
@@ -161,15 +159,26 @@ static int alloc_dentry_info(struct dentry *dentry)
|
||||
}
|
||||
|
||||
static void update_dentry_info(struct super_block *sb, struct dentry *dentry,
|
||||
u64 pos, struct scoutfs_lock *lock)
|
||||
u64 hash, u64 pos, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return;
|
||||
|
||||
di->readdir_pos = pos;
|
||||
scoutfs_lock_add_coverage(sb, lock, &di->lock_cov);
|
||||
di->hash = hash;
|
||||
di->pos = pos;
|
||||
}
|
||||
|
||||
static u64 dentry_info_hash(struct dentry *dentry)
|
||||
{
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return 0;
|
||||
|
||||
return di->hash;
|
||||
}
|
||||
|
||||
static u64 dentry_info_pos(struct dentry *dentry)
|
||||
@@ -179,88 +188,102 @@ static u64 dentry_info_pos(struct dentry *dentry)
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return 0;
|
||||
|
||||
return di->readdir_pos;
|
||||
return di->pos;
|
||||
}
|
||||
|
||||
static struct scoutfs_key_buf *alloc_dirent_key(struct super_block *sb,
|
||||
u64 dir_ino, const char *name,
|
||||
unsigned name_len)
|
||||
static void init_dirent_key(struct scoutfs_key_buf *key,
|
||||
struct scoutfs_dirent_key *dkey, u8 type,
|
||||
u64 ino, u64 major, u64 minor)
|
||||
{
|
||||
struct scoutfs_dirent_key *dkey;
|
||||
struct scoutfs_key_buf *key;
|
||||
dkey->zone = SCOUTFS_FS_ZONE;
|
||||
dkey->ino = cpu_to_be64(ino);
|
||||
dkey->type = type;
|
||||
dkey->major = cpu_to_be64(major);
|
||||
dkey->minor = cpu_to_be64(minor);
|
||||
|
||||
key = scoutfs_key_alloc(sb, offsetof(struct scoutfs_dirent_key,
|
||||
name[name_len]));
|
||||
if (key) {
|
||||
dkey = key->data;
|
||||
dkey->zone = SCOUTFS_FS_ZONE;
|
||||
dkey->ino = cpu_to_be64(dir_ino);
|
||||
dkey->type = SCOUTFS_DIRENT_TYPE;
|
||||
memcpy(dkey->name, (void *)name, name_len);
|
||||
}
|
||||
|
||||
return key;
|
||||
scoutfs_key_init(key, dkey, sizeof(struct scoutfs_dirent_key));
|
||||
}
|
||||
|
||||
static void init_link_backref_key(struct scoutfs_key_buf *key,
|
||||
struct scoutfs_link_backref_key *lbrkey,
|
||||
u64 ino, u64 dir_ino,
|
||||
const char *name, unsigned name_len)
|
||||
static unsigned int dirent_bytes(unsigned int name_len)
|
||||
{
|
||||
lbrkey->zone = SCOUTFS_FS_ZONE;
|
||||
lbrkey->ino = cpu_to_be64(ino);
|
||||
lbrkey->type = SCOUTFS_LINK_BACKREF_TYPE;
|
||||
lbrkey->dir_ino = cpu_to_be64(dir_ino);
|
||||
if (name_len)
|
||||
memcpy(lbrkey->name, name, name_len);
|
||||
|
||||
scoutfs_key_init(key, lbrkey, offsetof(struct scoutfs_link_backref_key,
|
||||
name[name_len]));
|
||||
return offsetof(struct scoutfs_dirent, name[name_len]);
|
||||
}
|
||||
|
||||
static struct scoutfs_key_buf *alloc_link_backref_key(struct super_block *sb,
|
||||
u64 ino, u64 dir_ino,
|
||||
const char *name,
|
||||
unsigned name_len)
|
||||
static struct scoutfs_dirent *alloc_dirent(unsigned int name_len)
|
||||
{
|
||||
struct scoutfs_link_backref_key *lbkey;
|
||||
struct scoutfs_key_buf *key;
|
||||
return kmalloc(dirent_bytes(name_len), GFP_NOFS);
|
||||
}
|
||||
|
||||
key = scoutfs_key_alloc(sb, offsetof(struct scoutfs_link_backref_key,
|
||||
name[name_len]));
|
||||
if (key) {
|
||||
lbkey = key->data;
|
||||
init_link_backref_key(key, lbkey, ino, dir_ino,
|
||||
name, name_len);
|
||||
}
|
||||
static u64 dirent_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
unsigned int half = (name_len + 1) / 2;
|
||||
|
||||
return key;
|
||||
return crc32c(~0, name, half) |
|
||||
((u64)crc32c(~0, name + name_len - half, half) << 32);
|
||||
}
|
||||
|
||||
static u64 dirent_names_equal(const char *a_name, unsigned int a_len,
|
||||
const char *b_name, unsigned int b_len)
|
||||
{
|
||||
return a_len == b_len && memcmp(a_name, b_name, a_len) == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Looks for the dirent item and fills the caller's dirent if it finds
|
||||
* it. Returns item lookup errors including -ENOENT if it's not found.
|
||||
*/
|
||||
static int lookup_dirent(struct super_block *sb, struct inode *dir,
|
||||
const char *name, unsigned name_len,
|
||||
struct scoutfs_dirent *dent,
|
||||
static int lookup_dirent(struct super_block *sb, u64 dir_ino, const char *name,
|
||||
unsigned name_len, u64 hash,
|
||||
struct scoutfs_dirent *dent_ret,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_key_buf *key = NULL;
|
||||
struct scoutfs_dirent_key last_dkey;
|
||||
struct scoutfs_dirent_key dkey;
|
||||
struct scoutfs_key_buf last_key;
|
||||
struct scoutfs_key_buf key;
|
||||
struct scoutfs_dirent *dent = NULL;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
key = alloc_dirent_key(sb, scoutfs_ino(dir), name, name_len);
|
||||
if (!key) {
|
||||
dent = alloc_dirent(SCOUTFS_NAME_LEN);
|
||||
if (!dent) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvec_init(&val, dent, sizeof(struct scoutfs_dirent));
|
||||
init_dirent_key(&key, &dkey, SCOUTFS_DIRENT_TYPE,
|
||||
dir_ino, hash, 0);
|
||||
init_dirent_key(&last_key, &last_dkey, SCOUTFS_DIRENT_TYPE,
|
||||
dir_ino, hash, U64_MAX);
|
||||
kvec_init(&val, dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, &val, lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
ret -= sizeof(struct scoutfs_dirent);
|
||||
/* XXX corruption */
|
||||
if (ret < 1 || ret > SCOUTFS_NAME_LEN) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dirent_names_equal(name, name_len, dent->name, ret)) {
|
||||
*dent_ret = *dent;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (be64_to_cpu(dkey.minor) == U64_MAX) {
|
||||
ret = -ENOENT;
|
||||
break;
|
||||
}
|
||||
be64_add_cpu(&dkey.minor, 1);
|
||||
}
|
||||
|
||||
ret = scoutfs_item_lookup_exact(sb, key, &val, lock);
|
||||
out:
|
||||
scoutfs_key_free(sb, key);
|
||||
kfree(dent);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -318,18 +341,24 @@ static int scoutfs_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = lookup_dirent(sb, dir, dentry->d_name.name, dentry->d_name.len,
|
||||
ret = lookup_dirent(sb, scoutfs_ino(dir),
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
dirent_name_hash(dentry->d_name.name,
|
||||
dentry->d_name.len),
|
||||
&dent, lock);
|
||||
if (ret == -ENOENT)
|
||||
if (ret == -ENOENT) {
|
||||
dent.ino = 0;
|
||||
else if (ret < 0)
|
||||
dent.hash = 0;
|
||||
dent.pos = 0;
|
||||
} else if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dentry_ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
|
||||
|
||||
if ((dentry_ino == le64_to_cpu(dent.ino))) {
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.readdir_pos),
|
||||
lock);
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.hash),
|
||||
le64_to_cpu(dent.pos), lock);
|
||||
scoutfs_inc_counter(sb, dentry_revalidate_valid);
|
||||
ret = 1;
|
||||
} else {
|
||||
@@ -368,8 +397,11 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
struct scoutfs_dirent dent;
|
||||
struct inode *inode;
|
||||
u64 ino = 0;
|
||||
u64 hash;
|
||||
int ret;
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
|
||||
if (dentry->d_name.len > SCOUTFS_NAME_LEN) {
|
||||
ret = -ENAMETOOLONG;
|
||||
goto out;
|
||||
@@ -383,15 +415,15 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = lookup_dirent(sb, dir, dentry->d_name.name, dentry->d_name.len,
|
||||
&dent, dir_lock);
|
||||
ret = lookup_dirent(sb, scoutfs_ino(dir), dentry->d_name.name,
|
||||
dentry->d_name.len, hash, &dent, dir_lock);
|
||||
if (ret == -ENOENT) {
|
||||
ino = 0;
|
||||
ret = 0;
|
||||
} else if (ret == 0) {
|
||||
ino = le64_to_cpu(dent.ino);
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.readdir_pos),
|
||||
dir_lock);
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.hash),
|
||||
le64_to_cpu(dent.pos), dir_lock);
|
||||
}
|
||||
scoutfs_unlock(sb, dir_lock, DLM_LOCK_PR);
|
||||
|
||||
@@ -428,18 +460,6 @@ static int dir_emit_dots(struct file *file, void *dirent, filldir_t filldir)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void init_readdir_key(struct scoutfs_key_buf *key,
|
||||
struct scoutfs_readdir_key *rkey, u64 dir_ino,
|
||||
loff_t pos)
|
||||
{
|
||||
rkey->zone = SCOUTFS_FS_ZONE;
|
||||
rkey->ino = cpu_to_be64(dir_ino);
|
||||
rkey->type = SCOUTFS_READDIR_TYPE;
|
||||
rkey->pos = cpu_to_be64(pos);
|
||||
|
||||
scoutfs_key_init(key, rkey, sizeof(struct scoutfs_readdir_key));
|
||||
}
|
||||
|
||||
/*
|
||||
* readdir simply iterates over the dirent items for the dir inode and
|
||||
* uses their offset as the readdir position.
|
||||
@@ -454,10 +474,9 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
|
||||
struct scoutfs_dirent *dent;
|
||||
struct scoutfs_key_buf key;
|
||||
struct scoutfs_key_buf last_key;
|
||||
struct scoutfs_readdir_key rkey;
|
||||
struct scoutfs_readdir_key last_rkey;
|
||||
struct scoutfs_dirent_key dkey;
|
||||
struct scoutfs_dirent_key last_dkey;
|
||||
struct scoutfs_lock *dir_lock;
|
||||
unsigned int item_len;
|
||||
unsigned int name_len;
|
||||
struct kvec val;
|
||||
u64 pos;
|
||||
@@ -466,27 +485,26 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
|
||||
if (!dir_emit_dots(file, dirent, filldir))
|
||||
return 0;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, DLM_LOCK_PR, 0, inode, &dir_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
init_readdir_key(&last_key, &last_rkey, scoutfs_ino(inode),
|
||||
SCOUTFS_DIRENT_LAST_POS);
|
||||
|
||||
item_len = offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]);
|
||||
dent = kmalloc(item_len, GFP_KERNEL);
|
||||
dent = alloc_dirent(SCOUTFS_NAME_LEN);
|
||||
if (!dent) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
init_readdir_key(&key, &rkey, scoutfs_ino(inode), file->f_pos);
|
||||
init_dirent_key(&last_key, &last_dkey, SCOUTFS_READDIR_TYPE,
|
||||
scoutfs_ino(inode), SCOUTFS_DIRENT_LAST_POS, 0);
|
||||
kvec_init(&val, dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
ret = scoutfs_lock_inode(sb, DLM_LOCK_PR, 0, inode, &dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (;;) {
|
||||
init_dirent_key(&key, &dkey, SCOUTFS_READDIR_TYPE,
|
||||
scoutfs_ino(inode), file->f_pos, 0);
|
||||
|
||||
kvec_init(&val, dent, item_len);
|
||||
ret = scoutfs_item_next_same_min(sb, &key, &last_key, &val,
|
||||
offsetof(struct scoutfs_dirent, name[1]),
|
||||
dir_lock);
|
||||
dirent_bytes(1), dir_lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -494,7 +512,7 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
|
||||
}
|
||||
|
||||
name_len = ret - sizeof(struct scoutfs_dirent);
|
||||
pos = be64_to_cpu(rkey.pos);
|
||||
pos = be64_to_cpu(dkey.major);
|
||||
|
||||
if (filldir(dirent, dent->name, name_len, pos,
|
||||
le64_to_cpu(dent->ino), dentry_type(dent->type))) {
|
||||
@@ -519,69 +537,63 @@ out:
|
||||
*
|
||||
* If this returns an error then nothing will have changed.
|
||||
*/
|
||||
static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 pos,
|
||||
const char *name, unsigned name_len, u64 ino,
|
||||
umode_t mode, struct scoutfs_lock *dir_lock,
|
||||
static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
u64 pos, const char *name, unsigned name_len,
|
||||
u64 ino, umode_t mode, struct scoutfs_lock *dir_lock,
|
||||
struct scoutfs_lock *inode_lock)
|
||||
{
|
||||
struct scoutfs_key_buf *ent_key = NULL;
|
||||
struct scoutfs_key_buf *lb_key = NULL;
|
||||
struct scoutfs_dirent *dent = NULL;
|
||||
struct scoutfs_dirent_key rdir_dkey;
|
||||
struct scoutfs_dirent_key ent_dkey;
|
||||
struct scoutfs_dirent_key lb_dkey;
|
||||
struct scoutfs_key_buf rdir_key;
|
||||
struct scoutfs_readdir_key rkey;
|
||||
struct scoutfs_key_buf ent_key;
|
||||
struct scoutfs_key_buf lb_key;
|
||||
struct scoutfs_dirent *dent;
|
||||
bool del_ent = false;
|
||||
bool del_rdir = false;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
ent_key = alloc_dirent_key(sb, dir_ino, name, name_len);
|
||||
dent = kmalloc(offsetof(struct scoutfs_dirent, name[name_len]),
|
||||
GFP_NOFS);
|
||||
if (!ent_key || !dent) {
|
||||
dent = alloc_dirent(name_len);
|
||||
if (!dent) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* initialize the dent */
|
||||
dent->ino = cpu_to_le64(ino);
|
||||
dent->readdir_pos = cpu_to_le64(pos);
|
||||
dent->hash = cpu_to_le64(hash);
|
||||
dent->pos = cpu_to_le64(pos);
|
||||
dent->type = mode_to_type(mode);
|
||||
memcpy(dent->name, name, name_len);
|
||||
|
||||
/* dirent item for lookup */
|
||||
kvec_init(&val, dent, sizeof(struct scoutfs_dirent));
|
||||
ret = scoutfs_item_create(sb, ent_key, &val, dir_lock);
|
||||
init_dirent_key(&ent_key, &ent_dkey, SCOUTFS_DIRENT_TYPE,
|
||||
dir_ino, hash, pos);
|
||||
init_dirent_key(&rdir_key, &rdir_dkey, SCOUTFS_READDIR_TYPE,
|
||||
dir_ino, pos, 0);
|
||||
init_dirent_key(&lb_key, &lb_dkey, SCOUTFS_LINK_BACKREF_TYPE,
|
||||
ino, dir_ino, pos);
|
||||
kvec_init(&val, dent, dirent_bytes(name_len));
|
||||
|
||||
ret = scoutfs_item_create(sb, &ent_key, &val, dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
del_ent = true;
|
||||
|
||||
/* readdir item for .. readdir */
|
||||
init_readdir_key(&rdir_key, &rkey, dir_ino, pos);
|
||||
kvec_init(&val, dent, offsetof(struct scoutfs_dirent, name[name_len]));
|
||||
|
||||
ret = scoutfs_item_create(sb, &rdir_key, &val, dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
del_rdir = true;
|
||||
|
||||
/* link backref item for inode to path resolution */
|
||||
lb_key = alloc_link_backref_key(sb, ino, dir_ino, name, name_len);
|
||||
if (!lb_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_item_create(sb, lb_key, NULL, inode_lock);
|
||||
ret = scoutfs_item_create(sb, &lb_key, &val, inode_lock);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
if (del_ent)
|
||||
scoutfs_item_delete_dirty(sb, ent_key);
|
||||
scoutfs_item_delete_dirty(sb, &ent_key);
|
||||
if (del_rdir)
|
||||
scoutfs_item_delete_dirty(sb, &rdir_key);
|
||||
}
|
||||
|
||||
scoutfs_key_free(sb, ent_key);
|
||||
scoutfs_key_free(sb, lb_key);
|
||||
kfree(dent);
|
||||
|
||||
return ret;
|
||||
@@ -592,49 +604,40 @@ out:
|
||||
* Only items are modified. The caller is responsible for locking,
|
||||
* entering a transaction, dirtying items, and managing the vfs structs.
|
||||
*
|
||||
* The items match the items used in add_entry_items() but we don't have
|
||||
* to worry about values here and we can dirty all the items before
|
||||
* starting to delete them which makes cleanup a little easier.
|
||||
*
|
||||
* If this returns an error then nothing will have changed.
|
||||
*/
|
||||
static int del_entry_items(struct super_block *sb, u64 dir_ino, u64 pos,
|
||||
const char *name, unsigned name_len, u64 ino,
|
||||
struct scoutfs_lock *dir_lock,
|
||||
static int del_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
u64 pos, u64 ino, struct scoutfs_lock *dir_lock,
|
||||
struct scoutfs_lock *inode_lock)
|
||||
{
|
||||
struct scoutfs_key_buf *ent_key;
|
||||
struct scoutfs_key_buf *lb_key;
|
||||
struct scoutfs_dirent_key rdir_dkey;
|
||||
struct scoutfs_dirent_key ent_dkey;
|
||||
struct scoutfs_dirent_key lb_dkey;
|
||||
struct scoutfs_key_buf rdir_key;
|
||||
struct scoutfs_readdir_key rkey;
|
||||
struct scoutfs_key_buf ent_key;
|
||||
struct scoutfs_key_buf lb_key;
|
||||
LIST_HEAD(dir_saved);
|
||||
LIST_HEAD(inode_saved);
|
||||
int ret;
|
||||
|
||||
ent_key = alloc_dirent_key(sb, dir_ino, name, name_len);
|
||||
if (!ent_key)
|
||||
return -ENOMEM;
|
||||
init_dirent_key(&ent_key, &ent_dkey, SCOUTFS_DIRENT_TYPE,
|
||||
dir_ino, hash, pos);
|
||||
init_dirent_key(&rdir_key, &rdir_dkey, SCOUTFS_READDIR_TYPE,
|
||||
dir_ino, pos, 0);
|
||||
init_dirent_key(&lb_key, &lb_dkey, SCOUTFS_LINK_BACKREF_TYPE,
|
||||
ino, dir_ino, pos);
|
||||
|
||||
init_readdir_key(&rdir_key, &rkey, dir_ino, pos);
|
||||
|
||||
lb_key = alloc_link_backref_key(sb, ino, dir_ino, name, name_len);
|
||||
if (!lb_key) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
ret = scoutfs_item_delete_save(sb, &ent_key, &dir_saved, dir_lock) ?:
|
||||
scoutfs_item_delete_save(sb, &rdir_key, &dir_saved, dir_lock) ?:
|
||||
scoutfs_item_delete_save(sb, &lb_key, &inode_saved, inode_lock);
|
||||
if (ret < 0) {
|
||||
scoutfs_item_restore(sb, &dir_saved, dir_lock);
|
||||
scoutfs_item_restore(sb, &inode_saved, inode_lock);
|
||||
} else {
|
||||
scoutfs_item_free_batch(sb, &dir_saved);
|
||||
scoutfs_item_free_batch(sb, &inode_saved);
|
||||
}
|
||||
|
||||
ret = scoutfs_item_dirty(sb, ent_key, dir_lock) ?:
|
||||
scoutfs_item_dirty(sb, &rdir_key, dir_lock) ?:
|
||||
scoutfs_item_dirty(sb, lb_key, inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
scoutfs_item_delete_dirty(sb, ent_key);
|
||||
scoutfs_item_delete_dirty(sb, &rdir_key);
|
||||
scoutfs_item_delete_dirty(sb, lb_key);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
kfree(ent_key);
|
||||
kfree(lb_key);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -724,12 +727,14 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
int ret;
|
||||
|
||||
if (dentry->d_name.len > SCOUTFS_NAME_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
inode = lock_hold_create(dir, dentry, mode, rdev,
|
||||
SIC_MKNOD(dentry->d_name.len),
|
||||
&dir_lock, &inode_lock, &ind_locks);
|
||||
@@ -738,13 +743,14 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
|
||||
pos = SCOUTFS_I(dir)->next_readdir_pos++;
|
||||
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), pos, dentry->d_name.name,
|
||||
dentry->d_name.len, scoutfs_ino(inode),
|
||||
inode->i_mode, dir_lock, inode_lock);
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), hash, pos,
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
scoutfs_ino(inode), inode->i_mode, dir_lock,
|
||||
inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
update_dentry_info(sb, dentry, pos, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -795,9 +801,12 @@ static int scoutfs_link(struct dentry *old_dentry,
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 dir_size;
|
||||
u64 ind_seq;
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
int ret;
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
|
||||
if (dentry->d_name.len > SCOUTFS_NAME_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
@@ -834,12 +843,13 @@ retry:
|
||||
|
||||
pos = SCOUTFS_I(dir)->next_readdir_pos++;
|
||||
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), pos, dentry->d_name.name,
|
||||
dentry->d_name.len, scoutfs_ino(inode),
|
||||
inode->i_mode, dir_lock, inode_lock);
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), hash, pos,
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
scoutfs_ino(inode), inode->i_mode, dir_lock,
|
||||
inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
update_dentry_info(sb, dentry, pos, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, dir_size);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -908,9 +918,9 @@ retry:
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = del_entry_items(sb, scoutfs_ino(dir), dentry_info_pos(dentry),
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
scoutfs_ino(inode), dir_lock, inode_lock);
|
||||
ret = del_entry_items(sb, scoutfs_ino(dir), dentry_info_hash(dentry),
|
||||
dentry_info_pos(dentry), scoutfs_ino(inode),
|
||||
dir_lock, inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1108,9 +1118,12 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
int ret;
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
|
||||
/* path_max includes null as does our value for nd_set_link */
|
||||
if (dentry->d_name.len > SCOUTFS_NAME_LEN ||
|
||||
name_len > PATH_MAX || name_len > SCOUTFS_SYMLINK_MAX_SIZE)
|
||||
@@ -1133,13 +1146,14 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
|
||||
pos = SCOUTFS_I(dir)->next_readdir_pos++;
|
||||
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), pos, dentry->d_name.name,
|
||||
dentry->d_name.len, scoutfs_ino(inode),
|
||||
inode->i_mode, dir_lock, inode_lock);
|
||||
ret = add_entry_items(sb, scoutfs_ino(dir), hash, pos,
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
scoutfs_ino(inode), inode->i_mode, dir_lock,
|
||||
inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
update_dentry_info(sb, dentry, pos, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -1185,7 +1199,7 @@ int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
/*
|
||||
* Find the next link backref key for the given ino starting from the
|
||||
* given dir inode and null terminated name. If we find a backref item
|
||||
* given dir inode and final entry position. If we find a backref item
|
||||
* we add an allocated copy of it to the head of the caller's list.
|
||||
*
|
||||
* Returns 0 if we added an entry, -ENOENT if we didn't, and -errno for
|
||||
@@ -1195,40 +1209,37 @@ int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
|
||||
* building up a path with individual locked backref item lookups.
|
||||
*/
|
||||
int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
u64 dir_ino, char *name, unsigned int name_len,
|
||||
u64 dir_ino, u64 dir_pos,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct scoutfs_link_backref_key last_lbkey;
|
||||
struct scoutfs_link_backref_entry *ent;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_key_buf last;
|
||||
struct scoutfs_dirent_key last_dkey;
|
||||
struct scoutfs_dirent_key dkey;
|
||||
struct scoutfs_key_buf last_key;
|
||||
struct scoutfs_key_buf key;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct kvec val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
ent = kmalloc(offsetof(struct scoutfs_link_backref_entry,
|
||||
lbkey.name[SCOUTFS_NAME_LEN + 1]), GFP_KERNEL);
|
||||
dent.name[SCOUTFS_NAME_LEN]), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&ent->head);
|
||||
|
||||
/* put search key in ent */
|
||||
init_link_backref_key(&key, &ent->lbkey, ino, dir_ino, name, name_len);
|
||||
/* we actually have room for a full backref item */
|
||||
scoutfs_key_init_buf_len(&key, key.data, key.key_len,
|
||||
offsetof(struct scoutfs_link_backref_key,
|
||||
name[SCOUTFS_NAME_LEN + 1]));
|
||||
init_dirent_key(&key, &dkey, SCOUTFS_LINK_BACKREF_TYPE,
|
||||
ino, dir_ino, dir_pos);
|
||||
init_dirent_key(&last_key, &last_dkey, SCOUTFS_LINK_BACKREF_TYPE,
|
||||
ino, U64_MAX, U64_MAX);
|
||||
kvec_init(&val, &ent->dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
/* small last key to avoid full name copy, XXX enforce no U64_MAX ino */
|
||||
init_link_backref_key(&last, &last_lbkey, ino, U64_MAX, NULL, 0);
|
||||
|
||||
/* next backref key is now in ent */
|
||||
ret = scoutfs_lock_ino(sb, DLM_LOCK_PR, 0, ino, &lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_item_next(sb, &key, &last, NULL, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, &val, lock);
|
||||
scoutfs_unlock(sb, lock, DLM_LOCK_PR);
|
||||
lock = NULL;
|
||||
|
||||
@@ -1236,15 +1247,17 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
len = (int)key.key_len - sizeof(struct scoutfs_link_backref_key);
|
||||
len = ret - sizeof(struct scoutfs_dirent);
|
||||
/* XXX corruption */
|
||||
if (len < 1 || len > SCOUTFS_NAME_LEN) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ent->name_len = len;
|
||||
list_add(&ent->head, list);
|
||||
ent->dir_ino = be64_to_cpu(dkey.major);
|
||||
ent->dir_pos = be64_to_cpu(dkey.minor);
|
||||
ent->name_len = len;
|
||||
ret = 0;
|
||||
out:
|
||||
if (list_empty(&ent->head))
|
||||
@@ -1257,7 +1270,7 @@ static u64 first_backref_dir_ino(struct list_head *list)
|
||||
struct scoutfs_link_backref_entry *ent;
|
||||
|
||||
ent = list_first_entry(list, struct scoutfs_link_backref_entry, head);
|
||||
return be64_to_cpu(ent->lbkey.dir_ino);
|
||||
return ent->dir_ino;
|
||||
}
|
||||
|
||||
void scoutfs_dir_free_backref_path(struct super_block *sb,
|
||||
@@ -1310,8 +1323,7 @@ void scoutfs_dir_free_backref_path(struct super_block *sb,
|
||||
* sync if we see our dirty seq.
|
||||
*/
|
||||
int scoutfs_dir_get_backref_path(struct super_block *sb, u64 ino, u64 dir_ino,
|
||||
char *name, u16 name_len,
|
||||
struct list_head *list)
|
||||
u64 dir_pos, struct list_head *list)
|
||||
{
|
||||
u64 par_ino;
|
||||
int ret;
|
||||
@@ -1323,15 +1335,14 @@ retry:
|
||||
* confident we won't hit an endless loop here again.
|
||||
*/
|
||||
if (WARN_ONCE(++iters >= 4000, "scoutfs: Excessive retries in "
|
||||
"dir_get_backref_path. ino %llu dir_ino %llu name %.*s\n",
|
||||
ino, dir_ino, name_len, name)) {
|
||||
"dir_get_backref_path. ino %llu dir_ino %llu pos %llu\n",
|
||||
ino, dir_ino, dir_pos)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* get the next link name to the given inode */
|
||||
ret = scoutfs_dir_add_next_linkref(sb, ino, dir_ino, name, name_len,
|
||||
list);
|
||||
ret = scoutfs_dir_add_next_linkref(sb, ino, dir_ino, dir_pos, list);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1339,8 +1350,7 @@ retry:
|
||||
par_ino = first_backref_dir_ino(list);
|
||||
while (par_ino != SCOUTFS_ROOT_INO) {
|
||||
|
||||
ret = scoutfs_dir_add_next_linkref(sb, par_ino, 0, NULL, 0,
|
||||
list);
|
||||
ret = scoutfs_dir_add_next_linkref(sb, par_ino, 0, 0, list);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
/* restart if there was no parent component */
|
||||
@@ -1374,26 +1384,23 @@ static int item_d_ancestor(struct super_block *sb, u64 p1, u64 p2, u64 *p_ret)
|
||||
{
|
||||
struct scoutfs_link_backref_entry *ent;
|
||||
LIST_HEAD(list);
|
||||
u64 dir_ino;
|
||||
int ret;
|
||||
u64 p;
|
||||
|
||||
*p_ret = 0;
|
||||
|
||||
ret = scoutfs_dir_get_backref_path(sb, p2, 0, NULL, 0, &list);
|
||||
ret = scoutfs_dir_get_backref_path(sb, p2, 0, 0, &list);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
p = p2;
|
||||
list_for_each_entry(ent, &list, head) {
|
||||
dir_ino = be64_to_cpu(ent->lbkey.dir_ino);
|
||||
|
||||
if (dir_ino == p1) {
|
||||
if (ent->dir_ino == p1) {
|
||||
*p_ret = p;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
p = dir_ino;
|
||||
p = ent->dir_ino;
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -1434,27 +1441,18 @@ static int verify_ancestors(struct super_block *sb, u64 p1, u64 p2,
|
||||
* The caller has the name locked in the dir.
|
||||
*/
|
||||
static int verify_entry(struct super_block *sb, u64 dir_ino, const char *name,
|
||||
unsigned name_len, u64 ino,
|
||||
unsigned name_len, u64 hash, u64 ino,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_key_buf *key = NULL;
|
||||
struct scoutfs_dirent dent;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
key = alloc_dirent_key(sb, dir_ino, name, name_len);
|
||||
if (!key)
|
||||
return -ENOMEM;
|
||||
|
||||
kvec_init(&val, &dent, sizeof(dent));
|
||||
|
||||
ret = scoutfs_item_lookup_exact(sb, key, &val, lock);
|
||||
ret = lookup_dirent(sb, dir_ino, name, name_len, hash, &dent, lock);
|
||||
if (ret == 0 && le64_to_cpu(dent.ino) != ino)
|
||||
ret = -ENOENT;
|
||||
else if (ret == -ENOENT && ino == 0)
|
||||
ret = 0;
|
||||
|
||||
scoutfs_key_free(sb, key);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1503,12 +1501,19 @@ static int scoutfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
bool ins_old = false;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 ind_seq;
|
||||
u64 old_hash;
|
||||
u64 new_hash;
|
||||
u64 new_pos;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
trace_scoutfs_rename(sb, old_dir, old_dentry, new_dir, new_dentry);
|
||||
|
||||
old_hash = dirent_name_hash(old_dentry->d_name.name,
|
||||
old_dentry->d_name.len);
|
||||
new_hash = dirent_name_hash(new_dentry->d_name.name,
|
||||
new_dentry->d_name.len);
|
||||
|
||||
if (new_dentry->d_name.len > SCOUTFS_NAME_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
@@ -1545,10 +1550,10 @@ static int scoutfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
|
||||
/* make sure that the entries assumed by the argument still exist */
|
||||
ret = verify_entry(sb, scoutfs_ino(old_dir), old_dentry->d_name.name,
|
||||
old_dentry->d_name.len, scoutfs_ino(old_inode),
|
||||
old_dir_lock) ?:
|
||||
old_dentry->d_name.len, old_hash,
|
||||
scoutfs_ino(old_inode), old_dir_lock) ?:
|
||||
verify_entry(sb, scoutfs_ino(new_dir), new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
new_dentry->d_name.len, new_hash,
|
||||
new_inode ? scoutfs_ino(new_inode) : 0,
|
||||
new_dir_lock);
|
||||
if (ret)
|
||||
@@ -1586,9 +1591,8 @@ retry:
|
||||
/* remove the new entry if it exists */
|
||||
if (new_inode) {
|
||||
ret = del_entry_items(sb, scoutfs_ino(new_dir),
|
||||
dentry_info_hash(new_dentry),
|
||||
dentry_info_pos(new_dentry),
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
scoutfs_ino(new_inode),
|
||||
new_dir_lock, new_inode_lock);
|
||||
if (ret)
|
||||
@@ -1597,7 +1601,7 @@ retry:
|
||||
}
|
||||
|
||||
/* create the new entry */
|
||||
ret = add_entry_items(sb, scoutfs_ino(new_dir), new_pos,
|
||||
ret = add_entry_items(sb, scoutfs_ino(new_dir), new_hash, new_pos,
|
||||
new_dentry->d_name.name, new_dentry->d_name.len,
|
||||
scoutfs_ino(old_inode), old_inode->i_mode,
|
||||
new_dir_lock, old_inode_lock);
|
||||
@@ -1607,9 +1611,8 @@ retry:
|
||||
|
||||
/* remove the old entry */
|
||||
ret = del_entry_items(sb, scoutfs_ino(old_dir),
|
||||
dentry_info_hash(old_dentry),
|
||||
dentry_info_pos(old_dentry),
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len,
|
||||
scoutfs_ino(old_inode),
|
||||
old_dir_lock, old_inode_lock);
|
||||
if (ret)
|
||||
@@ -1625,7 +1628,7 @@ retry:
|
||||
/* won't fail from here on out, update all the vfs structs */
|
||||
|
||||
/* the caller will use d_move to move the old_dentry into place */
|
||||
update_dentry_info(sb, old_dentry, new_pos, new_dir_lock);
|
||||
update_dentry_info(sb, old_dentry, new_hash, new_pos, new_dir_lock);
|
||||
|
||||
i_size_write(old_dir, i_size_read(old_dir) - old_dentry->d_name.len);
|
||||
if (!new_inode)
|
||||
@@ -1664,7 +1667,6 @@ retry:
|
||||
if (new_inode)
|
||||
scoutfs_update_inode_item(new_inode, new_inode_lock,
|
||||
&ind_locks);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret) {
|
||||
@@ -1677,10 +1679,14 @@ out:
|
||||
* succeed. Maybe we could have an item replace call
|
||||
* that gives us the dupe to re-insert on cleanup? Not
|
||||
* sure.
|
||||
*
|
||||
* It's safe to use dentry_info here 'cause they haven't
|
||||
* been updated if we saw an error.
|
||||
*/
|
||||
err = 0;
|
||||
if (ins_old)
|
||||
err = add_entry_items(sb, scoutfs_ino(old_dir),
|
||||
dentry_info_hash(old_dentry),
|
||||
dentry_info_pos(old_dentry),
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len,
|
||||
@@ -1691,14 +1697,13 @@ out:
|
||||
|
||||
if (del_new && err == 0)
|
||||
err = del_entry_items(sb, scoutfs_ino(new_dir),
|
||||
new_pos,
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
new_hash, new_pos,
|
||||
scoutfs_ino(old_inode),
|
||||
new_dir_lock, old_inode_lock);
|
||||
|
||||
if (ins_new && err == 0)
|
||||
err = add_entry_items(sb, scoutfs_ino(new_dir),
|
||||
dentry_info_hash(new_dentry),
|
||||
dentry_info_pos(new_dentry),
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
|
||||
@@ -10,18 +10,20 @@ extern const struct inode_operations scoutfs_symlink_iops;
|
||||
|
||||
struct scoutfs_link_backref_entry {
|
||||
struct list_head head;
|
||||
u64 dir_ino;
|
||||
u64 dir_pos;
|
||||
u16 name_len;
|
||||
struct scoutfs_link_backref_key lbkey;
|
||||
struct scoutfs_dirent dent;
|
||||
/* the full name is allocated and stored in dent.name[0] */
|
||||
};
|
||||
|
||||
int scoutfs_dir_get_backref_path(struct super_block *sb, u64 target_ino,
|
||||
u64 dir_ino, char *name, u16 name_len,
|
||||
struct list_head *list);
|
||||
int scoutfs_dir_get_backref_path(struct super_block *sb, u64 ino, u64 dir_ino,
|
||||
u64 dir_pos, struct list_head *list);
|
||||
void scoutfs_dir_free_backref_path(struct super_block *sb,
|
||||
struct list_head *list);
|
||||
|
||||
int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
u64 dir_ino, char *name, unsigned int name_len,
|
||||
u64 dir_ino, u64 dir_pos,
|
||||
struct list_head *list);
|
||||
|
||||
int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
@@ -114,13 +114,12 @@ static struct dentry *scoutfs_get_parent(struct dentry *child)
|
||||
int ret;
|
||||
u64 ino;
|
||||
|
||||
ret = scoutfs_dir_add_next_linkref(sb, scoutfs_ino(inode), 0, NULL, 0,
|
||||
&list);
|
||||
ret = scoutfs_dir_add_next_linkref(sb, scoutfs_ino(inode), 0, 0, &list);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ent = list_first_entry(&list, struct scoutfs_link_backref_entry, head);
|
||||
ino = be64_to_cpu(ent->lbkey.dir_ino);
|
||||
ino = ent->dir_ino;
|
||||
scoutfs_dir_free_backref_path(sb, &list);
|
||||
trace_scoutfs_get_parent(sb, inode, ino);
|
||||
|
||||
@@ -140,16 +139,16 @@ static int scoutfs_get_name(struct dentry *parent, char *name,
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_dir_add_next_linkref(sb, scoutfs_ino(inode), dir_ino,
|
||||
NULL, 0, &list);
|
||||
0, &list);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -ENOENT;
|
||||
ent = list_first_entry(&list, struct scoutfs_link_backref_entry, head);
|
||||
if (be64_to_cpu(ent->lbkey.ino) == scoutfs_ino(inode) &&
|
||||
be64_to_cpu(ent->lbkey.dir_ino) == dir_ino &&
|
||||
if (le64_to_cpu(ent->dent.ino) == scoutfs_ino(inode) &&
|
||||
ent->dir_ino == dir_ino &&
|
||||
ent->name_len <= NAME_MAX) {
|
||||
memcpy(name, ent->lbkey.name, ent->name_len);
|
||||
memcpy(name, ent->dent.name, ent->name_len);
|
||||
name[ent->name_len] = '\0';
|
||||
ret = 0;
|
||||
trace_scoutfs_get_name(sb, parent->d_inode, inode, name);
|
||||
|
||||
@@ -266,29 +266,13 @@ struct scoutfs_inode_key {
|
||||
__u8 type;
|
||||
} __packed;
|
||||
|
||||
/* value is struct scoutfs_dirent without the name */
|
||||
/* value is struct scoutfs_dirent with the name */
|
||||
struct scoutfs_dirent_key {
|
||||
__u8 zone;
|
||||
__be64 ino;
|
||||
__u8 type;
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
|
||||
/* value is struct scoutfs_dirent with the name */
|
||||
struct scoutfs_readdir_key {
|
||||
__u8 zone;
|
||||
__be64 ino;
|
||||
__u8 type;
|
||||
__be64 pos;
|
||||
} __packed;
|
||||
|
||||
/* value is empty */
|
||||
struct scoutfs_link_backref_key {
|
||||
__u8 zone;
|
||||
__be64 ino;
|
||||
__u8 type;
|
||||
__be64 dir_ino;
|
||||
__u8 name[0];
|
||||
__be64 major;
|
||||
__be64 minor;
|
||||
} __packed;
|
||||
|
||||
/* key is bytes of encoded block mapping */
|
||||
@@ -494,13 +478,17 @@ struct scoutfs_inode {
|
||||
#define SCOUTFS_SYMLINK_MAX_SIZE 4096
|
||||
|
||||
/*
|
||||
* Dirents are stored in items with an offset of the hash of their name.
|
||||
* Colliding names are packed into the value.
|
||||
* Dirents are stored in multiple places to isolate contention when
|
||||
* performing different operations: hashed by name for creation and
|
||||
* lookup, at incrementing positions for readdir and resolving inodes to
|
||||
* paths. Each entry has all the metadata needed to reference all the
|
||||
* items (so an entry cached by lookup can be used to unlink all the
|
||||
* items).
|
||||
*/
|
||||
struct scoutfs_dirent {
|
||||
__le64 ino;
|
||||
__le64 counter;
|
||||
__le64 readdir_pos;
|
||||
__le64 hash;
|
||||
__le64 pos;
|
||||
__u8 type;
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
@@ -526,9 +514,8 @@ enum {
|
||||
SCOUTFS_DT_WHT,
|
||||
};
|
||||
|
||||
/* ino_path can search for backref items with a null term */
|
||||
#define SCOUTFS_MAX_KEY_SIZE \
|
||||
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])
|
||||
sizeof(struct scoutfs_dirent_key)
|
||||
|
||||
#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_BLOCK_MAPPING_MAX_BYTES
|
||||
|
||||
|
||||
@@ -201,116 +201,79 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ino_path_cursor {
|
||||
__u64 dir_ino;
|
||||
__u8 name[SCOUTFS_NAME_LEN + 1];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* see the definition of scoutfs_ioctl_ino_path for ioctl semantics.
|
||||
*
|
||||
* The null termination of the cursor name is a trick to skip past the
|
||||
* last name we read without having to try and "increment" the name.
|
||||
* Adding a null sorts the cursor after the non-null name and before all
|
||||
* the next names because the item names aren't null terminated.
|
||||
* See the comment above the definition of struct scoutfs_ioctl_ino_path
|
||||
* for ioctl semantics.
|
||||
*/
|
||||
static long scoutfs_ioc_ino_path(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_ino_path __user *uargs;
|
||||
struct scoutfs_ioctl_ino_path_result __user *ures;
|
||||
struct scoutfs_link_backref_entry *last_ent;
|
||||
struct scoutfs_link_backref_entry *ent;
|
||||
struct ino_path_cursor __user *ucurs;
|
||||
struct scoutfs_ioctl_ino_path args;
|
||||
char __user *upath;
|
||||
LIST_HEAD(list);
|
||||
u64 dir_ino;
|
||||
u16 name_len;
|
||||
u16 copied;
|
||||
char term;
|
||||
char *name;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(SCOUTFS_IOC_INO_PATH_CURSOR_BYTES !=
|
||||
sizeof(struct ino_path_cursor));
|
||||
|
||||
if (!capable(CAP_DAC_READ_SEARCH))
|
||||
return -EPERM;
|
||||
|
||||
uargs = (void __user *)arg;
|
||||
if (copy_from_user(&args, uargs, sizeof(args)))
|
||||
if (copy_from_user(&args, (void __user *)arg, sizeof(args)))
|
||||
return -EFAULT;
|
||||
|
||||
if (args.cursor_bytes != sizeof(struct ino_path_cursor))
|
||||
return -EINVAL;
|
||||
ures = (void __user *)(unsigned long)args.result_ptr;
|
||||
|
||||
ucurs = (void __user *)(unsigned long)args.cursor_ptr;
|
||||
upath = (void __user *)(unsigned long)args.path_ptr;
|
||||
|
||||
if (get_user(dir_ino, &ucurs->dir_ino))
|
||||
return -EFAULT;
|
||||
|
||||
/* alloc/copy the small cursor name, requires and includes null */
|
||||
name_len = strnlen_user(ucurs->name, sizeof(ucurs->name));
|
||||
if (name_len < 1 || name_len > sizeof(ucurs->name))
|
||||
return -EINVAL;
|
||||
|
||||
name = kmalloc(name_len, GFP_KERNEL);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(name, ucurs->name, name_len)) {
|
||||
ret = -EFAULT;
|
||||
ret = scoutfs_dir_get_backref_path(sb, args.ino, args.dir_ino,
|
||||
args.dir_pos, &list);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_dir_get_backref_path(sb, args.ino, dir_ino, name,
|
||||
name_len, &list);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
last_ent = list_last_entry(&list, struct scoutfs_link_backref_entry,
|
||||
head);
|
||||
copied = 0;
|
||||
list_for_each_entry(ent, &list, head) {
|
||||
if (ret + ent->name_len + 1 > args.path_bytes) {
|
||||
|
||||
if (offsetof(struct scoutfs_ioctl_ino_path_result,
|
||||
path[copied + ent->name_len + 1])
|
||||
> args.result_bytes) {
|
||||
ret = -ENAMETOOLONG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_to_user(upath, ent->lbkey.name, ent->name_len)) {
|
||||
if (copy_to_user(&ures->path[copied],
|
||||
ent->dent.name, ent->name_len)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
upath += ent->name_len;
|
||||
ret += ent->name_len;
|
||||
copied += ent->name_len;
|
||||
|
||||
if (ent->head.next == &list)
|
||||
if (ent == last_ent)
|
||||
term = '\0';
|
||||
else
|
||||
term = '/';
|
||||
|
||||
if (put_user(term, upath)) {
|
||||
if (put_user(term, &ures->path[copied])) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
upath++;
|
||||
ret++;
|
||||
copied++;
|
||||
}
|
||||
|
||||
/* copy the last entry into the cursor */
|
||||
ent = list_last_entry(&list, struct scoutfs_link_backref_entry, head);
|
||||
|
||||
if (put_user(be64_to_cpu(ent->lbkey.dir_ino), &ucurs->dir_ino) ||
|
||||
copy_to_user(ucurs->name, ent->lbkey.name, ent->name_len) ||
|
||||
put_user('\0', &ucurs->name[ent->name_len])) {
|
||||
/* fill the result header now that we know the copied path length */
|
||||
if (put_user(last_ent->dir_ino, &ures->dir_ino) ||
|
||||
put_user(last_ent->dir_pos, &ures->dir_pos) ||
|
||||
put_user(copied, &ures->path_bytes)) {
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
scoutfs_dir_free_backref_path(sb, &list);
|
||||
kfree(name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -64,24 +64,36 @@ enum {
|
||||
struct scoutfs_ioctl_walk_inodes)
|
||||
|
||||
/*
|
||||
* Fill the path buffer with the next path to the target inode. An
|
||||
* iteration cursor is stored in the cursor buffer which advances
|
||||
* through the paths to the inode at each call.
|
||||
* Fill the result buffer with the next absolute path to the target
|
||||
* inode searching from a given position in a parent directory.
|
||||
*
|
||||
* @ino: The target ino that we're finding paths to. Constant across
|
||||
* all the calls that make up an iteration over all the inode's paths.
|
||||
*
|
||||
* @cursor_ptr: A pointer to the buffer that will hold the iteration
|
||||
* cursor. It must be initialized to 0 before iterating. Each call
|
||||
* modifies it to skip past the result of that call.
|
||||
* @dir_ino: The inode number of the directory containing the entry to
|
||||
* our inode to search from. If this parent directory contains no more
|
||||
* entries to our inode then we'll search through other parent directory
|
||||
* inodes in inode order.
|
||||
*
|
||||
* @cusur_bytes: The length of the cursor buffer. Must be
|
||||
* SCOUTFS_IOC_INO_PATH_CURSOR_BYTES.
|
||||
* @dir_pos: The position in the dir_ino parent directory of the entry
|
||||
* to our inode to search from. If there is no entry at this position
|
||||
* then we'll search through other entry positions in increasing order.
|
||||
* If we exhaust the parent directory then we'll search through
|
||||
* additional parent directories in inode order.
|
||||
*
|
||||
* @path_ptr: The buffer to store each found path.
|
||||
* @result_ptr: A pointer to the buffer where the result struct and
|
||||
* absolute path will be stored.
|
||||
*
|
||||
* @path_bytes: The size of the buffer that will the found path
|
||||
* including null termination. (PATH_MAX is a solid choice.)
|
||||
* @result_bytes: The size of the buffer that will contain the result
|
||||
* struct and the null terminated absolute path name.
|
||||
*
|
||||
* To start iterating set the desired target inode, dir_ino to 0,
|
||||
* dir_pos to 0, and set result_ptr and _bytes to a sufficiently large
|
||||
* buffeer (sizeof(result) + PATH_MAX is a solid choice).
|
||||
*
|
||||
* After each returned result set the next search dir_ino and dir_pos to
|
||||
* the returned dir_ino and dir_pos. Then increment the search dir_pos,
|
||||
* and if it wrapped to 0, increment dir_ino.
|
||||
*
|
||||
* This only walks back through full hard links. None of the returned
|
||||
* paths will reflect symlinks to components in the path.
|
||||
@@ -90,28 +102,39 @@ enum {
|
||||
* returned paths to the inode. It requires CAP_DAC_READ_SEARCH which
|
||||
* bypasses permissions checking.
|
||||
*
|
||||
* ENAMETOOLONG is returned when the next path found from the cursor
|
||||
* doesn't fit in the path buffer.
|
||||
*
|
||||
* This call is not serialized with any modification (create, rename,
|
||||
* unlink) of the path components. It will return all the paths that
|
||||
* were stable both before and after the call. It may or may not return
|
||||
* paths which are created or unlinked during the call.
|
||||
*
|
||||
* The number of bytes in the path, including the null terminator, are
|
||||
* returned when a path is found. 0 is returned when there are no more
|
||||
* paths to the link to the inode from the cursor.
|
||||
* On success 0 is returned and result struct is filled with the next
|
||||
* absolute path. The path_bytes length of the path includes a null
|
||||
* terminating byte. dir_ino and dir_pos refer to the position of the
|
||||
* final component in its parent directory and can be advanced to search
|
||||
* for the next terminal entry whose path is then built by walking up
|
||||
* parent directories.
|
||||
*
|
||||
* ENOENT is returned when no paths are found.
|
||||
*
|
||||
* ENAMETOOLONG is returned when the result struct and path found
|
||||
* doesn't fit in the result buffer.
|
||||
*
|
||||
* Many other errnos indicate hard failure to find the next path.
|
||||
*/
|
||||
struct scoutfs_ioctl_ino_path {
|
||||
__u64 ino;
|
||||
__u64 cursor_ptr;
|
||||
__u64 path_ptr;
|
||||
__u16 cursor_bytes;
|
||||
__u16 path_bytes;
|
||||
__u64 dir_ino;
|
||||
__u64 dir_pos;
|
||||
__u64 result_ptr;
|
||||
__u16 result_bytes;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_IOC_INO_PATH_CURSOR_BYTES \
|
||||
(sizeof(__u64) + SCOUTFS_NAME_LEN + 1)
|
||||
struct scoutfs_ioctl_ino_path_result {
|
||||
__u64 dir_ino;
|
||||
__u64 dir_pos;
|
||||
__u16 path_bytes;
|
||||
__u8 path[0];
|
||||
} __packed;
|
||||
|
||||
/* Get a single path from the root to the given inode number */
|
||||
#define SCOUTFS_IOC_INO_PATH _IOW(SCOUTFS_IOCTL_MAGIC, 2, \
|
||||
|
||||
@@ -276,35 +276,17 @@ static int pr_xattr(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
static int pr_dirent(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
{
|
||||
struct scoutfs_dirent_key *dkey = key->data;
|
||||
int len = (int)key->key_len - sizeof(struct scoutfs_dirent_key);
|
||||
char *which = dkey->type == SCOUTFS_DIRENT_TYPE ? "dnt" :
|
||||
dkey->type == SCOUTFS_READDIR_TYPE ? "rdr" :
|
||||
dkey->type == SCOUTFS_LINK_BACKREF_TYPE ? "lbr" :
|
||||
"unk";
|
||||
|
||||
return snprintf_key(buf, size, key,
|
||||
sizeof(struct scoutfs_dirent_key), key->key_len,
|
||||
"fs.%llu.dnt.%.*s",
|
||||
be64_to_cpu(dkey->ino), len, dkey->name);
|
||||
}
|
||||
|
||||
static int pr_readdir(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
{
|
||||
struct scoutfs_readdir_key *rkey = key->data;
|
||||
|
||||
return snprintf_key(buf, size, key,
|
||||
sizeof(struct scoutfs_readdir_key), 0,
|
||||
"fs.%llu.rdr.%llu",
|
||||
be64_to_cpu(rkey->ino), be64_to_cpu(rkey->pos));
|
||||
}
|
||||
|
||||
static int pr_link_backref(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
{
|
||||
struct scoutfs_link_backref_key *lkey = key->data;
|
||||
int len = (int)key->key_len - sizeof(*lkey);
|
||||
|
||||
return snprintf_key(buf, size, key,
|
||||
sizeof(struct scoutfs_link_backref_key),
|
||||
key->key_len,
|
||||
"fs.%llu.lbr.%llu.%.*s",
|
||||
be64_to_cpu(lkey->ino), be64_to_cpu(lkey->dir_ino),
|
||||
len, lkey->name);
|
||||
"fs.%llu.%s.%llu.%llu",
|
||||
be64_to_cpu(dkey->ino), which,
|
||||
be64_to_cpu(dkey->major),
|
||||
be64_to_cpu(dkey->minor));
|
||||
}
|
||||
|
||||
static int pr_symlink(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
@@ -339,8 +321,8 @@ const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_INODE_TYPE] = pr_inode,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_XATTR_TYPE] = pr_xattr,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_DIRENT_TYPE] = pr_dirent,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_READDIR_TYPE] = pr_readdir,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_LINK_BACKREF_TYPE] = pr_link_backref,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_READDIR_TYPE] = pr_dirent,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_LINK_BACKREF_TYPE] = pr_dirent,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_SYMLINK_TYPE] = pr_symlink,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_BLOCK_MAPPING_TYPE] = pr_block_mapping,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user