mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-03 02:45:20 +00:00
Raise the nlink limit
A few xfstests tests were failing because they tried to create a decent number of hard links to a file. We had a small nlink limit because the inode-paths ioctl copied all the paths for all the hard links to a userspace buffer which could be enormous if there was a larger nlink limit. The hard link backref disk format already has a natural counter that could be used as a cursor to iterate over all the hard links that point to a given inode. This refactors the inode_paths ioctl into a ino_path ioctl that returns a single path for the given counter and returns the counter for the next path that links to the inode. Happily this lets us get rid of all the weird path component lists and allocations. Now there's just the kernel path buffer that gets null terminated path components and the userspace buffer that those are copied to. We don't fully relax the nlink limit. stat(2) returns the link count as a u32. We go a step further and limit it to S32_MAX so that apps might avoid sign bugs. That still gives us a more generous limit than ext4 and btrfs which are around U16_MAX. Signed-off-by: Zach Brown <zab@versity.com> Reviewed-by: Mark Fasheh <mfasheh@versity.com>
This commit is contained in:
107
kmod/src/dir.c
107
kmod/src/dir.c
@@ -810,8 +810,8 @@ int scoutfs_symlink_drop(struct super_block *sb, u64 ino)
|
||||
}
|
||||
|
||||
/*
|
||||
* Add an allocated path component to the callers list which links to
|
||||
* the target inode at a counter past the given counter.
|
||||
* Store the null terminated path component that links to the inode at
|
||||
* the given counter in the callers buffer.
|
||||
*
|
||||
* This is implemented by searching for link backrefs on the inode
|
||||
* starting from the given counter. Those contain references to the
|
||||
@@ -827,11 +827,10 @@ int scoutfs_symlink_drop(struct super_block *sb, u64 ino)
|
||||
* Backref counters are never reused and rename only modifies the
|
||||
* existing backref counter under the dir's mutex.
|
||||
*/
|
||||
static int add_linkref_name(struct super_block *sb, u64 *dir_ino, u64 ino,
|
||||
u64 *ctr, struct list_head *list)
|
||||
static int append_linkref_name(struct super_block *sb, u64 *dir_ino, u64 ino,
|
||||
u64 *ctr, char *path, unsigned int bytes)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_path_component *comp;
|
||||
struct scoutfs_link_backref lref;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_dirent dent;
|
||||
@@ -844,10 +843,6 @@ static int add_linkref_name(struct super_block *sb, u64 *dir_ino, u64 ino,
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
comp = kmalloc(sizeof(struct scoutfs_path_component), GFP_KERNEL);
|
||||
if (!comp)
|
||||
return -ENOMEM;
|
||||
|
||||
retry:
|
||||
scoutfs_set_key(&first, ino, SCOUTFS_LINK_BACKREF_KEY, *ctr);
|
||||
scoutfs_set_key(&last, ino, SCOUTFS_LINK_BACKREF_KEY, ~0ULL);
|
||||
@@ -900,69 +895,52 @@ retry:
|
||||
}
|
||||
|
||||
scoutfs_set_key(&key, *dir_ino, SCOUTFS_DIRENT_KEY, off);
|
||||
scoutfs_btree_init_val(&val, &dent, sizeof(dent),
|
||||
comp->name, SCOUTFS_NAME_LEN);
|
||||
scoutfs_btree_init_val(&val, &dent, sizeof(dent), path, bytes - 1);
|
||||
val.check_size_lte = 1;
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret < 0) {
|
||||
/* XXX corruption, should always have dirent for backref */
|
||||
if (ret == -ENOENT)
|
||||
ret = -EIO;
|
||||
else if (ret == -EOVERFLOW)
|
||||
ret = -ENAMETOOLONG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(dent)) {
|
||||
if (ret <= sizeof(dent)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = ret - sizeof(dent); /* just name len, no null term */
|
||||
|
||||
/* XXX corruption */
|
||||
if (len > SCOUTFS_NAME_LEN || le64_to_cpu(dent.ino) != ino) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = ret - sizeof(dent);
|
||||
trace_printk("dent ino %llu len %d\n", le64_to_cpu(dent.ino), len);
|
||||
|
||||
/* XXX corruption */
|
||||
if (len < 1 || len > SCOUTFS_NAME_LEN) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX corruption, dirents should always match link backref */
|
||||
if (le64_to_cpu(dent.ino) != ino) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
(*ctr)++;
|
||||
comp->len = len;
|
||||
list_add(&comp->head, list);
|
||||
comp = NULL; /* won't be freed */
|
||||
|
||||
ret = 1;
|
||||
path[len] = '\0';
|
||||
ret = len + 1;
|
||||
out:
|
||||
if (inode) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
kfree(comp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_dir_free_path(struct list_head *list)
|
||||
{
|
||||
struct scoutfs_path_component *comp;
|
||||
struct scoutfs_path_component *tmp;
|
||||
|
||||
list_for_each_entry_safe(comp, tmp, list, head) {
|
||||
list_del_init(&comp->head);
|
||||
kfree(comp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the list with the allocated path components that link the root
|
||||
* to the target inode. The caller's ctr gives the link counter to
|
||||
* start from.
|
||||
* Fill the caller's buffer with the null terminated path components
|
||||
* from the target inode to the root. These will be in the opposite
|
||||
* order of a typical slash delimited path. The caller's ctr gives the
|
||||
* specific link to start from.
|
||||
*
|
||||
* This is racing with modification of components in the path. We can
|
||||
* traverse a partial path only to find that it's been blown away
|
||||
@@ -970,44 +948,53 @@ void scoutfs_dir_free_path(struct list_head *list)
|
||||
* the final link to the inode should prevent repeatedly traversing
|
||||
* paths that no longer exist.
|
||||
*
|
||||
* Returns > 0 and *ctr is updated if an allocated name was added to the
|
||||
* list, 0 if no name past *ctr was found, or -errno on errors.
|
||||
* Returns > 0 and *ctr is updated if a full path from the link to the
|
||||
* root dir was filled, 0 if no name past *ctr was found, or -errno on
|
||||
* errors.
|
||||
*/
|
||||
int scoutfs_dir_next_path(struct super_block *sb, u64 ino, u64 *ctr,
|
||||
struct list_head *list)
|
||||
int scoutfs_dir_get_ino_path(struct super_block *sb, u64 ino, u64 *ctr,
|
||||
char *path, unsigned int bytes)
|
||||
{
|
||||
u64 our_ctr;
|
||||
u64 final_ctr;
|
||||
u64 par_ctr;
|
||||
u64 par_ino;
|
||||
int ret;
|
||||
int nr;
|
||||
|
||||
if (*ctr == U64_MAX)
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
our_ctr = *ctr;
|
||||
final_ctr = *ctr;
|
||||
ret = 0;
|
||||
|
||||
/* get the next link name to the given inode */
|
||||
ret = add_linkref_name(sb, &par_ino, ino, &our_ctr, list);
|
||||
if (ret <= 0)
|
||||
nr = append_linkref_name(sb, &par_ino, ino, &final_ctr, path, bytes);
|
||||
if (nr <= 0) {
|
||||
ret = nr;
|
||||
goto out;
|
||||
}
|
||||
ret += nr;
|
||||
|
||||
/* then get the names of all the parent dirs */
|
||||
while (par_ino != SCOUTFS_ROOT_INO) {
|
||||
par_ctr = 0;
|
||||
ret = add_linkref_name(sb, &par_ino, par_ino, &par_ctr, list);
|
||||
if (ret < 0)
|
||||
nr = append_linkref_name(sb, &par_ino, par_ino, &par_ctr,
|
||||
path + ret, bytes - ret);
|
||||
if (nr < 0) {
|
||||
ret = nr;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* restart if there was no parent component */
|
||||
if (ret == 0) {
|
||||
scoutfs_dir_free_path(list);
|
||||
if (nr == 0)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret += nr;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret > 0)
|
||||
*ctr = our_ctr;
|
||||
*ctr = final_ctr;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,9 +16,8 @@ struct scoutfs_path_component {
|
||||
char name[SCOUTFS_NAME_LEN];
|
||||
};
|
||||
|
||||
int scoutfs_dir_next_path(struct super_block *sb, u64 ino, u64 *ctr,
|
||||
struct list_head *list);
|
||||
void scoutfs_dir_free_path(struct list_head *list);
|
||||
int scoutfs_dir_get_ino_path(struct super_block *sb, u64 ino, u64 *ctr,
|
||||
char *path, unsigned int bytes);
|
||||
|
||||
int scoutfs_symlink_drop(struct super_block *sb, u64 ino);
|
||||
|
||||
|
||||
@@ -254,14 +254,8 @@ struct scoutfs_dirent {
|
||||
|
||||
#define SCOUTFS_NAME_LEN 255
|
||||
|
||||
/*
|
||||
* This is arbitrarily limiting the max size of the single buffer
|
||||
* that's needed in the inode_paths ioctl to return all the paths
|
||||
* that link to an inode. The structures could easily support much
|
||||
* more than this but then we'd need to grow a more thorough interface
|
||||
* for iterating over referring paths. That sounds horrible.
|
||||
*/
|
||||
#define SCOUTFS_LINK_MAX 255
|
||||
/* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */
|
||||
#define SCOUTFS_LINK_MAX S32_MAX
|
||||
|
||||
/*
|
||||
* We only use 31 bits for readdir positions so that we don't confuse
|
||||
|
||||
110
kmod/src/ioctl.c
110
kmod/src/ioctl.c
@@ -99,26 +99,13 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_to_ptr(char __user **to, const void *from,
|
||||
unsigned long n, int space)
|
||||
{
|
||||
if (n > space)
|
||||
return -EOVERFLOW;
|
||||
|
||||
if (copy_to_user(*to, from, n))
|
||||
return -EFAULT;
|
||||
|
||||
*to += n;
|
||||
return space - n;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill the caller's buffer with all the paths from the on-disk root
|
||||
* directory to the target inode. It will provide as many full paths as
|
||||
* there are final links to the target inode.
|
||||
* Fill the caller's buffer with one of the paths from the on-disk root
|
||||
* directory to the target inode.
|
||||
*
|
||||
* The null terminated paths are stored consecutively in the buffer. A
|
||||
* final zero length null terminated string follows the last path.
|
||||
* Userspace provides a u64 counter used to chose which path to return.
|
||||
* It should be initialized to zero to start iterating. After each path
|
||||
* it is set to the next counter to search from.
|
||||
*
|
||||
* This only walks back through full hard links. None of the returned
|
||||
* paths will reflect symlinks to components in the path.
|
||||
@@ -127,35 +114,32 @@ static int copy_to_ptr(char __user **to, const void *from,
|
||||
* returned paths to the inode. It requires CAP_DAC_READ_SEARCH which
|
||||
* bypasses permissions checking.
|
||||
*
|
||||
* If the provided buffer isn't large enough EOVERFLOW will be returned.
|
||||
* The buffer can be approximately sized by multiplying the inode's
|
||||
* nlink by PATH_MAX.
|
||||
* ENAMETOOLONG is returned when the next path from the given counter
|
||||
* doesn't fit in the buffer. Providing a buffer of PATH_MAX should
|
||||
* succeed.
|
||||
*
|
||||
* This call is not serialized with any modification (create, rename,
|
||||
* unlink) of the path components. It will return all the paths that
|
||||
* were stable both before and after the call. It may or may not return
|
||||
* paths which are created or unlinked during the call.
|
||||
*
|
||||
* This will return failure if it fails to read any path. An empty
|
||||
* buffer is returned if the target inode doesn't exist or is
|
||||
* disconnected from the root.
|
||||
* The number of bytes in the path, including the null terminator, are
|
||||
* returned when a path is found. 0 is returned when there are no more
|
||||
* paths to the link from the given counter. -errno is returned on
|
||||
* errors.
|
||||
*
|
||||
* XXX
|
||||
* - we may want to support partial failure
|
||||
* - can dir renaming trick us into returning garbage paths? seems likely.
|
||||
*/
|
||||
static long scoutfs_ioc_inode_paths(struct file *file, unsigned long arg)
|
||||
static long scoutfs_ioc_ino_path(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_inode_paths __user *uargs = (void __user *)arg;
|
||||
struct scoutfs_ioctl_inode_paths args;
|
||||
struct scoutfs_path_component *comp;
|
||||
struct scoutfs_path_component *tmp;
|
||||
static char slash = '/';
|
||||
static char null = '\0';
|
||||
char __user *ptr;
|
||||
LIST_HEAD(list);
|
||||
u64 ctr;
|
||||
struct scoutfs_ioctl_ino_path __user *uargs = (void __user *)arg;
|
||||
struct scoutfs_ioctl_ino_path args;
|
||||
unsigned int bytes;
|
||||
char __user *upath;
|
||||
char *comp;
|
||||
char *path;
|
||||
int ret;
|
||||
int len;
|
||||
|
||||
@@ -165,42 +149,40 @@ static long scoutfs_ioc_inode_paths(struct file *file, unsigned long arg)
|
||||
if (copy_from_user(&args, uargs, sizeof(args)))
|
||||
return -EFAULT;
|
||||
|
||||
if (args.buf_len > INT_MAX)
|
||||
if (args.path_bytes <= 1)
|
||||
return -EINVAL;
|
||||
|
||||
ptr = (void __user *)(unsigned long)args.buf_ptr;
|
||||
len = args.buf_len;
|
||||
bytes = min_t(unsigned int, args.path_bytes, PATH_MAX);
|
||||
path = kmalloc(bytes, GFP_KERNEL);
|
||||
if (path == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
ctr = 0;
|
||||
while ((ret = scoutfs_dir_next_path(sb, args.ino, &ctr, &list)) > 0) {
|
||||
ret = 0;
|
||||
/* positive ret is len of all components including null terminators */
|
||||
ret = scoutfs_dir_get_ino_path(sb, args.ino, &args.ctr, path, bytes);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
/* copy the components out as a path */
|
||||
list_for_each_entry_safe(comp, tmp, &list, head) {
|
||||
len = copy_to_ptr(&ptr, comp->name, comp->len, len);
|
||||
if (len < 0)
|
||||
goto out;
|
||||
/* reverse the components from backref order to path/ order */
|
||||
comp = path;
|
||||
upath = (void __user *)((unsigned long)args.path_ptr + ret);
|
||||
while (comp < (path + ret)) {
|
||||
len = strlen(comp);
|
||||
if (comp != path)
|
||||
comp[len] = '/';
|
||||
len++;
|
||||
|
||||
list_del_init(&comp->head);
|
||||
kfree(comp);
|
||||
|
||||
if (!list_empty(&list)) {
|
||||
len = copy_to_ptr(&ptr, &slash, 1, len);
|
||||
if (len < 0)
|
||||
goto out;
|
||||
}
|
||||
upath -= len;
|
||||
if (copy_to_user(upath, comp, len)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
len = copy_to_ptr(&ptr, &null, 1, len);
|
||||
if (len < 0)
|
||||
goto out;
|
||||
comp += len;
|
||||
}
|
||||
|
||||
len = copy_to_ptr(&ptr, &null, 1, len);
|
||||
if (ret > 0 && put_user(args.ctr, &uargs->ctr))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
scoutfs_dir_free_path(&list);
|
||||
|
||||
if (ret == 0 && len < 0)
|
||||
ret = len;
|
||||
kfree(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -297,8 +279,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
switch (cmd) {
|
||||
case SCOUTFS_IOC_INODES_SINCE:
|
||||
return scoutfs_ioc_inodes_since(file, arg, SCOUTFS_INODE_KEY);
|
||||
case SCOUTFS_IOC_INODE_PATHS:
|
||||
return scoutfs_ioc_inode_paths(file, arg);
|
||||
case SCOUTFS_IOC_INO_PATH:
|
||||
return scoutfs_ioc_ino_path(file, arg);
|
||||
case SCOUTFS_IOC_FIND_XATTR_NAME:
|
||||
return scoutfs_ioc_find_xattr(file, arg, true);
|
||||
case SCOUTFS_IOC_FIND_XATTR_VAL:
|
||||
|
||||
@@ -26,18 +26,17 @@ struct scoutfs_ioctl_inodes_since {
|
||||
#define SCOUTFS_IOC_INODES_SINCE _IOW(SCOUTFS_IOCTL_MAGIC, 1, \
|
||||
struct scoutfs_ioctl_inodes_since)
|
||||
|
||||
struct scoutfs_ioctl_inode_paths {
|
||||
/* returns bytes of path buffer set starting at _off, including null */
|
||||
struct scoutfs_ioctl_ino_path {
|
||||
__u64 ino;
|
||||
__u64 buf_ptr;
|
||||
__u32 buf_len;
|
||||
__u64 ctr; /* init to 0, set to next */
|
||||
__u64 path_ptr;
|
||||
__u16 path_bytes; /* total buffer space, including null term */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Fills the callers buffer with all the paths from the root to the
|
||||
* target inode.
|
||||
*/
|
||||
#define SCOUTFS_IOC_INODE_PATHS _IOW(SCOUTFS_IOCTL_MAGIC, 2, \
|
||||
struct scoutfs_ioctl_inode_paths)
|
||||
/* Get a single path from the root to the given inode number */
|
||||
#define SCOUTFS_IOC_INO_PATH _IOW(SCOUTFS_IOCTL_MAGIC, 2, \
|
||||
struct scoutfs_ioctl_ino_path)
|
||||
|
||||
/* XXX might as well include a seq? 0 for current behaviour? */
|
||||
struct scoutfs_ioctl_find_xattr {
|
||||
|
||||
Reference in New Issue
Block a user