scoutfs: add fallocate

Add an fallocate operation.

This changes the possible combinations of flags in extents and makes it
possible to create extents beyond i_size.  This will confuse the rest of
the code in a few places and that will be fixed up next.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2018-06-15 15:16:48 -07:00
committed by Zach Brown
parent dab0fd7d9a
commit 1fca13b092
6 changed files with 279 additions and 0 deletions

View File

@@ -247,4 +247,25 @@ SIC_TRUNC_EXTENT(struct inode *inode)
return cnt;
}
/*
* Fallocating an extent can, at most:
* - allocate from the server: delete two free and insert merged
* - free an allocated extent: delete one and create two split
* - remove an unallocated file extent: delete one and create two split
* - add an fallocated flie extent: delete two and inset one merged
*/
static inline const struct scoutfs_item_count SIC_FALLOCATE_ONE(void)
{
struct scoutfs_item_count cnt = {0,};
unsigned int nr_free = ((1 + 2) * 2) * 2;
unsigned int nr_file = (1 + 2) * 2;
__count_dirty_inode(&cnt);
cnt.items += nr_free + nr_file;
cnt.vals += nr_file * sizeof(struct scoutfs_file_extent);
return cnt;
}
#endif

View File

@@ -27,6 +27,7 @@
EXPAND_COUNTER(corrupt_btree_no_child_ref) \
EXPAND_COUNTER(corrupt_data_extent_trunc_cleanup) \
EXPAND_COUNTER(corrupt_data_extent_alloc_cleanup) \
EXPAND_COUNTER(corrupt_data_extent_fallocate_cleanup) \
EXPAND_COUNTER(corrupt_dirent_backref_name_len) \
EXPAND_COUNTER(corrupt_dirent_name_len) \
EXPAND_COUNTER(corrupt_dirent_readdir_name_len) \

View File

@@ -19,6 +19,7 @@
#include <linux/buffer_head.h>
#include <linux/hash.h>
#include <linux/log2.h>
#include <linux/falloc.h>
#include "format.h"
#include "super.h"
@@ -820,6 +821,230 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping,
return ret;
}
/*
* Update one extent on behalf of fallocate.
*
* The caller has searched for the next extent that intersects with the
* region including first_block and last_block. The next extent will be
* zeroed if it wasn't found. We don't know the state of the offsets
* past the next extent.
*
* The caller has held transactions and acquired locks. We only ever
* make one extent modification here.
*
* If this returns 0 then the caller's extent is clobbered. It is set
* to the newly fallocated extent so that the caller can continue with
* the fallocate operation.
*/
static int fallocate_one_extent(struct super_block *sb, u64 ino, u64 start,
u64 len, u8 flags, u8 rem_flags,
struct scoutfs_lock *lock)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_extent fal;
struct scoutfs_extent rem;
struct scoutfs_extent fr;
bool add_rem = false;
bool add_fr = false;
int ret;
if (WARN_ON_ONCE(len == 0) ||
WARN_ON_ONCE(start + len < start)) {
ret = -EINVAL;
goto out;
}
/* find a sufficiently large free extent */
scoutfs_extent_init(&fr, SCOUTFS_FREE_EXTENT_BLOCKS_TYPE,
sbi->node_id, 0, len, 0, 0);
ret = scoutfs_extent_next(sb, data_extent_io, &fr,
sbi->node_id_lock);
if (ret == -ENOENT) {
/* try to get allocation from the server if we're out */
ret = get_server_extent(sb, SERVER_ALLOC_BLOCKS);
if (ret == 0)
ret = scoutfs_extent_next(sb, data_extent_io, &fr,
sbi->node_id_lock);
/* XXX try to find smaller free extents */
}
if (ret < 0) {
if (ret == -ENOENT)
ret = -ENOSPC;
goto out;
}
/* trim our allocation from the length indexed extent */
scoutfs_extent_init(&fr, SCOUTFS_FREE_EXTENT_BLKNO_TYPE,
sbi->node_id, fr.start, min(fr.len, len), 0, 0);
ret = scoutfs_extent_init(&fal, SCOUTFS_FILE_EXTENT_TYPE, ino,
start, fr.len, fr.start, flags);
if (WARN_ON_ONCE(ret))
goto out;
ret = scoutfs_extent_remove(sb, data_extent_io, &fr, sbi->node_id_lock);
if (ret)
goto out;
add_fr = true;
/* remove a region of the existing extent */
if (rem_flags) {
scoutfs_extent_init(&rem, SCOUTFS_FILE_EXTENT_TYPE, ino,
fal.start, fal.len, 0, rem_flags);
ret = scoutfs_extent_remove(sb, data_extent_io, &rem, lock);
if (ret)
goto out;
add_rem = true;
}
ret = scoutfs_extent_add(sb, data_extent_io, &fal, lock);
out:
scoutfs_extent_cleanup(ret < 0 && add_rem, scoutfs_extent_add, sb,
data_extent_io, &rem, lock,
SC_DATA_EXTENT_FALLOCATE_CLEANUP,
corrupt_data_extent_fallocate_cleanup, &fal);
scoutfs_extent_cleanup(ret < 0 && add_fr, scoutfs_extent_add, sb,
data_extent_io, &fr, sbi->node_id_lock,
SC_DATA_EXTENT_FALLOCATE_CLEANUP,
corrupt_data_extent_alloc_cleanup, &fal);
return ret;
}
/*
* Modify the extents that map the blocks that store the len byte region
* starting at offset.
*
* The caller has only prevented freezing by entering a fs write
* context. We're responsible for all other locking and consistency.
*/
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
const u64 ino = scoutfs_ino(inode);
struct scoutfs_lock *lock = NULL;
DECLARE_DATA_INFO(sb, datinf);
struct scoutfs_extent ext;
LIST_HEAD(ind_locks);
u64 last_block;
u64 iblock;
u64 blocks;
loff_t end;
u8 rem_flags;
u8 flags;
int ret;
mutex_lock(&inode->i_mutex);
/* XXX support more flags */
if (mode & ~(FALLOC_FL_KEEP_SIZE)) {
ret = -EOPNOTSUPP;
goto out;
}
/* catch wrapping */
if (offset + len < offset) {
ret = -EINVAL;
goto out;
}
if (len == 0) {
ret = 0;
goto out;
}
ret = scoutfs_lock_inode(sb, DLM_LOCK_EX, SCOUTFS_LKF_REFRESH_INODE,
inode, &lock);
if (ret)
goto out;
inode_dio_wait(inode);
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(offset + len > i_size_read(inode))) {
ret = inode_newsize_ok(inode, offset + len);
if (ret)
goto out;
}
iblock = offset >> SCOUTFS_BLOCK_SHIFT;
last_block = (offset + len - 1) >> SCOUTFS_BLOCK_SHIFT;
for (; iblock <= last_block; iblock = ext.start + ext.len) {
scoutfs_extent_init(&ext, SCOUTFS_FILE_EXTENT_TYPE,
ino, iblock, 1, 0, 0);
ret = scoutfs_extent_next(sb, data_extent_io, &ext, lock);
if (ret < 0 && ret != -ENOENT)
goto out;
blocks = last_block - iblock + 1;
flags = SEF_UNWRITTEN;
rem_flags = 0;
if (ret == -ENOENT || ext.start > last_block) {
/* no next extent or past us, all remaining blocks */
} else if (iblock < ext.start) {
/* sparse region until next extent */
blocks = min(blocks, ext.start - iblock);
} else if (ext.map > 0) {
/* skip past an allocated extent */
blocks = min(blocks, (ext.start + ext.len) - iblock);
iblock += blocks;
blocks = 0;
} else {
/* allocating a portion of an unallocated extent */
blocks = min(blocks, (ext.start + ext.len) - iblock);
flags |= ext.flags;
rem_flags = ext.flags;
/* XXX corruption; why'd we store map == flags == 0? */
if (rem_flags == 0) {
ret = -EIO;
goto out;
}
}
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false,
SIC_FALLOCATE_ONE());
if (ret)
goto out;
if (blocks > 0) {
down_write(&datinf->alloc_rwsem);
ret = fallocate_one_extent(sb, ino, iblock, blocks,
flags, rem_flags, lock);
up_write(&datinf->alloc_rwsem);
}
if (ret == 0 && !(mode & FALLOC_FL_KEEP_SIZE)) {
end = (iblock + blocks) << SCOUTFS_BLOCK_SHIFT;
if (end == 0 || end > offset + len)
end = offset + len;
if (end > i_size_read(inode))
i_size_write(inode, end);
scoutfs_update_inode_item(inode, lock, &ind_locks);
}
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
if (ret)
goto out;
iblock += blocks;
}
ret = 0;
out:
scoutfs_unlock(sb, lock, DLM_LOCK_EX);
mutex_unlock(&inode->i_mutex);
trace_scoutfs_data_fallocate(sb, ino, mode, offset, len, ret);
return ret;
}
/*
* Return all the file's extents whose blocks overlap with the caller's
* byte region. We set _LAST on the last extent and _UNKNOWN on offline
@@ -910,6 +1135,7 @@ const struct file_operations scoutfs_file_fops = {
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
.llseek = scoutfs_file_llseek,
.fallocate = scoutfs_fallocate,
};

View File

@@ -9,6 +9,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
struct scoutfs_lock *lock);
int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
int scoutfs_data_setup(struct super_block *sb);
void scoutfs_data_destroy(struct super_block *sb);

View File

@@ -622,6 +622,7 @@ enum {
SC_DATA_EXTENT_TRUNC_CLEANUP,
SC_DATA_EXTENT_ALLOC_CLEANUP,
SC_SERVER_EXTENT_CLEANUP,
SC_DATA_EXTENT_FALLOCATE_CLEANUP,
SC_NR_SOURCES,
};

View File

@@ -370,6 +370,35 @@ TRACE_EVENT(scoutfs_erase_item,
TP_printk(FSID_FMT" erasing item %p", __entry->fsid, __entry->item)
);
TRACE_EVENT(scoutfs_data_fallocate,
TP_PROTO(struct super_block *sb, u64 ino, int mode, loff_t offset,
loff_t len, int ret),
TP_ARGS(sb, ino, mode, offset, len, ret),
TP_STRUCT__entry(
__field(__u64, fsid)
__field(__u64, ino)
__field(int, mode)
__field(__u64, offset)
__field(__u64, len)
__field(int, ret)
),
TP_fast_assign(
__entry->fsid = FSID_ARG(sb);
__entry->ino = ino;
__entry->mode = mode;
__entry->offset = offset;
__entry->len = len;
__entry->ret = ret;
),
TP_printk("fsid "FSID_FMT" ino %llu mode 0x%x offset %llu len %llu ret %d",
__entry->fsid, __entry->ino, __entry->mode, __entry->offset,
__entry->len, __entry->ret)
);
TRACE_EVENT(scoutfs_data_fiemap,
TP_PROTO(struct super_block *sb, __u64 off, int i, __u64 blkno),