Compare commits

...

4 Commits

Author SHA1 Message Date
Bryant G. Duffy-Ly
1029d5a0fe Enable and Disable correct unit tests for O_DIRECT
Signed-off-by: Bryant G. Duffy-Ly <bduffyly@versity.com>
2022-03-25 09:50:16 -05:00
Bryant Duffy-Ly
9fc759ce47 Fix truncate for O_DIRECT
In the buffered case page tail zeroing happens automatically.
In the O_DIRECT case it does not so we need to add it in our setattr
path just like EXT2. We want to zero the end of the block that contains
i_size during truncate, so we just call block_truncate_page in
set_inode_size.
2022-03-25 09:50:16 -05:00
Bryant Duffy-Ly
bb11617fe3 Fix EOF extent in last block
Currently if there is an extent on the last block the code will only set
EOF on ENOENT. In the case that the last block has an extent it wont go
to the next iteration due to iblock <= last. This then doesnt set the EOF
on the last block in these cases. We want to just allow the loop to keep
looping and rely on if (ext.start > last) to protect us from infinite loop.
2022-03-25 09:50:16 -05:00
Bryant Duffy-Ly
20370c6573 Add O_DIRECT support
We want to first pass a mapping of unwritten extents to the blockdev_direct_IO
call. Then based upon the amount of bytes written we want to convert those
unwritten extents into written.
2022-03-25 09:50:16 -05:00
6 changed files with 321 additions and 27 deletions

View File

@@ -21,6 +21,7 @@
#include <linux/log2.h>
#include <linux/falloc.h>
#include <linux/writeback.h>
#include <linux/aio.h>
#include "format.h"
#include "super.h"
@@ -471,6 +472,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
ext->map = blkno;
ext->flags = 0;
ret = 0;
out:
if (ret < 0 && blkno > 0) {
err = scoutfs_free_data(sb, datinf->alloc, datinf->wri,
@@ -488,8 +490,68 @@ out:
return ret;
}
static int alloc_block_dio(struct super_block *sb, struct inode *inode,
struct scoutfs_extent *ext, struct buffer_head *bh,
u64 iblock, struct scoutfs_lock *lock)
{
DECLARE_DATA_INFO(sb, datinf);
const u64 ino = scoutfs_ino(inode);
struct data_ext_args args = {
.ino = ino,
.inode = inode,
.lock = lock,
};
u64 blkno = 0;
u64 blocks = 0;
u64 count = 0;
u64 last;
u8 ext_fl = 0;
int ret = 0;
bool first = true;
int err;
last = (bh->b_size - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
while(blocks < last) {
if (ext->len >= last && first)
count = min_t(u64, last, SCOUTFS_FALLOCATE_ALLOC_LIMIT);
else
count = min_t(u64, last - blocks, SCOUTFS_FALLOCATE_ALLOC_LIMIT);
mutex_lock(&datinf->mutex);
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
&datinf->dalloc, count, &blkno, &count);
if (ret == 0) {
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock,
count, blkno,
ext_fl | SEF_UNWRITTEN);
if (ret < 0) {
err = scoutfs_free_data(sb, datinf->alloc,
datinf->wri,
&datinf->data_freed,
blkno, count);
BUG_ON(err); /* inconsistent */
}
}
mutex_unlock(&datinf->mutex);
if (ret < 0)
break;
blocks += count;
first = false;
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, ext);
if (ret < 0)
break;
}
return ret;
}
static int scoutfs_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
struct buffer_head *bh, int create, bool dio_flag)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
const u64 ino = scoutfs_ino(inode);
@@ -530,7 +592,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
}
/* convert unwritten to written, could be staging */
if (create && ext.map && (ext.flags & SEF_UNWRITTEN)) {
if (create && ext.map && !dio_flag && (ext.flags & SEF_UNWRITTEN)) {
un.start = iblock;
un.len = 1;
un.map = ext.map + (iblock - ext.start);
@@ -542,11 +604,26 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
set_buffer_new(bh);
}
goto out;
} else if (create && ext.map && dio_flag) {
un.start = iblock;
un.len = 1;
un.map = ext.map + (iblock - ext.start);
un.flags = ext.flags;
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
un.start, un.len, un.map, un.flags);
if (ret == 0) {
ext = un;
set_buffer_new(bh);
}
goto out;
}
/* allocate and map blocks containing our logical block */
if (create && !ext.map) {
ret = alloc_block(sb, inode, &ext, iblock, lock);
if (dio_flag)
ret = alloc_block_dio(sb, inode, &ext, bh, iblock, lock);
else
ret = alloc_block(sb, inode, &ext, iblock, lock);
if (ret == 0)
set_buffer_new(bh);
} else {
@@ -580,25 +657,75 @@ static int scoutfs_get_block_read(struct inode *inode, sector_t iblock,
int ret;
down_read(&si->extent_sem);
ret = scoutfs_get_block(inode, iblock, bh, create);
ret = scoutfs_get_block(inode, iblock, bh, create, false);
up_read(&si->extent_sem);
return ret;
}
static int scoutfs_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
int scoutfs_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
int ret;
down_write(&si->extent_sem);
ret = scoutfs_get_block(inode, iblock, bh, create);
ret = scoutfs_get_block(inode, iblock, bh, create, false);
up_write(&si->extent_sem);
return ret;
}
static int scoutfs_get_block_write_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
LIST_HEAD(ind_locks);
int ret;
lock = scoutfs_per_task_get(&si->pt_data_lock);
if (WARN_ON_ONCE(!lock)) {
return -EINVAL;
}
if (inode)
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks,
true, false);
else
ret = scoutfs_hold_trans(sb, false);
if (ret)
goto out;
if (inode)
ret = scoutfs_dirty_inode_item(inode, lock);
if (ret < 0)
goto out_unlock;
down_write(&si->extent_sem);
ret = scoutfs_get_block(inode, iblock, bh, create, true);
up_write(&si->extent_sem);
if (inode) {
scoutfs_inode_set_data_seq(inode);
scoutfs_inode_inc_data_version(inode);
inode_inc_iversion(inode);
if (ret > 0)
i_size_write(inode, ret);
scoutfs_update_inode_item(inode, lock, &ind_locks);
}
out_unlock:
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
out:
return ret;
}
/*
* This is almost never used. We can't block on a cluster lock while
* holding the page lock because lock invalidation gets the page lock
@@ -861,6 +988,154 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping,
return ret;
}
static s64 convert_unwritten_items(struct super_block *sb, struct inode *inode,
u64 ino, u64 iblock, u64 last,
struct scoutfs_lock *lock)
{
struct data_ext_args args = {
.ino = ino,
.inode = inode,
.lock = lock,
};
struct scoutfs_extent ext;
struct scoutfs_extent un;
u64 offset;
s64 ret;
int i;
ret = 0;
for (i = 0; iblock <= last; i++) {
if (i == EXTENTS_PER_HOLD) {
ret = iblock;
break;
}
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &ext);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
break;
}
/* done if we went past the region */
if (ext.start > last) {
ret = 0;
break;
}
/* nothing to do when already marked written */
if (!(ext.flags & SEF_UNWRITTEN)) {
iblock = ext.start + ext.len;
continue;
}
iblock = max(ext.start, iblock);
offset = iblock - ext.start;
un.start = iblock;
un.map = ext.map ? ext.map + offset : 0;
un.len = min(ext.len - offset, last - iblock + 1);
un.flags = ext.flags & ~(SEF_OFFLINE|SEF_UNWRITTEN);
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
un.start, un.len, un.map, un.flags);
if (ret < 0)
break;
iblock += un.len;
}
return ret;
}
static ssize_t
convert_unwritten_extent(struct inode *inode, loff_t offset, ssize_t count)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
const u64 ino = scoutfs_ino(inode);
struct scoutfs_lock *lock = NULL;
LIST_HEAD(ind_locks);
u64 iblock;
u64 last;
ssize_t ret = 0;
lock = scoutfs_per_task_get(&si->pt_data_lock);
if (WARN_ON_ONCE(!lock)) {
ret = -EINVAL;
goto out;
}
iblock = offset >> SCOUTFS_BLOCK_SM_SHIFT;
last = (offset + count - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
while(iblock <= last) {
if (inode)
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks,
true, false);
else
ret = scoutfs_hold_trans(sb, false);
if (ret)
break;
if (inode)
ret = scoutfs_dirty_inode_item(inode, lock);
else
ret = 0;
if (ret == 0) {
down_write(&si->extent_sem);
ret = convert_unwritten_items(sb, inode, ino, iblock,
last, lock);
up_write(&si->extent_sem);
}
if (ret < 0)
goto out;
if (inode) {
scoutfs_inode_set_data_seq(inode);
scoutfs_inode_inc_data_version(inode);
inode_inc_iversion(inode);
if (ret > 0)
i_size_write(inode, ret);
scoutfs_update_inode_item(inode, lock, &ind_locks);
}
scoutfs_release_trans(sb);
if (inode)
scoutfs_inode_index_unlock(sb, &ind_locks);
if (ret <= 0)
break;
iblock = ret;
}
out:
return ret;
}
static ssize_t
scoutfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
scoutfs_get_block_write_dio);
if (ret > 0 && (rw & WRITE))
{
ret = convert_unwritten_extent(inode, offset, ret);
}
return ret;
}
/*
* Try to allocate unwritten extents for any unallocated regions of the
* logical block extent from the caller. The caller manages locks and
@@ -1473,13 +1748,14 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
iblock = start >> SCOUTFS_BLOCK_SM_SHIFT;
last = (start + len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
while (iblock <= last) {
while (true) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &ext);
if (ret < 0) {
if (ret == -ENOENT)
if (ret == -ENOENT) {
ret = 0;
last_flags = FIEMAP_EXTENT_LAST;
last_flags = FIEMAP_EXTENT_LAST;
}
break;
}
@@ -1804,6 +2080,7 @@ const struct address_space_operations scoutfs_file_aops = {
.writepages = scoutfs_writepages,
.write_begin = scoutfs_write_begin,
.write_end = scoutfs_write_end,
.direct_IO = scoutfs_direct_IO,
};
const struct file_operations scoutfs_file_fops = {

View File

@@ -49,6 +49,8 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
int scoutfs_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create);
int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
struct scoutfs_lock *lock);
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,

View File

@@ -47,6 +47,9 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
DECLARE_DATA_WAIT(dw);
int ret;
if (!is_sync_kiocb(iocb))
return -EINVAL;
retry:
/* protect checked extents from release */
mutex_lock(&inode->i_mutex);
@@ -97,6 +100,9 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
DECLARE_DATA_WAIT(dw);
int ret;
if (!is_sync_kiocb(iocb))
return -EINVAL;
if (iocb->ki_left == 0) /* Does this even happen? */
return 0;

View File

@@ -19,6 +19,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/list_sort.h>
#include <linux/buffer_head.h>
#include "format.h"
#include "super.h"
@@ -354,15 +355,22 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
LIST_HEAD(ind_locks);
int ret;
if (!S_ISREG(inode->i_mode))
return 0;
scoutfs_per_task_add(&si->pt_data_lock, &pt_ent, lock);
ret = block_truncate_page(inode->i_mapping, new_size, scoutfs_get_block_write);
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
if (ret)
goto out;
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false);
if (ret)
return ret;
goto out;
if (new_size != i_size_read(inode))
scoutfs_inode_inc_data_version(inode);
@@ -378,6 +386,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
out:
return ret;
}

View File

@@ -45,9 +45,14 @@ generic/107
generic/117
generic/124
generic/129
generic/130
generic/131
generic/135
generic/169
generic/184
generic/211
generic/212
generic/214
generic/221
generic/228
generic/236
@@ -100,13 +105,9 @@ generic/078
generic/079
generic/081
generic/082
generic/091
generic/094
generic/096
generic/110
generic/111
generic/113
generic/114
generic/115
generic/116
generic/118
@@ -115,9 +116,7 @@ generic/121
generic/122
generic/123
generic/128
generic/130
generic/134
generic/135
generic/136
generic/138
generic/139
@@ -165,7 +164,6 @@ generic/194
generic/195
generic/196
generic/197
generic/198
generic/199
generic/200
generic/201
@@ -173,11 +171,6 @@ generic/202
generic/203
generic/205
generic/206
generic/207
generic/210
generic/211
generic/212
generic/214
generic/216
generic/217
generic/218
@@ -185,13 +178,11 @@ generic/219
generic/220
generic/222
generic/223
generic/225
generic/227
generic/229
generic/230
generic/235
generic/238
generic/240
generic/244
generic/250
generic/252
@@ -203,7 +194,6 @@ generic/259
generic/260
generic/261
generic/262
generic/263
generic/264
generic/265
generic/266
@@ -282,4 +272,4 @@ shared/004
shared/032
shared/051
shared/289
Passed all 75 tests
Passed all 80 tests

View File

@@ -64,19 +64,29 @@ generic/029 # mmap missing
generic/030 # mmap missing
generic/075 # file content mismatch failures (fds, etc)
generic/080 # mmap missing
generic/091 # skip fsx tests
generic/094 # odirect streaming pre-alloc treated as failure in xfstests
generic/103 # enospc causes trans commit failures
generic/105 # needs trigage: something about acls
generic/108 # mount fails on failing device?
generic/112 # file content mismatch failures (fds, etc)
generic/113 # block aio dio runs
generic/114 # block aio dio runs
generic/120 # (can't exec 'cause no mmap)
generic/126 # (can't exec 'cause no mmap)
generic/141 # mmap missing
generic/198 # block aio dio runs
generic/207 # block aio dio runs
generic/210 # block aio dio runs
generic/213 # enospc causes trans commit failures
generic/215 # mmap missing
generic/225 # odirect streaming pre-alloc treated as failure in xfstests
generic/237 # wrong error return from failing setfacl?
generic/240 # block aio dio runs
generic/246 # mmap missing
generic/247 # mmap missing
generic/248 # mmap missing
generic/263 # do not support allocate mode FALLOC_FL_PUNCH_HOLE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_ZERO_RANGE...
generic/319 # utils output change? update branch?
generic/321 # requires selinux enabled for '+' in ls?
generic/325 # mmap missing