mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-07 20:45:18 +00:00
Compare commits
1 Commits
clk/scoutf
...
clk/trunca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
417261a051 |
@@ -296,28 +296,39 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
|
||||
* and offline blocks. If it's not provided then the inode is being
|
||||
* destroyed and isn't reachable, we don't need to update it.
|
||||
*
|
||||
* If 'pause' is set, then we are destroying the inode and we should take
|
||||
* breaks occasionally to allow other nodes access to this inode lock shard.
|
||||
*
|
||||
* The caller is in charge of locking the inode and data, but we may
|
||||
* have to modify far more items than fit in a transaction so we're in
|
||||
* charge of batching updates into transactions. If the inode is
|
||||
* provided then we're responsible for updating its item as we go.
|
||||
*/
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
u64 ino, u64 iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock)
|
||||
u64 ino, u64 *iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock, bool pause)
|
||||
{
|
||||
struct scoutfs_inode_info *si = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 cur_seq;
|
||||
s64 ret = 0;
|
||||
|
||||
WARN_ON_ONCE(inode && !inode_is_locked(inode));
|
||||
|
||||
/*
|
||||
* If the inode is provided, then we aren't destroying it. So it's not
|
||||
* safe to pause while removing items- it needs to be done in one chunk.
|
||||
*/
|
||||
if (WARN_ON_ONCE(pause && inode))
|
||||
return -EINVAL;
|
||||
|
||||
/* clamp last to the last possible block? */
|
||||
if (last > SCOUTFS_BLOCK_SM_MAX)
|
||||
last = SCOUTFS_BLOCK_SM_MAX;
|
||||
|
||||
trace_scoutfs_data_truncate_items(sb, iblock, last, offline);
|
||||
trace_scoutfs_data_truncate_items(sb, *iblock, last, offline, pause);
|
||||
|
||||
if (WARN_ON_ONCE(last < iblock))
|
||||
if (WARN_ON_ONCE(last < *iblock))
|
||||
return -EINVAL;
|
||||
|
||||
if (inode) {
|
||||
@@ -325,7 +336,9 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
down_write(&si->extent_sem);
|
||||
}
|
||||
|
||||
while (iblock <= last) {
|
||||
cur_seq = scoutfs_trans_sample_seq(sb);
|
||||
|
||||
while (*iblock <= last) {
|
||||
if (inode)
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false);
|
||||
else
|
||||
@@ -339,7 +352,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
ret = 0;
|
||||
|
||||
if (ret == 0)
|
||||
ret = truncate_extents(sb, inode, ino, iblock, last,
|
||||
ret = truncate_extents(sb, inode, ino, *iblock, last,
|
||||
offline, lock);
|
||||
|
||||
if (inode)
|
||||
@@ -351,8 +364,19 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
iblock = ret;
|
||||
*iblock = ret;
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* We know there's more to do because truncate_extents()
|
||||
* pauses every EXTENTS_PER_HOLD extents and it returned the
|
||||
* next starting block. Our caller might also want us to pause,
|
||||
* which we will do whenever we cross a transaction boundary.
|
||||
*/
|
||||
if (pause && (cur_seq != scoutfs_trans_sample_seq(sb))) {
|
||||
ret = -EINPROGRESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (si)
|
||||
|
||||
@@ -47,8 +47,8 @@ int scoutfs_get_block_write(struct inode *inode, sector_t iblock, struct buffer_
|
||||
int create);
|
||||
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
u64 ino, u64 iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock);
|
||||
u64 ino, u64 *iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock, bool pause);
|
||||
int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len);
|
||||
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
|
||||
|
||||
@@ -468,8 +468,8 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock)
|
||||
start = (i_size_read(inode) + SCOUTFS_BLOCK_SM_SIZE - 1) >>
|
||||
SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(inode->i_sb, inode,
|
||||
scoutfs_ino(inode), start, ~0ULL,
|
||||
false, lock);
|
||||
scoutfs_ino(inode), &start, ~0ULL,
|
||||
false, lock, false);
|
||||
err = clear_truncate_flag(inode, lock);
|
||||
|
||||
return ret ? ret : err;
|
||||
@@ -1635,7 +1635,8 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
|
||||
* partial deletion until all deletion is complete and the orphan item
|
||||
* is removed.
|
||||
*/
|
||||
static int delete_inode_items(struct super_block *sb, u64 ino, struct scoutfs_inode *sinode,
|
||||
static int delete_inode_items(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_inode *sinode, u64 *start,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *orph_lock)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
@@ -1654,8 +1655,8 @@ static int delete_inode_items(struct super_block *sb, u64 ino, struct scoutfs_in
|
||||
|
||||
/* remove data items in their own transactions */
|
||||
if (S_ISREG(mode)) {
|
||||
ret = scoutfs_data_truncate_items(sb, NULL, ino, 0, ~0ULL,
|
||||
false, lock);
|
||||
ret = scoutfs_data_truncate_items(sb, NULL, ino, start, ~0ULL,
|
||||
false, lock, true);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -1803,16 +1804,23 @@ out:
|
||||
*/
|
||||
static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct inode_deletion_lock_data *ldata = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct inode_deletion_lock_data *ldata;
|
||||
struct scoutfs_lock *orph_lock;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
bool clear_trying = false;
|
||||
bool more = false;
|
||||
u64 group_nr;
|
||||
u64 start = 0;
|
||||
int bit_nr;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
ldata = NULL;
|
||||
orph_lock = NULL;
|
||||
lock = NULL;
|
||||
|
||||
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -1824,11 +1832,12 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
goto out;
|
||||
|
||||
/* only one local attempt per inode at a time */
|
||||
if (test_and_set_bit(bit_nr, ldata->trying)) {
|
||||
if (!more && test_and_set_bit(bit_nr, ldata->trying)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
clear_trying = true;
|
||||
more = false;
|
||||
|
||||
/* can't delete if it's cached in local or remote mounts */
|
||||
if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
|
||||
@@ -1853,7 +1862,15 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
|
||||
ret = delete_inode_items(sb, ino, &sinode, &start, lock, orph_lock);
|
||||
|
||||
if (ret == -EINPROGRESS) {
|
||||
more = true;
|
||||
clear_trying = false;
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
|
||||
out:
|
||||
if (clear_trying)
|
||||
clear_bit(bit_nr, ldata->trying);
|
||||
@@ -1861,6 +1878,9 @@ out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, orph_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
|
||||
if (more)
|
||||
goto again;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -372,9 +372,9 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
sblock = args.offset >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
eblock = (args.offset + args.length - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode, scoutfs_ino(inode),
|
||||
sblock,
|
||||
&sblock,
|
||||
eblock, true,
|
||||
lock);
|
||||
lock, false);
|
||||
if (ret == 0) {
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
isize = i_size_read(inode);
|
||||
@@ -383,8 +383,8 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
>> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode,
|
||||
scoutfs_ino(inode),
|
||||
sblock, U64_MAX,
|
||||
false, lock);
|
||||
&sblock, U64_MAX,
|
||||
false, lock, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -378,15 +378,16 @@ DEFINE_EVENT(scoutfs_data_file_extent_class, scoutfs_data_fiemap_extent,
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_data_truncate_items,
|
||||
TP_PROTO(struct super_block *sb, __u64 iblock, __u64 last, int offline),
|
||||
TP_PROTO(struct super_block *sb, __u64 iblock, __u64 last, int offline, bool pause),
|
||||
|
||||
TP_ARGS(sb, iblock, last, offline),
|
||||
TP_ARGS(sb, iblock, last, offline, pause),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, iblock)
|
||||
__field(__u64, last)
|
||||
__field(int, offline)
|
||||
__field(bool, pause)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -394,10 +395,12 @@ TRACE_EVENT(scoutfs_data_truncate_items,
|
||||
__entry->iblock = iblock;
|
||||
__entry->last = last;
|
||||
__entry->offline = offline;
|
||||
__entry->pause = pause;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" iblock %llu last %llu offline %u", SCSB_TRACE_ARGS,
|
||||
__entry->iblock, __entry->last, __entry->offline)
|
||||
TP_printk(SCSBF" iblock %llu last %llu offline %u pause %d",
|
||||
SCSB_TRACE_ARGS, __entry->iblock, __entry->last,
|
||||
__entry->offline, __entry->pause)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_data_wait_check,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
== setting longer hung task timeout
|
||||
== creating fragmented extents
|
||||
== unlink file with moved extents to free extents per block
|
||||
== cleanup
|
||||
|
||||
@@ -10,30 +10,6 @@ EXTENTS_PER_BTREE_BLOCK=600
|
||||
EXTENTS_PER_LIST_BLOCK=8192
|
||||
FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))
|
||||
|
||||
#
|
||||
# This test specifically creates a pathologically sparse file that will
|
||||
# be as expensive as possible to free. This is usually fine on
|
||||
# dedicated or reasonable hardware, but trying to run this in
|
||||
# virtualized debug kernels can take a very long time. This test is
|
||||
# about making sure that the server doesn't fail, not that the platform
|
||||
# can handle the scale of work that our btree formats happen to require
|
||||
# while execution is bogged down with use-after-free memory reference
|
||||
# tracking. So we give the test a lot more breathing room before
|
||||
# deciding that its hung.
|
||||
#
|
||||
echo "== setting longer hung task timeout"
|
||||
if [ -w /proc/sys/kernel/hung_task_timeout_secs ]; then
|
||||
secs=$(cat /proc/sys/kernel/hung_task_timeout_secs)
|
||||
test "$secs" -gt 0 || \
|
||||
t_fail "confusing value '$secs' from /proc/sys/kernel/hung_task_timeout_secs"
|
||||
restore_hung_task_timeout()
|
||||
{
|
||||
echo "$secs" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
}
|
||||
trap restore_hung_task_timeout EXIT
|
||||
echo "$((secs * 5))" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
fi
|
||||
|
||||
echo "== creating fragmented extents"
|
||||
fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user