mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-10 05:37:25 +00:00
Compare commits
1 Commits
clk/trunca
...
auke/estal
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68f7d1f2d0 |
@@ -1,27 +1,6 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.25
|
||||
\
|
||||
*Jun 3, 2025*
|
||||
|
||||
Fix a bug that could cause indefinite retries of failed client commits.
|
||||
Under specific error conditions the client and server's understanding of
|
||||
the current client commit could get out of sync. The client would retry
|
||||
commits indefinitely that could never succeed. This manifested as
|
||||
infinite "critical transaction commit failure" messages in the kernel
|
||||
log on the client and matching "error <nr> committing client logs" on
|
||||
the server.
|
||||
|
||||
Fix a bug in a specific case of server error handling that could result
|
||||
in sending references to unwritten blocks to the client. The client
|
||||
would try to read blocks that hadn't been written and return spurious
|
||||
errors. This was seen under low free space conditions on the server and
|
||||
resulted in error messages with error code 116 (The errno enum for
|
||||
ESTALE, the client's indication that it couldn't read the blocks that it
|
||||
expected.)
|
||||
|
||||
---
|
||||
v1.24
|
||||
\
|
||||
|
||||
@@ -296,39 +296,28 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
|
||||
* and offline blocks. If it's not provided then the inode is being
|
||||
* destroyed and isn't reachable, we don't need to update it.
|
||||
*
|
||||
* If 'pause' is set, then we are destroying the inode and we should take
|
||||
* breaks occasionally to allow other nodes access to this inode lock shard.
|
||||
*
|
||||
* The caller is in charge of locking the inode and data, but we may
|
||||
* have to modify far more items than fit in a transaction so we're in
|
||||
* charge of batching updates into transactions. If the inode is
|
||||
* provided then we're responsible for updating its item as we go.
|
||||
*/
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
u64 ino, u64 *iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock, bool pause)
|
||||
u64 ino, u64 iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_inode_info *si = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 cur_seq;
|
||||
s64 ret = 0;
|
||||
|
||||
WARN_ON_ONCE(inode && !inode_is_locked(inode));
|
||||
|
||||
/*
|
||||
* If the inode is provided, then we aren't destroying it. So it's not
|
||||
* safe to pause while removing items- it needs to be done in one chunk.
|
||||
*/
|
||||
if (WARN_ON_ONCE(pause && inode))
|
||||
return -EINVAL;
|
||||
|
||||
/* clamp last to the last possible block? */
|
||||
if (last > SCOUTFS_BLOCK_SM_MAX)
|
||||
last = SCOUTFS_BLOCK_SM_MAX;
|
||||
|
||||
trace_scoutfs_data_truncate_items(sb, *iblock, last, offline, pause);
|
||||
trace_scoutfs_data_truncate_items(sb, iblock, last, offline);
|
||||
|
||||
if (WARN_ON_ONCE(last < *iblock))
|
||||
if (WARN_ON_ONCE(last < iblock))
|
||||
return -EINVAL;
|
||||
|
||||
if (inode) {
|
||||
@@ -336,9 +325,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
down_write(&si->extent_sem);
|
||||
}
|
||||
|
||||
cur_seq = scoutfs_trans_sample_seq(sb);
|
||||
|
||||
while (*iblock <= last) {
|
||||
while (iblock <= last) {
|
||||
if (inode)
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false);
|
||||
else
|
||||
@@ -352,7 +339,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
ret = 0;
|
||||
|
||||
if (ret == 0)
|
||||
ret = truncate_extents(sb, inode, ino, *iblock, last,
|
||||
ret = truncate_extents(sb, inode, ino, iblock, last,
|
||||
offline, lock);
|
||||
|
||||
if (inode)
|
||||
@@ -364,19 +351,8 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
*iblock = ret;
|
||||
iblock = ret;
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* We know there's more to do because truncate_extents()
|
||||
* pauses every EXTENTS_PER_HOLD extents and it returned the
|
||||
* next starting block. Our caller might also want us to pause,
|
||||
* which we will do whenever we cross a transaction boundary.
|
||||
*/
|
||||
if (pause && (cur_seq != scoutfs_trans_sample_seq(sb))) {
|
||||
ret = -EINPROGRESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (si)
|
||||
|
||||
@@ -47,8 +47,8 @@ int scoutfs_get_block_write(struct inode *inode, sector_t iblock, struct buffer_
|
||||
int create);
|
||||
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
u64 ino, u64 *iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock, bool pause);
|
||||
u64 ino, u64 iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len);
|
||||
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
|
||||
|
||||
@@ -468,8 +468,8 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock)
|
||||
start = (i_size_read(inode) + SCOUTFS_BLOCK_SM_SIZE - 1) >>
|
||||
SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(inode->i_sb, inode,
|
||||
scoutfs_ino(inode), &start, ~0ULL,
|
||||
false, lock, false);
|
||||
scoutfs_ino(inode), start, ~0ULL,
|
||||
false, lock);
|
||||
err = clear_truncate_flag(inode, lock);
|
||||
|
||||
return ret ? ret : err;
|
||||
@@ -1635,8 +1635,7 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
|
||||
* partial deletion until all deletion is complete and the orphan item
|
||||
* is removed.
|
||||
*/
|
||||
static int delete_inode_items(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_inode *sinode, u64 *start,
|
||||
static int delete_inode_items(struct super_block *sb, u64 ino, struct scoutfs_inode *sinode,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *orph_lock)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
@@ -1655,8 +1654,8 @@ static int delete_inode_items(struct super_block *sb, u64 ino,
|
||||
|
||||
/* remove data items in their own transactions */
|
||||
if (S_ISREG(mode)) {
|
||||
ret = scoutfs_data_truncate_items(sb, NULL, ino, start, ~0ULL,
|
||||
false, lock, true);
|
||||
ret = scoutfs_data_truncate_items(sb, NULL, ino, 0, ~0ULL,
|
||||
false, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -1804,23 +1803,16 @@ out:
|
||||
*/
|
||||
static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct inode_deletion_lock_data *ldata;
|
||||
struct scoutfs_lock *orph_lock;
|
||||
struct scoutfs_lock *lock;
|
||||
struct inode_deletion_lock_data *ldata = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
bool clear_trying = false;
|
||||
bool more = false;
|
||||
u64 group_nr;
|
||||
u64 start = 0;
|
||||
int bit_nr;
|
||||
int ret;
|
||||
|
||||
again:
|
||||
ldata = NULL;
|
||||
orph_lock = NULL;
|
||||
lock = NULL;
|
||||
|
||||
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -1832,12 +1824,11 @@ again:
|
||||
goto out;
|
||||
|
||||
/* only one local attempt per inode at a time */
|
||||
if (!more && test_and_set_bit(bit_nr, ldata->trying)) {
|
||||
if (test_and_set_bit(bit_nr, ldata->trying)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
clear_trying = true;
|
||||
more = false;
|
||||
|
||||
/* can't delete if it's cached in local or remote mounts */
|
||||
if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
|
||||
@@ -1862,15 +1853,7 @@ again:
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = delete_inode_items(sb, ino, &sinode, &start, lock, orph_lock);
|
||||
|
||||
if (ret == -EINPROGRESS) {
|
||||
more = true;
|
||||
clear_trying = false;
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
|
||||
ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
|
||||
out:
|
||||
if (clear_trying)
|
||||
clear_bit(bit_nr, ldata->trying);
|
||||
@@ -1878,9 +1861,6 @@ out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, orph_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
|
||||
if (more)
|
||||
goto again;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -372,9 +372,9 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
sblock = args.offset >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
eblock = (args.offset + args.length - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode, scoutfs_ino(inode),
|
||||
&sblock,
|
||||
sblock,
|
||||
eblock, true,
|
||||
lock, false);
|
||||
lock);
|
||||
if (ret == 0) {
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
isize = i_size_read(inode);
|
||||
@@ -383,8 +383,8 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
>> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode,
|
||||
scoutfs_ino(inode),
|
||||
&sblock, U64_MAX,
|
||||
false, lock, false);
|
||||
sblock, U64_MAX,
|
||||
false, lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -378,16 +378,15 @@ DEFINE_EVENT(scoutfs_data_file_extent_class, scoutfs_data_fiemap_extent,
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_data_truncate_items,
|
||||
TP_PROTO(struct super_block *sb, __u64 iblock, __u64 last, int offline, bool pause),
|
||||
TP_PROTO(struct super_block *sb, __u64 iblock, __u64 last, int offline),
|
||||
|
||||
TP_ARGS(sb, iblock, last, offline, pause),
|
||||
TP_ARGS(sb, iblock, last, offline),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, iblock)
|
||||
__field(__u64, last)
|
||||
__field(int, offline)
|
||||
__field(bool, pause)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -395,12 +394,10 @@ TRACE_EVENT(scoutfs_data_truncate_items,
|
||||
__entry->iblock = iblock;
|
||||
__entry->last = last;
|
||||
__entry->offline = offline;
|
||||
__entry->pause = pause;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" iblock %llu last %llu offline %u pause %d",
|
||||
SCSB_TRACE_ARGS, __entry->iblock, __entry->last,
|
||||
__entry->offline, __entry->pause)
|
||||
TP_printk(SCSBF" iblock %llu last %llu offline %u", SCSB_TRACE_ARGS,
|
||||
__entry->iblock, __entry->last, __entry->offline)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_data_wait_check,
|
||||
|
||||
@@ -15,7 +15,8 @@ BIN := src/createmany \
|
||||
src/o_tmpfile_umask \
|
||||
src/o_tmpfile_linkat \
|
||||
src/mmap_stress \
|
||||
src/mmap_validate
|
||||
src/mmap_validate \
|
||||
src/walk_inodes_for_estale
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
== setting longer hung task timeout
|
||||
== creating fragmented extents
|
||||
== unlink file with moved extents to free extents per block
|
||||
== cleanup
|
||||
|
||||
464
tests/src/walk_inodes_for_estale.c
Normal file
464
tests/src/walk_inodes_for_estale.c
Normal file
@@ -0,0 +1,464 @@
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/types.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "ioctl.h"
|
||||
|
||||
#define array_size(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
||||
#define FILEID_SCOUTFS 0x81
|
||||
#define FILEID_SCOUTFS_WITH_PARENT 0x82
|
||||
|
||||
static uint64_t meta_seq = 0;
|
||||
static bool sig_received = false;
|
||||
static bool tracing_on = false;
|
||||
static bool exit_on_current = false;
|
||||
static bool exiting = false;
|
||||
static uint64_t count = 0;
|
||||
|
||||
struct our_handle {
|
||||
struct file_handle handle;
|
||||
/*
|
||||
* scoutfs file handle can be ino or ino/parent. The
|
||||
* handle_type field of struct file_handle denotes which
|
||||
* version is in use. We only use the ino variant here.
|
||||
*/
|
||||
__le64 scoutfs_ino;
|
||||
};
|
||||
|
||||
static void exit_usage(void)
|
||||
{
|
||||
printf(
|
||||
" -e exit once stable meta_seq has been reached\n"
|
||||
" -m <string> scoutfs mount path string for seq walk\n"
|
||||
" -s <number> start from meta_seq number, instead of 0\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int write_at(int tracefd, char *path, char *val)
|
||||
{
|
||||
int fd = -1;
|
||||
int ret;
|
||||
|
||||
fd = openat(tracefd, path, O_TRUNC | O_RDWR);
|
||||
if (fd < 0)
|
||||
return errno;
|
||||
ret = write(fd, val, strlen(val));
|
||||
if (ret < 0)
|
||||
ret = errno;
|
||||
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_trace(int fd, uint64_t ino)
|
||||
{
|
||||
struct our_handle handle;
|
||||
int tracefd = -1;
|
||||
int targetfd = -1;
|
||||
int outfd = -1;
|
||||
int infd = -1;
|
||||
char *pidstr;
|
||||
char *name;
|
||||
char *buf;
|
||||
ssize_t bytes;
|
||||
ssize_t written;
|
||||
ssize_t off = 0;
|
||||
unsigned long e = 0;
|
||||
int ret;
|
||||
|
||||
if (asprintf(&pidstr, "%u", getpid()) < 0)
|
||||
return ENOMEM;
|
||||
|
||||
if (asprintf(&name, "trace.scoutfs.open_by_handle_at.ino-%lu", ino) < 0)
|
||||
return ENOMEM;
|
||||
|
||||
buf = malloc(4096);
|
||||
if (!buf)
|
||||
return ENOMEM;
|
||||
|
||||
handle.handle.handle_bytes = sizeof(struct our_handle);
|
||||
handle.handle.handle_type = FILEID_SCOUTFS;
|
||||
handle.scoutfs_ino = htole64(ino);
|
||||
|
||||
/* keep a quick dirfd around for easy writing sysfs files */
|
||||
tracefd = open("/sys/kernel/debug/tracing", 0);
|
||||
if (tracefd < 0)
|
||||
return errno;
|
||||
|
||||
/* start tracing */
|
||||
ret = write_at(tracefd, "current_tracer", "nop") ?:
|
||||
write_at(tracefd, "current_tracer", "function_graph") ?:
|
||||
write_at(tracefd, "set_ftrace_pid", pidstr) ?:
|
||||
write_at(tracefd, "tracing_on", "1");
|
||||
|
||||
tracing_on = true;
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
targetfd = open_by_handle_at(fd, &handle.handle, O_RDWR);
|
||||
e = errno;
|
||||
|
||||
out:
|
||||
/* turn off tracing first */
|
||||
ret = write_at(tracefd, "tracing_on", "0");
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
tracing_on = false;
|
||||
|
||||
if (targetfd != -1) {
|
||||
close(targetfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (e == ESTALE) {
|
||||
/* capture trace */
|
||||
outfd = open(name, O_CREAT | O_TRUNC | O_RDWR, 0644);
|
||||
if (outfd < 0) {
|
||||
fprintf(stderr, "Error opening trace\n");
|
||||
return errno;
|
||||
}
|
||||
infd = openat(tracefd, "trace", O_RDONLY);
|
||||
if (infd < 0) {
|
||||
fprintf(stderr, "Error opening trace output\n");
|
||||
return errno;
|
||||
}
|
||||
for (;;) {
|
||||
bytes = pread(infd, buf, 4096, off);
|
||||
if (bytes < 0)
|
||||
return errno;
|
||||
if (bytes == 0)
|
||||
break;
|
||||
written = pwrite(outfd, buf, bytes, off);
|
||||
if (written < 0)
|
||||
return errno;
|
||||
if (written != bytes)
|
||||
return EIO;
|
||||
off += bytes;
|
||||
}
|
||||
close(outfd);
|
||||
close(infd);
|
||||
|
||||
fprintf(stderr, "Wrote \"%s\"\n", name);
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
ret = write_at(tracefd, "current_tracer", "nop");
|
||||
|
||||
free(pidstr);
|
||||
free(name);
|
||||
free(buf);
|
||||
close(tracefd);
|
||||
/* collect trace output */
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup path for ino using ino_path
|
||||
*/
|
||||
struct ino_args {
|
||||
char *path;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
static int do_resolve(int fd, uint64_t ino, char **path)
|
||||
{
|
||||
struct scoutfs_ioctl_ino_path ioctl_args = {0};
|
||||
struct scoutfs_ioctl_ino_path_result *res;
|
||||
unsigned int result_bytes;
|
||||
int ret;
|
||||
|
||||
result_bytes = offsetof(struct scoutfs_ioctl_ino_path_result,
|
||||
path[PATH_MAX]);
|
||||
|
||||
res = malloc(result_bytes);
|
||||
if (!res)
|
||||
return ENOMEM;
|
||||
|
||||
ioctl_args.ino = ino;
|
||||
ioctl_args.dir_ino = 0;
|
||||
ioctl_args.dir_pos = 0;
|
||||
ioctl_args.result_ptr = (intptr_t)res;
|
||||
ioctl_args.result_bytes = result_bytes;
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_INO_PATH, &ioctl_args);
|
||||
if (ret < 0) {
|
||||
if (errno == ENOENT) {
|
||||
*path = NULL;
|
||||
return 0;
|
||||
}
|
||||
return errno;
|
||||
}
|
||||
|
||||
ret = asprintf(path, "%.*s", res->path_bytes, res->path);
|
||||
if (ret <= 0)
|
||||
return ENOMEM;
|
||||
|
||||
free(res);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_test_ino(int fd, uint64_t ino)
|
||||
{
|
||||
struct our_handle handle = {{0}};
|
||||
struct stat sb = {0};
|
||||
char *path = NULL;
|
||||
int targetfd = -1;
|
||||
int ret;
|
||||
|
||||
/* filter: open_by_handle_at() must fail */
|
||||
handle.handle.handle_bytes = sizeof(struct our_handle);
|
||||
handle.handle.handle_type = FILEID_SCOUTFS;
|
||||
handle.scoutfs_ino = htole64(ino);
|
||||
|
||||
targetfd = open_by_handle_at(fd, &handle.handle, O_RDWR);
|
||||
if (targetfd != -1) {
|
||||
close(targetfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* filter: errno must be ESTALE */
|
||||
if (errno != ESTALE)
|
||||
return 0;
|
||||
|
||||
/* filter: path resolution succeeds to an actual file entry */
|
||||
ret = do_resolve(fd, ino, &path);
|
||||
if (path == NULL)
|
||||
return 0;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* filter: stat() must succeed on resolved path */
|
||||
ret = fstatat(fd, path, &sb, AT_SYMLINK_NOFOLLOW);
|
||||
free(path);
|
||||
if (ret != 0) {
|
||||
if (errno == ENOENT)
|
||||
/* doesn't exist */
|
||||
return 0;
|
||||
return errno;
|
||||
}
|
||||
|
||||
return do_trace(fd, ino);
|
||||
}
|
||||
|
||||
static uint64_t do_get_meta_seq_stable(int fd)
|
||||
{
|
||||
struct scoutfs_ioctl_stat_more stm;
|
||||
|
||||
if (ioctl(fd, SCOUTFS_IOC_STAT_MORE, &stm) < 0)
|
||||
return errno;
|
||||
|
||||
return stm.meta_seq;
|
||||
}
|
||||
|
||||
static int do_walk_seq(int fd)
|
||||
{
|
||||
struct scoutfs_ioctl_walk_inodes_entry ents[128];
|
||||
struct scoutfs_ioctl_walk_inodes walk = {{0}};
|
||||
struct timespec ts;
|
||||
time_t seconds;
|
||||
int ret;
|
||||
uint64_t total = 0;
|
||||
uint64_t stable;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
walk.index = SCOUTFS_IOC_WALK_INODES_META_SEQ;
|
||||
|
||||
/* make sure not to advance to stable meta_seq, we can just trail behind */
|
||||
stable = do_get_meta_seq_stable(fd);
|
||||
if (stable == 0)
|
||||
return 0;
|
||||
if (meta_seq >= stable - 1) {
|
||||
if (exit_on_current)
|
||||
exiting = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
meta_seq = meta_seq ? meta_seq + 1 : 0;
|
||||
|
||||
walk.first.major = meta_seq;
|
||||
walk.first.minor = 0;
|
||||
walk.first.ino = 0;
|
||||
|
||||
walk.last.major = stable - 1;
|
||||
walk.last.minor = ~0;
|
||||
walk.last.ino = ~0ULL;
|
||||
|
||||
walk.entries_ptr = (unsigned long)ents;
|
||||
walk.nr_entries = array_size(ents);
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
seconds = ts.tv_sec;
|
||||
|
||||
for (j = 0;; j++) {
|
||||
if (sig_received)
|
||||
return 0;
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_WALK_INODES, &walk);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret == 0)
|
||||
break;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
meta_seq = ents[i].major;
|
||||
if (ents[i].ino == 1)
|
||||
continue;
|
||||
|
||||
/* poke at it */
|
||||
ret = do_test_ino(fd, ents[i].ino);
|
||||
|
||||
count++;
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
total += i;
|
||||
|
||||
walk.first = ents[i - 1];
|
||||
if (++walk.first.ino == 0 && ++walk.first.minor == 0)
|
||||
walk.first.major++;
|
||||
|
||||
/* yield once in a while */
|
||||
if (j % 32 == 0) {
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
if (ts.tv_sec > seconds + 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void handle_signal(int sig)
|
||||
{
|
||||
int tracefd = -1;
|
||||
|
||||
sig_received = true;
|
||||
|
||||
if (!tracing_on)
|
||||
return;
|
||||
|
||||
tracefd = open("/sys/kernel/debug/tracing", 0);
|
||||
write_at(tracefd, "tracing_on", "0");
|
||||
close(tracefd);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *mnt = NULL;
|
||||
char c;
|
||||
int mntfd;
|
||||
int ret;
|
||||
|
||||
meta_seq = 0;
|
||||
|
||||
/* All we need is the mount point arg */
|
||||
while ((c = getopt(argc, argv, "+em:s:")) != -1) {
|
||||
switch (c) {
|
||||
case 'e':
|
||||
exit_on_current = true;
|
||||
break;
|
||||
case 'm':
|
||||
mnt = strdup(optarg);
|
||||
break;
|
||||
case 's':
|
||||
meta_seq = strtoull(optarg, NULL, 0);
|
||||
break;
|
||||
case '?':
|
||||
printf("unknown argument: %c\n", optind);
|
||||
case 'h':
|
||||
exit_usage();
|
||||
}
|
||||
}
|
||||
|
||||
if (!mnt) {
|
||||
fprintf(stderr, "Must provide a mount point with -m\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (meta_seq > 0)
|
||||
fprintf(stdout, "Starting from meta_seq = %lu\n", meta_seq);
|
||||
|
||||
/* lower prio */
|
||||
ret = nice(10);
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "Error setting nice value\n");
|
||||
ret = syscall(SYS_ioprio_set, 1, 0, 0); /* IOPRIO_WHO_PROCESS = 1, IOPRIO_PRIO_CLASS(IOPRIO_CLASS_IDLE) = 0 */
|
||||
if (ret == -1)
|
||||
fprintf(stderr, "Error setting ioprio value\n");
|
||||
|
||||
signal(SIGINT, handle_signal);
|
||||
signal(SIGTERM, handle_signal);
|
||||
|
||||
for (;;) {
|
||||
if (sig_received)
|
||||
break;
|
||||
|
||||
mntfd = open(mnt, O_RDONLY);
|
||||
if (mntfd == -1) {
|
||||
perror("open(mntfd)");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
ret = do_walk_seq(mntfd);
|
||||
/* handle unmounts? EAGAIN? */
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
close(mntfd);
|
||||
|
||||
if (exiting)
|
||||
break;
|
||||
|
||||
/* yield */
|
||||
if (!sig_received)
|
||||
sleep(5);
|
||||
}
|
||||
|
||||
free(mnt);
|
||||
|
||||
fprintf(stdout, "Last meta_seq = %lu\n", meta_seq);
|
||||
|
||||
if (ret)
|
||||
fprintf(stderr, "Error walking inodes: %s(%d)\n", strerror(errno), ret);
|
||||
|
||||
exit(ret);
|
||||
}
|
||||
@@ -10,6 +10,30 @@ EXTENTS_PER_BTREE_BLOCK=600
|
||||
EXTENTS_PER_LIST_BLOCK=8192
|
||||
FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))
|
||||
|
||||
#
|
||||
# This test specifically creates a pathologically sparse file that will
|
||||
# be as expensive as possible to free. This is usually fine on
|
||||
# dedicated or reasonable hardware, but trying to run this in
|
||||
# virtualized debug kernels can take a very long time. This test is
|
||||
# about making sure that the server doesn't fail, not that the platform
|
||||
# can handle the scale of work that our btree formats happen to require
|
||||
# while execution is bogged down with use-after-free memory reference
|
||||
# tracking. So we give the test a lot more breathing room before
|
||||
# deciding that its hung.
|
||||
#
|
||||
echo "== setting longer hung task timeout"
|
||||
if [ -w /proc/sys/kernel/hung_task_timeout_secs ]; then
|
||||
secs=$(cat /proc/sys/kernel/hung_task_timeout_secs)
|
||||
test "$secs" -gt 0 || \
|
||||
t_fail "confusing value '$secs' from /proc/sys/kernel/hung_task_timeout_secs"
|
||||
restore_hung_task_timeout()
|
||||
{
|
||||
echo "$secs" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
}
|
||||
trap restore_hung_task_timeout EXIT
|
||||
echo "$((secs * 5))" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
fi
|
||||
|
||||
echo "== creating fragmented extents"
|
||||
fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user