scoutfs: incremental srch compaction

Previously the srch compaction work would output the entire compacted
file and delete the input files in one atomic commit.  The server would
send the input files and an allocator to the client, and the client
would send back an output file and an allocator that included the
deletion of the input files.  The server would merge in the allocator
and replace the input file items with the output file item.

Doing it this way required giving an enormous allocation pool to the
client in a radix, which would deal with recursive operations
(allocating from and freeing to the radix that is being modified).  We
no longer have the radix allocator, and we use single block avail/free
lists instead of recursively modifying the btrees with free extent
items.  The compaction RPC needs to work with a finite amount of
allocator resources that can be stored in an alloc list block.

The compaction work now does a fixed amount of work and a compaction
operation spans multiple work iterations.

A single compaction struct is now sent between the client and server in
the get_compact and commit_compact messages.  The client records any
partial progress in the struct.  The server writes that position into
PENDING items.  It first searchs for pending items to give to clients
before searching for files to start a new compaction operation.

The compact struct has flags to indicate whether the output file is
being written or the input files are being deleted.  The server manages
the flags and sets the input file deletion flag only once the result of
the compaction has been reflected in the btree items which record srch
files.

We added the progress fields to the compaction struct, making it even
bigger than it already was, so we take the time to allocate them rather
than declaring them on the stack.

It's worth mentioning that each operation now takes a reasonably bounded
amount of time will make it feasible to decide that it has failed and
needs to be fenced.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2020-10-14 14:10:50 -07:00
committed by Zach Brown
parent d589881855
commit 7a3749d591
8 changed files with 555 additions and 289 deletions

View File

@@ -1121,15 +1121,15 @@ int scoutfs_alloc_foreach(struct super_block *sb,
struct scoutfs_btree_ref stale_refs[2] = {{0,}};
struct scoutfs_btree_ref refs[2] = {{0,}};
struct scoutfs_super_block *super = NULL;
struct scoutfs_srch_compact_input *scin;
struct scoutfs_srch_compact *sc;
struct scoutfs_log_trees_val ltv;
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key key;
int ret;
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
scin = kmalloc(sizeof(struct scoutfs_srch_compact_input), GFP_NOFS);
if (!super || !scin) {
sc = kmalloc(sizeof(struct scoutfs_srch_compact), GFP_NOFS);
if (!super || !sc) {
ret = -ENOMEM;
goto out;
}
@@ -1200,17 +1200,17 @@ retry:
/* srch compaction allocators */
memset(&key, 0, sizeof(key));
key.sk_zone = SCOUTFS_SRCH_ZONE;
key.sk_type = SCOUTFS_SRCH_BUSY_TYPE;
key.sk_type = SCOUTFS_SRCH_PENDING_TYPE;
for (;;) {
/* _BUSY_ is last type, _next won't see other types */
/* _PENDING_ and _BUSY_ are last, _next won't see other types */
ret = scoutfs_btree_next(sb, &super->srch_root, &key, &iref);
if (ret == -ENOENT)
break;
if (ret == 0) {
if (iref.val_len == sizeof(scin)) {
if (iref.val_len == sizeof(*sc)) {
key = *iref.key;
memcpy(scin, iref.val, iref.val_len);
memcpy(sc, iref.val, iref.val_len);
} else {
ret = -EIO;
}
@@ -1220,11 +1220,11 @@ retry:
goto out;
ret = cb(sb, arg, SCOUTFS_ALLOC_OWNER_SRCH,
le64_to_cpu(scin->id), true, true,
le64_to_cpu(scin->meta_avail.total_nr)) ?:
le64_to_cpu(sc->id), true, true,
le64_to_cpu(sc->meta_avail.total_nr)) ?:
cb(sb, arg, SCOUTFS_ALLOC_OWNER_SRCH,
le64_to_cpu(scin->id), true, false,
le64_to_cpu(scin->meta_freed.total_nr));
le64_to_cpu(sc->id), true, false,
le64_to_cpu(sc->meta_freed.total_nr));
if (ret < 0)
goto out;
@@ -1244,6 +1244,6 @@ out:
}
kfree(super);
kfree(scin);
kfree(sc);
return ret;
}

View File

@@ -201,24 +201,24 @@ int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
/* Find srch files that need to be compacted. */
int scoutfs_client_srch_get_compact(struct super_block *sb,
struct scoutfs_srch_compact_input *scin)
struct scoutfs_srch_compact *sc)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
NULL, 0, scin, sizeof(*scin));
NULL, 0, sc, sizeof(*sc));
}
/* Commit the result of a srch file compaction. */
int scoutfs_client_srch_commit_compact(struct super_block *sb,
struct scoutfs_srch_compact_result *scres)
struct scoutfs_srch_compact *res)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
scres, sizeof(*scres), NULL, 0);
res, sizeof(*res), NULL, 0);
}
/* The client is receiving a invalidation request from the server */

View File

@@ -19,9 +19,9 @@ int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock_recover *nlr);
int scoutfs_client_srch_get_compact(struct super_block *sb,
struct scoutfs_srch_compact_input *scin);
struct scoutfs_srch_compact *sc);
int scoutfs_client_srch_commit_compact(struct super_block *sb,
struct scoutfs_srch_compact_result *scres);
struct scoutfs_srch_compact *res);
int scoutfs_client_setup(struct super_block *sb);
void scoutfs_client_destroy(struct super_block *sb);

View File

@@ -155,7 +155,6 @@
EXPAND_COUNTER(srch_compact_dirty_block) \
EXPAND_COUNTER(srch_compact_entry) \
EXPAND_COUNTER(srch_compact_flush) \
EXPAND_COUNTER(srch_compact_free_block) \
EXPAND_COUNTER(srch_compact_log_page) \
EXPAND_COUNTER(srch_compact_removed_entry) \
EXPAND_COUNTER(srch_inconsistent_ref) \

View File

@@ -374,27 +374,39 @@ struct scoutfs_srch_block {
#define SCOUTFS_SRCH_COMPACT_ORDER 3
#define SCOUTFS_SRCH_COMPACT_NR (1 << SCOUTFS_SRCH_COMPACT_ORDER)
struct scoutfs_srch_compact_input {
/*
* A persistent record of a srch file compaction operation in progress.
*
* When compacting log files blk and pos aren't used. When compacting
* sorted files blk is the logical block number and pos is the byte
* offset of the next entry. When deleting files pos is the height of
* the level that we're deleting, and blk is the logical block offset of
* the next parent ref array index to descend through.
*/
struct scoutfs_srch_compact {
struct scoutfs_alloc_list_head meta_avail;
struct scoutfs_alloc_list_head meta_freed;
__le64 id;
__u8 nr;
__u8 flags;
struct scoutfs_srch_file sfl[SCOUTFS_SRCH_COMPACT_NR];
struct scoutfs_srch_file out;
struct scoutfs_srch_compact_input {
struct scoutfs_srch_file sfl;
__le64 blk;
__le64 pos;
} in[SCOUTFS_SRCH_COMPACT_NR] __packed;
} __packed;
struct scoutfs_srch_compact_result {
struct scoutfs_alloc_list_head meta_avail;
struct scoutfs_alloc_list_head meta_freed;
__le64 id;
__u8 flags;
struct scoutfs_srch_file sfl;
} __packed;
/* files are insorted logs */
#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0)
/* compaction failed, release inputs */
#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 1)
/* server -> client: combine input log file entries into output file */
#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0)
/* server -> client: combine input sorted file entries into output file */
#define SCOUTFS_SRCH_COMPACT_FLAG_SORTED (1 << 1)
/* server -> client: delete input files */
#define SCOUTFS_SRCH_COMPACT_FLAG_DELETE (1 << 2)
/* client -> server: compaction phase (LOG,SORTED,DELETE) done */
#define SCOUTFS_SRCH_COMPACT_FLAG_DONE (1 << 4)
/* client -> server: compaction failed */
#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 5)
/*
* XXX I imagine we should rename these now that they've evolved to track
@@ -496,7 +508,8 @@ struct scoutfs_bloom_block {
/* srch zone, only in server btrees */
#define SCOUTFS_SRCH_LOG_TYPE 1
#define SCOUTFS_SRCH_BLOCKS_TYPE 2
#define SCOUTFS_SRCH_BUSY_TYPE 3
#define SCOUTFS_SRCH_PENDING_TYPE 3
#define SCOUTFS_SRCH_BUSY_TYPE 4
/* free extents in allocator btrees in client and server, by blkno or len */
#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 1

View File

@@ -925,55 +925,57 @@ static int server_srch_get_compact(struct super_block *sb,
u64 rid = scoutfs_net_client_rid(conn);
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
struct scoutfs_srch_compact_input scin;
u64 blocks;
struct scoutfs_srch_compact *sc = NULL;
int ret;
int i;
memset(&scin, 0, sizeof(scin));
if (arg_len != 0) {
ret = -EINVAL;
goto out;
}
sc = kzalloc(sizeof(struct scoutfs_srch_compact), GFP_NOFS);
if (sc == NULL) {
ret = -ENOMEM;
goto out;
}
ret = scoutfs_server_hold_commit(sb);
if (ret)
goto out;
mutex_lock(&server->srch_mutex);
ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
&super->srch_root, rid, &scin);
&super->srch_root, rid, sc);
mutex_unlock(&server->srch_mutex);
if (ret == 0 && scin.nr == 0)
if (ret == 0 && sc->nr == 0)
ret = -ENOENT;
if (ret < 0)
goto apply;
/* provide ~3x input blocks to allocate, write+delete+cow */
blocks = 0;
for (i = 0; i < scin.nr; i++)
blocks += le64_to_cpu(scin.sfl[i].blocks);
blocks *= 3;
mutex_lock(&server->alloc_mutex);
ret = scoutfs_alloc_fill_list(sb, &server->alloc, &server->wri,
&scin.meta_avail, server->meta_avail,
blocks, blocks);
&sc->meta_avail, server->meta_avail,
SCOUTFS_SERVER_META_FILL_LO,
SCOUTFS_SERVER_META_FILL_TARGET) ?:
scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
server->other_freed, &sc->meta_freed);
mutex_unlock(&server->alloc_mutex);
if (ret < 0)
goto apply;
mutex_lock(&server->srch_mutex);
ret = scoutfs_srch_update_compact(sb, &server->alloc, &server->wri,
&super->srch_root, rid, &scin);
&super->srch_root, rid, sc);
mutex_unlock(&server->srch_mutex);
apply:
ret = scoutfs_server_apply_commit(sb, ret);
WARN_ON_ONCE(ret < 0 && ret != -ENOENT); /* XXX leaked busy item */
out:
return scoutfs_net_response(sb, conn, cmd, id, ret,
&scin, sizeof(scin));
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
sc, sizeof(struct scoutfs_srch_compact));
kfree(sc);
return ret;
}
/*
@@ -990,16 +992,16 @@ static int server_srch_commit_compact(struct super_block *sb,
u64 rid = scoutfs_net_client_rid(conn);
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
struct scoutfs_srch_compact_result *scres;
struct scoutfs_srch_compact *sc;
struct scoutfs_alloc_list_head av;
struct scoutfs_alloc_list_head fr;
int ret;
scres = arg;
if (arg_len != sizeof(*scres)) {
if (arg_len != sizeof(struct scoutfs_srch_compact)) {
ret = -EINVAL;
goto out;
}
sc = arg;
ret = scoutfs_server_hold_commit(sb);
if (ret)
@@ -1007,12 +1009,13 @@ static int server_srch_commit_compact(struct super_block *sb,
mutex_lock(&server->srch_mutex);
ret = scoutfs_srch_commit_compact(sb, &server->alloc, &server->wri,
&super->srch_root, rid, scres,
&super->srch_root, rid, sc,
&av, &fr);
mutex_unlock(&server->srch_mutex);
if (ret < 0) /* XXX very bad, leaks allocators */
goto apply;
/* reclaim allocators if they were set by _srch_commit_ */
mutex_lock(&server->alloc_mutex);
ret = scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
server->other_freed, &av) ?:

File diff suppressed because it is too large Load Diff

View File

@@ -42,18 +42,17 @@ int scoutfs_srch_get_compact(struct super_block *sb,
struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_btree_root *root,
u64 rid,
struct scoutfs_srch_compact_input *scin_ret);
u64 rid, struct scoutfs_srch_compact *sc);
int scoutfs_srch_update_compact(struct super_block *sb,
struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_btree_root *root, u64 rid,
struct scoutfs_srch_compact_input *scin);
struct scoutfs_srch_compact *sc);
int scoutfs_srch_commit_compact(struct super_block *sb,
struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_btree_root *root, u64 rid,
struct scoutfs_srch_compact_result *scres,
struct scoutfs_srch_compact *res,
struct scoutfs_alloc_list_head *av,
struct scoutfs_alloc_list_head *fr);
int scoutfs_srch_cancel_compact(struct super_block *sb,