mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-08 12:42:35 +00:00
scoutfs: incremental srch compaction
Previously the srch compaction work would output the entire compacted file and delete the input files in one atomic commit. The server would send the input files and an allocator to the client, and the client would send back an output file and an allocator that included the deletion of the input files. The server would merge in the allocator and replace the input file items with the output file item. Doing it this way required giving an enormous allocation pool to the client in a radix, which would deal with recursive operations (allocating from and freeing to the radix that is being modified). We no longer have the radix allocator, and we use single block avail/free lists instead of recursively modifying the btrees with free extent items. The compaction RPC needs to work with a finite amount of allocator resources that can be stored in an alloc list block. The compaction work now does a fixed amount of work and a compaction operation spans multiple work iterations. A single compaction struct is now sent between the client and server in the get_compact and commit_compact messages. The client records any partial progress in the struct. The server writes that position into PENDING items. It first searchs for pending items to give to clients before searching for files to start a new compaction operation. The compact struct has flags to indicate whether the output file is being written or the input files are being deleted. The server manages the flags and sets the input file deletion flag only once the result of the compaction has been reflected in the btree items which record srch files. We added the progress fields to the compaction struct, making it even bigger than it already was, so we take the time to allocate them rather than declaring them on the stack. It's worth mentioning that each operation now takes a reasonably bounded amount of time will make it feasible to decide that it has failed and needs to be fenced. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1121,15 +1121,15 @@ int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
struct scoutfs_btree_ref stale_refs[2] = {{0,}};
|
||||
struct scoutfs_btree_ref refs[2] = {{0,}};
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_srch_compact_input *scin;
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_log_trees_val ltv;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
scin = kmalloc(sizeof(struct scoutfs_srch_compact_input), GFP_NOFS);
|
||||
if (!super || !scin) {
|
||||
sc = kmalloc(sizeof(struct scoutfs_srch_compact), GFP_NOFS);
|
||||
if (!super || !sc) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -1200,17 +1200,17 @@ retry:
|
||||
/* srch compaction allocators */
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.sk_zone = SCOUTFS_SRCH_ZONE;
|
||||
key.sk_type = SCOUTFS_SRCH_BUSY_TYPE;
|
||||
key.sk_type = SCOUTFS_SRCH_PENDING_TYPE;
|
||||
|
||||
for (;;) {
|
||||
/* _BUSY_ is last type, _next won't see other types */
|
||||
/* _PENDING_ and _BUSY_ are last, _next won't see other types */
|
||||
ret = scoutfs_btree_next(sb, &super->srch_root, &key, &iref);
|
||||
if (ret == -ENOENT)
|
||||
break;
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(scin)) {
|
||||
if (iref.val_len == sizeof(*sc)) {
|
||||
key = *iref.key;
|
||||
memcpy(scin, iref.val, iref.val_len);
|
||||
memcpy(sc, iref.val, iref.val_len);
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
@@ -1220,11 +1220,11 @@ retry:
|
||||
goto out;
|
||||
|
||||
ret = cb(sb, arg, SCOUTFS_ALLOC_OWNER_SRCH,
|
||||
le64_to_cpu(scin->id), true, true,
|
||||
le64_to_cpu(scin->meta_avail.total_nr)) ?:
|
||||
le64_to_cpu(sc->id), true, true,
|
||||
le64_to_cpu(sc->meta_avail.total_nr)) ?:
|
||||
cb(sb, arg, SCOUTFS_ALLOC_OWNER_SRCH,
|
||||
le64_to_cpu(scin->id), true, false,
|
||||
le64_to_cpu(scin->meta_freed.total_nr));
|
||||
le64_to_cpu(sc->id), true, false,
|
||||
le64_to_cpu(sc->meta_freed.total_nr));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1244,6 +1244,6 @@ out:
|
||||
}
|
||||
|
||||
kfree(super);
|
||||
kfree(scin);
|
||||
kfree(sc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -201,24 +201,24 @@ int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
|
||||
|
||||
/* Find srch files that need to be compacted. */
|
||||
int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact_input *scin)
|
||||
struct scoutfs_srch_compact *sc)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
NULL, 0, scin, sizeof(*scin));
|
||||
NULL, 0, sc, sizeof(*sc));
|
||||
}
|
||||
|
||||
/* Commit the result of a srch file compaction. */
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact_result *scres)
|
||||
struct scoutfs_srch_compact *res)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
scres, sizeof(*scres), NULL, 0);
|
||||
res, sizeof(*res), NULL, 0);
|
||||
}
|
||||
|
||||
/* The client is receiving a invalidation request from the server */
|
||||
|
||||
@@ -19,9 +19,9 @@ int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
|
||||
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock_recover *nlr);
|
||||
int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact_input *scin);
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact_result *scres);
|
||||
struct scoutfs_srch_compact *res);
|
||||
|
||||
int scoutfs_client_setup(struct super_block *sb);
|
||||
void scoutfs_client_destroy(struct super_block *sb);
|
||||
|
||||
@@ -155,7 +155,6 @@
|
||||
EXPAND_COUNTER(srch_compact_dirty_block) \
|
||||
EXPAND_COUNTER(srch_compact_entry) \
|
||||
EXPAND_COUNTER(srch_compact_flush) \
|
||||
EXPAND_COUNTER(srch_compact_free_block) \
|
||||
EXPAND_COUNTER(srch_compact_log_page) \
|
||||
EXPAND_COUNTER(srch_compact_removed_entry) \
|
||||
EXPAND_COUNTER(srch_inconsistent_ref) \
|
||||
|
||||
@@ -374,27 +374,39 @@ struct scoutfs_srch_block {
|
||||
#define SCOUTFS_SRCH_COMPACT_ORDER 3
|
||||
#define SCOUTFS_SRCH_COMPACT_NR (1 << SCOUTFS_SRCH_COMPACT_ORDER)
|
||||
|
||||
struct scoutfs_srch_compact_input {
|
||||
/*
|
||||
* A persistent record of a srch file compaction operation in progress.
|
||||
*
|
||||
* When compacting log files blk and pos aren't used. When compacting
|
||||
* sorted files blk is the logical block number and pos is the byte
|
||||
* offset of the next entry. When deleting files pos is the height of
|
||||
* the level that we're deleting, and blk is the logical block offset of
|
||||
* the next parent ref array index to descend through.
|
||||
*/
|
||||
struct scoutfs_srch_compact {
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
__le64 id;
|
||||
__u8 nr;
|
||||
__u8 flags;
|
||||
struct scoutfs_srch_file sfl[SCOUTFS_SRCH_COMPACT_NR];
|
||||
struct scoutfs_srch_file out;
|
||||
struct scoutfs_srch_compact_input {
|
||||
struct scoutfs_srch_file sfl;
|
||||
__le64 blk;
|
||||
__le64 pos;
|
||||
} in[SCOUTFS_SRCH_COMPACT_NR] __packed;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_srch_compact_result {
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
__le64 id;
|
||||
__u8 flags;
|
||||
struct scoutfs_srch_file sfl;
|
||||
} __packed;
|
||||
|
||||
/* files are insorted logs */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0)
|
||||
/* compaction failed, release inputs */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 1)
|
||||
/* server -> client: combine input log file entries into output file */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0)
|
||||
/* server -> client: combine input sorted file entries into output file */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_SORTED (1 << 1)
|
||||
/* server -> client: delete input files */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_DELETE (1 << 2)
|
||||
/* client -> server: compaction phase (LOG,SORTED,DELETE) done */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_DONE (1 << 4)
|
||||
/* client -> server: compaction failed */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 5)
|
||||
|
||||
/*
|
||||
* XXX I imagine we should rename these now that they've evolved to track
|
||||
@@ -496,7 +508,8 @@ struct scoutfs_bloom_block {
|
||||
/* srch zone, only in server btrees */
|
||||
#define SCOUTFS_SRCH_LOG_TYPE 1
|
||||
#define SCOUTFS_SRCH_BLOCKS_TYPE 2
|
||||
#define SCOUTFS_SRCH_BUSY_TYPE 3
|
||||
#define SCOUTFS_SRCH_PENDING_TYPE 3
|
||||
#define SCOUTFS_SRCH_BUSY_TYPE 4
|
||||
|
||||
/* free extents in allocator btrees in client and server, by blkno or len */
|
||||
#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 1
|
||||
|
||||
@@ -925,55 +925,57 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact_input scin;
|
||||
u64 blocks;
|
||||
struct scoutfs_srch_compact *sc = NULL;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
memset(&scin, 0, sizeof(scin));
|
||||
|
||||
if (arg_len != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sc = kzalloc(sizeof(struct scoutfs_srch_compact), GFP_NOFS);
|
||||
if (sc == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_server_hold_commit(sb);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
|
||||
&super->srch_root, rid, &scin);
|
||||
&super->srch_root, rid, sc);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
if (ret == 0 && scin.nr == 0)
|
||||
if (ret == 0 && sc->nr == 0)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
goto apply;
|
||||
|
||||
/* provide ~3x input blocks to allocate, write+delete+cow */
|
||||
blocks = 0;
|
||||
for (i = 0; i < scin.nr; i++)
|
||||
blocks += le64_to_cpu(scin.sfl[i].blocks);
|
||||
blocks *= 3;
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
ret = scoutfs_alloc_fill_list(sb, &server->alloc, &server->wri,
|
||||
&scin.meta_avail, server->meta_avail,
|
||||
blocks, blocks);
|
||||
&sc->meta_avail, server->meta_avail,
|
||||
SCOUTFS_SERVER_META_FILL_LO,
|
||||
SCOUTFS_SERVER_META_FILL_TARGET) ?:
|
||||
scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
|
||||
server->other_freed, &sc->meta_freed);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
if (ret < 0)
|
||||
goto apply;
|
||||
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_update_compact(sb, &server->alloc, &server->wri,
|
||||
&super->srch_root, rid, &scin);
|
||||
&super->srch_root, rid, sc);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
|
||||
apply:
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
WARN_ON_ONCE(ret < 0 && ret != -ENOENT); /* XXX leaked busy item */
|
||||
out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret,
|
||||
&scin, sizeof(scin));
|
||||
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
|
||||
sc, sizeof(struct scoutfs_srch_compact));
|
||||
kfree(sc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -990,16 +992,16 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact_result *scres;
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
int ret;
|
||||
|
||||
scres = arg;
|
||||
if (arg_len != sizeof(*scres)) {
|
||||
if (arg_len != sizeof(struct scoutfs_srch_compact)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
sc = arg;
|
||||
|
||||
ret = scoutfs_server_hold_commit(sb);
|
||||
if (ret)
|
||||
@@ -1007,12 +1009,13 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_commit_compact(sb, &server->alloc, &server->wri,
|
||||
&super->srch_root, rid, scres,
|
||||
&super->srch_root, rid, sc,
|
||||
&av, &fr);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
if (ret < 0) /* XXX very bad, leaks allocators */
|
||||
goto apply;
|
||||
|
||||
/* reclaim allocators if they were set by _srch_commit_ */
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
ret = scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
|
||||
server->other_freed, &av) ?:
|
||||
|
||||
712
kmod/src/srch.c
712
kmod/src/srch.c
File diff suppressed because it is too large
Load Diff
@@ -42,18 +42,17 @@ int scoutfs_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
u64 rid,
|
||||
struct scoutfs_srch_compact_input *scin_ret);
|
||||
u64 rid, struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_srch_update_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root, u64 rid,
|
||||
struct scoutfs_srch_compact_input *scin);
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root, u64 rid,
|
||||
struct scoutfs_srch_compact_result *scres,
|
||||
struct scoutfs_srch_compact *res,
|
||||
struct scoutfs_alloc_list_head *av,
|
||||
struct scoutfs_alloc_list_head *fr);
|
||||
int scoutfs_srch_cancel_compact(struct super_block *sb,
|
||||
|
||||
Reference in New Issue
Block a user