Consistently sample data alloc total_len

With many concurrent writers we were seeing excessive commits forced
because it thought the data allocator was running low.  The transaction
was checking the raw total_len value in the data_avail alloc_root for
the number of free data blocks.  But this read wasn't locked, and
allocators could completely remove a large free extent and then
re-insert a slightly smaller free extent as they perform their
alloction.  The transaction could see a temporary very small total_len
and trigger a commit.

Data allocations are serialized by a heavy mutex so we don't want to
have the reader try and use that to see a consistent total_len.  Instead
we create a data allocator run-time struct that has a consistent
total_len that is updated after all the extent items are manipulated.
This also gives us a place to put the caller's cached extent so that it
can be included in the total_len, previously it wasn't included in the
free total that the transaction saw.

The file data allocator can then initialize and use this struct instead
of its raw use of the root and cached extent.  Then the transaction can
sample its consistent total_len that reflects the root and cached
extent.

A subtle detail is that fallocate can't use _free_data to return an
allocated extent on error to the avail pool.  It instead frees into the
data_free pool like normal frees.  It doesn't really matter that this
could prematurely drain the avail pool because it's in an error path.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2021-01-04 12:08:18 -08:00
parent b4592554af
commit fc003a5038
3 changed files with 99 additions and 37 deletions

View File

@@ -657,6 +657,60 @@ out:
return ret;
}
void scoutfs_dalloc_init(struct scoutfs_data_alloc *dalloc,
struct scoutfs_alloc_root *data_avail)
{
dalloc->root = *data_avail;
memset(&dalloc->cached, 0, sizeof(dalloc->cached));
atomic64_set(&dalloc->total_len, le64_to_cpu(dalloc->root.total_len));
}
void scoutfs_dalloc_get_root(struct scoutfs_data_alloc *dalloc,
struct scoutfs_alloc_root *data_avail)
{
*data_avail = dalloc->root;
}
static void dalloc_update_total_len(struct scoutfs_data_alloc *dalloc)
{
atomic64_set(&dalloc->total_len, le64_to_cpu(dalloc->root.total_len) +
dalloc->cached.len);
}
u64 scoutfs_dalloc_total_len(struct scoutfs_data_alloc *dalloc)
{
return atomic64_read(&dalloc->total_len);
}
/*
* Return the current in-memory cached free extent to extent items in
* the avail root. This should be locked by the caller just like
* _alloc_data and _free_data.
*/
int scoutfs_dalloc_return_cached(struct super_block *sb,
struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_data_alloc *dalloc)
{
struct alloc_ext_args args = {
.alloc = alloc,
.wri = wri,
.root = &dalloc->root,
.type = SCOUTFS_FREE_EXTENT_BLKNO_TYPE,
};
int ret = 0;
if (dalloc->cached.len) {
ret = scoutfs_ext_insert(sb, &alloc_ext_ops, &args,
dalloc->cached.start,
dalloc->cached.len, 0, 0);
if (ret == 0)
memset(&dalloc->cached, 0, sizeof(dalloc->cached));
}
return ret;
}
/*
* Allocate a data extent. An extent that's smaller than the requested
* size can be returned.
@@ -671,14 +725,13 @@ out:
*/
int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_alloc_root *root,
struct scoutfs_extent *cached, u64 count,
struct scoutfs_data_alloc *dalloc, u64 count,
u64 *blkno_ret, u64 *count_ret)
{
struct alloc_ext_args args = {
.alloc = alloc,
.wri = wri,
.root = root,
.root = &dalloc->root,
.type = SCOUTFS_FREE_EXTENT_LEN_TYPE,
};
struct scoutfs_extent ext;
@@ -699,20 +752,21 @@ int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
}
/* smaller allocations come from a cached extent */
if (cached->len == 0) {
if (dalloc->cached.len == 0) {
ret = scoutfs_ext_alloc(sb, &alloc_ext_ops, &args, 0, 0,
SCOUTFS_ALLOC_DATA_LG_THRESH, cached);
SCOUTFS_ALLOC_DATA_LG_THRESH,
&dalloc->cached);
if (ret < 0)
goto out;
}
len = min(count, cached->len);
len = min(count, dalloc->cached.len);
*blkno_ret = cached->start;
*blkno_ret = dalloc->cached.start;
*count_ret = len;
cached->start += len;
cached->len -= len;
dalloc->cached.start += len;
dalloc->cached.len -= len;
ret = 0;
out:
if (ret < 0) {
@@ -720,6 +774,8 @@ out:
ret = -ENOSPC;
*blkno_ret = 0;
*count_ret = 0;
} else {
dalloc_update_total_len(dalloc);
}
scoutfs_inc_counter(sb, alloc_alloc_data);

View File

@@ -80,6 +80,18 @@ struct scoutfs_alloc {
struct scoutfs_alloc_list_head freed;
};
/*
* A run-time data allocator. We have a cached extent in memory that is
* a lot cheaper to work with than the extent items, and we have a
* consistent record of the total_len that can be sampled outside of the
* usual heavy serialization of the extent modifications.
*/
struct scoutfs_data_alloc {
struct scoutfs_alloc_root root;
struct scoutfs_extent cached;
atomic64_t total_len;
};
void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
struct scoutfs_alloc_list_head *avail,
struct scoutfs_alloc_list_head *freed);
@@ -92,10 +104,18 @@ int scoutfs_alloc_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
int scoutfs_free_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri, u64 blkno);
void scoutfs_dalloc_init(struct scoutfs_data_alloc *dalloc,
struct scoutfs_alloc_root *data_avail);
void scoutfs_dalloc_get_root(struct scoutfs_data_alloc *dalloc,
struct scoutfs_alloc_root *data_avail);
u64 scoutfs_dalloc_total_len(struct scoutfs_data_alloc *dalloc);
int scoutfs_dalloc_return_cached(struct super_block *sb,
struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_data_alloc *dalloc);
int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_alloc_root *root,
struct scoutfs_extent *cached, u64 count,
struct scoutfs_data_alloc *dalloc, u64 count,
u64 *blkno_ret, u64 *count_ret);
int scoutfs_free_data(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,

View File

@@ -53,9 +53,8 @@ struct data_info {
struct mutex mutex;
struct scoutfs_alloc *alloc;
struct scoutfs_block_writer *wri;
struct scoutfs_alloc_root data_avail;
struct scoutfs_alloc_root data_freed;
struct scoutfs_extent cached_ext;
struct scoutfs_data_alloc dalloc;
};
#define DECLARE_DATA_INFO(sb, name) \
@@ -432,8 +431,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
count = 1;
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
&datinf->data_avail, &datinf->cached_ext,
count, &blkno, &count);
&datinf->dalloc, count, &blkno, &count);
if (ret < 0)
goto out;
@@ -916,9 +914,8 @@ static s64 fallocate_extents(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
&datinf->data_avail,
&datinf->cached_ext,
count, &blkno, &count);
&datinf->dalloc, count,
&blkno, &count);
if (ret == 0) {
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock,
count, blkno,
@@ -926,7 +923,7 @@ static s64 fallocate_extents(struct super_block *sb, struct inode *inode,
if (ret < 0) {
err = scoutfs_free_data(sb, datinf->alloc,
datinf->wri,
&datinf->data_avail,
&datinf->data_freed,
blkno, count);
BUG_ON(err); /* inconsistent */
}
@@ -1532,7 +1529,7 @@ void scoutfs_data_init_btrees(struct super_block *sb,
datinf->alloc = alloc;
datinf->wri = wri;
datinf->data_avail = lt->data_avail;
scoutfs_dalloc_init(&datinf->dalloc, &lt->data_avail);
datinf->data_freed = lt->data_freed;
mutex_unlock(&datinf->mutex);
@@ -1545,7 +1542,7 @@ void scoutfs_data_get_btrees(struct super_block *sb,
mutex_lock(&datinf->mutex);
lt->data_avail = datinf->data_avail;
scoutfs_dalloc_get_root(&datinf->dalloc, &lt->data_avail);
lt->data_freed = datinf->data_freed;
mutex_unlock(&datinf->mutex);
@@ -1561,31 +1558,20 @@ int scoutfs_data_prepare_commit(struct super_block *sb)
int ret;
mutex_lock(&datinf->mutex);
if (datinf->cached_ext.len) {
ret = scoutfs_free_data(sb, datinf->alloc, datinf->wri,
&datinf->data_avail,
datinf->cached_ext.start,
datinf->cached_ext.len);
if (ret == 0)
memset(&datinf->cached_ext, 0,
sizeof(datinf->cached_ext));
} else {
ret = 0;
}
ret = scoutfs_dalloc_return_cached(sb, datinf->alloc, datinf->wri,
&datinf->dalloc);
mutex_unlock(&datinf->mutex);
return ret;
}
/*
* This isn't serializing with allocators so it can be a bit racey.
*/
u64 scoutfs_data_alloc_free_bytes(struct super_block *sb)
{
DECLARE_DATA_INFO(sb, datinf);
return le64_to_cpu(datinf->data_avail.total_len) <<
SCOUTFS_BLOCK_SM_SHIFT;
return scoutfs_dalloc_total_len(&datinf->dalloc) <<
SCOUTFS_BLOCK_SM_SHIFT;
}
int scoutfs_data_setup(struct super_block *sb)