mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-05 03:44:05 +00:00
Consistently sample data alloc total_len
With many concurrent writers we were seeing excessive commits forced because it thought the data allocator was running low. The transaction was checking the raw total_len value in the data_avail alloc_root for the number of free data blocks. But this read wasn't locked, and allocators could completely remove a large free extent and then re-insert a slightly smaller free extent as they perform their alloction. The transaction could see a temporary very small total_len and trigger a commit. Data allocations are serialized by a heavy mutex so we don't want to have the reader try and use that to see a consistent total_len. Instead we create a data allocator run-time struct that has a consistent total_len that is updated after all the extent items are manipulated. This also gives us a place to put the caller's cached extent so that it can be included in the total_len, previously it wasn't included in the free total that the transaction saw. The file data allocator can then initialize and use this struct instead of its raw use of the root and cached extent. Then the transaction can sample its consistent total_len that reflects the root and cached extent. A subtle detail is that fallocate can't use _free_data to return an allocated extent on error to the avail pool. It instead frees into the data_free pool like normal frees. It doesn't really matter that this could prematurely drain the avail pool because it's in an error path. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -657,6 +657,60 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_dalloc_init(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail)
|
||||
{
|
||||
dalloc->root = *data_avail;
|
||||
memset(&dalloc->cached, 0, sizeof(dalloc->cached));
|
||||
atomic64_set(&dalloc->total_len, le64_to_cpu(dalloc->root.total_len));
|
||||
}
|
||||
|
||||
void scoutfs_dalloc_get_root(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail)
|
||||
{
|
||||
*data_avail = dalloc->root;
|
||||
}
|
||||
|
||||
static void dalloc_update_total_len(struct scoutfs_data_alloc *dalloc)
|
||||
{
|
||||
atomic64_set(&dalloc->total_len, le64_to_cpu(dalloc->root.total_len) +
|
||||
dalloc->cached.len);
|
||||
}
|
||||
|
||||
u64 scoutfs_dalloc_total_len(struct scoutfs_data_alloc *dalloc)
|
||||
{
|
||||
return atomic64_read(&dalloc->total_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the current in-memory cached free extent to extent items in
|
||||
* the avail root. This should be locked by the caller just like
|
||||
* _alloc_data and _free_data.
|
||||
*/
|
||||
int scoutfs_dalloc_return_cached(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_data_alloc *dalloc)
|
||||
{
|
||||
struct alloc_ext_args args = {
|
||||
.alloc = alloc,
|
||||
.wri = wri,
|
||||
.root = &dalloc->root,
|
||||
.type = SCOUTFS_FREE_EXTENT_BLKNO_TYPE,
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
if (dalloc->cached.len) {
|
||||
ret = scoutfs_ext_insert(sb, &alloc_ext_ops, &args,
|
||||
dalloc->cached.start,
|
||||
dalloc->cached.len, 0, 0);
|
||||
if (ret == 0)
|
||||
memset(&dalloc->cached, 0, sizeof(dalloc->cached));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a data extent. An extent that's smaller than the requested
|
||||
* size can be returned.
|
||||
@@ -671,14 +725,13 @@ out:
|
||||
*/
|
||||
int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *root,
|
||||
struct scoutfs_extent *cached, u64 count,
|
||||
struct scoutfs_data_alloc *dalloc, u64 count,
|
||||
u64 *blkno_ret, u64 *count_ret)
|
||||
{
|
||||
struct alloc_ext_args args = {
|
||||
.alloc = alloc,
|
||||
.wri = wri,
|
||||
.root = root,
|
||||
.root = &dalloc->root,
|
||||
.type = SCOUTFS_FREE_EXTENT_LEN_TYPE,
|
||||
};
|
||||
struct scoutfs_extent ext;
|
||||
@@ -699,20 +752,21 @@ int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
}
|
||||
|
||||
/* smaller allocations come from a cached extent */
|
||||
if (cached->len == 0) {
|
||||
if (dalloc->cached.len == 0) {
|
||||
ret = scoutfs_ext_alloc(sb, &alloc_ext_ops, &args, 0, 0,
|
||||
SCOUTFS_ALLOC_DATA_LG_THRESH, cached);
|
||||
SCOUTFS_ALLOC_DATA_LG_THRESH,
|
||||
&dalloc->cached);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = min(count, cached->len);
|
||||
len = min(count, dalloc->cached.len);
|
||||
|
||||
*blkno_ret = cached->start;
|
||||
*blkno_ret = dalloc->cached.start;
|
||||
*count_ret = len;
|
||||
|
||||
cached->start += len;
|
||||
cached->len -= len;
|
||||
dalloc->cached.start += len;
|
||||
dalloc->cached.len -= len;
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret < 0) {
|
||||
@@ -720,6 +774,8 @@ out:
|
||||
ret = -ENOSPC;
|
||||
*blkno_ret = 0;
|
||||
*count_ret = 0;
|
||||
} else {
|
||||
dalloc_update_total_len(dalloc);
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, alloc_alloc_data);
|
||||
|
||||
@@ -80,6 +80,18 @@ struct scoutfs_alloc {
|
||||
struct scoutfs_alloc_list_head freed;
|
||||
};
|
||||
|
||||
/*
|
||||
* A run-time data allocator. We have a cached extent in memory that is
|
||||
* a lot cheaper to work with than the extent items, and we have a
|
||||
* consistent record of the total_len that can be sampled outside of the
|
||||
* usual heavy serialization of the extent modifications.
|
||||
*/
|
||||
struct scoutfs_data_alloc {
|
||||
struct scoutfs_alloc_root root;
|
||||
struct scoutfs_extent cached;
|
||||
atomic64_t total_len;
|
||||
};
|
||||
|
||||
void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_alloc_list_head *avail,
|
||||
struct scoutfs_alloc_list_head *freed);
|
||||
@@ -92,10 +104,18 @@ int scoutfs_alloc_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
int scoutfs_free_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 blkno);
|
||||
|
||||
void scoutfs_dalloc_init(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail);
|
||||
void scoutfs_dalloc_get_root(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail);
|
||||
u64 scoutfs_dalloc_total_len(struct scoutfs_data_alloc *dalloc);
|
||||
int scoutfs_dalloc_return_cached(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_data_alloc *dalloc);
|
||||
int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *root,
|
||||
struct scoutfs_extent *cached, u64 count,
|
||||
struct scoutfs_data_alloc *dalloc, u64 count,
|
||||
u64 *blkno_ret, u64 *count_ret);
|
||||
int scoutfs_free_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
|
||||
@@ -53,9 +53,8 @@ struct data_info {
|
||||
struct mutex mutex;
|
||||
struct scoutfs_alloc *alloc;
|
||||
struct scoutfs_block_writer *wri;
|
||||
struct scoutfs_alloc_root data_avail;
|
||||
struct scoutfs_alloc_root data_freed;
|
||||
struct scoutfs_extent cached_ext;
|
||||
struct scoutfs_data_alloc dalloc;
|
||||
};
|
||||
|
||||
#define DECLARE_DATA_INFO(sb, name) \
|
||||
@@ -432,8 +431,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
count = 1;
|
||||
|
||||
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->data_avail, &datinf->cached_ext,
|
||||
count, &blkno, &count);
|
||||
&datinf->dalloc, count, &blkno, &count);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -916,9 +914,8 @@ static s64 fallocate_extents(struct super_block *sb, struct inode *inode,
|
||||
mutex_lock(&datinf->mutex);
|
||||
|
||||
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->data_avail,
|
||||
&datinf->cached_ext,
|
||||
count, &blkno, &count);
|
||||
&datinf->dalloc, count,
|
||||
&blkno, &count);
|
||||
if (ret == 0) {
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock,
|
||||
count, blkno,
|
||||
@@ -926,7 +923,7 @@ static s64 fallocate_extents(struct super_block *sb, struct inode *inode,
|
||||
if (ret < 0) {
|
||||
err = scoutfs_free_data(sb, datinf->alloc,
|
||||
datinf->wri,
|
||||
&datinf->data_avail,
|
||||
&datinf->data_freed,
|
||||
blkno, count);
|
||||
BUG_ON(err); /* inconsistent */
|
||||
}
|
||||
@@ -1532,7 +1529,7 @@ void scoutfs_data_init_btrees(struct super_block *sb,
|
||||
|
||||
datinf->alloc = alloc;
|
||||
datinf->wri = wri;
|
||||
datinf->data_avail = lt->data_avail;
|
||||
scoutfs_dalloc_init(&datinf->dalloc, <->data_avail);
|
||||
datinf->data_freed = lt->data_freed;
|
||||
|
||||
mutex_unlock(&datinf->mutex);
|
||||
@@ -1545,7 +1542,7 @@ void scoutfs_data_get_btrees(struct super_block *sb,
|
||||
|
||||
mutex_lock(&datinf->mutex);
|
||||
|
||||
lt->data_avail = datinf->data_avail;
|
||||
scoutfs_dalloc_get_root(&datinf->dalloc, <->data_avail);
|
||||
lt->data_freed = datinf->data_freed;
|
||||
|
||||
mutex_unlock(&datinf->mutex);
|
||||
@@ -1561,31 +1558,20 @@ int scoutfs_data_prepare_commit(struct super_block *sb)
|
||||
int ret;
|
||||
|
||||
mutex_lock(&datinf->mutex);
|
||||
if (datinf->cached_ext.len) {
|
||||
ret = scoutfs_free_data(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->data_avail,
|
||||
datinf->cached_ext.start,
|
||||
datinf->cached_ext.len);
|
||||
if (ret == 0)
|
||||
memset(&datinf->cached_ext, 0,
|
||||
sizeof(datinf->cached_ext));
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
ret = scoutfs_dalloc_return_cached(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->dalloc);
|
||||
mutex_unlock(&datinf->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This isn't serializing with allocators so it can be a bit racey.
|
||||
*/
|
||||
u64 scoutfs_data_alloc_free_bytes(struct super_block *sb)
|
||||
{
|
||||
DECLARE_DATA_INFO(sb, datinf);
|
||||
|
||||
return le64_to_cpu(datinf->data_avail.total_len) <<
|
||||
SCOUTFS_BLOCK_SM_SHIFT;
|
||||
return scoutfs_dalloc_total_len(&datinf->dalloc) <<
|
||||
SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
}
|
||||
|
||||
int scoutfs_data_setup(struct super_block *sb)
|
||||
|
||||
Reference in New Issue
Block a user