scoutfs: commit trans before premature enospc

File data allocations come from radix allocators which are populated by
the server before each client transation.  It's possible to fully
consume the data allocator within one transaction if the number of dirty
metadata blocks is kept low.  This could result in premature ENOSPC.

This was happening to the archive-light-cycle test.  If the transactions
performed by previous tests lined up just right then the creation of the
initial test files could see ENOSPC and cause all sorts of nonsense in
the rest of the test, culminating in cmp commands stuck in offline
waits.

This introduces high and low data allocator water marks for
transactions.  The server tries to fill data allocators for each
transaction to the high water mark and the client forces the commit of a
transaction if its data allocator falls below the low water mark.

The archive-light-cycle test now passes easily and we see the
trans_commit_data_alloc_low counter increasing during the test.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2020-04-22 14:04:04 -07:00
committed by Zach Brown
parent 7da8ddb8a1
commit 9ad86d4d29
8 changed files with 41 additions and 1 deletions

View File

@@ -85,6 +85,7 @@
EXPAND_COUNTER(quorum_write_block) \
EXPAND_COUNTER(quorum_write_block_error) \
EXPAND_COUNTER(quorum_fenced) \
EXPAND_COUNTER(trans_commit_data_alloc_low) \
EXPAND_COUNTER(trans_commit_fsync) \
EXPAND_COUNTER(trans_commit_full) \
EXPAND_COUNTER(trans_commit_sync_fs) \

View File

@@ -2017,6 +2017,16 @@ void scoutfs_data_get_btrees(struct super_block *sb,
up_read(&datinf->alloc_rwsem);
}
/*
* This isn't serializing with allocators so it can be a bit racey.
*/
u64 scoutfs_data_alloc_free_bytes(struct super_block *sb)
{
DECLARE_DATA_INFO(sb, datinf);
return scoutfs_radix_root_free_bytes(sb, &datinf->data_avail);
}
int scoutfs_data_setup(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);

View File

@@ -78,6 +78,7 @@ void scoutfs_data_init_btrees(struct super_block *sb,
struct scoutfs_log_trees *lt);
void scoutfs_data_get_btrees(struct super_block *sb,
struct scoutfs_log_trees *lt);
u64 scoutfs_data_alloc_free_bytes(struct super_block *sb);
int scoutfs_data_setup(struct super_block *sb);
void scoutfs_data_destroy(struct super_block *sb);

View File

@@ -1526,6 +1526,12 @@ void scoutfs_radix_root_init(struct super_block *sb,
init_ref(&root->ref, 0, false);
}
u64 scoutfs_radix_root_free_bytes(struct super_block *sb,
struct scoutfs_radix_root *root)
{
return le64_to_cpu(root->ref.sm_total) << SCOUTFS_BLOCK_SHIFT;
}
/*
* The first bit nr in a leaf containing the bit, used by callers to
* identify regions that span leafs and would need to be freed in

View File

@@ -38,6 +38,8 @@ void scoutfs_radix_init_alloc(struct scoutfs_radix_allocator *alloc,
struct scoutfs_radix_root *freed);
void scoutfs_radix_root_init(struct super_block *sb,
struct scoutfs_radix_root *root, bool meta);
u64 scoutfs_radix_root_free_bytes(struct super_block *sb,
struct scoutfs_radix_root *root);
u64 scoutfs_radix_bit_leaf_nr(u64 bit);
#endif

View File

@@ -35,6 +35,7 @@
#include "lock_server.h"
#include "endian_swap.h"
#include "quorum.h"
#include "trans.h"
/*
* Every active mount can act as the server that listens on a net
@@ -412,7 +413,7 @@ static int server_get_log_trees(struct super_block *sb,
}
/* ensure client has enough free data blocks for a transaction */
target = (2ULL*1024*1024*1024) / SCOUTFS_BLOCK_SIZE;
target = SCOUTFS_TRANS_DATA_ALLOC_HWM / SCOUTFS_BLOCK_SIZE;
if (le64_to_cpu(ltv.data_avail.ref.sm_total) < target) {
count = target - le64_to_cpu(ltv.data_avail.ref.sm_total);

View File

@@ -315,6 +315,13 @@ struct scoutfs_reservation {
* we piggy back on their hold. We wait if the writer is trying to
* write out the transation. And if our items won't fit then we kick off
* a write.
*
* This is called as a condition for wait_event. It is very limited in
* the locking (blocking) it can do because the caller has set the task
* state before testing the condition safely race with waking after
* setting the condition. Our checking the amount of dirty metadata
* blocks and free data blocks is racy, but we don't mind the risk of
* delaying or prematurely forcing commits.
*/
static bool acquired_hold(struct super_block *sb,
struct scoutfs_reservation *rsv,
@@ -354,6 +361,13 @@ static bool acquired_hold(struct super_block *sb,
goto out;
}
/* Try to refill data allocator before premature enospc */
if (scoutfs_data_alloc_free_bytes(sb) <= SCOUTFS_TRANS_DATA_ALLOC_LWM) {
scoutfs_inc_counter(sb, trans_commit_data_alloc_low);
queue_trans_work(sbi);
goto out;
}
tri->reserved_items = items;
tri->reserved_vals = vals;

View File

@@ -1,6 +1,11 @@
#ifndef _SCOUTFS_TRANS_H_
#define _SCOUTFS_TRANS_H_
/* the server will attempt to fill data allocs for each trans */
#define SCOUTFS_TRANS_DATA_ALLOC_HWM (2ULL * 1024 * 1024 * 1024)
/* the client will force commits if data allocators get too low */
#define SCOUTFS_TRANS_DATA_ALLOC_LWM (256ULL * 1024 * 1024)
#include "count.h"
void scoutfs_trans_write_func(struct work_struct *work);