mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-08 04:55:21 +00:00
Item seq is max of trans and lock write_seq
Rename the item version to seq and set it to the max of the transaction seq and the lock's write_seq. This lets btree item merging chose a seq at which all dirty items written in future commits must have greater seqs. It can drop the seqs from items written to the fs tree during btree merging knowing that there aren't any older items out in transactions that could be mistaken for newer items. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -37,9 +37,9 @@
|
||||
*
|
||||
* The log btrees are modified by multiple transactions over time so
|
||||
* there is no consistent ordering relationship between the items in
|
||||
* different btrees. Each item in a log btree stores a version number
|
||||
* for the item. Readers check log btrees for the most recent version
|
||||
* that it should use.
|
||||
* different btrees. Each item in a log btree stores a seq for the
|
||||
* item. Readers check log btrees for the most recent seq that it
|
||||
* should use.
|
||||
*
|
||||
* The item cache reads items in bulk from stable btrees, and writes a
|
||||
* transaction's worth of dirty items into the item log btree.
|
||||
@@ -249,7 +249,7 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key,
|
||||
* If we hit stale blocks and retry we can call the callback for
|
||||
* duplicate items. This is harmless because the items are stable while
|
||||
* the caller holds their cluster lock and the caller has to filter out
|
||||
* item versions anyway.
|
||||
* item seqs anyway.
|
||||
*/
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
@@ -426,29 +426,29 @@ out:
|
||||
|
||||
/*
|
||||
* The caller is commiting items in the transaction and has found the
|
||||
* greatest item version amongst them. We store it in the log_trees root
|
||||
* greatest item seq amongst them. We store it in the log_trees root
|
||||
* to send to the server.
|
||||
*/
|
||||
void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers)
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq)
|
||||
{
|
||||
DECLARE_FOREST_INFO(sb, finf);
|
||||
|
||||
finf->our_log.max_item_vers = cpu_to_le64(max_vers);
|
||||
finf->our_log.max_item_seq = cpu_to_le64(max_seq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The server is calling during setup to find the greatest item version
|
||||
* The server is calling during setup to find the greatest item seq
|
||||
* amongst all the log tree roots. They have the authoritative current
|
||||
* super.
|
||||
*
|
||||
* Item versions are only used to compare items in log trees, not in the
|
||||
* main fs tree. All we have to do is find the greatest version amongst
|
||||
* the log_trees so that new locks will have a write_version greater
|
||||
* than all the items in the log_trees.
|
||||
* Item seqs are only used to compare items in log trees, not in the
|
||||
* main fs tree. All we have to do is find the greatest seq amongst the
|
||||
* log_trees so that the core seq will have a greater seq than all the
|
||||
* items in the log_trees.
|
||||
*/
|
||||
int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *vers)
|
||||
int scoutfs_forest_get_max_seq(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *seq)
|
||||
{
|
||||
struct scoutfs_log_trees *lt;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -456,7 +456,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
int ret;
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
*vers = 0;
|
||||
*seq = 0;
|
||||
|
||||
for (;; scoutfs_key_inc(<k)) {
|
||||
ret = scoutfs_btree_next(sb, &super->logs_root, <k, &iref);
|
||||
@@ -464,8 +464,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
|
||||
ltk = *iref.key;
|
||||
lt = iref.val;
|
||||
*vers = max(*vers,
|
||||
le64_to_cpu(lt->max_item_vers));
|
||||
*seq = max(*seq, le64_to_cpu(lt->max_item_seq));
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
@@ -534,7 +533,7 @@ void scoutfs_forest_init_btrees(struct super_block *sb,
|
||||
memset(&finf->our_log, 0, sizeof(finf->our_log));
|
||||
finf->our_log.item_root = lt->item_root;
|
||||
finf->our_log.bloom_ref = lt->bloom_ref;
|
||||
finf->our_log.max_item_vers = lt->max_item_vers;
|
||||
finf->our_log.max_item_seq = lt->max_item_seq;
|
||||
finf->our_log.rid = lt->rid;
|
||||
finf->our_log.nr = lt->nr;
|
||||
finf->srch_file = lt->srch_file;
|
||||
@@ -564,7 +563,7 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
lt->item_root = finf->our_log.item_root;
|
||||
lt->bloom_ref = finf->our_log.bloom_ref;
|
||||
lt->srch_file = finf->srch_file;
|
||||
lt->max_item_vers = finf->our_log.max_item_vers;
|
||||
lt->max_item_seq = finf->our_log.max_item_seq;
|
||||
|
||||
scoutfs_block_put(sb, finf->srch_bl);
|
||||
finf->srch_bl = NULL;
|
||||
|
||||
@@ -23,10 +23,10 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers);
|
||||
int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *vers);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
int scoutfs_forest_get_max_seq(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *seq);
|
||||
int scoutfs_forest_insert_list(struct super_block *sb,
|
||||
struct scoutfs_btree_item_list *lst);
|
||||
int scoutfs_forest_srch_add(struct super_block *sb, u64 hash, u64 ino, u64 id);
|
||||
|
||||
@@ -449,13 +449,13 @@ struct scoutfs_log_trees {
|
||||
struct scoutfs_srch_file srch_file;
|
||||
__le64 data_alloc_zone_blocks;
|
||||
__le64 data_alloc_zones[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
|
||||
__le64 max_item_vers;
|
||||
__le64 max_item_seq;
|
||||
__le64 rid;
|
||||
__le64 nr;
|
||||
};
|
||||
|
||||
struct scoutfs_log_item_value {
|
||||
__le64 vers;
|
||||
__le64 seq;
|
||||
__u8 flags;
|
||||
__u8 __pad[7];
|
||||
__u8 data[];
|
||||
|
||||
@@ -1308,10 +1308,10 @@ static struct active_reader *active_rbtree_walk(struct rb_root *root,
|
||||
* on our root and aren't in dirty or lru lists.
|
||||
*
|
||||
* We need to store deletion items here as we read items from all the
|
||||
* btrees so that they can override older versions of the items. The
|
||||
* deletion items will be deleted before we insert the pages into the
|
||||
* cache. We don't insert old versions of items into the tree here so
|
||||
* that the trees don't have to compare versions.
|
||||
* btrees so that they can override older items. The deletion items
|
||||
* will be deleted before we insert the pages into the cache. We don't
|
||||
* insert old versions of items into the tree here so that the trees
|
||||
* don't have to compare seqs.
|
||||
*/
|
||||
static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv, void *val,
|
||||
@@ -1331,7 +1331,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
pg = page_rbtree_walk(sb, root, key, key, NULL, NULL, &p_par, &p_pnode);
|
||||
found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode);
|
||||
if (found && (le64_to_cpu(found->liv.vers) >= le64_to_cpu(liv->vers)))
|
||||
if (found && (le64_to_cpu(found->liv.seq) >= le64_to_cpu(liv->seq)))
|
||||
return 0;
|
||||
|
||||
if (!page_has_room(pg, val_len)) {
|
||||
@@ -1783,6 +1783,21 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* An item's seq is greater of the client transaction's seq and the
|
||||
* lock's write_seq. This ensures that multiple commits in one lock
|
||||
* grant will have increasing seqs, and new locks in open commits will
|
||||
* also increase the seqs. It lets us limit the inputs of item merging
|
||||
* to the last stable seq and ensure that all the items in open
|
||||
* transactions and granted locks will have greater seqs.
|
||||
*/
|
||||
static __le64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return cpu_to_le64(max(sbi->trans_seq, lock->write_seq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the item dirty. Dirtying while holding a transaction pins the
|
||||
* page holding the item and guarantees that the item can be deleted or
|
||||
@@ -1816,7 +1831,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
item->liv.vers = cpu_to_le64(lock->write_seq);
|
||||
item->liv.seq = item_seq(sb, lock);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@@ -1836,7 +1851,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_seq),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *found;
|
||||
struct cached_item *item;
|
||||
@@ -1911,7 +1926,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_seq),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *item;
|
||||
struct cached_item *found;
|
||||
@@ -1946,7 +1961,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
if (val_len < found->val_len)
|
||||
pg->erased_bytes += found->val_len - val_len;
|
||||
found->val_len = val_len;
|
||||
found->liv.vers = liv.vers;
|
||||
found->liv.seq = liv.seq;
|
||||
mark_item_dirty(sb, cinf, pg, NULL, found);
|
||||
} else {
|
||||
item = alloc_item(pg, key, &liv, val, val_len);
|
||||
@@ -1978,7 +1993,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_seq),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
@@ -2020,7 +2035,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
erase_item(pg, item);
|
||||
} else {
|
||||
/* must emit deletion to clobber old persistent item */
|
||||
item->liv.vers = cpu_to_le64(lock->write_seq);
|
||||
item->liv.seq = liv.seq;
|
||||
item->liv.flags |= SCOUTFS_LOG_ITEM_FLAG_DELETION;
|
||||
item->deletion = 1;
|
||||
pg->erased_bytes += item->val_len;
|
||||
@@ -2106,7 +2121,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
struct page *page;
|
||||
LIST_HEAD(pages);
|
||||
LIST_HEAD(pos);
|
||||
u64 max_vers = 0;
|
||||
u64 max_seq = 0;
|
||||
int val_len;
|
||||
int bytes;
|
||||
int off;
|
||||
@@ -2171,7 +2186,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
val_len = sizeof(item->liv) + item->val_len;
|
||||
bytes = offsetof(struct scoutfs_btree_item_list,
|
||||
val[val_len]);
|
||||
max_vers = max(max_vers, le64_to_cpu(item->liv.vers));
|
||||
max_seq = max(max_seq, le64_to_cpu(item->liv.seq));
|
||||
|
||||
if (off + bytes > PAGE_SIZE) {
|
||||
page = second;
|
||||
@@ -2201,8 +2216,8 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
read_unlock(&pg->rwlock);
|
||||
}
|
||||
|
||||
/* store max item vers in forest's log_trees */
|
||||
scoutfs_forest_set_max_vers(sb, max_vers);
|
||||
/* store max item seq in forest's log_trees */
|
||||
scoutfs_forest_set_max_seq(sb, max_seq);
|
||||
|
||||
/* write all the dirty items into log btree blocks */
|
||||
ret = scoutfs_forest_insert_list(sb, first);
|
||||
|
||||
@@ -518,6 +518,7 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
|
||||
nl.key = snode->key;
|
||||
nl.new_mode = req->mode;
|
||||
nl.write_seq = 0;
|
||||
|
||||
/* see if there's an existing compatible grant to replace */
|
||||
gr = find_entry(snode, &snode->granted, req->rid);
|
||||
|
||||
@@ -2250,7 +2250,7 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
struct scoutfs_net_connection *conn = NULL;
|
||||
DECLARE_WAIT_QUEUE_HEAD(waitq);
|
||||
struct sockaddr_in sin;
|
||||
u64 max_vers;
|
||||
u64 max_seq;
|
||||
int ret;
|
||||
|
||||
trace_scoutfs_server_work_enter(sb, 0, 0);
|
||||
@@ -2314,12 +2314,12 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
le64_to_cpu(server->meta_avail->total_len))
|
||||
swap(server->meta_avail, server->meta_freed);
|
||||
|
||||
ret = scoutfs_forest_get_max_vers(sb, super, &max_vers);
|
||||
ret = scoutfs_forest_get_max_seq(sb, super, &max_seq);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "server couldn't find max item vers: %d", ret);
|
||||
scoutfs_err(sb, "server couldn't find max item seq: %d", ret);
|
||||
goto shutdown;
|
||||
}
|
||||
scoutfs_server_set_seq_if_greater(sb, max_vers);
|
||||
scoutfs_server_set_seq_if_greater(sb, max_seq);
|
||||
|
||||
ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri) ?:
|
||||
start_recovery(sb);
|
||||
|
||||
Reference in New Issue
Block a user