From 7eddeb82abcb62233cdfe576bb080ed527bebe7d Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Fri, 13 Aug 2010 18:12:45 +0000 Subject: [PATCH] - Some iSCSI performance fixes - __attribute__((aligned(sizeof(long))) added to all field with different protection than the previous field to make sure they don't share the same bus line. Just in case, actually, compiler should do the alignment by default. - Small docs updates. git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@1948 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- iscsi-scst/include/iscsi_scst.h | 2 +- iscsi-scst/kernel/conn.c | 27 +++++--- iscsi-scst/kernel/iscsi.c | 99 ++++++++++++++++++++---------- iscsi-scst/kernel/iscsi.h | 28 +++++++-- iscsi-scst/kernel/nthread.c | 37 ++++------- scst/README | 6 +- scst/include/scst.h | 33 ++++++---- scst/src/dev_handlers/scst_vdisk.c | 3 +- 8 files changed, 149 insertions(+), 86 deletions(-) diff --git a/iscsi-scst/include/iscsi_scst.h b/iscsi-scst/include/iscsi_scst.h index 3f119e649..3542c3506 100644 --- a/iscsi-scst/include/iscsi_scst.h +++ b/iscsi-scst/include/iscsi_scst.h @@ -187,7 +187,7 @@ struct iscsi_kern_initiator_info { #define MAX_NR_QUEUED_CMNDS 256 #define DEFAULT_RSP_TIMEOUT 30 -#define MIN_RSP_TIMEOUT 10 +#define MIN_RSP_TIMEOUT 2 #define MAX_RSP_TIMEOUT 65535 #define DEFAULT_NOP_IN_INTERVAL 30 diff --git a/iscsi-scst/kernel/conn.c b/iscsi-scst/kernel/conn.c index a7c5f1d18..b39b2c83c 100644 --- a/iscsi-scst/kernel/conn.c +++ b/iscsi-scst/kernel/conn.c @@ -382,6 +382,24 @@ static void iscsi_data_ready(struct sock *sk, int len) return; } +void __iscsi_write_space_ready(struct iscsi_conn *conn) +{ + TRACE_ENTRY(); + + spin_lock_bh(&iscsi_wr_lock); + conn->wr_space_ready = 1; + if ((conn->wr_state == ISCSI_CONN_WR_STATE_SPACE_WAIT)) { + TRACE_DBG("wr space ready (conn %p)", conn); + list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; + wake_up(&iscsi_wr_waitQ); + } + spin_unlock_bh(&iscsi_wr_lock); + + TRACE_EXIT(); + return; +} + static void iscsi_write_space_ready(struct sock *sk) { struct iscsi_conn *conn = sk->sk_user_data; @@ -390,14 +408,7 @@ static void iscsi_write_space_ready(struct sock *sk) TRACE_DBG("Write space ready for conn %p", conn); - spin_lock_bh(&iscsi_wr_lock); - conn->wr_space_ready = 1; - if ((conn->wr_state == ISCSI_CONN_WR_STATE_SPACE_WAIT)) { - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); - conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; - wake_up(&iscsi_wr_waitQ); - } - spin_unlock_bh(&iscsi_wr_lock); + __iscsi_write_space_ready(conn); conn->old_write_space(sk); diff --git a/iscsi-scst/kernel/iscsi.c b/iscsi-scst/kernel/iscsi.c index 1af866c5f..6fd84da1b 100644 --- a/iscsi-scst/kernel/iscsi.c +++ b/iscsi-scst/kernel/iscsi.c @@ -447,6 +447,8 @@ void cmnd_done(struct iscsi_cmnd *cmnd) cmnd_free(cmnd); } else { + struct iscsi_cmnd *parent = cmnd->parent_req; + if (cmnd->own_sg) { TRACE_DBG("own_sg for rsp %p", cmnd); if ((cmnd->sg != &dummy_sg) && (cmnd->sg != cmnd->rsp_sg)) @@ -460,15 +462,15 @@ void cmnd_done(struct iscsi_cmnd *cmnd) EXTRACHECKS_BUG_ON(cmnd->dec_active_cmnds); - if (cmnd == cmnd->parent_req->main_rsp) { + if (cmnd == parent->main_rsp) { TRACE_DBG("Finishing main rsp %p (req %p)", cmnd, - cmnd->parent_req); - cmnd->parent_req->main_rsp = NULL; + parent); + parent->main_rsp = NULL; } - cmnd_put(cmnd->parent_req); + cmnd_put(parent); /* - * rsp will be freed on the last parent's put and can already + * cmnd will be freed on the last parent's put and can already * be freed!! */ } @@ -554,11 +556,7 @@ void req_cmnd_release_force(struct iscsi_cmnd *req) return; } -/* - * Corresponding conn may also get destroyed after this function, except only - * if it's called from the read thread! - */ -static void req_cmnd_release(struct iscsi_cmnd *req) +static void req_cmnd_pre_release(struct iscsi_cmnd *req) { struct iscsi_cmnd *c, *t; @@ -619,6 +617,19 @@ static void req_cmnd_release(struct iscsi_cmnd *req) #endif } + TRACE_EXIT(); + return; +} + +/* + * Corresponding conn may also get destroyed after this function, except only + * if it's called from the read thread! + */ +static void req_cmnd_release(struct iscsi_cmnd *req) +{ + TRACE_ENTRY(); + + req_cmnd_pre_release(req); cmnd_put(req); TRACE_EXIT(); @@ -3142,16 +3153,11 @@ static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) return; } -/* - * No locks. - * - * IMPORTANT! Connection conn must be protected by additional conn_get() - * upon entrance in this function, because otherwise it could be destroyed - * inside as a result of iscsi_send(), which releases sent commands. - */ -static void iscsi_try_local_processing(struct iscsi_conn *conn) +/* No locks */ +static void iscsi_try_local_processing(struct iscsi_cmnd *req) { - int local; + struct iscsi_conn *conn = req->conn; + bool local; TRACE_ENTRY(); @@ -3166,10 +3172,10 @@ static void iscsi_try_local_processing(struct iscsi_conn *conn) #endif conn->wr_state = ISCSI_CONN_WR_STATE_PROCESSING; conn->wr_space_ready = 0; - local = 1; + local = true; break; default: - local = 0; + local = false; break; } spin_unlock_bh(&iscsi_wr_lock); @@ -3177,14 +3183,21 @@ static void iscsi_try_local_processing(struct iscsi_conn *conn) if (local) { int rc = 1; - if (test_write_ready(conn)) + do { rc = iscsi_send(conn); + if (rc <= 0) + break; + } while (req->not_processed_rsp_cnt != 0); spin_lock_bh(&iscsi_wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif - if ((rc <= 0) || test_write_ready(conn)) { + if ((rc == -EAGAIN) && !conn->wr_space_ready) { + TRACE_DBG("EAGAIN, setting WR_STATE_SPACE_WAIT " + "(conn %p)", conn); + conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; + } else if (test_write_ready(conn)) { list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; wake_up(&iscsi_wr_waitQ); @@ -3206,6 +3219,7 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) int status = scst_cmd_get_status(scst_cmd); u8 *sense = scst_cmd_get_sense_buffer(scst_cmd); int sense_len = scst_cmd_get_sense_buffer_len(scst_cmd); + struct iscsi_cmnd *wr_rsp, *our_rsp; EXTRACHECKS_BUG_ON(scst_cmd_atomic(scst_cmd)); @@ -3296,18 +3310,41 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) #endif /* - * "_ordered" here to protect from reorder, which can lead to - * preliminary connection destroy in req_cmnd_release(). Just in - * case, actually, because reordering shouldn't go so far, but who - * knows.. + * There's no need for protection, since we are not going to + * dereference them. */ - conn_get_ordered(conn); - req_cmnd_release(req); - iscsi_try_local_processing(conn); - conn_put(conn); + wr_rsp = list_entry(conn->write_list.next, struct iscsi_cmnd, + write_list_entry); + our_rsp = list_entry(req->rsp_cmd_list.next, struct iscsi_cmnd, + rsp_cmd_list_entry); + if (wr_rsp == our_rsp) { + /* + * This is our rsp, so let's try to process it locally to + * decrease latency. We need to call pre_release before + * processing to handle some error recovery cases. + */ + if (scst_get_active_cmd_count(scst_cmd) <= 2) { + req_cmnd_pre_release(req); + iscsi_try_local_processing(req); + cmnd_put(req); + } else { + /* + * There's too much backend activity, so it could be + * better to push it to the write thread. + */ + goto out_push_to_wr_thread; + } + } else + goto out_push_to_wr_thread; out: return SCST_TGT_RES_SUCCESS; + +out_push_to_wr_thread: + TRACE_DBG("Waking up write thread (conn %p)", conn); + req_cmnd_release(req); + iscsi_make_conn_wr_active(conn); + goto out; } /* Called under sn_lock */ diff --git a/iscsi-scst/kernel/iscsi.h b/iscsi-scst/kernel/iscsi.h index 0caf3e8cc..3e316d24a 100644 --- a/iscsi-scst/kernel/iscsi.h +++ b/iscsi-scst/kernel/iscsi.h @@ -174,7 +174,7 @@ struct iscsi_conn { #define ISCSI_CONN_REINSTATING 1 #define ISCSI_CONN_SHUTTINGDOWN 2 - unsigned long conn_aflags; + unsigned long conn_aflags __attribute__((aligned(sizeof(long)))); spinlock_t cmd_list_lock; /* BH lock */ @@ -184,9 +184,9 @@ struct iscsi_conn { atomic_t conn_ref_cnt; spinlock_t write_list_lock; - /* List of data pdus to be sent, protected by write_list_lock */ + /* List of data pdus to be sent. Protected by write_list_lock */ struct list_head write_list; - /* List of data pdus being sent, protected by write_list_lock */ + /* List of data pdus being sent. Protected by write_list_lock */ struct list_head write_timeout_list; /* Protected by write_list_lock */ @@ -194,7 +194,7 @@ struct iscsi_conn { unsigned int rsp_timeout; /* in jiffies */ /* All 2 protected by iscsi_wr_lock */ - unsigned short wr_state; + unsigned short wr_state __attribute__((aligned(sizeof(long)))); unsigned short wr_space_ready:1; struct list_head wr_list_entry; @@ -361,7 +361,7 @@ struct iscsi_cmnd { */ #define ISCSI_CMD_ABORTED 0 #define ISCSI_CMD_PRELIM_COMPLETED 1 - unsigned long prelim_compl_flags; + unsigned long prelim_compl_flags __attribute__((aligned(sizeof(long)))); struct list_head hash_list_entry; @@ -420,6 +420,10 @@ struct iscsi_cmnd { }; struct iscsi_cmnd *main_rsp; + + /* Protected on modify by conn->write_list_lock */ + int not_processed_rsp_cnt + __attribute__((aligned(sizeof(long)))); }; /* Response only fields */ @@ -518,6 +522,7 @@ extern void conn_info_show(struct seq_file *, struct iscsi_session *); #endif extern void iscsi_check_tm_data_wait_timeouts(struct iscsi_conn *conn, bool force); +extern void __iscsi_write_space_ready(struct iscsi_conn *conn); /* nthread.c */ extern int iscsi_send(struct iscsi_conn *conn); @@ -668,20 +673,33 @@ static inline void cmnd_put(struct iscsi_cmnd *cmnd) static inline void cmd_add_on_write_list(struct iscsi_conn *conn, struct iscsi_cmnd *cmnd) { + struct iscsi_cmnd *parent = cmnd->parent_req; + TRACE_DBG("cmnd %p", cmnd); /* See comment in iscsi_restart_cmnd() */ EXTRACHECKS_BUG_ON(cmnd->parent_req->hashed && (cmnd_opcode(cmnd) != ISCSI_OP_R2T)); list_add_tail(&cmnd->write_list_entry, &conn->write_list); cmnd->on_write_list = 1; + + parent->not_processed_rsp_cnt++; + TRACE_DBG("not processed rsp cnt %d (parent %p)", + parent->not_processed_rsp_cnt, parent); } /* conn->write_list_lock supposed to be locked and BHs off */ static inline void cmd_del_from_write_list(struct iscsi_cmnd *cmnd) { + struct iscsi_cmnd *parent = cmnd->parent_req; + TRACE_DBG("%p", cmnd); list_del(&cmnd->write_list_entry); cmnd->on_write_list = 0; + + parent->not_processed_rsp_cnt--; + TRACE_DBG("not processed rsp cnt %d (parent %p)", + parent->not_processed_rsp_cnt, parent); + EXTRACHECKS_BUG_ON(parent->not_processed_rsp_cnt < 0); } static inline void cmd_add_on_rx_ddigest_list(struct iscsi_cmnd *req, diff --git a/iscsi-scst/kernel/nthread.c b/iscsi-scst/kernel/nthread.c index 5de59215b..563cff6bb 100644 --- a/iscsi-scst/kernel/nthread.c +++ b/iscsi-scst/kernel/nthread.c @@ -529,6 +529,9 @@ static void close_conn(struct iscsi_conn *conn) trace_conn_close(conn); + /* It might never be called for being closed conn */ + __iscsi_write_space_ready(conn); + iscsi_check_closewait(conn); } @@ -653,6 +656,9 @@ static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) write_list_entry); cmd_del_from_write_list(cmnd); cmnd->write_processing_started = 1; + } else { + spin_unlock_bh(&conn->write_list_lock); + goto out; } spin_unlock_bh(&conn->write_list_lock); @@ -676,6 +682,7 @@ static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) #endif } +out: return cmnd; } @@ -1528,7 +1535,6 @@ static int exit_tx(struct iscsi_conn *conn, int res) switch (res) { case -EAGAIN: case -ERESTARTSYS: - res = 0; break; default: #ifndef CONFIG_SCST_DEBUG @@ -1719,25 +1725,6 @@ out: return res; } -/* No locks, conn is wr processing. - * - * IMPORTANT! Connection conn must be protected by additional conn_get() - * upon entrance in this function, because otherwise it could be destroyed - * inside as a result of iscsi_send(), which releases sent commands. - */ -static int process_write_queue(struct iscsi_conn *conn) -{ - int res = 0; - - TRACE_ENTRY(); - - if (likely(test_write_ready(conn))) - res = iscsi_send(conn); - - TRACE_EXIT_RES(res); - return res; -} - /* * Called under iscsi_wr_lock and BHs disabled, but will drop it inside, * then reaquire. @@ -1774,24 +1761,22 @@ static void scst_do_job_wr(void) conn_get(conn); - rc = process_write_queue(conn); + rc = iscsi_send(conn); spin_lock_bh(&iscsi_wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif if ((rc == -EAGAIN) && !conn->wr_space_ready) { + TRACE_DBG("EAGAIN, setting WR_STATE_SPACE_WAIT " + "(conn %p)", conn); conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; - goto cont; - } - - if (test_write_ready(conn)) { + } else if (test_write_ready(conn)) { list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; } else conn->wr_state = ISCSI_CONN_WR_STATE_IDLE; -cont: conn_put(conn); } diff --git a/scst/README b/scst/README index ef54390ce..5c7380d4a 100644 --- a/scst/README +++ b/scst/README @@ -1449,7 +1449,7 @@ IMPORTANT: Kernels starting from 2.6.32 have a problem, which prevents BLOCKIO Hopefully, it will be fixed soon. Meanwhile, it is recommended to use kernels below 2.6.32 for BLOCKIO on RAID5/DM devices. -IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when +IMPORTANT: In SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when ========= each device reported to remote initiators as having write through caching. But if your backend block device has internal write back caching it might create a possibility for data loss of @@ -1458,9 +1458,9 @@ IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when non-NV_CACHE mode, when each device reported to remote initiators as having write back caching, and synchronizes the internal device's cache on each SYNCHRONIZE_CACHE command - from the initiators. It might lead to some PERFORMANCE LOSS, + from the initiators. It might lead to some *PERFORMANCE LOSS*, so if you are are sure in your power supply and want to - restore 1.x behavior, your should recreate your BLOCKIO + restore the 1.x behavior, your should recreate your BLOCKIO devices in NV_CACHE mode. Pass-through mode diff --git a/scst/include/scst.h b/scst/include/scst.h index 64a591354..4352129fd 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -1395,7 +1395,8 @@ struct scst_session { /* Used for storage of target driver private stuff */ void *tgt_priv; - unsigned long sess_aflags; /* session's async flags */ + /* session's async flags */ + unsigned long sess_aflags __attribute__((aligned(sizeof(long)))); /* * Hash list of tgt_dev's for this session, protected by scst_mutex @@ -1410,7 +1411,7 @@ struct scst_session { * very beginning, because otherwise they can be missed during * TM processing. */ - struct list_head sess_cmd_list; + struct list_head sess_cmd_list __attribute__((aligned(sizeof(long)))); spinlock_t sess_list_lock; /* protects sess_cmd_list, etc */ @@ -1701,7 +1702,8 @@ struct scst_cmd { /**************************************************************/ - unsigned long cmd_flags; /* cmd's async flags */ + /* cmd's async flags */ + unsigned long cmd_flags __attribute__((aligned(sizeof(long)))); /* Keeps status of cmd's status/data delivery to remote initiator */ int delivery_status; @@ -2021,7 +2023,7 @@ struct scst_device { ** neighbour fields. *************************************************************/ - unsigned long queue_alg:4; + unsigned long queue_alg:4 __attribute__((aligned(sizeof(long)))); unsigned long tst:3; unsigned long tas:1; unsigned long swp:1; @@ -2037,7 +2039,7 @@ struct scst_device { /**************************************************************/ /* Set if dev is persistently reserved. Protected by dev_pr_mutex. */ - unsigned short pr_is_set:1; + unsigned short pr_is_set:1 __attribute__((aligned(sizeof(long)))); /* * Set if there is a thread changing or going to change PR state(s). @@ -2093,7 +2095,7 @@ struct scst_device { *************************************************************/ /* True if persist through power loss is activated */ - unsigned short pr_aptpl:1; + unsigned short pr_aptpl:1 __attribute__((aligned(sizeof(long)))); /* Persistent reservation type */ uint8_t pr_type; @@ -2134,7 +2136,7 @@ struct scst_device { wait_queue_head_t on_dev_waitQ; /* A list entry used during TM, protected by scst_mutex */ - struct list_head tm_dev_list_entry; + struct list_head tm_dev_list_entry __attribute__((aligned(sizeof(long)))); /* Virtual device internal ID */ int virt_id; @@ -2210,7 +2212,8 @@ struct scst_tgt_dev { struct sgv_pool *pool; int max_sg_cnt; - unsigned long tgt_dev_flags; /* tgt_dev's async flags */ + /* tgt_dev's async flags */ + unsigned long tgt_dev_flags __attribute__((aligned(sizeof(long)))); /* Used for storage of dev handler private stuff */ void *dh_priv; @@ -2235,10 +2238,10 @@ struct scst_tgt_dev { struct list_head skipped_sn_list; /* - * Set if the prev cmd was ORDERED. Size must allow unprotected - * modifications independant to the neighbour fields. + * Set if the prev cmd was ORDERED. Size and alignment must allow + * unprotected modifications independant to the neighbour fields. */ - unsigned long prev_cmd_ordered; + unsigned long prev_cmd_ordered __attribute__((aligned(sizeof(long)))); int num_free_sn_slots; /* if it's <0, then all slots are busy */ atomic_t *cur_sn_slot; @@ -3147,6 +3150,14 @@ static inline void scst_set_delivery_status(struct scst_cmd *cmd, cmd->delivery_status = delivery_status; } +static inline unsigned int scst_get_active_cmd_count(struct scst_cmd *cmd) +{ + if (likely(cmd->tgt_dev != NULL)) + return atomic_read(&cmd->tgt_dev->tgt_dev_cmd_count); + else + return (unsigned int)-1; +} + /* * Get/Set function for mgmt cmd's target private data */ diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index cbc250a70..b990dfa34 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -631,7 +631,8 @@ static void vdisk_blockio_check_flush_support(struct scst_vdisk_dev *virt_dev) if (blockio_flush(inode->i_bdev) != 0) { PRINT_WARNING("Device %s doesn't support barriers, switching " - "to NV_CACHE mode", virt_dev->filename); + "to NV_CACHE mode. Read README for more details.", + virt_dev->filename); virt_dev->nv_cache = 1; }