diff --git a/iscsi-scst/include/iscsi_scst.h b/iscsi-scst/include/iscsi_scst.h index 3f119e649..3542c3506 100644 --- a/iscsi-scst/include/iscsi_scst.h +++ b/iscsi-scst/include/iscsi_scst.h @@ -187,7 +187,7 @@ struct iscsi_kern_initiator_info { #define MAX_NR_QUEUED_CMNDS 256 #define DEFAULT_RSP_TIMEOUT 30 -#define MIN_RSP_TIMEOUT 10 +#define MIN_RSP_TIMEOUT 2 #define MAX_RSP_TIMEOUT 65535 #define DEFAULT_NOP_IN_INTERVAL 30 diff --git a/iscsi-scst/kernel/conn.c b/iscsi-scst/kernel/conn.c index a7c5f1d18..b39b2c83c 100644 --- a/iscsi-scst/kernel/conn.c +++ b/iscsi-scst/kernel/conn.c @@ -382,6 +382,24 @@ static void iscsi_data_ready(struct sock *sk, int len) return; } +void __iscsi_write_space_ready(struct iscsi_conn *conn) +{ + TRACE_ENTRY(); + + spin_lock_bh(&iscsi_wr_lock); + conn->wr_space_ready = 1; + if ((conn->wr_state == ISCSI_CONN_WR_STATE_SPACE_WAIT)) { + TRACE_DBG("wr space ready (conn %p)", conn); + list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; + wake_up(&iscsi_wr_waitQ); + } + spin_unlock_bh(&iscsi_wr_lock); + + TRACE_EXIT(); + return; +} + static void iscsi_write_space_ready(struct sock *sk) { struct iscsi_conn *conn = sk->sk_user_data; @@ -390,14 +408,7 @@ static void iscsi_write_space_ready(struct sock *sk) TRACE_DBG("Write space ready for conn %p", conn); - spin_lock_bh(&iscsi_wr_lock); - conn->wr_space_ready = 1; - if ((conn->wr_state == ISCSI_CONN_WR_STATE_SPACE_WAIT)) { - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); - conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; - wake_up(&iscsi_wr_waitQ); - } - spin_unlock_bh(&iscsi_wr_lock); + __iscsi_write_space_ready(conn); conn->old_write_space(sk); diff --git a/iscsi-scst/kernel/iscsi.c b/iscsi-scst/kernel/iscsi.c index 1af866c5f..6fd84da1b 100644 --- a/iscsi-scst/kernel/iscsi.c +++ b/iscsi-scst/kernel/iscsi.c @@ -447,6 +447,8 @@ void cmnd_done(struct iscsi_cmnd *cmnd) cmnd_free(cmnd); } else { + struct iscsi_cmnd *parent = cmnd->parent_req; + if (cmnd->own_sg) { TRACE_DBG("own_sg for rsp %p", cmnd); if ((cmnd->sg != &dummy_sg) && (cmnd->sg != cmnd->rsp_sg)) @@ -460,15 +462,15 @@ void cmnd_done(struct iscsi_cmnd *cmnd) EXTRACHECKS_BUG_ON(cmnd->dec_active_cmnds); - if (cmnd == cmnd->parent_req->main_rsp) { + if (cmnd == parent->main_rsp) { TRACE_DBG("Finishing main rsp %p (req %p)", cmnd, - cmnd->parent_req); - cmnd->parent_req->main_rsp = NULL; + parent); + parent->main_rsp = NULL; } - cmnd_put(cmnd->parent_req); + cmnd_put(parent); /* - * rsp will be freed on the last parent's put and can already + * cmnd will be freed on the last parent's put and can already * be freed!! */ } @@ -554,11 +556,7 @@ void req_cmnd_release_force(struct iscsi_cmnd *req) return; } -/* - * Corresponding conn may also get destroyed after this function, except only - * if it's called from the read thread! - */ -static void req_cmnd_release(struct iscsi_cmnd *req) +static void req_cmnd_pre_release(struct iscsi_cmnd *req) { struct iscsi_cmnd *c, *t; @@ -619,6 +617,19 @@ static void req_cmnd_release(struct iscsi_cmnd *req) #endif } + TRACE_EXIT(); + return; +} + +/* + * Corresponding conn may also get destroyed after this function, except only + * if it's called from the read thread! + */ +static void req_cmnd_release(struct iscsi_cmnd *req) +{ + TRACE_ENTRY(); + + req_cmnd_pre_release(req); cmnd_put(req); TRACE_EXIT(); @@ -3142,16 +3153,11 @@ static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) return; } -/* - * No locks. - * - * IMPORTANT! Connection conn must be protected by additional conn_get() - * upon entrance in this function, because otherwise it could be destroyed - * inside as a result of iscsi_send(), which releases sent commands. - */ -static void iscsi_try_local_processing(struct iscsi_conn *conn) +/* No locks */ +static void iscsi_try_local_processing(struct iscsi_cmnd *req) { - int local; + struct iscsi_conn *conn = req->conn; + bool local; TRACE_ENTRY(); @@ -3166,10 +3172,10 @@ static void iscsi_try_local_processing(struct iscsi_conn *conn) #endif conn->wr_state = ISCSI_CONN_WR_STATE_PROCESSING; conn->wr_space_ready = 0; - local = 1; + local = true; break; default: - local = 0; + local = false; break; } spin_unlock_bh(&iscsi_wr_lock); @@ -3177,14 +3183,21 @@ static void iscsi_try_local_processing(struct iscsi_conn *conn) if (local) { int rc = 1; - if (test_write_ready(conn)) + do { rc = iscsi_send(conn); + if (rc <= 0) + break; + } while (req->not_processed_rsp_cnt != 0); spin_lock_bh(&iscsi_wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif - if ((rc <= 0) || test_write_ready(conn)) { + if ((rc == -EAGAIN) && !conn->wr_space_ready) { + TRACE_DBG("EAGAIN, setting WR_STATE_SPACE_WAIT " + "(conn %p)", conn); + conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; + } else if (test_write_ready(conn)) { list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; wake_up(&iscsi_wr_waitQ); @@ -3206,6 +3219,7 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) int status = scst_cmd_get_status(scst_cmd); u8 *sense = scst_cmd_get_sense_buffer(scst_cmd); int sense_len = scst_cmd_get_sense_buffer_len(scst_cmd); + struct iscsi_cmnd *wr_rsp, *our_rsp; EXTRACHECKS_BUG_ON(scst_cmd_atomic(scst_cmd)); @@ -3296,18 +3310,41 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd) #endif /* - * "_ordered" here to protect from reorder, which can lead to - * preliminary connection destroy in req_cmnd_release(). Just in - * case, actually, because reordering shouldn't go so far, but who - * knows.. + * There's no need for protection, since we are not going to + * dereference them. */ - conn_get_ordered(conn); - req_cmnd_release(req); - iscsi_try_local_processing(conn); - conn_put(conn); + wr_rsp = list_entry(conn->write_list.next, struct iscsi_cmnd, + write_list_entry); + our_rsp = list_entry(req->rsp_cmd_list.next, struct iscsi_cmnd, + rsp_cmd_list_entry); + if (wr_rsp == our_rsp) { + /* + * This is our rsp, so let's try to process it locally to + * decrease latency. We need to call pre_release before + * processing to handle some error recovery cases. + */ + if (scst_get_active_cmd_count(scst_cmd) <= 2) { + req_cmnd_pre_release(req); + iscsi_try_local_processing(req); + cmnd_put(req); + } else { + /* + * There's too much backend activity, so it could be + * better to push it to the write thread. + */ + goto out_push_to_wr_thread; + } + } else + goto out_push_to_wr_thread; out: return SCST_TGT_RES_SUCCESS; + +out_push_to_wr_thread: + TRACE_DBG("Waking up write thread (conn %p)", conn); + req_cmnd_release(req); + iscsi_make_conn_wr_active(conn); + goto out; } /* Called under sn_lock */ diff --git a/iscsi-scst/kernel/iscsi.h b/iscsi-scst/kernel/iscsi.h index 0caf3e8cc..3e316d24a 100644 --- a/iscsi-scst/kernel/iscsi.h +++ b/iscsi-scst/kernel/iscsi.h @@ -174,7 +174,7 @@ struct iscsi_conn { #define ISCSI_CONN_REINSTATING 1 #define ISCSI_CONN_SHUTTINGDOWN 2 - unsigned long conn_aflags; + unsigned long conn_aflags __attribute__((aligned(sizeof(long)))); spinlock_t cmd_list_lock; /* BH lock */ @@ -184,9 +184,9 @@ struct iscsi_conn { atomic_t conn_ref_cnt; spinlock_t write_list_lock; - /* List of data pdus to be sent, protected by write_list_lock */ + /* List of data pdus to be sent. Protected by write_list_lock */ struct list_head write_list; - /* List of data pdus being sent, protected by write_list_lock */ + /* List of data pdus being sent. Protected by write_list_lock */ struct list_head write_timeout_list; /* Protected by write_list_lock */ @@ -194,7 +194,7 @@ struct iscsi_conn { unsigned int rsp_timeout; /* in jiffies */ /* All 2 protected by iscsi_wr_lock */ - unsigned short wr_state; + unsigned short wr_state __attribute__((aligned(sizeof(long)))); unsigned short wr_space_ready:1; struct list_head wr_list_entry; @@ -361,7 +361,7 @@ struct iscsi_cmnd { */ #define ISCSI_CMD_ABORTED 0 #define ISCSI_CMD_PRELIM_COMPLETED 1 - unsigned long prelim_compl_flags; + unsigned long prelim_compl_flags __attribute__((aligned(sizeof(long)))); struct list_head hash_list_entry; @@ -420,6 +420,10 @@ struct iscsi_cmnd { }; struct iscsi_cmnd *main_rsp; + + /* Protected on modify by conn->write_list_lock */ + int not_processed_rsp_cnt + __attribute__((aligned(sizeof(long)))); }; /* Response only fields */ @@ -518,6 +522,7 @@ extern void conn_info_show(struct seq_file *, struct iscsi_session *); #endif extern void iscsi_check_tm_data_wait_timeouts(struct iscsi_conn *conn, bool force); +extern void __iscsi_write_space_ready(struct iscsi_conn *conn); /* nthread.c */ extern int iscsi_send(struct iscsi_conn *conn); @@ -668,20 +673,33 @@ static inline void cmnd_put(struct iscsi_cmnd *cmnd) static inline void cmd_add_on_write_list(struct iscsi_conn *conn, struct iscsi_cmnd *cmnd) { + struct iscsi_cmnd *parent = cmnd->parent_req; + TRACE_DBG("cmnd %p", cmnd); /* See comment in iscsi_restart_cmnd() */ EXTRACHECKS_BUG_ON(cmnd->parent_req->hashed && (cmnd_opcode(cmnd) != ISCSI_OP_R2T)); list_add_tail(&cmnd->write_list_entry, &conn->write_list); cmnd->on_write_list = 1; + + parent->not_processed_rsp_cnt++; + TRACE_DBG("not processed rsp cnt %d (parent %p)", + parent->not_processed_rsp_cnt, parent); } /* conn->write_list_lock supposed to be locked and BHs off */ static inline void cmd_del_from_write_list(struct iscsi_cmnd *cmnd) { + struct iscsi_cmnd *parent = cmnd->parent_req; + TRACE_DBG("%p", cmnd); list_del(&cmnd->write_list_entry); cmnd->on_write_list = 0; + + parent->not_processed_rsp_cnt--; + TRACE_DBG("not processed rsp cnt %d (parent %p)", + parent->not_processed_rsp_cnt, parent); + EXTRACHECKS_BUG_ON(parent->not_processed_rsp_cnt < 0); } static inline void cmd_add_on_rx_ddigest_list(struct iscsi_cmnd *req, diff --git a/iscsi-scst/kernel/nthread.c b/iscsi-scst/kernel/nthread.c index 5de59215b..563cff6bb 100644 --- a/iscsi-scst/kernel/nthread.c +++ b/iscsi-scst/kernel/nthread.c @@ -529,6 +529,9 @@ static void close_conn(struct iscsi_conn *conn) trace_conn_close(conn); + /* It might never be called for being closed conn */ + __iscsi_write_space_ready(conn); + iscsi_check_closewait(conn); } @@ -653,6 +656,9 @@ static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) write_list_entry); cmd_del_from_write_list(cmnd); cmnd->write_processing_started = 1; + } else { + spin_unlock_bh(&conn->write_list_lock); + goto out; } spin_unlock_bh(&conn->write_list_lock); @@ -676,6 +682,7 @@ static struct iscsi_cmnd *iscsi_get_send_cmnd(struct iscsi_conn *conn) #endif } +out: return cmnd; } @@ -1528,7 +1535,6 @@ static int exit_tx(struct iscsi_conn *conn, int res) switch (res) { case -EAGAIN: case -ERESTARTSYS: - res = 0; break; default: #ifndef CONFIG_SCST_DEBUG @@ -1719,25 +1725,6 @@ out: return res; } -/* No locks, conn is wr processing. - * - * IMPORTANT! Connection conn must be protected by additional conn_get() - * upon entrance in this function, because otherwise it could be destroyed - * inside as a result of iscsi_send(), which releases sent commands. - */ -static int process_write_queue(struct iscsi_conn *conn) -{ - int res = 0; - - TRACE_ENTRY(); - - if (likely(test_write_ready(conn))) - res = iscsi_send(conn); - - TRACE_EXIT_RES(res); - return res; -} - /* * Called under iscsi_wr_lock and BHs disabled, but will drop it inside, * then reaquire. @@ -1774,24 +1761,22 @@ static void scst_do_job_wr(void) conn_get(conn); - rc = process_write_queue(conn); + rc = iscsi_send(conn); spin_lock_bh(&iscsi_wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif if ((rc == -EAGAIN) && !conn->wr_space_ready) { + TRACE_DBG("EAGAIN, setting WR_STATE_SPACE_WAIT " + "(conn %p)", conn); conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; - goto cont; - } - - if (test_write_ready(conn)) { + } else if (test_write_ready(conn)) { list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; } else conn->wr_state = ISCSI_CONN_WR_STATE_IDLE; -cont: conn_put(conn); } diff --git a/scst/README b/scst/README index ef54390ce..5c7380d4a 100644 --- a/scst/README +++ b/scst/README @@ -1449,7 +1449,7 @@ IMPORTANT: Kernels starting from 2.6.32 have a problem, which prevents BLOCKIO Hopefully, it will be fixed soon. Meanwhile, it is recommended to use kernels below 2.6.32 for BLOCKIO on RAID5/DM devices. -IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when +IMPORTANT: In SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when ========= each device reported to remote initiators as having write through caching. But if your backend block device has internal write back caching it might create a possibility for data loss of @@ -1458,9 +1458,9 @@ IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when non-NV_CACHE mode, when each device reported to remote initiators as having write back caching, and synchronizes the internal device's cache on each SYNCHRONIZE_CACHE command - from the initiators. It might lead to some PERFORMANCE LOSS, + from the initiators. It might lead to some *PERFORMANCE LOSS*, so if you are are sure in your power supply and want to - restore 1.x behavior, your should recreate your BLOCKIO + restore the 1.x behavior, your should recreate your BLOCKIO devices in NV_CACHE mode. Pass-through mode diff --git a/scst/include/scst.h b/scst/include/scst.h index 64a591354..4352129fd 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -1395,7 +1395,8 @@ struct scst_session { /* Used for storage of target driver private stuff */ void *tgt_priv; - unsigned long sess_aflags; /* session's async flags */ + /* session's async flags */ + unsigned long sess_aflags __attribute__((aligned(sizeof(long)))); /* * Hash list of tgt_dev's for this session, protected by scst_mutex @@ -1410,7 +1411,7 @@ struct scst_session { * very beginning, because otherwise they can be missed during * TM processing. */ - struct list_head sess_cmd_list; + struct list_head sess_cmd_list __attribute__((aligned(sizeof(long)))); spinlock_t sess_list_lock; /* protects sess_cmd_list, etc */ @@ -1701,7 +1702,8 @@ struct scst_cmd { /**************************************************************/ - unsigned long cmd_flags; /* cmd's async flags */ + /* cmd's async flags */ + unsigned long cmd_flags __attribute__((aligned(sizeof(long)))); /* Keeps status of cmd's status/data delivery to remote initiator */ int delivery_status; @@ -2021,7 +2023,7 @@ struct scst_device { ** neighbour fields. *************************************************************/ - unsigned long queue_alg:4; + unsigned long queue_alg:4 __attribute__((aligned(sizeof(long)))); unsigned long tst:3; unsigned long tas:1; unsigned long swp:1; @@ -2037,7 +2039,7 @@ struct scst_device { /**************************************************************/ /* Set if dev is persistently reserved. Protected by dev_pr_mutex. */ - unsigned short pr_is_set:1; + unsigned short pr_is_set:1 __attribute__((aligned(sizeof(long)))); /* * Set if there is a thread changing or going to change PR state(s). @@ -2093,7 +2095,7 @@ struct scst_device { *************************************************************/ /* True if persist through power loss is activated */ - unsigned short pr_aptpl:1; + unsigned short pr_aptpl:1 __attribute__((aligned(sizeof(long)))); /* Persistent reservation type */ uint8_t pr_type; @@ -2134,7 +2136,7 @@ struct scst_device { wait_queue_head_t on_dev_waitQ; /* A list entry used during TM, protected by scst_mutex */ - struct list_head tm_dev_list_entry; + struct list_head tm_dev_list_entry __attribute__((aligned(sizeof(long)))); /* Virtual device internal ID */ int virt_id; @@ -2210,7 +2212,8 @@ struct scst_tgt_dev { struct sgv_pool *pool; int max_sg_cnt; - unsigned long tgt_dev_flags; /* tgt_dev's async flags */ + /* tgt_dev's async flags */ + unsigned long tgt_dev_flags __attribute__((aligned(sizeof(long)))); /* Used for storage of dev handler private stuff */ void *dh_priv; @@ -2235,10 +2238,10 @@ struct scst_tgt_dev { struct list_head skipped_sn_list; /* - * Set if the prev cmd was ORDERED. Size must allow unprotected - * modifications independant to the neighbour fields. + * Set if the prev cmd was ORDERED. Size and alignment must allow + * unprotected modifications independant to the neighbour fields. */ - unsigned long prev_cmd_ordered; + unsigned long prev_cmd_ordered __attribute__((aligned(sizeof(long)))); int num_free_sn_slots; /* if it's <0, then all slots are busy */ atomic_t *cur_sn_slot; @@ -3147,6 +3150,14 @@ static inline void scst_set_delivery_status(struct scst_cmd *cmd, cmd->delivery_status = delivery_status; } +static inline unsigned int scst_get_active_cmd_count(struct scst_cmd *cmd) +{ + if (likely(cmd->tgt_dev != NULL)) + return atomic_read(&cmd->tgt_dev->tgt_dev_cmd_count); + else + return (unsigned int)-1; +} + /* * Get/Set function for mgmt cmd's target private data */ diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index cbc250a70..b990dfa34 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -631,7 +631,8 @@ static void vdisk_blockio_check_flush_support(struct scst_vdisk_dev *virt_dev) if (blockio_flush(inode->i_bdev) != 0) { PRINT_WARNING("Device %s doesn't support barriers, switching " - "to NV_CACHE mode", virt_dev->filename); + "to NV_CACHE mode. Read README for more details.", + virt_dev->filename); virt_dev->nv_cache = 1; }