From ff2c24c1c3b75df5b3e2c6fb0341e693cb111b83 Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Thu, 30 Sep 2010 18:07:27 +0000 Subject: [PATCH] Assigning CPU affinity to threads and connections git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@2317 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- iscsi-scst/ChangeLog | 6 + iscsi-scst/README | 20 +-- iscsi-scst/README_in-tree | 20 +-- iscsi-scst/kernel/conn.c | 44 +++--- iscsi-scst/kernel/iscsi.c | 257 +++++++++++++++++++++++++++--------- iscsi-scst/kernel/iscsi.h | 45 +++++-- iscsi-scst/kernel/nthread.c | 122 +++++++++-------- iscsi-scst/kernel/param.c | 4 +- iscsi-scst/kernel/session.c | 41 ++++-- scst/ChangeLog | 13 ++ scst/README | 23 ++-- scst/README_in-tree | 23 ++-- scst/include/scst.h | 9 ++ scst/src/scst_lib.c | 5 +- scst/src/scst_main.c | 9 ++ scst/src/scst_priv.h | 6 + scst/src/scst_sysfs.c | 229 +++++++++++++++++++++++++++++++- 17 files changed, 681 insertions(+), 195 deletions(-) diff --git a/iscsi-scst/ChangeLog b/iscsi-scst/ChangeLog index b0041db1d..4130231b5 100644 --- a/iscsi-scst/ChangeLog +++ b/iscsi-scst/ChangeLog @@ -1,3 +1,9 @@ +Summary of changes between versions 2.0.0 and 2.1.0 +--------------------------------------------------- + + - Assigning CPU affinity to RD/WR threads added + + Summary of changes in iSCSI-SCST between versions 1.0.1 and 1.0.2 ----------------------------------------------------------------- diff --git a/iscsi-scst/README b/iscsi-scst/README index 2daa63e76..151980c3d 100644 --- a/iscsi-scst/README +++ b/iscsi-scst/README @@ -313,15 +313,6 @@ Each target subdirectory contains the following entries: configuring it before it starts accepting new connections. 0 by default. - - rel_tgt_id - allows to read or write SCSI Relative Target Port - Identifier attribute. This identifier is used to identify SCSI Target - Ports by some SCSI commands, mainly by Persistent Reservations - commands. This identifier must be unique among all SCST targets, but - for convenience SCST allows disabled targets to have not unique - rel_tgt_id. In this case SCST will not allow to enable this target - until rel_tgt_id becomes unique. This attribute initialized unique by - SCST by default. - - redirect - allows to temporarily or permanently redirect login to the target to another portal. Discovery sessions will not be impacted, but normal sessions will be redirected before security negotiation. @@ -367,6 +358,8 @@ Each connection subdirectory contains the following entries: - state - contains processing state of this connection. +See SCST README for info about other attributes. + Below is a sample script, which configures 1 virtual disk "disk1" using /disk1 image and one target iqn.2006-10.net.vlnb:tgt with all default parameters: @@ -865,7 +858,14 @@ load, including IRQ processing load. Note, many tools like vmstat give aggregate load on all CPUs, so with 4 cores 25% corresponds to 100% load of any single CPU. -7. See SCST core's README for more advices. Especially pay attention to +7. For high speed network adapters is can be better if you configure +them to serve connections, e.g., from initiator on CPU0 and from +initiator Y on CPU1. Then you can bind threads processing them also to +CPU0 and CPU1 correspondingly using cpu_mask attribute of their targets +or security groups. In NUMA-like configurations it can signficantly +boost IOPS performance. + +8. See SCST core's README for more advices. Especially pay attention to have io_grouping_type option set correctly. diff --git a/iscsi-scst/README_in-tree b/iscsi-scst/README_in-tree index 8f1f8cf56..b7abbb90d 100644 --- a/iscsi-scst/README_in-tree +++ b/iscsi-scst/README_in-tree @@ -155,15 +155,6 @@ Each target subdirectory contains the following entries: configuring it before it starts accepting new connections. 0 by default. - - rel_tgt_id - allows to read or write SCSI Relative Target Port - Identifier attribute. This identifier is used to identify SCSI Target - Ports by some SCSI commands, mainly by Persistent Reservations - commands. This identifier must be unique among all SCST targets, but - for convenience SCST allows disabled targets to have not unique - rel_tgt_id. In this case SCST will not allow to enable this target - until rel_tgt_id becomes unique. This attribute initialized unique by - SCST by default. - - redirect - allows to temporarily or permanently redirect login to the target to another portal. Discovery sessions will not be impacted, but normal sessions will be redirected before security negotiation. @@ -209,6 +200,8 @@ Each connection subdirectory contains the following entries: - state - contains processing state of this connection. +See SCST README for info about other attributes. + Below is a sample script, which configures 1 virtual disk "disk1" using /disk1 image and one target iqn.2006-10.net.vlnb:tgt with all default parameters: @@ -705,7 +698,14 @@ load, including IRQ processing load. Note, many tools like vmstat give aggregate load on all CPUs, so with 4 cores 25% corresponds to 100% load of any single CPU. -7. See SCST core's README for more advices. Especially pay attention to +7. For high speed network adapters is can be better if you configure +them to serve connections, e.g., from initiator on CPU0 and from +initiator Y on CPU1. Then you can bind threads processing them also to +CPU0 and CPU1 correspondingly using cpu_mask attribute of their targets +or security groups. In NUMA-like configurations it can signficantly +boost IOPS performance. + +8. See SCST core's README for more advices. Especially pay attention to have io_grouping_type option set correctly. diff --git a/iscsi-scst/kernel/conn.c b/iscsi-scst/kernel/conn.c index f2fbda8a1..711b5162e 100644 --- a/iscsi-scst/kernel/conn.c +++ b/iscsi-scst/kernel/conn.c @@ -350,9 +350,11 @@ struct iscsi_conn *conn_lookup(struct iscsi_session *session, u16 cid) void iscsi_make_conn_rd_active(struct iscsi_conn *conn) { + struct iscsi_thread_pool *p = conn->conn_thr_pool; + TRACE_ENTRY(); - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&p->rd_lock); TRACE_DBG("conn %p, rd_state %x, rd_data_ready %d", conn, conn->rd_state, conn->rd_data_ready); @@ -367,12 +369,12 @@ void iscsi_make_conn_rd_active(struct iscsi_conn *conn) conn->rd_data_ready = 1; if (conn->rd_state == ISCSI_CONN_RD_STATE_IDLE) { - list_add_tail(&conn->rd_list_entry, &iscsi_rd_list); + list_add_tail(&conn->rd_list_entry, &p->rd_list); conn->rd_state = ISCSI_CONN_RD_STATE_IN_LIST; - wake_up(&iscsi_rd_waitQ); + wake_up(&p->rd_waitQ); } - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&p->rd_lock); TRACE_EXIT(); return; @@ -380,9 +382,11 @@ void iscsi_make_conn_rd_active(struct iscsi_conn *conn) void iscsi_make_conn_wr_active(struct iscsi_conn *conn) { + struct iscsi_thread_pool *p = conn->conn_thr_pool; + TRACE_ENTRY(); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); TRACE_DBG("conn %p, wr_state %x, wr_space_ready %d", conn, conn->wr_state, conn->wr_space_ready); @@ -395,12 +399,12 @@ void iscsi_make_conn_wr_active(struct iscsi_conn *conn) */ if (conn->wr_state == ISCSI_CONN_WR_STATE_IDLE) { - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + list_add_tail(&conn->wr_list_entry, &p->wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; - wake_up(&iscsi_wr_waitQ); + wake_up(&p->wr_waitQ); } - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); TRACE_EXIT(); return; @@ -408,13 +412,13 @@ void iscsi_make_conn_wr_active(struct iscsi_conn *conn) void __mark_conn_closed(struct iscsi_conn *conn, int flags) { - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&conn->conn_thr_pool->rd_lock); conn->closing = 1; if (flags & ISCSI_CONN_ACTIVE_CLOSE) conn->active_close = 1; if (flags & ISCSI_CONN_DELETING) conn->deleting = 1; - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); iscsi_make_conn_rd_active(conn); } @@ -472,17 +476,19 @@ static void iscsi_data_ready(struct sock *sk, int len) void __iscsi_write_space_ready(struct iscsi_conn *conn) { + struct iscsi_thread_pool *p = conn->conn_thr_pool; + TRACE_ENTRY(); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); conn->wr_space_ready = 1; if ((conn->wr_state == ISCSI_CONN_WR_STATE_SPACE_WAIT)) { TRACE_DBG("wr space ready (conn %p)", conn); - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + list_add_tail(&conn->wr_list_entry, &p->wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; - wake_up(&iscsi_wr_waitQ); + wake_up(&p->wr_waitQ); } - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); TRACE_EXIT(); return; @@ -535,7 +541,7 @@ static void conn_rsp_timer_fn(unsigned long arg) /* * We must call mark_conn_closed() outside of * write_list_lock or we will have a circular - * locking dependency with iscsi_rd_lock. + * locking dependency with rd_lock. */ spin_unlock_bh(&conn->write_list_lock); mark_conn_closed(conn); @@ -618,7 +624,7 @@ void iscsi_check_tm_data_wait_timeouts(struct iscsi_conn *conn, bool force) iscsi_extracheck_is_rd_thread(conn); again: - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&conn->conn_thr_pool->rd_lock); spin_lock(&conn->write_list_lock); aborted_cmds_pending = false; @@ -635,7 +641,7 @@ again: (time_after_eq(j, cmnd->write_start + ISCSI_TM_DATA_WAIT_TIMEOUT) || force)) { spin_unlock(&conn->write_list_lock); - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); iscsi_fail_data_waiting_cmnd(cmnd); goto again; } @@ -657,7 +663,7 @@ again: } spin_unlock(&conn->write_list_lock); - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); TRACE_EXIT(); return; @@ -863,6 +869,8 @@ static int iscsi_conn_alloc(struct iscsi_session *session, INIT_LIST_HEAD(&conn->nop_req_list); spin_lock_init(&conn->nop_req_list_lock); + conn->conn_thr_pool = session->sess_thr_pool; + conn->nop_in_ttt = 0; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)) INIT_DELAYED_WORK(&conn->nop_in_delayed_work, diff --git a/iscsi-scst/kernel/iscsi.c b/iscsi-scst/kernel/iscsi.c index 8262e1e44..ff594c7c6 100644 --- a/iscsi-scst/kernel/iscsi.c +++ b/iscsi-scst/kernel/iscsi.c @@ -47,24 +47,14 @@ unsigned long iscsi_trace_flag = ISCSI_DEFAULT_LOG_FLAGS; static struct kmem_cache *iscsi_cmnd_cache; -DEFINE_SPINLOCK(iscsi_rd_lock); -LIST_HEAD(iscsi_rd_list); -DECLARE_WAIT_QUEUE_HEAD(iscsi_rd_waitQ); +static DEFINE_MUTEX(iscsi_threads_pool_mutex); +static LIST_HEAD(iscsi_thread_pools_list); -DEFINE_SPINLOCK(iscsi_wr_lock); -LIST_HEAD(iscsi_wr_list); -DECLARE_WAIT_QUEUE_HEAD(iscsi_wr_waitQ); +struct iscsi_thread_pool *iscsi_main_thread_pool; static struct page *dummy_page; static struct scatterlist dummy_sg; -struct iscsi_thread_t { - struct task_struct *thr; - struct list_head threads_list_entry; -}; - -static LIST_HEAD(iscsi_threads_list); - static void cmnd_remove_data_wait_hash(struct iscsi_cmnd *cmnd); static void iscsi_send_task_mgmt_resp(struct iscsi_cmnd *req, int status); static void iscsi_check_send_delayed_tm_resp(struct iscsi_session *sess); @@ -2241,7 +2231,7 @@ static void __cmnd_abort(struct iscsi_cmnd *cmnd) * Lock to sync with iscsi_check_tm_data_wait_timeouts(), including * CMD_ABORTED bit set. */ - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&conn->conn_thr_pool->rd_lock); /* * We suppose that preliminary commands completion is tested by @@ -2255,7 +2245,7 @@ static void __cmnd_abort(struct iscsi_cmnd *cmnd) TRACE_MGMT_DBG("Setting conn_tm_active for conn %p", conn); conn->conn_tm_active = 1; - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); /* * We need the lock to sync with req_add_to_write_timeout_list() and @@ -3197,11 +3187,12 @@ static void iscsi_preprocessing_done(struct scst_cmd *scst_cmd) static void iscsi_try_local_processing(struct iscsi_cmnd *req) { struct iscsi_conn *conn = req->conn; + struct iscsi_thread_pool *p = conn->conn_thr_pool; bool local; TRACE_ENTRY(); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); switch (conn->wr_state) { case ISCSI_CONN_WR_STATE_IN_LIST: list_del(&conn->wr_list_entry); @@ -3218,7 +3209,7 @@ static void iscsi_try_local_processing(struct iscsi_cmnd *req) local = false; break; } - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); if (local) { int rc = 1; @@ -3229,7 +3220,7 @@ static void iscsi_try_local_processing(struct iscsi_cmnd *req) break; } while (req->not_processed_rsp_cnt != 0); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif @@ -3238,12 +3229,12 @@ static void iscsi_try_local_processing(struct iscsi_cmnd *req) "(conn %p)", conn); conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; } else if (test_write_ready(conn)) { - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + list_add_tail(&conn->wr_list_entry, &p->wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; - wake_up(&iscsi_wr_waitQ); + wake_up(&p->wr_waitQ); } else conn->wr_state = ISCSI_CONN_WR_STATE_IDLE; - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); } TRACE_EXIT(); @@ -3646,6 +3637,27 @@ out_err: goto out; } +static int iscsi_cpu_mask_changed_aen(struct scst_aen *aen) +{ + int res = SCST_AEN_RES_SUCCESS; + struct scst_session *scst_sess = scst_aen_get_sess(aen); + struct iscsi_session *sess = scst_sess_get_tgt_priv(scst_sess); + + TRACE_ENTRY(); + + TRACE_MGMT_DBG("CPU mask changed AEN to sess %p (initiator %s)", sess, + sess->initiator_name); + + mutex_lock(&sess->target->target_mutex); + iscsi_sess_force_close(sess); + mutex_unlock(&sess->target->target_mutex); + + scst_aen_done(aen); + + TRACE_EXIT_RES(res); + return res; +} + static int iscsi_report_aen(struct scst_aen *aen) { int res; @@ -3657,6 +3669,9 @@ static int iscsi_report_aen(struct scst_aen *aen) case SCST_AEN_SCSI: res = iscsi_scsi_aen(aen); break; + case SCST_AEN_CPU_MASK_CHANGED: + res = iscsi_cpu_mask_changed_aen(aen); + break; default: TRACE_MGMT_DBG("Unsupported AEN %d", event_fn); res = SCST_AEN_RES_NOT_SUPPORTED; @@ -3835,52 +3850,179 @@ struct scst_tgt_template iscsi_template = { .get_scsi_transport_version = iscsi_get_scsi_transport_version, }; -static __init int iscsi_run_threads(int count, char *name, int (*fn)(void *)) +int iscsi_threads_pool_get(const struct cpumask *cpu_mask, + struct iscsi_thread_pool **out_pool) { - int res = 0; - int i; - struct iscsi_thread_t *thr; + int res; + struct iscsi_thread_pool *p; + struct iscsi_thread *t, *tt; + int i, j, count; - for (i = 0; i < count; i++) { - thr = kmalloc(sizeof(*thr), GFP_KERNEL); - if (!thr) { - res = -ENOMEM; - PRINT_ERROR("Failed to allocate thr %d", res); - goto out; + TRACE_ENTRY(); + + mutex_lock(&iscsi_threads_pool_mutex); + + list_for_each_entry(p, &iscsi_thread_pools_list, + thread_pools_list_entry) { + if ((cpu_mask == NULL) || + __cpus_equal(cpu_mask, &p->cpu_mask, nr_cpumask_bits)) { + p->thread_pool_ref++; + TRACE_DBG("iSCSI thread pool %p found (new ref %d)", + p, p->thread_pool_ref); + res = 0; + goto out_unlock; } - thr->thr = kthread_run(fn, NULL, "%s%d", name, i); - if (IS_ERR(thr->thr)) { - res = PTR_ERR(thr->thr); - PRINT_ERROR("kthread_create() failed: %d", res); - kfree(thr); - goto out; - } - list_add_tail(&thr->threads_list_entry, &iscsi_threads_list); } -out: + TRACE_DBG("%s", "Creating new iSCSI thread pool"); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + PRINT_ERROR("Unable to allocate iSCSI thread pool (size %zd)", + sizeof(*p)); + res = -ENOMEM; + if (!list_empty(&iscsi_thread_pools_list)) { + PRINT_WARNING("%s", "Using global iSCSI thread pool " + "instead"); + p = list_entry(iscsi_thread_pools_list.next, + struct iscsi_thread_pool, + thread_pools_list_entry); + } else + res = -ENOMEM; + goto out_unlock; + } + + spin_lock_init(&p->rd_lock); + INIT_LIST_HEAD(&p->rd_list); + init_waitqueue_head(&p->rd_waitQ); + spin_lock_init(&p->wr_lock); + INIT_LIST_HEAD(&p->wr_list); + init_waitqueue_head(&p->wr_waitQ); + if (cpu_mask == NULL) + cpus_setall(p->cpu_mask); + else { + cpus_clear(p->cpu_mask); + for_each_cpu(i, cpu_mask) + cpu_set(i, p->cpu_mask); + } + p->thread_pool_ref = 1; + INIT_LIST_HEAD(&p->threads_list); + + if (cpu_mask == NULL) + count = max((int)num_online_cpus(), 2); + else { + count = 0; + for_each_cpu(i, cpu_mask) + count++; + } + + for (j = 0; j < 2; j++) { + int (*fn)(void *); + char name[25]; + static int major; + + if (j == 0) + fn = istrd; + else + fn = istwr; + + major++; + + for (i = 0; i < count; i++) { + if (j == 0) { + if (cpu_mask == NULL) + snprintf(name, sizeof(name), "iscsird%d", i); + else + snprintf(name, sizeof(name), "iscsird%d_%d", + major, i); + } else { + if (cpu_mask == NULL) + snprintf(name, sizeof(name), "iscsiwr%d", i); + else + snprintf(name, sizeof(name), "iscsiwr%d_%d", + major, i); + } + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (t == NULL) { + res = -ENOMEM; + PRINT_ERROR("Failed to allocate thread %s " + "(size %zd)", name, sizeof(*t)); + goto out_free; + } + + t->thr = kthread_run(fn, p, name); + if (IS_ERR(t->thr)) { + res = PTR_ERR(t->thr); + PRINT_ERROR("kthread_run() for thread %s failed: %d", + name, res); + kfree(t); + goto out_free; + } + list_add_tail(&t->threads_list_entry, &p->threads_list); + } + } + + list_add_tail(&p->thread_pools_list_entry, &iscsi_thread_pools_list); + res = 0; + + TRACE_DBG("Created iSCSI thread pool %p", p); + +out_unlock: + mutex_unlock(&iscsi_threads_pool_mutex); + + if (out_pool != NULL) + *out_pool = p; + + TRACE_EXIT_RES(res); return res; -} -static void iscsi_stop_threads(void) -{ - struct iscsi_thread_t *t, *tmp; - - list_for_each_entry_safe(t, tmp, &iscsi_threads_list, - threads_list_entry) { - int rc = kthread_stop(t->thr); - if (rc < 0) - TRACE_MGMT_DBG("kthread_stop() failed: %d", rc); +out_free: + list_for_each_entry_safe(t, tt, &p->threads_list, threads_list_entry) { + kthread_stop(t->thr); list_del(&t->threads_list_entry); kfree(t); } + goto out_unlock; +} + +void iscsi_threads_pool_put(struct iscsi_thread_pool *p) +{ + struct iscsi_thread *t, *tt; + + TRACE_ENTRY(); + + mutex_lock(&iscsi_threads_pool_mutex); + + p->thread_pool_ref--; + if (p->thread_pool_ref > 0) { + TRACE_DBG("iSCSI thread pool %p still has %d references)", + p, p->thread_pool_ref); + goto out_unlock; + } + + TRACE_DBG("Freeing iSCSI thread pool %p", p); + + list_for_each_entry_safe(t, tt, &p->threads_list, threads_list_entry) { + kthread_stop(t->thr); + list_del(&t->threads_list_entry); + kfree(t); + } + + list_del(&p->thread_pools_list_entry); + + kfree(p); + +out_unlock: + mutex_unlock(&iscsi_threads_pool_mutex); + + TRACE_EXIT(); return; } static int __init iscsi_init(void) { int err = 0; - int num; PRINT_INFO("iSCSI SCST Target - version %s", ISCSI_VERSION_STRING); @@ -3939,13 +4081,7 @@ static int __init iscsi_init(void) iscsi_conn_ktype.sysfs_ops = scst_sysfs_get_sysfs_ops(); #endif - num = max((int)num_online_cpus(), 2); - - err = iscsi_run_threads(num, "iscsird", istrd); - if (err != 0) - goto out_thr; - - err = iscsi_run_threads(num, "iscsiwr", istwr); + err = iscsi_threads_pool_get(NULL, &iscsi_main_thread_pool); if (err != 0) goto out_thr; @@ -3956,7 +4092,6 @@ out_thr: #ifdef CONFIG_SCST_PROC iscsi_procfs_exit(); #endif - iscsi_stop_threads(); #ifdef CONFIG_SCST_PROC out_reg_tmpl: @@ -3984,7 +4119,9 @@ out_free_dummy: static void __exit iscsi_exit(void) { - iscsi_stop_threads(); + iscsi_threads_pool_put(iscsi_main_thread_pool); + + sBUG_ON(!list_empty(&iscsi_thread_pools_list)); unregister_chrdev(ctr_major, ctr_name); diff --git a/iscsi-scst/kernel/iscsi.h b/iscsi-scst/kernel/iscsi.h index 945959de2..d8532013e 100644 --- a/iscsi-scst/kernel/iscsi.h +++ b/iscsi-scst/kernel/iscsi.h @@ -64,11 +64,30 @@ struct iscsi_tgt_params { unsigned int nop_in_interval; }; -struct network_thread_info { - struct task_struct *task; - unsigned int ready; +struct iscsi_thread { + struct task_struct *thr; + struct list_head threads_list_entry; }; +struct iscsi_thread_pool { + spinlock_t rd_lock; + struct list_head rd_list; + wait_queue_head_t rd_waitQ; + + spinlock_t wr_lock; + struct list_head wr_list; + wait_queue_head_t wr_waitQ; + + struct cpumask cpu_mask; + + int thread_pool_ref; + + struct list_head threads_list; + + struct list_head thread_pools_list_entry; +}; + + struct iscsi_target; struct iscsi_cmnd; @@ -151,6 +170,8 @@ struct iscsi_session { unsigned int sess_reinstating:1; unsigned int sess_shutting_down:1; + struct iscsi_thread_pool *sess_thr_pool; + /* All don't need any protection */ char *initiator_name; u64 sid; @@ -196,7 +217,7 @@ struct iscsi_conn { unsigned int rsp_timeout; /* in jiffies */ /* - * All 2 protected by iscsi_wr_lock. Modified independently to the + * All 2 protected by wr_lock. Modified independently to the * above field, hence the alignment. */ unsigned short wr_state __attribute__((aligned(sizeof(long)))); @@ -232,7 +253,9 @@ struct iscsi_conn { int hdigest_type; int ddigest_type; - /* All 6 protected by iscsi_rd_lock */ + struct iscsi_thread_pool *conn_thr_pool; + + /* All 6 protected by rd_lock */ unsigned short rd_state; unsigned short rd_data_ready:1; /* Let's save some cache footprint by putting them here */ @@ -485,14 +508,6 @@ extern struct mutex target_mgmt_mutex; extern int ctr_open_state; extern const struct file_operations ctr_fops; -extern spinlock_t iscsi_rd_lock; -extern struct list_head iscsi_rd_list; -extern wait_queue_head_t iscsi_rd_waitQ; - -extern spinlock_t iscsi_wr_lock; -extern struct list_head iscsi_wr_list; -extern wait_queue_head_t iscsi_wr_waitQ; - /* iscsi.c */ extern struct iscsi_cmnd *cmnd_alloc(struct iscsi_conn *, struct iscsi_cmnd *parent); @@ -512,6 +527,9 @@ extern int iscsi_preliminary_complete(struct iscsi_cmnd *req, struct iscsi_cmnd *orig_req, bool get_data); extern int set_scst_preliminary_status_rsp(struct iscsi_cmnd *req, bool get_data, int key, int asc, int ascq); +extern int iscsi_threads_pool_get(const struct cpumask *cpu_mask, + struct iscsi_thread_pool **out_pool); +extern void iscsi_threads_pool_put(struct iscsi_thread_pool *p); /* conn.c */ #ifndef CONFIG_SCST_PROC @@ -591,6 +609,7 @@ extern int __add_session(struct iscsi_target *, struct iscsi_kern_session_info *); extern int __del_session(struct iscsi_target *, u64); extern int session_free(struct iscsi_session *session, bool del); +extern void iscsi_sess_force_close(struct iscsi_session *sess); /* params.c */ extern const char *iscsi_get_digest_name(int val, char *res); diff --git a/iscsi-scst/kernel/nthread.c b/iscsi-scst/kernel/nthread.c index 662145f89..6a6501a05 100644 --- a/iscsi-scst/kernel/nthread.c +++ b/iscsi-scst/kernel/nthread.c @@ -544,9 +544,9 @@ static void close_conn(struct iscsi_conn *conn) while (1) { bool t; - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&conn->conn_thr_pool->wr_lock); t = (conn->wr_state == ISCSI_CONN_WR_STATE_IDLE); - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&conn->conn_thr_pool->wr_lock); if (t && (atomic_read(&conn->conn_ref_cnt) == 0)) break; @@ -1001,12 +1001,12 @@ out: } /* - * Called under iscsi_rd_lock and BHs disabled, but will drop it inside, + * Called under rd_lock and BHs disabled, but will drop it inside, * then reaquire. */ -static void scst_do_job_rd(void) - __acquires(&iscsi_rd_lock) - __releases(&iscsi_rd_lock) +static void scst_do_job_rd(struct iscsi_thread_pool *p) + __acquires(&rd_lock) + __releases(&rd_lock) { TRACE_ENTRY(); @@ -1014,9 +1014,9 @@ static void scst_do_job_rd(void) * We delete/add to tail connections to maintain fairness between them. */ - while (!list_empty(&iscsi_rd_list)) { + while (!list_empty(&p->rd_list)) { int closed = 0, rc; - struct iscsi_conn *conn = list_entry(iscsi_rd_list.next, + struct iscsi_conn *conn = list_entry(p->rd_list.next, typeof(*conn), rd_list_entry); list_del(&conn->rd_list_entry); @@ -1027,26 +1027,26 @@ static void scst_do_job_rd(void) #ifdef CONFIG_SCST_EXTRACHECKS conn->rd_task = current; #endif - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&p->rd_lock); rc = process_read_io(conn, &closed); - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&p->rd_lock); if (unlikely(closed)) continue; if (unlikely(conn->conn_tm_active)) { - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&p->rd_lock); iscsi_check_tm_data_wait_timeouts(conn, false); - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&p->rd_lock); } #ifdef CONFIG_SCST_EXTRACHECKS conn->rd_task = NULL; #endif if ((rc == 0) || conn->rd_data_ready) { - list_add_tail(&conn->rd_list_entry, &iscsi_rd_list); + list_add_tail(&conn->rd_list_entry, &p->rd_list); conn->rd_state = ISCSI_CONN_RD_STATE_IN_LIST; } else conn->rd_state = ISCSI_CONN_RD_STATE_IDLE; @@ -1056,50 +1056,56 @@ static void scst_do_job_rd(void) return; } -static inline int test_rd_list(void) +static inline int test_rd_list(struct iscsi_thread_pool *p) { - int res = !list_empty(&iscsi_rd_list) || + int res = !list_empty(&p->rd_list) || unlikely(kthread_should_stop()); return res; } int istrd(void *arg) { + struct iscsi_thread_pool *p = arg; + int rc; + TRACE_ENTRY(); - PRINT_INFO("Read thread started, PID %d", current->pid); + PRINT_INFO("Read thread for pool %p started, PID %d", p, current->pid); current->flags |= PF_NOFREEZE; + rc = set_cpus_allowed_ptr(current, &p->cpu_mask); + if (rc != 0) + PRINT_ERROR("Setting CPU affinity failed: %d", rc); - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&p->rd_lock); while (!kthread_should_stop()) { wait_queue_t wait; init_waitqueue_entry(&wait, current); - if (!test_rd_list()) { - add_wait_queue_exclusive_head(&iscsi_rd_waitQ, &wait); + if (!test_rd_list(p)) { + add_wait_queue_exclusive_head(&p->rd_waitQ, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (test_rd_list()) + if (test_rd_list(p)) break; - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&p->rd_lock); schedule(); - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&p->rd_lock); } set_current_state(TASK_RUNNING); - remove_wait_queue(&iscsi_rd_waitQ, &wait); + remove_wait_queue(&p->rd_waitQ, &wait); } - scst_do_job_rd(); + scst_do_job_rd(p); } - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&p->rd_lock); /* * If kthread_should_stop() is true, we are guaranteed to be - * on the module unload, so iscsi_rd_list must be empty. + * on the module unload, so rd_list must be empty. */ - sBUG_ON(!list_empty(&iscsi_rd_list)); + sBUG_ON(!list_empty(&p->rd_list)); - PRINT_INFO("Read thread PID %d finished", current->pid); + PRINT_INFO("Read thread for PID %d for pool %p finished", current->pid, p); TRACE_EXIT(); return 0; @@ -1240,13 +1246,13 @@ void req_add_to_write_timeout_list(struct iscsi_cmnd *req) /* * conn_tm_active can be already cleared by * iscsi_check_tm_data_wait_timeouts(). write_list_lock is an inner - * lock for iscsi_rd_lock. + * lock for rd_lock. */ if (unlikely(set_conn_tm_active)) { - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&conn->conn_thr_pool->rd_lock); TRACE_MGMT_DBG("Setting conn_tm_active for conn %p", conn); conn->conn_tm_active = 1; - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); } out: @@ -1726,12 +1732,12 @@ out: } /* - * Called under iscsi_wr_lock and BHs disabled, but will drop it inside, + * Called under wr_lock and BHs disabled, but will drop it inside, * then reaquire. */ -static void scst_do_job_wr(void) - __acquires(&iscsi_wr_lock) - __releases(&iscsi_wr_lock) +static void scst_do_job_wr(struct iscsi_thread_pool *p) + __acquires(&wr_lock) + __releases(&wr_lock) { TRACE_ENTRY(); @@ -1739,9 +1745,9 @@ static void scst_do_job_wr(void) * We delete/add to tail connections to maintain fairness between them. */ - while (!list_empty(&iscsi_wr_list)) { + while (!list_empty(&p->wr_list)) { int rc; - struct iscsi_conn *conn = list_entry(iscsi_wr_list.next, + struct iscsi_conn *conn = list_entry(p->wr_list.next, typeof(*conn), wr_list_entry); TRACE_DBG("conn %p, wr_state %x, wr_space_ready %d, " @@ -1757,13 +1763,13 @@ static void scst_do_job_wr(void) #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = current; #endif - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); conn_get(conn); rc = iscsi_send(conn); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); #ifdef CONFIG_SCST_EXTRACHECKS conn->wr_task = NULL; #endif @@ -1772,7 +1778,7 @@ static void scst_do_job_wr(void) "(conn %p)", conn); conn->wr_state = ISCSI_CONN_WR_STATE_SPACE_WAIT; } else if (test_write_ready(conn)) { - list_add_tail(&conn->wr_list_entry, &iscsi_wr_list); + list_add_tail(&conn->wr_list_entry, &p->wr_list); conn->wr_state = ISCSI_CONN_WR_STATE_IN_LIST; } else conn->wr_state = ISCSI_CONN_WR_STATE_IDLE; @@ -1784,50 +1790,56 @@ static void scst_do_job_wr(void) return; } -static inline int test_wr_list(void) +static inline int test_wr_list(struct iscsi_thread_pool *p) { - int res = !list_empty(&iscsi_wr_list) || + int res = !list_empty(&p->wr_list) || unlikely(kthread_should_stop()); return res; } int istwr(void *arg) { + struct iscsi_thread_pool *p = arg; + int rc; + TRACE_ENTRY(); - PRINT_INFO("Write thread started, PID %d", current->pid); + PRINT_INFO("Write thread for pool %p started, PID %d", p, current->pid); current->flags |= PF_NOFREEZE; + rc = set_cpus_allowed_ptr(current, &p->cpu_mask); + if (rc != 0) + PRINT_ERROR("Setting CPU affinity failed: %d", rc); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); while (!kthread_should_stop()) { wait_queue_t wait; init_waitqueue_entry(&wait, current); - if (!test_wr_list()) { - add_wait_queue_exclusive_head(&iscsi_wr_waitQ, &wait); + if (!test_wr_list(p)) { + add_wait_queue_exclusive_head(&p->wr_waitQ, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (test_wr_list()) + if (test_wr_list(p)) break; - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); schedule(); - spin_lock_bh(&iscsi_wr_lock); + spin_lock_bh(&p->wr_lock); } set_current_state(TASK_RUNNING); - remove_wait_queue(&iscsi_wr_waitQ, &wait); + remove_wait_queue(&p->wr_waitQ, &wait); } - scst_do_job_wr(); + scst_do_job_wr(p); } - spin_unlock_bh(&iscsi_wr_lock); + spin_unlock_bh(&p->wr_lock); /* * If kthread_should_stop() is true, we are guaranteed to be - * on the module unload, so iscsi_wr_list must be empty. + * on the module unload, so wr_list must be empty. */ - sBUG_ON(!list_empty(&iscsi_wr_list)); + sBUG_ON(!list_empty(&p->wr_list)); - PRINT_INFO("Write thread PID %d finished", current->pid); + PRINT_INFO("Write thread PID %d for pool %p finished", current->pid, p); TRACE_EXIT(); return 0; diff --git a/iscsi-scst/kernel/param.c b/iscsi-scst/kernel/param.c index 9ad4060d9..fb1f7ebfb 100644 --- a/iscsi-scst/kernel/param.c +++ b/iscsi-scst/kernel/param.c @@ -231,13 +231,13 @@ static int iscsi_tgt_params_set(struct iscsi_session *session, conn_list_entry) { conn->rsp_timeout = session->tgt_params.rsp_timeout * HZ; conn->nop_in_interval = session->tgt_params.nop_in_interval * HZ; - spin_lock_bh(&iscsi_rd_lock); + spin_lock_bh(&conn->conn_thr_pool->rd_lock); if (!conn->closing && (conn->nop_in_interval > 0)) { TRACE_DBG("Schedule Nop-In work for conn %p", conn); schedule_delayed_work(&conn->nop_in_delayed_work, conn->nop_in_interval + ISCSI_ADD_SCHED_TIME); } - spin_unlock_bh(&iscsi_rd_lock); + spin_unlock_bh(&conn->conn_thr_pool->rd_lock); } } else { GET_PARAM(params, info, iparams, queued_cmnds); diff --git a/iscsi-scst/kernel/session.c b/iscsi-scst/kernel/session.c index a9ee50dec..4cbbacd06 100644 --- a/iscsi-scst/kernel/session.c +++ b/iscsi-scst/kernel/session.c @@ -88,6 +88,11 @@ static int iscsi_session_alloc(struct iscsi_target *target, goto err; } + err = iscsi_threads_pool_get(&session->scst_sess->acg->acg_cpu_mask, + &session->sess_thr_pool); + if (err != 0) + goto err_unreg; + #ifdef CONFIG_SCST_PROC kfree(name); #endif @@ -98,6 +103,9 @@ static int iscsi_session_alloc(struct iscsi_target *target, *result = session; return 0; +err_unreg: + scst_unregister_session(session->scst_sess, 1, NULL); + err: if (session) { kfree(session->initiator_name); @@ -318,6 +326,11 @@ int session_free(struct iscsi_session *session, bool del) if (del) list_del(&session->session_list_entry); + if (session->sess_thr_pool != NULL) { + iscsi_threads_pool_put(session->sess_thr_pool); + session->sess_thr_pool = NULL; + } + if (session->scst_sess != NULL) { /* * We must NOT call scst_unregister_session() in the waiting @@ -352,6 +365,25 @@ int __del_session(struct iscsi_target *target, u64 sid) return session_free(session, true); } +/* Must be called under target_mutex */ +void iscsi_sess_force_close(struct iscsi_session *sess) +{ + struct iscsi_conn *conn; + + TRACE_ENTRY(); + + PRINT_INFO("Deleting session %llx with initiator %s (%p)", + (long long unsigned int)sess->sid, sess->initiator_name, sess); + + list_for_each_entry(conn, &sess->conn_list, conn_list_entry) { + TRACE_MGMT_DBG("Deleting connection with initiator %p", conn); + __mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING); + } + + TRACE_EXIT(); + return; +} + #ifdef CONFIG_SCST_PROC /* target_mutex supposed to be locked */ @@ -509,7 +541,6 @@ static ssize_t iscsi_sess_force_close_store(struct kobject *kobj, int res; struct scst_session *scst_sess; struct iscsi_session *sess; - struct iscsi_conn *conn; TRACE_ENTRY(); @@ -521,13 +552,7 @@ static ssize_t iscsi_sess_force_close_store(struct kobject *kobj, goto out; } - PRINT_INFO("Deleting session %llx with initiator %s (%p)", - (long long unsigned int)sess->sid, sess->initiator_name, sess); - - list_for_each_entry(conn, &sess->conn_list, conn_list_entry) { - TRACE_MGMT_DBG("Deleting connection with initiator %p", conn); - __mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING); - } + iscsi_sess_force_close(sess); mutex_unlock(&sess->target->target_mutex); diff --git a/scst/ChangeLog b/scst/ChangeLog index 7c8aefff2..f3cae8e67 100644 --- a/scst/ChangeLog +++ b/scst/ChangeLog @@ -1,3 +1,16 @@ +Summary of changes between versions 2.0.0 and 2.1.0 +--------------------------------------------------- + + - Assigning CPU affinity to threads and connections + + - Splitting read/write CDBs for pass-through devices with sg_tablesize + and max_sectors limitations added to scst_disk. + + - Support for thin provisioning commands added in scst_vdisk. + + - Bug fixes and other minor improvements. + + Summary of changes between versions 1.0.1 and 1.0.2 --------------------------------------------------- diff --git a/scst/README b/scst/README index 87c1c2c4e..f22bd7bd4 100644 --- a/scst/README +++ b/scst/README @@ -515,6 +515,10 @@ Every target should have at least the following entries: initiators security groups, so you can assign the addressing method on per-initiator basis. + - cpu_mask - defines CPU affinity mask for threads serving this target. + For threads serving LUNs it is used only for devices with + threads_pool_type "per_initiator". + - io_grouping_type - defines how I/O from sessions to this target are grouped together. This I/O grouping is very important for performance. By setting this attribute in a right value, you can @@ -668,7 +672,8 @@ commands by looking inside this file. - "del GROUP_NAME" - deletes a new security group. Each security group's subdirectory contains 2 subdirectories: initiators -and luns. +and luns as well as the following attributes: addr_method, cpu_mask and +io_grouping_type. See above description of them. Each "initiators" subdirectory contains list of added to this groups initiator as well as as well as file "mgmt". This file has the following @@ -1487,13 +1492,15 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K) for VDISK FILEIO devices above. -9. In some cases, for instance working with SSD devices, which consume 100% -of a single CPU load for data transfers in their internal threads, to -maximize IOPS it can be needed to assign for those threads dedicated -CPUs using Linux CPU affinity facilities. No IRQ processing should be -done on those CPUs. Check that using /proc/interrupts. See taskset -command and Documentation/IRQ-affinity.txt in your kernel's source tree -for how to assign IRQ affinity to tasks and IRQs. +9. In some cases, for instance working with SSD devices, which consume +100% of a single CPU load for data transfers in their internal threads, +to maximize IOPS it can be needed to assign for those threads dedicated +CPUs. Consider using cpu_mask attribute for devices with +threads_pool_type "per_initiator" or Linux CPU affinity facilities for +other threads_pool_types. No IRQ processing should be done on those +CPUs. Check that using /proc/interrupts. See taskset command and +Documentation/IRQ-affinity.txt in your kernel's source tree for how to +assign IRQ affinity to tasks and IRQs. The reason for that is that processing of coming commands in SIRQ context might be done on the same CPUs as SSD devices' threads doing data diff --git a/scst/README_in-tree b/scst/README_in-tree index 86bcac436..c0ecfedc0 100644 --- a/scst/README_in-tree +++ b/scst/README_in-tree @@ -383,6 +383,10 @@ Every target should have at least the following entries: initiators security groups, so you can assign the addressing method on per-initiator basis. + - cpu_mask - defines CPU affinity mask for threads serving this target. + For threads serving LUNs it is used only for devices with + threads_pool_type "per_initiator". + - io_grouping_type - defines how I/O from sessions to this target are grouped together. This I/O grouping is very important for performance. By setting this attribute in a right value, you can @@ -536,7 +540,8 @@ commands by looking inside this file. - "del GROUP_NAME" - deletes a new security group. Each security group's subdirectory contains 2 subdirectories: initiators -and luns. +and luns as well as the following attributes: addr_method, cpu_mask and +io_grouping_type. See above description of them. Each "initiators" subdirectory contains list of added to this groups initiator as well as as well as file "mgmt". This file has the following @@ -1306,13 +1311,15 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K) See also important notes about setting block sizes >512 bytes for VDISK FILEIO devices above. -9. In some cases, for instance working with SSD devices, which consume 100% -of a single CPU load for data transfers in their internal threads, to -maximize IOPS it can be needed to assign for those threads dedicated -CPUs using Linux CPU affinity facilities. No IRQ processing should be -done on those CPUs. Check that using /proc/interrupts. See taskset -command and Documentation/IRQ-affinity.txt in your kernel's source tree -for how to assign IRQ affinity to tasks and IRQs. +9. In some cases, for instance working with SSD devices, which consume +100% of a single CPU load for data transfers in their internal threads, +to maximize IOPS it can be needed to assign for those threads dedicated +CPUs. Consider using cpu_mask attribute for devices with +threads_pool_type "per_initiator" or Linux CPU affinity facilities for +other threads_pool_types. No IRQ processing should be done on those +CPUs. Check that using /proc/interrupts. See taskset command and +Documentation/IRQ-affinity.txt in your kernel's source tree for how to +assign IRQ affinity to tasks and IRQs. The reason for that is that processing of coming commands in SIRQ context might be done on the same CPUs as SSD devices' threads doing data diff --git a/scst/include/scst.h b/scst/include/scst.h index b19192eb0..d80132ba3 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -335,6 +335,11 @@ enum scst_exec_context { */ #define SCST_AEN_SCSI 0 +/* + * Notifies that CPU affinity mask on the corresponding session changed + */ +#define SCST_AEN_CPU_MASK_CHANGED 1 + /************************************************************* ** Allowed return/status codes for report_aen() callback and ** scst_set_aen_delivery_status() function @@ -2390,6 +2395,9 @@ struct scst_acg { /* Type of I/O initiators groupping */ int acg_io_grouping_type; + /* CPU affinity for threads in this ACG */ + struct cpumask acg_cpu_mask; + unsigned int tgt_acg:1; /* sysfs release completion */ @@ -3887,6 +3895,7 @@ struct scst_sysfs_work_item { bool is_tgt_kobj; int io_grouping_type; bool enable; + struct cpumask cpu_mask; }; }; struct { diff --git a/scst/src/scst_lib.c b/scst/src/scst_lib.c index 6c2ddf4d6..64455c117 100644 --- a/scst/src/scst_lib.c +++ b/scst/src/scst_lib.c @@ -1325,7 +1325,7 @@ void scst_set_initial_UA(struct scst_session *sess, int key, int asc, int ascq) } EXPORT_SYMBOL(scst_set_initial_UA); -static struct scst_aen *scst_alloc_aen(struct scst_session *sess, +struct scst_aen *scst_alloc_aen(struct scst_session *sess, uint64_t unpacked_lun) { struct scst_aen *aen; @@ -1351,7 +1351,7 @@ out: return aen; } -static void scst_free_aen(struct scst_aen *aen) +void scst_free_aen(struct scst_aen *aen) { TRACE_ENTRY(); @@ -2814,6 +2814,7 @@ struct scst_acg *scst_alloc_add_acg(struct scst_tgt *tgt, INIT_LIST_HEAD(&acg->acg_dev_list); INIT_LIST_HEAD(&acg->acg_sess_list); INIT_LIST_HEAD(&acg->acn_list); + cpumask_copy(&acg->acg_cpu_mask, &default_cpu_mask); acg->acg_name = kstrdup(acg_name, GFP_KERNEL); if (acg->acg_name == NULL) { PRINT_ERROR("%s", "Allocation of acg_name failed"); diff --git a/scst/src/scst_main.c b/scst/src/scst_main.c index 7ae3c7c44..6392bfa52 100644 --- a/scst/src/scst_main.c +++ b/scst/src/scst_main.c @@ -158,6 +158,8 @@ static int suspend_count; static int scst_virt_dev_last_id; /* protected by scst_mutex */ +struct cpumask default_cpu_mask; + static unsigned int scst_max_cmd_mem; unsigned int scst_max_dev_cmd_mem; @@ -1588,9 +1590,15 @@ int scst_add_threads(struct scst_cmd_threads *cmd_threads, cmd_threads, "%s%d", nm, n++); } else if (tgt_dev != NULL) { char nm[11]; /* to limit the name's len */ + int rc; strlcpy(nm, tgt_dev->dev->virt_name, ARRAY_SIZE(nm)); thr->cmd_thread = kthread_create(scst_cmd_thread, cmd_threads, "%s%d_%d", nm, tgt_dev_num, n++); + rc = set_cpus_allowed_ptr(thr->cmd_thread, + &tgt_dev->sess->acg->acg_cpu_mask); + if (rc != 0) + PRINT_ERROR("Setting CPU affinity failed: " + "%d", rc); } else thr->cmd_thread = kthread_create(scst_cmd_thread, cmd_threads, "scstd%d", n++); @@ -2182,6 +2190,7 @@ static int __init init_scst(void) init_waitqueue_head(&scst_dev_cmd_waitQ); mutex_init(&scst_suspend_mutex); INIT_LIST_HEAD(&scst_cmd_threads_list); + cpus_setall(default_cpu_mask); scst_init_threads(&scst_main_cmd_threads); diff --git a/scst/src/scst_priv.h b/scst/src/scst_priv.h index 7cc159eb6..897804af0 100644 --- a/scst/src/scst_priv.h +++ b/scst/src/scst_priv.h @@ -193,6 +193,8 @@ extern spinlock_t scst_mgmt_lock; extern struct list_head scst_sess_init_list; extern struct list_head scst_sess_shut_list; +extern struct cpumask default_cpu_mask; + struct scst_cmd_thread_t { struct task_struct *cmd_thread; struct list_head thread_list_entry; @@ -550,6 +552,10 @@ void scst_process_reset(struct scst_device *dev, bool scst_is_ua_global(const uint8_t *sense, int len); void scst_requeue_ua(struct scst_cmd *cmd); +struct scst_aen *scst_alloc_aen(struct scst_session *sess, + uint64_t unpacked_lun); +void scst_free_aen(struct scst_aen *aen); + void scst_gen_aen_or_ua(struct scst_tgt_dev *tgt_dev, int key, int asc, int ascq); diff --git a/scst/src/scst_sysfs.c b/scst/src/scst_sysfs.c index c92b81bb5..5dc03ee19 100644 --- a/scst/src/scst_sysfs.c +++ b/scst/src/scst_sysfs.c @@ -123,6 +123,12 @@ static ssize_t scst_tgt_io_grouping_type_show(struct kobject *kobj, static ssize_t scst_tgt_io_grouping_type_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count); +static ssize_t scst_tgt_cpu_mask_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf); +static ssize_t scst_tgt_cpu_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count); static ssize_t scst_ini_group_mgmt_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -156,6 +162,12 @@ static ssize_t scst_acg_io_grouping_type_show(struct kobject *kobj, static ssize_t scst_acg_io_grouping_type_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count); +static ssize_t scst_acg_cpu_mask_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf); +static ssize_t scst_acg_cpu_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count); static ssize_t scst_acn_file_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -964,6 +976,11 @@ static struct kobj_attribute scst_tgt_io_grouping_type = scst_tgt_io_grouping_type_show, scst_tgt_io_grouping_type_store); +static struct kobj_attribute scst_tgt_cpu_mask = + __ATTR(cpu_mask, S_IRUGO | S_IWUSR, + scst_tgt_cpu_mask_show, + scst_tgt_cpu_mask_store); + static struct kobj_attribute scst_rel_tgt_id = __ATTR(rel_tgt_id, S_IRUGO | S_IWUSR, scst_rel_tgt_id_show, scst_rel_tgt_id_store); @@ -977,6 +994,11 @@ static struct kobj_attribute scst_acg_io_grouping_type = scst_acg_io_grouping_type_show, scst_acg_io_grouping_type_store); +static struct kobj_attribute scst_acg_cpu_mask = + __ATTR(cpu_mask, S_IRUGO | S_IWUSR, + scst_acg_cpu_mask_show, + scst_acg_cpu_mask_store); + static ssize_t scst_tgt_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1183,6 +1205,13 @@ int scst_tgt_sysfs_create(struct scst_tgt *tgt) goto out_err; } + res = sysfs_create_file(&tgt->tgt_kobj, &scst_tgt_cpu_mask.attr); + if (res != 0) { + PRINT_ERROR("Can't add attribute %s for tgt %s", + scst_tgt_cpu_mask.attr.name, tgt->tgt_name); + goto out_err; + } + pattr = tgt->tgtt->tgt_attrs; if (pattr != NULL) { while (*pattr != NULL) { @@ -3143,6 +3172,168 @@ out: return res; } +static ssize_t __scst_acg_cpu_mask_show(struct scst_acg *acg, char *buf) +{ + int res; + + res = cpumask_scnprintf(buf, SCST_SYSFS_BLOCK_SIZE, + &acg->acg_cpu_mask); + if (!cpus_equal(acg->acg_cpu_mask, default_cpu_mask)) + res += sprintf(&buf[res], "\n%s\n", SCST_SYSFS_KEY_MARK); + + return res; +} + +static int __scst_acg_process_cpu_mask_store(struct scst_tgt *tgt, + struct scst_acg *acg, struct cpumask *cpu_mask) +{ + int res = 0; + struct scst_session *sess; + + TRACE_DBG("tgt %p, acg %p", tgt, acg); + + if (mutex_lock_interruptible(&scst_mutex) != 0) { + res = -EINTR; + goto out; + } + + /* Check if tgt and acg not already freed while we were coming here */ + if (scst_check_tgt_acg_ptrs(tgt, acg) != 0) + goto out_unlock; + + cpumask_copy(&acg->acg_cpu_mask, cpu_mask); + + list_for_each_entry(sess, &acg->acg_sess_list, acg_sess_list_entry) { + int i; + for (i = 0; i < SESS_TGT_DEV_LIST_HASH_SIZE; i++) { + struct scst_tgt_dev *tgt_dev; + struct list_head *head = &sess->sess_tgt_dev_list[i]; + list_for_each_entry(tgt_dev, head, + sess_tgt_dev_list_entry) { + struct scst_cmd_thread_t *thr; + if (tgt_dev->active_cmd_threads != &tgt_dev->tgt_dev_cmd_threads) + continue; + list_for_each_entry(thr, + &tgt_dev->active_cmd_threads->threads_list, + thread_list_entry) { + int rc; + rc = set_cpus_allowed_ptr(thr->cmd_thread, cpu_mask); + if (rc != 0) + PRINT_ERROR("Setting CPU " + "affinity failed: %d", rc); + } + } + } + if (tgt->tgtt->report_aen != NULL) { + struct scst_aen *aen; + int rc; + + aen = scst_alloc_aen(sess, 0); + if (aen == NULL) { + PRINT_ERROR("Unable to notify target driver %s " + "about cpu_mask change", tgt->tgt_name); + continue; + } + + aen->event_fn = SCST_AEN_CPU_MASK_CHANGED; + + TRACE_DBG("Calling target's %s report_aen(%p)", + tgt->tgtt->name, aen); + rc = tgt->tgtt->report_aen(aen); + TRACE_DBG("Target's %s report_aen(%p) returned %d", + tgt->tgtt->name, aen, rc); + if (rc != SCST_AEN_RES_SUCCESS) + scst_free_aen(aen); + } + } + + +out_unlock: + mutex_unlock(&scst_mutex); + +out: + return res; +} + +static int __scst_acg_cpu_mask_store_work_fn(struct scst_sysfs_work_item *work) +{ + return __scst_acg_process_cpu_mask_store(work->tgt, work->acg, + &work->cpu_mask); +} + +static ssize_t __scst_acg_cpu_mask_store(struct scst_acg *acg, + const char *buf, size_t count) +{ + int res; + struct scst_sysfs_work_item *work; + + /* cpumask might be too big for stack */ + + res = scst_alloc_sysfs_work(__scst_acg_cpu_mask_store_work_fn, + false, &work); + if (res != 0) + goto out; + + /* + * We can't use cpumask_parse_user() here, because it expects + * buffer in the user space. + */ + res = __bitmap_parse(buf, count, 0, cpumask_bits(&work->cpu_mask), + nr_cpumask_bits); + if (res != 0) { + PRINT_ERROR("__bitmap_parse() failed: %d", res); + goto out_release; + } + + if (cpus_equal(acg->acg_cpu_mask, work->cpu_mask)) + goto out; + + work->tgt = acg->tgt; + work->acg = acg; + + res = scst_sysfs_queue_wait_work(work); + +out: + return res; + +out_release: + scst_sysfs_work_release(&work->sysfs_work_kref); + goto out; +} + +static ssize_t scst_tgt_cpu_mask_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct scst_acg *acg; + struct scst_tgt *tgt; + + tgt = container_of(kobj, struct scst_tgt, tgt_kobj); + acg = tgt->default_acg; + + return __scst_acg_cpu_mask_show(acg, buf); +} + +static ssize_t scst_tgt_cpu_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int res; + struct scst_acg *acg; + struct scst_tgt *tgt; + + tgt = container_of(kobj, struct scst_tgt, tgt_kobj); + acg = tgt->default_acg; + + res = __scst_acg_cpu_mask_store(acg, buf, count); + if (res != 0) + goto out; + + res = count; + +out: + TRACE_EXIT_RES(res); + return res; +} + /* * Called with scst_mutex held. * @@ -3241,6 +3432,13 @@ int scst_acg_sysfs_create(struct scst_tgt *tgt, goto out_del; } + res = sysfs_create_file(&acg->acg_kobj, &scst_acg_cpu_mask.attr); + if (res != 0) { + PRINT_ERROR("Can't add tgt attr %s for tgt %s", + scst_acg_cpu_mask.attr.name, tgt->tgt_name); + goto out_del; + } + out: TRACE_EXIT_RES(res); return res; @@ -3303,6 +3501,35 @@ out: return res; } +static ssize_t scst_acg_cpu_mask_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct scst_acg *acg; + + acg = container_of(kobj, struct scst_acg, acg_kobj); + + return __scst_acg_cpu_mask_show(acg, buf); +} + +static ssize_t scst_acg_cpu_mask_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int res; + struct scst_acg *acg; + + acg = container_of(kobj, struct scst_acg, acg_kobj); + + res = __scst_acg_cpu_mask_store(acg, buf, count); + if (res != 0) + goto out; + + res = count; + +out: + TRACE_EXIT_RES(res); + return res; +} + static ssize_t scst_ini_group_mgmt_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -5199,7 +5426,7 @@ int __init scst_sysfs_init(void) NULL, "scst_uid"); if (IS_ERR(sysfs_work_thread)) { res = PTR_ERR(sysfs_work_thread); - PRINT_ERROR("kthread_create() for user interface thread " + PRINT_ERROR("kthread_run() for user interface thread " "failed: %d", res); sysfs_work_thread = NULL; goto out;