From 355fa36d34b8198c7dfdd133ae9cae4010bcbcf6 Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Fri, 6 May 2016 01:53:30 +0000 Subject: [PATCH] scst, EXPERIMENTAL: improve commands CPU affinity Now commands on the fast path processed by the same thread throughout all its lifetime. git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@6886 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- scst/include/scst.h | 3 + scst/src/scst_main.c | 26 +++++-- scst/src/scst_priv.h | 5 +- scst/src/scst_targ.c | 165 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 175 insertions(+), 24 deletions(-) diff --git a/scst/include/scst.h b/scst/include/scst.h index 85b0f0aee..215800533 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -2123,6 +2123,9 @@ struct scst_cmd { /* Pointer to lists of commands with the lock */ struct scst_cmd_threads *cmd_threads; + /* Assigned processing thread (for better CPU affinity) */ + struct scst_cmd_thread_t *cmd_thr; + atomic_t cmd_ref; struct scst_session *sess; /* corresponding session */ diff --git a/scst/src/scst_main.c b/scst/src/scst_main.c index dbf6431b0..8aeee5342 100644 --- a/scst/src/scst_main.c +++ b/scst/src/scst_main.c @@ -104,6 +104,7 @@ struct kmem_cache *scst_dev_cachep; struct kmem_cache *scst_tgtd_cachep; struct kmem_cache *scst_sess_cachep; struct kmem_cache *scst_acgd_cachep; +static struct kmem_cache *scst_thr_cachep; #ifdef CONFIG_SCST_PROC struct list_head scst_acg_list; @@ -1922,23 +1923,26 @@ int scst_add_threads(struct scst_cmd_threads *cmd_threads, } for (i = 0; i < num; i++) { - thr = kzalloc(sizeof(*thr), GFP_KERNEL); + thr = kmem_cache_zalloc(scst_thr_cachep, GFP_KERNEL); if (!thr) { res = -ENOMEM; PRINT_ERROR("Fail to allocate thr %d", res); goto out_wait; } + INIT_LIST_HEAD(&thr->thr_active_cmd_list); + spin_lock_init(&thr->thr_cmd_list_lock); + thr->thr_cmd_threads = cmd_threads; if (dev != NULL) { thr->cmd_thread = kthread_create(scst_cmd_thread, - cmd_threads, "%.13s%d", dev->virt_name, n++); + thr, "%.13s%d", dev->virt_name, n++); } else if (tgt_dev != NULL) { thr->cmd_thread = kthread_create(scst_cmd_thread, - cmd_threads, "%.10s%d_%d", + thr, "%.10s%d_%d", tgt_dev->dev->virt_name, tgt_dev_num, n++); } else thr->cmd_thread = kthread_create(scst_cmd_thread, - cmd_threads, "scstd%d", n++); + thr, "scstd%d", n++); if (IS_ERR(thr->cmd_thread)) { res = PTR_ERR(thr->cmd_thread); @@ -1990,6 +1994,10 @@ out: return res; } +/* + * The being stopped threads must not have assigned commands, which usually + * means suspended activities. + */ void scst_del_threads(struct scst_cmd_threads *cmd_threads, int num) { TRACE_ENTRY(); @@ -2018,7 +2026,7 @@ void scst_del_threads(struct scst_cmd_threads *cmd_threads, int num) if (rc != 0 && rc != -EINTR) TRACE_MGMT_DBG("kthread_stop() failed: %d", rc); - kfree(ct); + kmem_cache_free(scst_thr_cachep, ct); } EXTRACHECKS_BUG_ON((cmd_threads->nr_threads == 0) && @@ -2590,12 +2598,14 @@ static int __init init_scst(void) #endif if (!INIT_CACHEP(scst_acgd_cachep, scst_acg_dev)) /* read-mostly */ goto out_destroy_tgtd_cache; + if (!INIT_CACHEP_ALIGN(scst_thr_cachep, scst_cmd_thread_t)) + goto out_destroy_acg_cache; scst_mgmt_mempool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, scst_mgmt_cachep); if (scst_mgmt_mempool == NULL) { res = -ENOMEM; - goto out_destroy_acg_cache; + goto out_destroy_thr_cache; } /* @@ -2765,6 +2775,9 @@ out_destroy_mgmt_stub_mempool: out_destroy_mgmt_mempool: mempool_destroy(scst_mgmt_mempool); +out_destroy_thr_cache: + kmem_cache_destroy(scst_thr_cachep); + out_destroy_acg_cache: kmem_cache_destroy(scst_acgd_cachep); @@ -2857,6 +2870,7 @@ static void __exit exit_scst(void) DEINIT_CACHEP(scst_dev_cachep); DEINIT_CACHEP(scst_tgt_cachep); DEINIT_CACHEP(scst_acgd_cachep); + DEINIT_CACHEP(scst_thr_cachep); scst_lib_exit(); diff --git a/scst/src/scst_priv.h b/scst/src/scst_priv.h index 77711de83..a16e59024 100644 --- a/scst/src/scst_priv.h +++ b/scst/src/scst_priv.h @@ -212,9 +212,12 @@ extern struct list_head scst_sess_shut_list; extern cpumask_t default_cpu_mask; struct scst_cmd_thread_t { + struct list_head thr_active_cmd_list; + spinlock_t thr_cmd_list_lock; struct task_struct *cmd_thread; + struct scst_cmd_threads *thr_cmd_threads; struct list_head thread_list_entry; - bool being_stopped; + bool being_stopped; }; static inline bool scst_set_io_context(struct scst_cmd *cmd, diff --git a/scst/src/scst_targ.c b/scst/src/scst_targ.c index 52aa7066c..fa17b3cd8 100644 --- a/scst/src/scst_targ.c +++ b/scst/src/scst_targ.c @@ -1917,18 +1917,32 @@ static void scst_process_redirect_cmd(struct scst_cmd *cmd, context); /* go through */ case SCST_CONTEXT_THREAD: - spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags); + { + struct list_head *active_cmd_list; + if (cmd->cmd_thr != NULL) { + TRACE_DBG("Using assigned thread %p for cmd %p", + cmd->cmd_thr, cmd); + active_cmd_list = &cmd->cmd_thr->thr_active_cmd_list; + spin_lock_irqsave(&cmd->cmd_thr->thr_cmd_list_lock, flags); + } else { + active_cmd_list = &cmd->cmd_threads->active_cmd_list; + spin_lock_irqsave(&cmd->cmd_threads->cmd_list_lock, flags); + } TRACE_DBG("Adding cmd %p to active cmd list", cmd); if (unlikely(cmd->queue_type == SCST_CMD_QUEUE_HEAD_OF_QUEUE)) - list_add(&cmd->cmd_list_entry, - &cmd->cmd_threads->active_cmd_list); + list_add(&cmd->cmd_list_entry, active_cmd_list); else - list_add_tail(&cmd->cmd_list_entry, - &cmd->cmd_threads->active_cmd_list); - wake_up(&cmd->cmd_threads->cmd_list_waitQ); - spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags); + list_add_tail(&cmd->cmd_list_entry, active_cmd_list); + if (cmd->cmd_thr != NULL) { + wake_up_process(cmd->cmd_thr->cmd_thread); + spin_unlock_irqrestore(&cmd->cmd_thr->thr_cmd_list_lock, flags); + } else { + wake_up(&cmd->cmd_threads->cmd_list_waitQ); + spin_unlock_irqrestore(&cmd->cmd_threads->cmd_list_lock, flags); + } break; } + } TRACE_EXIT(); return; @@ -5453,17 +5467,20 @@ static void scst_do_job_active(struct list_head *cmd_list, return; } -static inline int test_cmd_threads(struct scst_cmd_threads *p_cmd_threads) +static inline int test_cmd_threads(struct scst_cmd_thread_t *thr) { - int res = !list_empty(&p_cmd_threads->active_cmd_list) || - unlikely(kthread_should_stop()) || - tm_dbg_is_release(); + int res = !list_empty(&thr->thr_active_cmd_list) || + !list_empty(&thr->thr_cmd_threads->active_cmd_list) || + unlikely(kthread_should_stop()) || + tm_dbg_is_release(); return res; } int scst_cmd_thread(void *arg) { - struct scst_cmd_threads *p_cmd_threads = arg; + struct scst_cmd_thread_t *thr = arg; + struct scst_cmd_threads *p_cmd_threads = thr->thr_cmd_threads; + bool someth_done, p_locked, thr_locked; TRACE_ENTRY(); @@ -5479,10 +5496,24 @@ int scst_cmd_thread(void *arg) wake_up_all(&p_cmd_threads->ioctx_wq); spin_lock_irq(&p_cmd_threads->cmd_list_lock); + spin_lock(&thr->thr_cmd_list_lock); while (!kthread_should_stop()) { - wait_event_locked(p_cmd_threads->cmd_list_waitQ, - test_cmd_threads(p_cmd_threads), lock_irq, - p_cmd_threads->cmd_list_lock); + if (!test_cmd_threads(thr)) { + DEFINE_WAIT(wait); + do { + prepare_to_wait_exclusive_head( + &p_cmd_threads->cmd_list_waitQ, + &wait, TASK_INTERRUPTIBLE); + if (test_cmd_threads(thr)) + break; + spin_unlock(&thr->thr_cmd_list_lock); + spin_unlock_irq(&p_cmd_threads->cmd_list_lock); + schedule(); + spin_lock_irq(&p_cmd_threads->cmd_list_lock); + spin_lock(&thr->thr_cmd_list_lock); + } while (!test_cmd_threads(thr)); + finish_wait(&p_cmd_threads->cmd_list_waitQ, &wait); + } if (tm_dbg_is_release()) { spin_unlock_irq(&p_cmd_threads->cmd_list_lock); @@ -5490,9 +5521,109 @@ int scst_cmd_thread(void *arg) spin_lock_irq(&p_cmd_threads->cmd_list_lock); } - scst_do_job_active(&p_cmd_threads->active_cmd_list, - &p_cmd_threads->cmd_list_lock, false); + /* + * Idea of this code is to have local queue be more prioritized + * comparing to the more global queue as 2:1, as well as the + * local processing not touching the more global data for writes + * during its iterations when the more global queue is empty. + * Why 2:1? 2 is average number of intermediate commands states + * reaching this point here. + */ + + p_locked = true; + thr_locked = true; + do { + int thr_cnt; + + someth_done = false; +again: + if (!list_empty(&p_cmd_threads->active_cmd_list)) { + struct scst_cmd *cmd; + + if (!p_locked) { + if (thr_locked) { + spin_unlock_irq(&thr->thr_cmd_list_lock); + thr_locked = false; + } + spin_lock_irq(&p_cmd_threads->cmd_list_lock); + p_locked = true; + goto again; + } + + cmd = list_first_entry(&p_cmd_threads->active_cmd_list, + typeof(*cmd), cmd_list_entry); + + TRACE_DBG("Deleting cmd %p from active cmd list", cmd); + list_del(&cmd->cmd_list_entry); + + if (thr_locked) { + spin_unlock(&thr->thr_cmd_list_lock); + thr_locked = false; + } + spin_unlock_irq(&p_cmd_threads->cmd_list_lock); + p_locked = false; + + if (cmd->cmd_thr == NULL) { + TRACE_DBG("Assigning thread %p on cmd %p", + thr, cmd); + cmd->cmd_thr = thr; + } + + scst_process_active_cmd(cmd, false); + someth_done = true; + } + + if (thr_locked && p_locked) { + /* We need to maintain order of locks and unlocks */ + spin_unlock(&thr->thr_cmd_list_lock); + spin_unlock(&p_cmd_threads->cmd_list_lock); + spin_lock(&thr->thr_cmd_list_lock); + p_locked = false; + } else if (!thr_locked) { + if (p_locked) { + spin_unlock_irq(&p_cmd_threads->cmd_list_lock); + p_locked = false; + } + spin_lock_irq(&thr->thr_cmd_list_lock); + thr_locked = true; + } + + thr_cnt = 0; + while (!list_empty(&thr->thr_active_cmd_list)) { + struct scst_cmd *cmd = list_first_entry( + &thr->thr_active_cmd_list, + typeof(*cmd), cmd_list_entry); + + TRACE_DBG("Deleting cmd %p from thr active cmd list", cmd); + list_del(&cmd->cmd_list_entry); + + spin_unlock_irq(&thr->thr_cmd_list_lock); + thr_locked = false; + + scst_process_active_cmd(cmd, false); + + someth_done = true; + + if (++thr_cnt == 3) + break; + else { + spin_lock_irq(&thr->thr_cmd_list_lock); + thr_locked = true; + } + } + } while (someth_done); + + EXTRACHECKS_BUG_ON(p_locked); + + if (thr_locked) { + spin_unlock_irq(&thr->thr_cmd_list_lock); + thr_locked = false; + } + + spin_lock_irq(&p_cmd_threads->cmd_list_lock); + spin_lock(&thr->thr_cmd_list_lock); } + spin_unlock(&thr->thr_cmd_list_lock); spin_unlock_irq(&p_cmd_threads->cmd_list_lock); scst_ioctx_put(p_cmd_threads);