diff --git a/scripts/generate-kernel-patch b/scripts/generate-kernel-patch index 00dc397bc..f923d7893 100755 --- a/scripts/generate-kernel-patch +++ b/scripts/generate-kernel-patch @@ -138,6 +138,7 @@ echo "" kpatch=( \ "scst/kernel/scst_exec_req_fifo-${kernel_version}.patch" \ + "scst/kernel/alloc_io_context-${kernel_version}.patch" \ "iscsi-scst/kernel/patches/put_page_callback-${kernel_version}.patch" \ ) diff --git a/scst/README b/scst/README index e156470ac..99c599480 100644 --- a/scst/README +++ b/scst/README @@ -723,11 +723,9 @@ using debug2perf Makefile target. directory, they also affect performance. If you find the best values, please share them with us. - - On the target deadline IO scheduler with read_expire and - write_expire increased on all exported devices to 5000 and 15000 - correspondingly should be the fastest for BLOCKIO, but for FILEIO - seems CFQ often outperforms it. So, try on your load and use the best - one. + - On the target CFQ IO scheduler. In most cases it has performance + advantage over other IO schedulers, sometimes huge (2+ times + aggregate throughput increase). - It is recommended to turn the kernel preemption off, i.e. set the kernel preemption model to "No Forced Preemption (Server)". diff --git a/scst/include/scst_debug.h b/scst/include/scst_debug.h index 04b3203b4..5296e10ed 100644 --- a/scst/include/scst_debug.h +++ b/scst/include/scst_debug.h @@ -56,7 +56,7 @@ #endif #endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 19) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18) #define WARN_ON_ONCE(condition) ({ \ static int __warned; \ typeof(condition) __ret_warn_once = (condition); \ diff --git a/scst/kernel/alloc_io_context-2.6.27.patch b/scst/kernel/alloc_io_context-2.6.27.patch new file mode 100644 index 000000000..a051ea04e --- /dev/null +++ b/scst/kernel/alloc_io_context-2.6.27.patch @@ -0,0 +1,22 @@ +diff -upkr linux-2.6.27.2/block/blk-ioc.c linux-2.6.27.2/block/blk-ioc.c +--- linux-2.6.27.2/block/blk-ioc.c 2008-10-10 02:13:53.000000000 +0400 ++++ linux-2.6.27.2/block/blk-ioc.c 2008-11-25 21:27:01.000000000 +0300 +@@ -105,6 +105,7 @@ struct io_context *alloc_io_context(gfp_ + + return ret; + } ++EXPORT_SYMBOL(alloc_io_context); + + /* + * If the current task has no IO context then create one and initialise it. +diff -upkr linux-2.6.27.2/include/linux/iocontext.h linux-2.6.27.2/include/linux/iocontext.h +--- linux-2.6.27.2/include/linux/iocontext.h 2008-10-10 02:13:53.000000000 +0400 ++++ linux-2.6.27.2/include/linux/iocontext.h 2008-11-26 13:23:03.000000000 +0300 +@@ -103,6 +103,7 @@ static inline struct io_context *ioc_tas + int put_io_context(struct io_context *ioc); + void exit_io_context(void); + struct io_context *get_io_context(gfp_t gfp_flags, int node); ++#define SCST_ALLOC_IO_CONTEXT_EXPORTED + struct io_context *alloc_io_context(gfp_t gfp_flags, int node); + void copy_io_context(struct io_context **pdst, struct io_context **psrc); + #else diff --git a/scst/src/dev_handlers/scst_user.c b/scst/src/dev_handlers/scst_user.c index e86913d90..8d87750d8 100644 --- a/scst/src/dev_handlers/scst_user.c +++ b/scst/src/dev_handlers/scst_user.c @@ -1056,6 +1056,7 @@ static int dev_user_map_buf(struct scst_user_cmd *ucmd, unsigned long ubuff, { int res = 0, rc; int i; + struct task_struct *tsk = current; TRACE_ENTRY(); @@ -1081,10 +1082,10 @@ static int dev_user_map_buf(struct scst_user_cmd *ucmd, unsigned long ubuff, ucmd->num_data_pages, (int)(ubuff & ~PAGE_MASK), ucmd->cmd->bufflen); - down_read(¤t->mm->mmap_sem); - rc = get_user_pages(current, current->mm, ubuff, ucmd->num_data_pages, + down_read(&tsk->mm->mmap_sem); + rc = get_user_pages(tsk, tsk->mm, ubuff, ucmd->num_data_pages, 1/*writable*/, 0/*don't force*/, ucmd->data_pages, NULL); - up_read(¤t->mm->mmap_sem); + up_read(&tsk->mm->mmap_sem); /* get_user_pages() flushes dcache */ diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index b5f9facba..47231a67c 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -712,6 +712,7 @@ static int vdisk_do_job(struct scst_cmd *cmd) int opcode = cdb[0]; loff_t loff; struct scst_device *dev = cmd->dev; + struct scst_tgt_dev *tgt_dev = cmd->tgt_dev; struct scst_vdisk_dev *virt_dev = (struct scst_vdisk_dev *)dev->dh_priv; struct scst_thr_data_hdr *d; @@ -741,9 +742,9 @@ static int vdisk_do_job(struct scst_cmd *cmd) cmd->driver_status = 0; if (!virt_dev->nullio) { - d = scst_find_thr_data(cmd->tgt_dev); + d = scst_find_thr_data(tgt_dev); if (unlikely(d == NULL)) { - thr = vdisk_init_thr_data(cmd->tgt_dev); + thr = vdisk_init_thr_data(tgt_dev); if (thr == NULL) { scst_set_busy(cmd); goto out_compl; @@ -855,7 +856,7 @@ static int vdisk_do_job(struct scst_cmd *cmd) int do_fsync = vdisk_sync_queue_type(cmd->queue_type); struct scst_vdisk_tgt_dev *ftgt_dev = (struct scst_vdisk_tgt_dev *) - cmd->tgt_dev->dh_priv; + tgt_dev->dh_priv; enum scst_cmd_queue_type last_queue_type = ftgt_dev->last_write_cmd_queue_type; ftgt_dev->last_write_cmd_queue_type = cmd->queue_type; @@ -892,7 +893,7 @@ static int vdisk_do_job(struct scst_cmd *cmd) int do_fsync = vdisk_sync_queue_type(cmd->queue_type); struct scst_vdisk_tgt_dev *ftgt_dev = (struct scst_vdisk_tgt_dev *) - cmd->tgt_dev->dh_priv; + tgt_dev->dh_priv; enum scst_cmd_queue_type last_queue_type = ftgt_dev->last_write_cmd_queue_type; ftgt_dev->last_write_cmd_queue_type = cmd->queue_type; @@ -2709,7 +2710,7 @@ static void vdisk_report_registering(const char *type, j = i; if (virt_dev->wt_flag) - i += snprintf(&buf[i], sizeof(buf) - i, " (WRITE_THROUGH"); + i += snprintf(&buf[i], sizeof(buf) - i, "(WRITE_THROUGH"); if (virt_dev->nv_cache) i += snprintf(&buf[i], sizeof(buf) - i, "%sNV_CACHE", @@ -3520,7 +3521,7 @@ static int __init init_scst_vdisk_driver(void) goto out; } - num_threads = num_online_cpus() + 2; + num_threads = 5; vdisk_file_devtype.threads_num = num_threads; vcdrom_devtype.threads_num = num_threads; diff --git a/scst/src/scst_lib.c b/scst/src/scst_lib.c index 8f8d8c1b8..51703d249 100644 --- a/scst/src/scst_lib.c +++ b/scst/src/scst_lib.c @@ -781,8 +781,7 @@ int scst_acg_add_dev(struct scst_acg *acg, struct scst_device *dev, list_add_tail(&acg_dev->acg_dev_list_entry, &acg->acg_dev_list); list_add_tail(&acg_dev->dev_acg_dev_list_entry, &dev->dev_acg_dev_list); - list_for_each_entry(sess, &acg->acg_sess_list, acg_sess_list_entry) - { + list_for_each_entry(sess, &acg->acg_sess_list, acg_sess_list_entry) { tgt_dev = scst_alloc_add_tgt_dev(sess, acg_dev); if (tgt_dev == NULL) { res = -ENOMEM; @@ -1728,7 +1727,7 @@ void scst_copy_sg(struct scst_cmd *cmd, enum scst_sg_copy_dir copy_dir) struct scatterlist *src_sg, *dst_sg; unsigned int src_sg_cnt, src_len, dst_len, src_offs, dst_offs; struct page *src, *dst; - int s, d, to_copy; + unsigned int s, d, to_copy; TRACE_ENTRY(); @@ -3082,10 +3081,11 @@ EXPORT_SYMBOL(scst_dev_del_all_thr_data); struct scst_thr_data_hdr *scst_find_thr_data(struct scst_tgt_dev *tgt_dev) { struct scst_thr_data_hdr *res = NULL, *d; + struct task_struct *tsk = current; spin_lock(&tgt_dev->thr_data_lock); list_for_each_entry(d, &tgt_dev->thr_data_list, thr_data_list_entry) { - if (d->pid == current->pid) { + if (d->pid == tsk->pid) { res = d; scst_thr_data_get(res); break; diff --git a/scst/src/scst_main.c b/scst/src/scst_main.c index 08aad2d12..35192f7ad 100644 --- a/scst/src/scst_main.c +++ b/scst/src/scst_main.c @@ -47,6 +47,13 @@ Pass-through dev handlers will not be supported." #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) +#if !defined(SCST_ALLOC_IO_CONTEXT_EXPORTED) +#warning "Patch export_alloc_io_context-.patch was not applied \ + on your kernel. SCST will be working with not the best performance." +#endif +#endif + /** ** SCST global variables. They are all uninitialized to have their layout in ** memory be exactly as specified. Otherwise compiler puts zero-initialized @@ -131,6 +138,12 @@ static int scst_virt_dev_last_id; /* protected by scst_mutex */ spinlock_t scst_temp_UA_lock; uint8_t scst_temp_UA[SCST_SENSE_BUFFERSIZE]; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) +#if defined(CONFIG_BLOCK) && defined(SCST_ALLOC_IO_CONTEXT_EXPORTED) +static struct io_context *scst_ioc; +#endif +#endif + unsigned int scst_max_cmd_mem; unsigned int scst_max_dev_cmd_mem; @@ -1069,6 +1082,7 @@ int scst_add_dev_threads(struct scst_device *dev, int num) int i, res = 0; int n = 0; struct scst_cmd_thread_t *thr; + struct io_context *ioc = NULL; char nm[12]; TRACE_ENTRY(); @@ -1086,7 +1100,7 @@ int scst_add_dev_threads(struct scst_device *dev, int num) } strncpy(nm, dev->handler->name, ARRAY_SIZE(nm)-1); nm[ARRAY_SIZE(nm)-1] = '\0'; - thr->cmd_thread = kthread_run(scst_cmd_thread, + thr->cmd_thread = kthread_create(scst_cmd_thread, &dev->cmd_lists, "%sd%d_%d", nm, dev->dev_num, n++); if (IS_ERR(thr->cmd_thread)) { res = PTR_ERR(thr->cmd_thread); @@ -1094,10 +1108,38 @@ int scst_add_dev_threads(struct scst_device *dev, int num) kfree(thr); goto out; } + list_add(&thr->thread_list_entry, &dev->threads_list); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) +#if defined(CONFIG_BLOCK) && defined(SCST_ALLOC_IO_CONTEXT_EXPORTED) + /* + * It would be better to keep io_context in tgt_dev and + * dynamically assign it to the current thread on the IO + * submission time to let each initiator have own + * io_context. But, unfortunately, CFQ doesn't + * support if a task has dynamically switched + * io_context, it oopses on BUG_ON(!cic->dead_key) in + * cic_free_func(). So, we have to have the same io_context + * for all initiators. + */ + if (ioc == NULL) { + ioc = alloc_io_context(GFP_KERNEL, -1); + TRACE_DBG("ioc %p (thr %d)", ioc, thr->cmd_thread->pid); + } + + put_io_context(thr->cmd_thread->io_context); + thr->cmd_thread->io_context = ioc_task_link(ioc); + TRACE_DBG("Setting ioc %p on thr %d", ioc, + thr->cmd_thread->pid); +#endif +#endif + wake_up_process(thr->cmd_thread); } out: + put_io_context(ioc); + TRACE_EXIT_RES(res); return res; } @@ -1349,7 +1391,7 @@ int __scst_add_cmd_threads(int num) PRINT_ERROR("fail to allocate thr %d", res); goto out_error; } - thr->cmd_thread = kthread_run(scst_cmd_thread, + thr->cmd_thread = kthread_create(scst_cmd_thread, &scst_main_cmd_lists, "scsi_tgt%d", scst_thread_num++); if (IS_ERR(thr->cmd_thread)) { @@ -1358,9 +1400,27 @@ int __scst_add_cmd_threads(int num) kfree(thr); goto out_error; } + list_add(&thr->thread_list_entry, &scst_threads_info.cmd_threads_list); scst_threads_info.nr_cmd_threads++; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) +#if defined(CONFIG_BLOCK) && defined(SCST_ALLOC_IO_CONTEXT_EXPORTED) + /* See comment in scst_add_dev_threads() */ + if (scst_ioc == NULL) { + scst_ioc = alloc_io_context(GFP_KERNEL, -1); + TRACE_DBG("scst_ioc %p (thr %d)", scst_ioc, + thr->cmd_thread->pid); + } + + put_io_context(thr->cmd_thread->io_context); + thr->cmd_thread->io_context = ioc_task_link(scst_ioc); + TRACE_DBG("Setting scst_ioc %p on thr %d", + scst_ioc, thr->cmd_thread->pid); +#endif +#endif + wake_up_process(thr->cmd_thread); } res = 0; @@ -1889,6 +1949,12 @@ static void __exit exit_scst(void) DEINIT_CACHEP(scst_tgtd_cachep); DEINIT_CACHEP(scst_acgd_cachep); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) +#if defined(CONFIG_BLOCK) && defined(SCST_ALLOC_IO_CONTEXT_EXPORTED) + put_io_context(scst_ioc); +#endif +#endif + PRINT_INFO("%s", "SCST unloaded"); TRACE_EXIT(); diff --git a/scst/src/scst_targ.c b/scst/src/scst_targ.c index e6bac2b11..a55cce6ca 100644 --- a/scst/src/scst_targ.c +++ b/scst/src/scst_targ.c @@ -1260,7 +1260,6 @@ static void scst_cmd_done(struct scsi_cmnd *scsi_cmd) { struct scsi_request *req = NULL; struct scst_cmd *cmd; - enum scst_exec_context context; TRACE_ENTRY(); @@ -1801,7 +1800,10 @@ static struct scst_cmd *scst_post_exec_sn(struct scst_cmd *cmd, /* cmd must be additionally referenced to not die inside */ static int scst_do_real_exec(struct scst_cmd *cmd) { - int res = SCST_EXEC_NOT_COMPLETED, rc; + int res = SCST_EXEC_NOT_COMPLETED; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18) + int rc; +#endif struct scst_device *dev = cmd->dev; struct scst_dev_type *handler = dev->handler; @@ -2933,7 +2935,7 @@ static void scst_cmd_set_sn(struct scst_cmd *cmd) switch (cmd->queue_type) { case SCST_CMD_QUEUE_SIMPLE: case SCST_CMD_QUEUE_UNTAGGED: -#if 1 /* temporary, ToDo */ +#if 0 /* left for future performance investigations */ if (scst_cmd_is_expected_set(cmd)) { if ((cmd->expected_data_direction == SCST_DATA_READ) && (atomic_read(&cmd->dev->write_cmd_count) == 0))