From a34ec0f7ff540befd09cc45e7c9cc3848d891afb Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Wed, 4 Feb 2009 18:45:32 +0000 Subject: [PATCH] A bunch of pending fixes/cleanups: - Docs about limitation of having initiator and target on the same host updated + cleanups - Minor local thread storage improvements - TRACE_MGMT_MINOR excluded from the default set of trace flags to not confuse people - Dedicated kmem_cache for blockio created git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@662 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- iscsi-scst/README | 2 +- iscsi-scst/README_in-tree | 2 +- scst/README | 22 ++++++++++++++-------- scst/README_in-tree | 22 ++++++++++++++-------- scst/include/scst.h | 12 ++++++++++-- scst/src/dev_handlers/scst_vdisk.c | 25 ++++++++++++++++++------- scst/src/scst_lib.c | 18 +++++++++--------- scst/src/scst_main.c | 19 +++++++++++++++++-- scst/src/scst_priv.h | 12 +++++------- scst/src/scst_targ.c | 16 ++++++++-------- scst_local/README | 5 +++++ 11 files changed, 102 insertions(+), 53 deletions(-) diff --git a/iscsi-scst/README b/iscsi-scst/README index f262848bc..d44dd91ac 100644 --- a/iscsi-scst/README +++ b/iscsi-scst/README @@ -136,7 +136,7 @@ IMPORTANT: All LUN information (access control) MUST be configured Also see SCST README file how to tune for the best performance. -CAUTION: Working of target and initiator on the same host isn't +CAUTION: Working of target and initiator on the same host isn't fully ======= supported. See SCST README file for details. diff --git a/iscsi-scst/README_in-tree b/iscsi-scst/README_in-tree index def03e451..8cb04d8c1 100644 --- a/iscsi-scst/README_in-tree +++ b/iscsi-scst/README_in-tree @@ -71,7 +71,7 @@ IMPORTANT: All LUN information (access control) MUST be configured Also see SCST README file how to tune for the best performance. -CAUTION: Working of target and initiator on the same host isn't +CAUTION: Working of target and initiator on the same host isn't fully ======= supported. See SCST README file for details. diff --git a/scst/README b/scst/README index fb87f6ba6..c0d6383ff 100644 --- a/scst/README +++ b/scst/README @@ -112,13 +112,19 @@ IMPORTANT: Without loading appropriate device handler, corresponding devices 'echo "- - -" >/sys/class/scsi_host/hostX/scan', where X - is the host number. -IMPORTANT: Working of target and initiator on the same host isn't -========= supported. This is a limitation of the Linux memory/cache - manager, because in this case an OOM deadlock like: system +IMPORTANT: Working of target and initiator on the same host is +========= supported, except the following 2 cases: swap over target exported + device and using a writable mmap over a file from target + exported device. The latter means you can't mount a file + system over target exported device. In other words, you can + freely use any sg, sd, st, etc. devices imported from target + on the same host, but you can't mount file systems or put + swap on them. This is a limitation of Linux memory/cache + manager, because in this case an OOM deadlock like: system needs some memory -> it decides to clear some cache -> cache - needs to write on a target exported device -> initiator sends - request to the target -> target needs memory -> problem is - possible. + needs to write on target exported device -> initiator sends + request to the target -> target needs memory -> system needs + even more memory -> deadlock. IMPORTANT: In the current version simultaneous access to local SCSI devices ========= via standard high-level SCSI drivers (sd, st, sg, etc.) and @@ -863,7 +869,7 @@ Work if target's backstorage or link is too slow ------------------------------------------------ Under high I/O load, when your target's backstorage gets overloaded, or -working over a slow link between inititor and target, when the link +working over a slow link between initiator and target, when the link can't serve all the queued commands on time, you can experience I/O stalls or see in the kernel log abort or reset messages. @@ -947,7 +953,7 @@ Note, that logged messages about QUEUE_FULL status are quite different by nature. This is a normal work, just SCSI flow control in action. Simply don't enable "mgmt_minor" logging level, or, alternatively, if you are confident in the worst case performance of your back-end storage -or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in +or initiator-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to 64. Usually initiators don't try to push more commands on the target. diff --git a/scst/README_in-tree b/scst/README_in-tree index 7e1566d03..54faf04db 100644 --- a/scst/README_in-tree +++ b/scst/README_in-tree @@ -59,13 +59,19 @@ IMPORTANT: Without loading appropriate device handler, corresponding devices 'echo "- - -" >/sys/class/scsi_host/hostX/scan', where X - is the host number. -IMPORTANT: Working of target and initiator on the same host isn't -========= supported. This is a limitation of the Linux memory/cache - manager, because in this case an OOM deadlock like: system +IMPORTANT: Working of target and initiator on the same host is +========= supported, except the following 2 cases: swap over target exported + device and using a writable mmap over a file from target + exported device. The latter means you can't mount a file + system over target exported device. In other words, you can + freely use any sg, sd, st, etc. devices imported from target + on the same host, but you can't mount file systems or put + swap on them. This is a limitation of Linux memory/cache + manager, because in this case an OOM deadlock like: system needs some memory -> it decides to clear some cache -> cache - needs to write on a target exported device -> initiator sends - request to the target -> target needs memory -> problem is - possible. + needs to write on target exported device -> initiator sends + request to the target -> target needs memory -> system needs + even more memory -> deadlock. IMPORTANT: In the current version simultaneous access to local SCSI devices ========= via standard high-level SCSI drivers (sd, st, sg, etc.) and @@ -798,7 +804,7 @@ Work if target's backstorage or link is too slow ------------------------------------------------ Under high I/O load, when your target's backstorage gets overloaded, or -working over a slow link between inititor and target, when the link +working over a slow link between initiator and target, when the link can't serve all the queued commands on time, you can experience I/O stalls or see in the kernel log abort or reset messages. @@ -882,7 +888,7 @@ Note, that logged messages about QUEUE_FULL status are quite different by nature. This is a normal work, just SCSI flow control in action. Simply don't enable "mgmt_minor" logging level, or, alternatively, if you are confident in the worst case performance of your back-end storage -or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in +or initiator-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to 64. Usually initiators don't try to push more commands on the target. diff --git a/scst/include/scst.h b/scst/include/scst.h index 9227377ef..d630a77eb 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -1455,7 +1455,7 @@ struct scst_device { struct scst_thr_data_hdr { /* List entry in tgt_dev->thr_data_list */ struct list_head thr_data_list_entry; - pid_t pid; /* PID of the owner thread */ + struct task_struct *owner_thr; /* the owner thread */ atomic_t ref; /* Function that will be called on the tgt_dev destruction */ void (*free_fn) (struct scst_thr_data_hdr *data); @@ -2658,8 +2658,16 @@ void scst_del_all_thr_data(struct scst_tgt_dev *tgt_dev); /* Deletes all local to threads data from all tgt_dev's of the dev */ void scst_dev_del_all_thr_data(struct scst_device *dev); +/* Finds local to the thread data. Returns NULL, if they not found. */ +struct scst_thr_data_hdr *__scst_find_thr_data(struct scst_tgt_dev *tgt_dev, + struct task_struct *tsk); + /* Finds local to the current thread data. Returns NULL, if they not found. */ -struct scst_thr_data_hdr *scst_find_thr_data(struct scst_tgt_dev *tgt_dev); +static inline struct scst_thr_data_hdr *scst_find_thr_data( + struct scst_tgt_dev *tgt_dev) +{ + return __scst_find_thr_data(tgt_dev, current); +} static inline void scst_thr_data_get(struct scst_thr_data_hdr *data) { diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index d982144ef..418cf3eaa 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -226,6 +226,7 @@ struct scst_vdisk_thr { }; static struct kmem_cache *vdisk_thr_cachep; +static struct kmem_cache *blockio_work_cachep; #define DEF_NUM_THREADS 5 static int num_threads = DEF_NUM_THREADS; @@ -2326,19 +2327,19 @@ out: return; } -struct blockio_work { +struct scst_blockio_work { atomic_t bios_inflight; struct scst_cmd *cmd; }; -static inline void blockio_check_finish(struct blockio_work *blockio_work) +static inline void blockio_check_finish(struct scst_blockio_work *blockio_work) { /* Decrement the bios in processing, and if zero signal completion */ if (atomic_dec_and_test(&blockio_work->bios_inflight)) { blockio_work->cmd->completed = 1; blockio_work->cmd->scst_cmd_done(blockio_work->cmd, SCST_CMD_STATE_DEFAULT, SCST_CONTEXT_DIRECT_ATOMIC); - kfree(blockio_work); + kmem_cache_free(blockio_work_cachep, blockio_work); } return; } @@ -2349,7 +2350,7 @@ static int blockio_endio(struct bio *bio, unsigned int bytes_done, int error) static void blockio_endio(struct bio *bio, int error) #endif { - struct blockio_work *blockio_work = bio->bi_private; + struct scst_blockio_work *blockio_work = bio->bi_private; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) if (bio->bi_size) @@ -2395,7 +2396,7 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, uint8_t *address; struct bio *bio = NULL, *hbio = NULL, *tbio = NULL; int need_new_bio; - struct blockio_work *blockio_work; + struct scst_blockio_work *blockio_work; int bios = 0; TRACE_ENTRY(); @@ -2404,7 +2405,7 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, goto out; /* Allocate and initialize blockio_work struct */ - blockio_work = kmalloc(sizeof(*blockio_work), GFP_KERNEL); + blockio_work = kmem_cache_alloc(blockio_work_cachep, GFP_KERNEL); if (blockio_work == NULL) goto out_no_mem; @@ -2506,7 +2507,7 @@ out_no_bio: hbio = hbio->bi_next; bio_put(bio); } - kfree(blockio_work); + kmem_cache_free(blockio_work_cachep, blockio_work); out_no_mem: scst_set_busy(cmd); @@ -3561,6 +3562,12 @@ static int __init init_scst_vdisk_driver(void) goto out; } + blockio_work_cachep = KMEM_CACHE(scst_blockio_work, SCST_SLAB_FLAGS); + if (blockio_work_cachep == NULL) { + res = -ENOMEM; + goto out_free_vdisk_cache; + } + if (num_threads < 1) { PRINT_ERROR("num_threads can not be less than 1, use " "default %d", DEF_NUM_THREADS); @@ -3601,6 +3608,9 @@ out_free_vdisk: exit_scst_vdisk(&vdisk_file_devtype, &vdisk_dev_list); out_free_slab: + kmem_cache_destroy(blockio_work_cachep); + +out_free_vdisk_cache: kmem_cache_destroy(vdisk_thr_cachep); goto out; } @@ -3611,6 +3621,7 @@ static void __exit exit_scst_vdisk_driver(void) exit_scst_vdisk(&vdisk_blk_devtype, &vdisk_dev_list); exit_scst_vdisk(&vdisk_file_devtype, &vdisk_dev_list); exit_scst_vdisk(&vcdrom_devtype, &vcdrom_dev_list); + kmem_cache_destroy(blockio_work_cachep); kmem_cache_destroy(vdisk_thr_cachep); } diff --git a/scst/src/scst_lib.c b/scst/src/scst_lib.c index e5251d80f..5d6a80020 100644 --- a/scst/src/scst_lib.c +++ b/scst/src/scst_lib.c @@ -2724,8 +2724,8 @@ void scst_process_reset(struct scst_device *dev, if (dev->dev_reserved) { list_for_each_entry(tgt_dev, &dev->dev_tgt_dev_list, dev_tgt_dev_list_entry) { - TRACE(TRACE_MGMT, "Clearing RESERVE'ation for tgt_dev " - "lun %lld", + TRACE(TRACE_MGMT_MINOR, "Clearing RESERVE'ation for " + "tgt_dev lun %lld", (long long unsigned int)tgt_dev->lun); clear_bit(SCST_TGT_DEV_RESERVED, &tgt_dev->tgt_dev_flags); @@ -2800,7 +2800,7 @@ int scst_set_pending_UA(struct scst_cmd *cmd) TRACE_ENTRY(); - TRACE(TRACE_MGMT, "Setting pending UA cmd %p", cmd); + TRACE(TRACE_MGMT_MINOR, "Setting pending UA cmd %p", cmd); spin_lock_bh(&cmd->tgt_dev->tgt_dev_lock); @@ -2935,7 +2935,7 @@ void __scst_dev_check_set_UA(struct scst_device *dev, { TRACE_ENTRY(); - TRACE(TRACE_MGMT, "Processing UA dev %p", dev); + TRACE(TRACE_MGMT_MINOR, "Processing UA dev %p", dev); /* Check for reset UA */ if (sense[12] == SCST_SENSE_ASC_UA_RESET) @@ -3044,7 +3044,7 @@ void scst_add_thr_data(struct scst_tgt_dev *tgt_dev, struct scst_thr_data_hdr *data, void (*free_fn) (struct scst_thr_data_hdr *data)) { - data->pid = current->pid; + data->owner_thr = current; atomic_set(&data->ref, 1); EXTRACHECKS_BUG_ON(free_fn == NULL); data->free_fn = free_fn; @@ -3091,14 +3091,14 @@ void scst_dev_del_all_thr_data(struct scst_device *dev) } EXPORT_SYMBOL(scst_dev_del_all_thr_data); -struct scst_thr_data_hdr *scst_find_thr_data(struct scst_tgt_dev *tgt_dev) +struct scst_thr_data_hdr *__scst_find_thr_data(struct scst_tgt_dev *tgt_dev, + struct task_struct *tsk) { struct scst_thr_data_hdr *res = NULL, *d; - struct task_struct *tsk = current; spin_lock(&tgt_dev->thr_data_lock); list_for_each_entry(d, &tgt_dev->thr_data_list, thr_data_list_entry) { - if (d->pid == tsk->pid) { + if (d->owner_thr == tsk) { res = d; scst_thr_data_get(res); break; @@ -3107,7 +3107,7 @@ struct scst_thr_data_hdr *scst_find_thr_data(struct scst_tgt_dev *tgt_dev) spin_unlock(&tgt_dev->thr_data_lock); return res; } -EXPORT_SYMBOL(scst_find_thr_data); +EXPORT_SYMBOL(__scst_find_thr_data); /* dev_lock supposed to be held and BH disabled */ void __scst_block_dev(struct scst_device *dev) diff --git a/scst/src/scst_main.c b/scst/src/scst_main.c index 681078ef8..c6062a644 100644 --- a/scst/src/scst_main.c +++ b/scst/src/scst_main.c @@ -1209,13 +1209,28 @@ void scst_del_dev_threads(struct scst_device *dev, int num) TRACE_ENTRY(); - list_for_each_entry_safe(ct, tmp, &dev->threads_list, + list_for_each_entry_safe_reverse(ct, tmp, &dev->threads_list, thread_list_entry) { - int rc = kthread_stop(ct->cmd_thread); + int rc; + struct scst_tgt_dev *tgt_dev; + + list_for_each_entry(tgt_dev, &dev->dev_tgt_dev_list, + dev_tgt_dev_list_entry) { + struct scst_thr_data_hdr *td; + td = __scst_find_thr_data(tgt_dev, ct->cmd_thread); + if (td != NULL) { + scst_thr_data_put(td); + break; + } + } + + rc = kthread_stop(ct->cmd_thread); if (rc < 0) TRACE_MGMT_DBG("kthread_stop() failed: %d", rc); + list_del(&ct->thread_list_entry); kfree(ct); + if ((num > 0) && (++i >= num)) break; } diff --git a/scst/src/scst_priv.h b/scst/src/scst_priv.h index e1cd186b5..57df67749 100644 --- a/scst/src/scst_priv.h +++ b/scst/src/scst_priv.h @@ -52,14 +52,11 @@ extern unsigned long scst_trace_flag; #endif #ifdef CONFIG_SCST_DEBUG -/*#define SCST_DEFAULT_LOG_FLAGS (TRACE_ALL & ~TRACE_MEMORY & ~TRACE_BUFF \ - & ~TRACE_FUNCTION) -#define SCST_DEFAULT_LOG_FLAGS (TRACE_ALL & ~TRACE_MEMORY & ~TRACE_BUFF & \ - ~TRACE_SCSI & ~TRACE_SCSI_SERIALIZING & ~TRACE_DEBUG) -*/ + +/* TRACE_MGMT_MINOR disabled to not confuse regular users */ #define SCST_DEFAULT_LOG_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR | TRACE_PID | \ TRACE_LINE | TRACE_FUNCTION | TRACE_SPECIAL | TRACE_MGMT | \ - TRACE_MGMT_MINOR | TRACE_MGMT_DEBUG | TRACE_RTRY) + /*TRACE_MGMT_MINOR |*/ TRACE_MGMT_DEBUG | TRACE_RTRY) #define TRACE_RETRY(args...) __TRACE(TRACE_RTRY, args) #define TRACE_SN(args...) __TRACE(TRACE_SCSI_SERIALIZING, args) @@ -71,7 +68,8 @@ extern unsigned long scst_trace_flag; #else /* CONFIG_SCST_DEBUG */ # ifdef CONFIG_SCST_TRACING -#define SCST_DEFAULT_LOG_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR | TRACE_SPECIAL) +#define SCST_DEFAULT_LOG_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR | TRACE_MGMT | \ + TRACE_SPECIAL) # else #define SCST_DEFAULT_LOG_FLAGS 0 # endif diff --git a/scst/src/scst_targ.c b/scst/src/scst_targ.c index 7e87ff20d..17daad9a8 100644 --- a/scst/src/scst_targ.c +++ b/scst/src/scst_targ.c @@ -4475,11 +4475,11 @@ static int scst_abort_all_nexus_loss_sess(struct scst_mgmt_cmd *mcmd, TRACE_ENTRY(); if (nexus_loss) { - TRACE(TRACE_MGMT, "Nexus loss for sess %p (mcmd %p)", sess, - mcmd); + TRACE(TRACE_MGMT_MINOR, "Nexus loss for sess %p (mcmd %p)", + sess, mcmd); } else { - TRACE(TRACE_MGMT, "Aborting all from sess %p (mcmd %p)", sess, - mcmd); + TRACE(TRACE_MGMT_MINOR, "Aborting all from sess %p (mcmd %p)", + sess, mcmd); } if (mcmd->fn != SCST_UNREG_SESS_TM) @@ -4557,11 +4557,11 @@ static int scst_abort_all_nexus_loss_tgt(struct scst_mgmt_cmd *mcmd, TRACE_ENTRY(); if (nexus_loss) { - TRACE(TRACE_MGMT, "I_T Nexus loss (tgt %p, mcmd %p)", tgt, - mcmd); + TRACE(TRACE_MGMT_MINOR, "I_T Nexus loss (tgt %p, mcmd %p)", + tgt, mcmd); } else { - TRACE(TRACE_MGMT, "Aborting all from tgt %p (mcmd %p)", tgt, - mcmd); + TRACE(TRACE_MGMT_MINOR, "Aborting all from tgt %p (mcmd %p)", + tgt, mcmd); } mcmd->needs_unblocking = 1; diff --git a/scst_local/README b/scst_local/README index d819d65eb..dc2fa7b75 100644 --- a/scst_local/README +++ b/scst_local/README @@ -9,6 +9,11 @@ No assumptions are made in the code about the device types on the target, so any device handlers that you load in SCST should be visible, including tapes and so forth. +You can freely use any sg, sd, st, etc. devices imported from target, +except the following: you can't mount file systems or put swap on them. +This is a limitation of Linux memory/cache manager. See SCST README file +for details. + To build, simply issue 'make' in the scst-local directory. Try 'modinfo scst_local' for a listing of module parameters so far.