diff --git a/scst/README b/scst/README index 76edf9689..787b3eecd 100644 --- a/scst/README +++ b/scst/README @@ -154,11 +154,12 @@ IMPORTANT: Working of target and initiator on the same host is freely use any sg, sd, st, etc. devices imported from target on the same host, but you can't mount file systems or put swap on them. This is a limitation of Linux memory/cache - manager, because in this case an OOM deadlock like: system - needs some memory -> it decides to clear some cache -> cache - needs to write on target exported device -> initiator sends - request to the target -> target needs memory -> system needs - even more memory -> deadlock. + manager, because in this case a memory allocation deadlock is + possible like: system needs some memory -> it decides to + clear some cache -> the cache is needed to be written on a + target exported device -> initiator sends request to the + target located on the same system -> the target needs memory + -> the system needs even more memory -> deadlock. IMPORTANT: In the current version simultaneous access to local SCSI devices ========= via standard high-level SCSI drivers (sd, st, sg, etc.) and diff --git a/scst/README_in-tree b/scst/README_in-tree index c177add76..e0778b877 100644 --- a/scst/README_in-tree +++ b/scst/README_in-tree @@ -75,11 +75,12 @@ IMPORTANT: Working of target and initiator on the same host is freely use any sg, sd, st, etc. devices imported from target on the same host, but you can't mount file systems or put swap on them. This is a limitation of Linux memory/cache - manager, because in this case an OOM deadlock like: system - needs some memory -> it decides to clear some cache -> cache - needs to write on target exported device -> initiator sends - request to the target -> target needs memory -> system needs - even more memory -> deadlock. + manager, because in this case a memory allocation deadlock is + possible like: system needs some memory -> it decides to + clear some cache -> the cache is needed to be written on a + target exported device -> initiator sends request to the + target located on the same system -> the target needs memory + -> the system needs even more memory -> deadlock. IMPORTANT: In the current version simultaneous access to local SCSI devices ========= via standard high-level SCSI drivers (sd, st, sg, etc.) and diff --git a/scst/include/scst.h b/scst/include/scst.h index 120134cf7..02a0f0522 100644 --- a/scst/include/scst.h +++ b/scst/include/scst.h @@ -1754,6 +1754,14 @@ struct scst_cmd { /* Set if cmd is queued as hw pending */ unsigned int cmd_hw_pending:1; + /* + * Set, if for this cmd required to not have any IO or FS calls on + * memory buffers allocations, at least for READ and WRITE commands. + * Needed for cases like file systems mounted over scst_local's + * devices. + */ + unsigned noio_mem_alloc:1; + /* * Set if the target driver wants to alloc data buffers on its own. * In this case alloc_data_buf() must be provided in the target driver @@ -3223,6 +3231,19 @@ static inline void scst_cmd_set_tgt_sn(struct scst_cmd *cmd, uint32_t tgt_sn) cmd->tgt_sn = tgt_sn; } +/* + * Get/Set functions for noio_mem_alloc + */ +static inline bool scst_cmd_get_noio_mem_alloc(struct scst_cmd *cmd) +{ + return cmd->noio_mem_alloc; +} + +static inline void scst_cmd_set_noio_mem_alloc(struct scst_cmd *cmd) +{ + cmd->noio_mem_alloc = 1; +} + /* * Returns 1 if the cmd was aborted, so its status is invalid and no * reply shall be sent to the remote initiator. A target driver should diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index 6b772bb72..7381b274f 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -272,7 +272,7 @@ static void vdisk_exec_write(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, loff_t loff); static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, u64 lba_start, int write); -static int blockio_flush(struct block_device *bdev); +static int vdisk_blockio_flush(struct block_device *bdev, gfp_t gfp_mask); static void vdisk_exec_verify(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, loff_t loff); static void vdisk_exec_read_capacity(struct scst_cmd *cmd); @@ -652,7 +652,7 @@ static void vdisk_blockio_check_flush_support(struct scst_vdisk_dev *virt_dev) goto out_close; } - if (blockio_flush(inode->i_bdev) != 0) { + if (vdisk_blockio_flush(inode->i_bdev, GFP_KERNEL) != 0) { PRINT_WARNING("Device %s doesn't support barriers, switching " "to NV_CACHE mode. Read README for more details.", virt_dev->filename); @@ -896,7 +896,7 @@ static void vdisk_free_thr_data(struct scst_thr_data_hdr *d) } static struct scst_vdisk_thr *vdisk_init_thr_data( - struct scst_tgt_dev *tgt_dev) + struct scst_tgt_dev *tgt_dev, gfp_t gfp_mask) { struct scst_vdisk_thr *res; struct scst_vdisk_dev *virt_dev = tgt_dev->dev->dh_priv; @@ -905,7 +905,7 @@ static struct scst_vdisk_thr *vdisk_init_thr_data( EXTRACHECKS_BUG_ON(virt_dev->nullio); - res = kmem_cache_zalloc(vdisk_thr_cachep, GFP_KERNEL); + res = kmem_cache_zalloc(vdisk_thr_cachep, gfp_mask); if (res == NULL) { TRACE(TRACE_OUT_OF_MEM, "%s", "Unable to allocate struct " "scst_vdisk_thr"); @@ -1000,7 +1000,8 @@ static int vdisk_do_job(struct scst_cmd *cmd) if (!virt_dev->nullio) { d = scst_find_thr_data(tgt_dev); if (unlikely(d == NULL)) { - thr = vdisk_init_thr_data(tgt_dev); + thr = vdisk_init_thr_data(tgt_dev, + cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL); if (thr == NULL) { scst_set_busy(cmd); goto out_compl; @@ -1387,21 +1388,22 @@ static void vdisk_exec_unmap(struct scst_cmd *cmd, struct scst_vdisk_thr *thr) (unsigned long long)start, len); if (virt_dev->blockio) { + gfp_t gfp = cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL; #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 31) err = blkdev_issue_discard(inode->i_bdev, start, len, - GFP_KERNEL); + gfp); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) \ && !(LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 34) \ && defined(CONFIG_SUSE_KERNEL)) err = blkdev_issue_discard(inode->i_bdev, start, len, - GFP_KERNEL, DISCARD_FL_WAIT); + gfp, DISCARD_FL_WAIT); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) err = blkdev_issue_discard(inode->i_bdev, start, len, - GFP_KERNEL, BLKDEV_IFL_WAIT); + gfp, BLKDEV_IFL_WAIT); #else err = blkdev_issue_discard(inode->i_bdev, start, len, - GFP_KERNEL, 0); + gfp, 0); #endif if (unlikely(err != 0)) { PRINT_ERROR("blkdev_issue_discard() for " @@ -2519,7 +2521,8 @@ static int vdisk_fsync(struct scst_vdisk_thr *thr, loff_t loff, goto out; if (virt_dev->blockio) { - res = blockio_flush(thr->bdev); + res = vdisk_blockio_flush(thr->bdev, + (cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL)); goto out; } @@ -2914,6 +2917,7 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, int need_new_bio; struct scst_blockio_work *blockio_work; int bios = 0; + gfp_t gfp_mask = (cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL); TRACE_ENTRY(); @@ -2921,7 +2925,7 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, goto out; /* Allocate and initialize blockio_work struct */ - blockio_work = kmem_cache_alloc(blockio_work_cachep, GFP_KERNEL); + blockio_work = kmem_cache_alloc(blockio_work_cachep, gfp_mask); if (blockio_work == NULL) goto out_no_mem; @@ -2951,9 +2955,9 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, if (need_new_bio) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30) - bio = bio_kmalloc(GFP_KERNEL, max_nr_vecs); + bio = bio_kmalloc(gfp_mask, max_nr_vecs); #else - bio = bio_alloc(GFP_KERNEL, max_nr_vecs); + bio = bio_alloc(gfp_mask, max_nr_vecs); #endif if (!bio) { PRINT_ERROR("Failed to create bio " @@ -3048,7 +3052,7 @@ out_no_mem: goto out; } -static int blockio_flush(struct block_device *bdev) +static int vdisk_blockio_flush(struct block_device *bdev, gfp_t gfp_mask) { int res = 0; @@ -3059,9 +3063,9 @@ static int blockio_flush(struct block_device *bdev) && LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 34)) res = blkdev_issue_flush(bdev, NULL); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) - res = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); + res = blkdev_issue_flush(bdev, gfp_mask, NULL, BLKDEV_IFL_WAIT); #else - res = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); + res = blkdev_issue_flush(bdev, gfp_mask, NULL); #endif if (res != 0) PRINT_ERROR("blkdev_issue_flush() failed: %d", res); diff --git a/scst/src/scst_lib.c b/scst/src/scst_lib.c index 50585209e..98a8fc405 100644 --- a/scst/src/scst_lib.c +++ b/scst/src/scst_lib.c @@ -4686,7 +4686,7 @@ int scst_scsi_exec_async(struct scst_cmd *cmd, void *data, struct request *rq; struct scsi_io_context *sioc; int write = (cmd->data_direction & SCST_DATA_WRITE) ? WRITE : READ; - gfp_t gfp = GFP_KERNEL; + gfp_t gfp = cmd->noio_mem_alloc ? GFP_NOIO : GFP_KERNEL; int cmd_len = cmd->cdb_len; sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp); diff --git a/scst_local/README b/scst_local/README index b46b3929a..18fe5dc05 100644 --- a/scst_local/README +++ b/scst_local/README @@ -10,9 +10,12 @@ any device handlers that you load in SCST should be visible, including tapes and so forth. You can freely use any sg, sd, st, etc. devices imported from target, -except the following: you can't mount file systems or put swap on them. -This is a limitation of Linux memory/cache manager. See SCST README file -for details. +except the following: you can't mount file systems or put swap on them +for all dev handlers, except BLOCKIO and pass-through, because it can +lead to recursive memory allocation deadlock. This is a limitation of +Linux memory/cache manager. See SCST README file for details. For +BLOCKIO and pass-through dev handlers there's no such limitation, so you +can freely mount file systems over them. To build, simply issue 'make' in the scst_local directory. diff --git a/scst_local/scst_local.c b/scst_local/scst_local.c index fabd0d93e..692f6b3cc 100644 --- a/scst_local/scst_local.c +++ b/scst_local/scst_local.c @@ -1033,11 +1033,13 @@ static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt, case DMA_TO_DEVICE: dir = SCST_DATA_WRITE; scst_cmd_set_expected(scst_cmd, dir, scsi_bufflen(SCpnt)); + scst_cmd_set_noio_mem_alloc(scst_cmd); scst_cmd_set_tgt_sg(scst_cmd, sgl, sgl_count); break; case DMA_FROM_DEVICE: dir = SCST_DATA_READ; scst_cmd_set_expected(scst_cmd, dir, scsi_bufflen(SCpnt)); + scst_cmd_set_noio_mem_alloc(scst_cmd); scst_cmd_set_tgt_sg(scst_cmd, sgl, sgl_count); break; case DMA_BIDIRECTIONAL: @@ -1047,6 +1049,7 @@ static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt, scst_cmd_set_expected(scst_cmd, dir, scsi_bufflen(SCpnt)); scst_cmd_set_expected_out_transfer_len(scst_cmd, scsi_in(SCpnt)->length); + scst_cmd_set_noio_mem_alloc(scst_cmd); scst_cmd_set_tgt_sg(scst_cmd, scsi_in(SCpnt)->table.sgl, scsi_in(SCpnt)->table.nents); scst_cmd_set_tgt_out_sg(scst_cmd, sgl, sgl_count);