scst_local: Avoid deadlock during module removal with kernel 3.6

Since kernel 3.6 a SYNCHRONIZE CACHE command is sent to SCSI disks
during the scsi_remove_host() call. That command is sent after command
processing has been suspended in SCST and hence causes a deadlock.
Avoid this deadlock by failing all commands issued after
scst_unregister_target() has been invoked.

The output of echo w >/proc/sysrq-trigger just after module removal of
scst_local started is as follows:

SysRq : Show Blocked State
  task                        PC stack   pid father
kworker/2:1     D ffff88003d08cf40     0    37      2 0x00000000
 ffff88003d0bd830 0000000000000046 ffffffff814a849e ffff88003d08cf40
 ffff88003d08cf40 ffff88003d0bdfd8 ffff88003d0bdfd8 ffff88003d0bdfd8
 ffff88003d9a3b70 ffff88003d08cf40 0000000000000282 ffff88003d0bd860
Call Trace:
 [<ffffffff814a849e>] ? schedule_timeout+0x12e/0x230
 [<ffffffff814aabc9>] schedule+0x29/0x70
 [<ffffffff814a84a3>] schedule_timeout+0x133/0x230
 [<ffffffff8104cc00>] ? __internal_add_timer+0x140/0x140
 [<ffffffff814aaa72>] wait_for_common+0x122/0x170
 [<ffffffff81074050>] ? try_to_wake_up+0x2c0/0x2c0
 [<ffffffff814aab73>] wait_for_completion_timeout+0x13/0x20
 [<ffffffff811edc2b>] blk_execute_rq+0x17b/0x200
 [<ffffffff814aa993>] ? wait_for_common+0x43/0x170
 [<ffffffff81180a51>] ? bio_phys_segments+0x21/0x30
 [<ffffffff811e8cf0>] ? blk_rq_bio_prep+0x30/0xb0
 [<ffffffff812e46ee>] scsi_execute+0xee/0x180
 [<ffffffff812e58ed>] scsi_execute_req+0xbd/0x130
 [<ffffffff812f55c0>] sr_check_events+0xc0/0x2d0
 [<ffffffff81076218>] ? sched_clock_cpu+0xa8/0x110
 [<ffffffff8131e1cc>] cdrom_check_events+0x1c/0x40
 [<ffffffff812f5a39>] sr_block_check_events+0x19/0x20
 [<ffffffff811f0e32>] disk_events_workfn+0x62/0x160
 [<ffffffff8105affe>] process_one_work+0x1ae/0x620
 [<ffffffff8105af9d>] ? process_one_work+0x14d/0x620
 [<ffffffff811f0dd0>] ? __disk_unblock_events+0x130/0x130
 [<ffffffff8105ce97>] worker_thread+0x187/0x4e0
 [<ffffffff8105cd10>] ? manage_workers+0x320/0x320
 [<ffffffff81062a07>] kthread+0xb7/0xc0
 [<ffffffff81097b35>] ? trace_hardirqs_on_caller+0x105/0x190
 [<ffffffff814b5d04>] kernel_thread_helper+0x4/0x10
 [<ffffffff814ac870>] ? retint_restore_args+0x13/0x13
 [<ffffffff81062950>] ? flush_kthread_work+0x1e0/0x1e0
 [<ffffffff814b5d00>] ? gs_change+0x13/0x13
rmmod           D ffff88001d46a7a0     0 10456  10437 0x00000004
 ffff8800393cd7a8 0000000000000046 ffffffff814a849e ffff88001d46a7a0
 ffff88001d46a7a0 ffff8800393cdfd8 ffff8800393cdfd8 ffff8800393cdfd8
 ffffffff81a13420 ffff88001d46a7a0 0000000000000282 ffff8800393cd7d8
Call Trace:
 [<ffffffff814a849e>] ? schedule_timeout+0x12e/0x230
 [<ffffffff814aabc9>] schedule+0x29/0x70
 [<ffffffff814a84a3>] schedule_timeout+0x133/0x230
 [<ffffffff8104cc00>] ? __internal_add_timer+0x140/0x140
 [<ffffffff814aaa72>] wait_for_common+0x122/0x170
 [<ffffffff81074050>] ? try_to_wake_up+0x2c0/0x2c0
 [<ffffffff814aab73>] wait_for_completion_timeout+0x13/0x20
 [<ffffffff811edc2b>] blk_execute_rq+0x17b/0x200
 [<ffffffff814aa993>] ? wait_for_common+0x43/0x170
 [<ffffffff81063000>] ? __init_waitqueue_head+0x60/0x60
 [<ffffffff812e46ee>] scsi_execute+0xee/0x180
 [<ffffffff812e58ed>] scsi_execute_req+0xbd/0x130
 [<ffffffff812f07a8>] sd_sync_cache+0x98/0x120
 [<ffffffff812f0a40>] sd_shutdown+0xd0/0x150
 [<ffffffff812f0b3c>] sd_remove+0x7c/0xc0
 [<ffffffff812c3e6c>] __device_release_driver+0x7c/0xf0
 [<ffffffff812c3f0e>] device_release_driver+0x2e/0x40
 [<ffffffff812c380f>] bus_remove_device+0xff/0x170
 [<ffffffff812c099d>] device_del+0x12d/0x1c0
 [<ffffffff812eae1d>] __scsi_remove_device+0x10d/0x120
 [<ffffffff812e97ef>] scsi_forget_host+0x6f/0x80
 [<ffffffff812deb8a>] scsi_remove_host+0x7a/0x130
 [<ffffffffa0287bea>] scst_local_driver_remove+0x5a/0x120 [scst_local]
 [<ffffffff812c3e6c>] __device_release_driver+0x7c/0xf0
 [<ffffffff812c3f0e>] device_release_driver+0x2e/0x40
 [<ffffffff812c380f>] bus_remove_device+0xff/0x170
 [<ffffffff812c099d>] device_del+0x12d/0x1c0
 [<ffffffffa028ae21>] ? scst_local_exit+0x6b/0x24a [scst_local]
 [<ffffffff812c0a52>] device_unregister+0x22/0x60
 [<ffffffffa0287649>] scst_local_remove_adapter+0x59/0xd0 [scst_local]
 [<ffffffffa02880ab>] __scst_local_remove_target+0x7b/0x130 [scst_local]
 [<ffffffffa028ae3a>] scst_local_exit+0x84/0x24a [scst_local]
 [<ffffffff810a2d26>] sys_delete_module+0x1a6/0x2b0
 [<ffffffff814ac855>] ? retint_swapgs+0x13/0x1b
 [<ffffffff8120ebfe>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff814b4b29>] system_call_fastpath+0x16/0x1b

Signed-off-by: Bart Van Assche <bvanassche@acm.org>




git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@4566 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2012-10-23 01:08:47 +00:00
parent 6b29b72695
commit fab0164c55

View File

@@ -953,6 +953,12 @@ static int scst_local_queuecommand_lck(struct scsi_cmnd *SCpnt,
sess = to_scst_lcl_sess(scsi_get_device(SCpnt->device->host));
if (sess->unregistering) {
SCpnt->result = DID_BAD_TARGET << 16;
SCpnt->scsi_done(SCpnt);
return 0;
}
scsi_set_resid(SCpnt, 0);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25))
@@ -1576,7 +1582,7 @@ static void scst_local_release_adapter(struct device *dev)
goto out;
spin_lock(&sess->aen_lock);
sess->unregistering = 1;
WARN_ON_ONCE(!sess->unregistering);
scst_process_aens(sess, true);
spin_unlock(&sess->aen_lock);
@@ -1766,6 +1772,10 @@ static void __scst_local_remove_target(struct scst_local_tgt *tgt)
list_for_each_entry_safe(sess, ts, &tgt->sessions_list,
sessions_list_entry) {
spin_lock(&sess->aen_lock);
sess->unregistering = 1;
spin_unlock(&sess->aen_lock);
scst_local_remove_adapter(sess);
}