- Memory barriers cleanup. Comments for them improved

- Small docs update
 - srpt/README_in-tree added



git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@614 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2008-12-10 10:55:01 +00:00
parent 76d0a69737
commit 41c3515516
12 changed files with 174 additions and 25 deletions

View File

@@ -2512,7 +2512,9 @@ static inline void iscsi_set_state_wake_up(struct iscsi_cmnd *req,
* We use wait_event() to wait for the state change, but it checks its
* condition without any protection, so without cmnd_get() it is
* possible that req will die "immediately" after the state assignment
* and wake_up() will operate on dead data.
* and wake_up() will operate on dead data. We use the ordered version
* of cmnd_get(), because "get" must be done before the state
* assignment.
*/
cmnd_get_ordered(req);
req->scst_state = new_state;
@@ -2710,6 +2712,12 @@ static int iscsi_xmit_response(struct scst_cmd *scst_cmd)
sBUG();
#endif
/*
* "_ordered" here to protect from reorder, which can lead to
* preliminary connection destroy in req_cmnd_release(). Just in
* case, actually, because reordering shouldn't go so far, but who
* knows..
*/
conn_get_ordered(conn);
req_cmnd_release(req);
iscsi_try_local_processing(conn);

View File

@@ -475,6 +475,7 @@ static inline void cmnd_get(struct iscsi_cmnd *cmnd)
static inline void cmnd_get_ordered(struct iscsi_cmnd *cmnd)
{
cmnd_get(cmnd);
/* See comments for each cmnd_get_ordered() use */
smp_mb__after_atomic_inc();
}
@@ -546,6 +547,7 @@ static inline void conn_get(struct iscsi_conn *conn)
static inline void conn_get_ordered(struct iscsi_conn *conn)
{
conn_get(conn);
/* See comments for each conn_get_ordered() use */
smp_mb__after_atomic_inc();
}
@@ -556,8 +558,9 @@ static inline void conn_put(struct iscsi_conn *conn)
sBUG_ON(atomic_read(&conn->conn_ref_cnt) == 0);
/*
* It always ordered to protect from undesired side effects like
* accessing just destroyed object because of this *_dec() reordering.
* Make it always ordered to protect from undesired side effects like
* accessing just destroyed by close_conn() conn caused by reordering
* of this atomic_dec().
*/
smp_mb__before_atomic_dec();
atomic_dec(&conn->conn_ref_cnt);

View File

@@ -359,7 +359,7 @@ else
fi \
| process_patch "srpt.diff"
add_file "srpt/README" "Documentation/scst/README.srpt" \
add_file "srpt/README_in-tree" "Documentation/scst/README.srpt" \
| process_patch "srpt-doc.diff"

View File

@@ -274,6 +274,8 @@ For changing VMSPLIT option (CONFIG_VMSPLIT to be precise) you should in
- General setup->Configure standard kernel features (for small systems): ON
- General setup->Prompt for development and/or incomplete code/drivers: ON
- Processor type and features->High Memory Support: OFF
- Processor type and features->Memory split: according to amount of

View File

@@ -222,6 +222,8 @@ For changing VMSPLIT option (CONFIG_VMSPLIT to be precise) you should in
- General setup->Configure standard kernel features (for small systems): ON
- General setup->Prompt for development and/or incomplete code/drivers: ON
- Processor type and features->High Memory Support: OFF
- Processor type and features->Memory split: according to amount of

View File

@@ -1270,6 +1270,11 @@ static int dev_user_process_reply_exec(struct scst_user_cmd *ucmd,
if (unlikely((cmd->data_direction == SCST_DATA_READ) ||
(cmd->resp_data_len != 0)))
goto out_inval;
/*
* background_exec assignment must be after ucmd get.
* Otherwise, due to reorder, in dev_user_process_reply()
* it is possible that ucmd is destroyed before it "got" here.
*/
ucmd_get_ordered(ucmd);
ucmd->background_exec = 1;
TRACE_DBG("Background ucmd %p", ucmd);
@@ -1383,6 +1388,8 @@ static int dev_user_process_reply(struct scst_user_dev *dev,
goto out_unlock;
}
/* To sync. with dev_user_process_reply_exec(). See comment there. */
smp_mb();
if (ucmd->background_exec) {
state = UCMD_STATE_EXECING;
goto unlock_process;

View File

@@ -2450,7 +2450,6 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr,
/* +1 to prevent erroneous too early command completion */
atomic_set(&blockio_work->bios_inflight, bios+1);
smp_wmb();
while (hbio) {
bio = hbio;

View File

@@ -1528,9 +1528,10 @@ void scst_check_retries(struct scst_tgt *tgt)
/*
* We don't worry about overflow of finished_cmds, because we check
* only for its change
* only for its change.
*/
atomic_inc(&tgt->finished_cmds);
/* See comment in scst_queue_retry_cmd() */
smp_mb__after_atomic_inc();
if (unlikely(tgt->retry_cmds > 0)) {
struct scst_cmd *c, *tc;
@@ -1541,8 +1542,7 @@ void scst_check_retries(struct scst_tgt *tgt)
spin_lock_irqsave(&tgt->tgt_lock, flags);
list_for_each_entry_safe(c, tc, &tgt->retry_cmd_list,
cmd_list_entry)
{
cmd_list_entry) {
tgt->retry_cmds--;
TRACE_RETRY("Moving retry cmd %p to head of active "
@@ -3125,7 +3125,13 @@ static void scst_block_dev(struct scst_device *dev, int outstanding)
__scst_block_dev(dev);
spin_unlock_bh(&dev->dev_lock);
/* spin_unlock_bh() doesn't provide the necessary memory barrier */
/*
* Memory barrier is necessary here, because we need to read
* on_dev_count in wait_event() below after we increased block_count.
* Otherwise, we can miss wake up in scst_dec_on_dev_cmd().
* We use the explicit barrier, because spin_unlock_bh() doesn't
* provide the necessary memory barrier functionality.
*/
smp_mb();
TRACE_MGMT_DBG("Waiting during blocking outstanding %d (on_dev_count "
@@ -3211,7 +3217,6 @@ repeat:
spin_lock_bh(&dev->dev_lock);
if (unlikely(test_bit(SCST_CMD_ABORTED, &cmd->cmd_flags)))
goto out_unlock;
barrier(); /* to reread block_count */
if (dev->block_count > 0) {
scst_dec_on_dev_cmd(cmd);
TRACE_MGMT_DBG("Delaying cmd %p due to blocking or "
@@ -3230,7 +3235,6 @@ repeat:
}
if (unlikely(dev->dev_serialized)) {
spin_lock_bh(&dev->dev_lock);
barrier(); /* to reread block_count */
if (dev->block_count == 0) {
TRACE_MGMT_DBG("cmd %p (tag %llu), blocking further "
"cmds due to serializing (dev %p)", cmd,
@@ -3405,7 +3409,6 @@ void scst_xmit_process_aborted_cmd(struct scst_cmd *cmd)
scst_done_cmd_mgmt(cmd);
smp_rmb();
if (test_bit(SCST_CMD_ABORTED_OTHER, &cmd->cmd_flags)) {
if (cmd->completed) {
/* It's completed and it's OK to return its result */
@@ -3548,6 +3551,7 @@ static void tm_dbg_timer_fn(unsigned long arg)
{
TRACE_MGMT_DBG("%s", "delayed cmd timer expired");
tm_dbg_flags.tm_dbg_release = 1;
/* Used to make sure that all woken up threads see the new value */
smp_wmb();
wake_up_all(tm_dbg_p_cmd_list_waitQ);
}
@@ -3763,6 +3767,10 @@ void tm_dbg_task_mgmt(struct scst_device *dev, const char *fn, int force)
tm_dbg_delayed_cmds_count);
tm_dbg_change_state();
tm_dbg_flags.tm_dbg_release = 1;
/*
* Used to make sure that all woken up threads see the new
* value.
*/
smp_wmb();
if (tm_dbg_p_cmd_list_waitQ != NULL)
wake_up_all(tm_dbg_p_cmd_list_waitQ);

View File

@@ -501,13 +501,18 @@ int scst_suspend_activity(bool interruptible)
set_bit(SCST_FLAG_SUSPENDING, &scst_flags);
set_bit(SCST_FLAG_SUSPENDED, &scst_flags);
/*
* Assignment of SCST_FLAG_SUSPENDING and SCST_FLAG_SUSPENDED must be
* ordered with scst_cmd_count. Otherwise lockless logic in
* scst_translate_lun() and scst_mgmt_translate_lun() won't work.
*/
smp_mb__after_set_bit();
/*
* See comment in scst_user.c::dev_user_task_mgmt_fn() for more
* information about scst_user behavior.
*
* ToDo: make the global suspending unneeded (Switch to per-device
* ToDo: make the global suspending unneeded (switch to per-device
* reference counting? That would mean to switch off from lockless
* implementation of scst_translate_lun().. )
*/
@@ -530,6 +535,7 @@ int scst_suspend_activity(bool interruptible)
goto out_clear;
clear_bit(SCST_FLAG_SUSPENDING, &scst_flags);
/* See comment about smp_mb() above */
smp_mb__after_clear_bit();
TRACE_MGMT_DBG("Waiting for %d active commands finally to complete",
@@ -551,6 +557,7 @@ out:
out_clear:
clear_bit(SCST_FLAG_SUSPENDING, &scst_flags);
/* See comment about smp_mb() above */
smp_mb__after_clear_bit();
goto out_up;
}
@@ -568,6 +575,10 @@ static void __scst_resume_activity(void)
goto out;
clear_bit(SCST_FLAG_SUSPENDED, &scst_flags);
/*
* The barrier is needed to make sure all woken up threads see the
* cleared flag. Not sure if it's really needed, but let's be safe.
*/
smp_mb__after_clear_bit();
list_for_each_entry(l, &scst_cmd_lists_list, lists_list_entry) {

View File

@@ -420,6 +420,7 @@ static inline void scst_dec_on_dev_cmd(struct scst_cmd *cmd)
scst_unblock_dev(dev);
atomic_dec(&dev->on_dev_count);
/* See comment in scst_block_dev() */
smp_mb__after_atomic_dec();
TRACE_DBG("New on_dev_count %d", atomic_read(&dev->on_dev_count));
@@ -438,6 +439,7 @@ static inline void __scst_get(int barrier)
TRACE_DBG("Incrementing scst_cmd_count(%d)",
atomic_read(&scst_cmd_count));
/* See comment about smp_mb() in scst_suspend_activity() */
if (barrier)
smp_mb__after_atomic_inc();
}
@@ -446,6 +448,7 @@ static inline void __scst_put(void)
{
int f;
f = atomic_dec_and_test(&scst_cmd_count);
/* See comment about smp_mb() in scst_suspend_activity() */
if (f && unlikely(test_bit(SCST_FLAG_SUSPENDED, &scst_flags))) {
TRACE_MGMT_DBG("%s", "Waking up scst_dev_cmd_waitQ");
wake_up_all(&scst_dev_cmd_waitQ);

View File

@@ -120,7 +120,8 @@ static int scst_init_cmd(struct scst_cmd *cmd, enum scst_exec_context *context)
}
/*
* Memory barrier isn't necessary here, because CPU appears to
* be self-consistent
* be self-consistent and we don't care about the race, described
* in comment in scst_do_job_init().
*/
rc = __scst_init_cmd(cmd);
@@ -885,6 +886,12 @@ static int scst_queue_retry_cmd(struct scst_cmd *cmd, int finished_cmds)
spin_lock_irqsave(&tgt->tgt_lock, flags);
tgt->retry_cmds++;
/*
* Memory barrier is needed here, because we need the exact order
* between the read and write between retry_cmds and finished_cmds to
* not miss the case when a command finished while we queuing it for
* retry after the finished_cmds check.
*/
smp_mb();
TRACE_RETRY("TGT QUEUE FULL: incrementing retry_cmds %d",
tgt->retry_cmds);
@@ -1684,7 +1691,6 @@ int scst_check_local_events(struct scst_cmd *cmd)
* was_reset.
*/
spin_lock_bh(&dev->dev_lock);
barrier(); /* to reread was_reset */
if (dev->scsi_dev->was_reset) {
TRACE(TRACE_MGMT, "was_reset is %d", 1);
scst_set_cmd_error(cmd,
@@ -1764,12 +1770,16 @@ void scst_inc_expected_sn(struct scst_tgt_dev *tgt_dev, atomic_t *slot)
inc:
/*
* No locks is needed, because only one thread at time can
* be here (serialized by sn). Also it is supposed that there
* could not be half-incremented halves.
* No protection of expected_sn is needed, because only one thread
* at time can be here (serialized by sn). Also it is supposed that
* there could not be half-incremented halves.
*/
tgt_dev->expected_sn++;
smp_mb(); /* write must be before def_cmd_count read */
/*
* Write must be before def_cmd_count read to be in sync. with
* scst_post_exec_sn(). See comment in scst_send_for_exec().
*/
smp_mb();
TRACE_SN("Next expected_sn: %ld", tgt_dev->expected_sn);
out:
@@ -2168,6 +2178,17 @@ static int scst_send_for_exec(struct scst_cmd **active_cmd)
spin_lock_irq(&tgt_dev->sn_lock);
tgt_dev->def_cmd_count++;
/*
* Memory barrier is needed here to implement lockless fast
* path. We need the exact order of read and write between
* def_cmd_count and expected_sn. Otherwise, we can miss case,
* when expected_sn was changed to be equal to cmd->sn while
* we are queuing cmd the deferred list after the expected_sn
* below. It will lead to a forever stuck command. But with
* the barrier in such case __scst_check_deferred_commands()
* will be called and it will take sn_lock, so we will be
* synchronized.
*/
smp_mb();
expected_sn = tgt_dev->expected_sn;
@@ -3044,6 +3065,7 @@ static int scst_translate_lun(struct scst_cmd *cmd)
TRACE_ENTRY();
/* See comment about smp_mb() in scst_suspend_activity() */
__scst_get(1);
if (likely(!test_bit(SCST_FLAG_SUSPENDED, &scst_flags))) {
@@ -3209,11 +3231,11 @@ restart:
* it's inserting to it, but another command at the same time
* seeing init cmd list empty and goes directly, because it
* could affect only commands from the same initiator to the
* same tgt_dev, but init_cmd_done() doesn't guarantee the order
* in case of simultaneous such calls anyway.
* same tgt_dev, but scst_cmd_init_done*() doesn't guarantee
* the order in case of simultaneous such calls anyway.
*/
TRACE_MGMT_DBG("Deleting cmd %p from init cmd list", cmd);
smp_wmb();
smp_wmb(); /* enforce the required order */
list_del(&cmd->cmd_list_entry);
spin_unlock(&scst_init_lock);
@@ -3571,6 +3593,7 @@ static int scst_mgmt_translate_lun(struct scst_mgmt_cmd *mcmd)
TRACE_DBG("Finding tgt_dev for mgmt cmd %p (lun %lld)", mcmd,
(long long unsigned int)mcmd->lun);
/* See comment about smp_mb() in scst_suspend_activity() */
__scst_get(1);
if (unlikely(test_bit(SCST_FLAG_SUSPENDED, &scst_flags) &&
@@ -3817,10 +3840,8 @@ void scst_abort_cmd(struct scst_cmd *cmd, struct scst_mgmt_cmd *mcmd,
if (other_ini) {
/* Might be necessary if command aborted several times */
if (!test_bit(SCST_CMD_ABORTED, &cmd->cmd_flags)) {
if (!test_bit(SCST_CMD_ABORTED, &cmd->cmd_flags))
set_bit(SCST_CMD_ABORTED_OTHER, &cmd->cmd_flags);
smp_mb__after_set_bit();
}
} else {
/* Might be necessary if command aborted several times */
clear_bit(SCST_CMD_ABORTED_OTHER, &cmd->cmd_flags);

85
srpt/README_in-tree Normal file
View File

@@ -0,0 +1,85 @@
SCSI RDMA Protocol (SRP) Target driver for Linux
=================================================
The SRP Target driver is designed to work directly on top of the
OpenFabrics OFED-1.x software stack (http://www.openfabrics.org) or
the Infiniband drivers in the Linux kernel tree
(http://www.kernel.org). The SRP target driver also interfaces with
the generic SCSI target mid-level driver called SCST
(http://scst.sourceforge.net).
How-to run
-----------
A. On srp target machine
1. Please refer to SCST's README for loading scst driver and its
dev_handlers drivers (scst_disk, scst_vdisk block or file IO mode, nullio, ...)
Example 1: working with real back-end scsi disks
a. modprobe scst
b. modprobe scst_disk
c. cat /proc/scsi_tgt/scsi_tgt
ibstor00:~ # cat /proc/scsi_tgt/scsi_tgt
Device (host:ch:id:lun or name) Device handler
0:0:0:0 dev_disk
4:0:0:0 dev_disk
5:0:0:0 dev_disk
6:0:0:0 dev_disk
7:0:0:0 dev_disk
Now you want to exclude the first scsi disk and expose the last 4 scsi disks as
IB/SRP luns for I/O
echo "add 4:0:0:0 0" >/proc/scsi_tgt/groups/Default/devices
echo "add 5:0:0:0 1" >/proc/scsi_tgt/groups/Default/devices
echo "add 6:0:0:0 2" >/proc/scsi_tgt/groups/Default/devices
echo "add 7:0:0:0 3" >/proc/scsi_tgt/groups/Default/devices
Example 2: working with VDISK FILEIO mode (using md0 device and file 10G-file)
a. modprobe scst
b. modprobe scst_vdisk
c. echo "open vdisk0 /dev/md0" > /proc/scsi_tgt/vdisk/vdisk
d. echo "open vdisk1 /10G-file" > /proc/scsi_tgt/vdisk/vdisk
e. echo "add vdisk0 0" >/proc/scsi_tgt/groups/Default/devices
f. echo "add vdisk1 1" >/proc/scsi_tgt/groups/Default/devices
Example 3: working with VDISK BLOCKIO mode (using md0 device, sda, and cciss/c1d0)
a. modprobe scst
b. modprobe scst_vdisk
c. echo "open vdisk0 /dev/md0 BLOCKIO" > /proc/scsi_tgt/vdisk/vdisk
d. echo "open vdisk1 /dev/sda BLOCKIO" > /proc/scsi_tgt/vdisk/vdisk
e. echo "open vdisk2 /dev/cciss/c1d0 BLOCKIO" > /proc/scsi_tgt/vdisk/vdisk
f. echo "add vdisk0 0" >/proc/scsi_tgt/groups/Default/devices
g. echo "add vdisk1 1" >/proc/scsi_tgt/groups/Default/devices
h. echo "add vdisk2 2" >/proc/scsi_tgt/groups/Default/devices
2. modprobe ib_srpt
B. On initiator machines you can manualy do the following steps:
1. modprobe ib_srp
2. ipsrpdm -c (to discover new SRP target)
3. echo <new target info> > /sys/class/infiniband_srp/srp-mthca0-1/add_target
4. fdisk -l (will show new discovered scsi disks)
Example:
Assume that you use port 1 of first HCA in the system ie. mthca0
[root@lab104 ~]# ibsrpdm -c -d /dev/infiniband/umad0
id_ext=0002c90200226cf4,ioc_guid=0002c90200226cf4,
dgid=fe800000000000000002c90200226cf5,pkey=ffff,service_id=0002c90200226cf4
[root@lab104 ~]# echo id_ext=0002c90200226cf4,ioc_guid=0002c90200226cf4,
dgid=fe800000000000000002c90200226cf5,pkey=ffff,service_id=0002c90200226cf4 >
/sys/class/infiniband_srp/srp-mthca0-1/add_target
OR
+ You can edit /etc/infiniband/openib.conf to load srp driver and srp HA daemon
automatically ie. set SRP_LOAD=yes, and SRPHA_ENABLE=yes
+ To set up and use high availability feature you need dm-multipath driver
and multipath tool
+ Please refer to OFED-1.x SRP's user manual for more in-details instructions
on how-to enable/use HA feature
To minimize QUEUEFULL conditions, you can apply scst_increase_max_tgt_cmds
patch from SRPT package from http://sourceforge.net/project/showfiles.php?group_id=110471