From 8ebd2bf6bbd9936d0205e1a767b2f567a0821d0e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Oct 2010 18:29:44 +0000 Subject: [PATCH] ib_srpt: Merged from trunk (svn merge -r2072:2459 https://scst.svn.sourceforge.net/svnroot/scst/trunk/srpt). git-svn-id: http://svn.code.sf.net/p/scst/svn/branches/2.0.0.x@2468 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- srpt/README | 27 +- srpt/Testing.txt | 90 ++- srpt/src/ib_srpt.c | 1394 +++++++++++++++++++------------------------- srpt/src/ib_srpt.h | 192 +++--- 4 files changed, 769 insertions(+), 934 deletions(-) diff --git a/srpt/README b/srpt/README index 924f18110..2acf741cf 100644 --- a/srpt/README +++ b/srpt/README @@ -62,27 +62,31 @@ Proceed as follows to compile and install the SRP target driver: chkconfig scst on The ib_srpt kernel module supports the following parameters: -* srp_max_message_size (number) +* srp_max_req_size (number) Maximum size of an SRP control message in bytes. Examples of SRP control messages are: login request, logout request, data transfer request, ... The larger this parameter, the more scatter/gather list elements can be sent at once. Use the following formula to compute an appropriate value for this parameter: 68 + 16 * (sg_tablesize). The default value of this parameter is 2116, which corresponds to an sg table size of 128. +* srp_max_rsp_size (number) + Maximum size of an SRP response message in bytes. Sense data is sent back + via these messages towards the initiator. The default size is 256 bytes. + With this value there remains (256-36) = 220 bytes for sense data. * srp_max_rdma_size (number) Maximum number of bytes that may be transferred at once via RDMA. Defaults to 65536 bytes, which is sufficient to use the full bandwidth of low-latency HCAs. Increasing this value may decrease latency for applications transferring large amounts of data at once. -* srpt_autodetect_cred_req (y or n, default n) - Whether or not to autodetect initiator support for SRP_CRED_REQ (initiators - with Linux kernel 2.6.37 or later only). The use of SRP_CRED_REQ allows - ib_srpt to process workloads with large I/O depths more efficiently. - Note: enabling this mode causes the Windows SRP initiator to stop working. * srpt_srq_size (number, default 4095) ib_srpt uses a shared receive queue (SRQ) for processing incoming SRP requests. This number may have to be increased when a large number of initiator systems is accessing a single SRP target system. +* srpt_sq_size (number, default 4096) + Per-channel InfiniBand send queue size. The default setting is sufficient + for a credit limit of 128. Changing this parameter to a smaller value may + cause RDMA requests to be retried and hence may slow down data transfer + severely. * thread (0, 1 or 2, default 1) Defines the context on which SRP requests are processed: * thread=0: do as much processing in IRQ context as possible. Results in @@ -180,14 +184,23 @@ For more information, see also: * http://www.linux-ha.org/wiki/Main_Page +Notes about ib_srpt +------------------- + +* Unloading the kernel module ib_srpt while I/O is ongoing is supported. + However, it can take up to two minutes before unloading finishes. During + that time CPU usage will be high. + + Performance Notes - Initiator Side ---------------------------------- * For latency sensitive applications, using the noop scheduler at the initiator side can give significantly better results than with other schedulers. -* The following parameters have a small but measureable impact on SRP +* The following parameters have a small but measurable impact on SRP performance: + * /sys/class/block/${dev}/queue/rotational * /sys/class/block/${dev}/queue/rq_affinity * /proc/irq/${ib_int_no}/smp_affinity diff --git a/srpt/Testing.txt b/srpt/Testing.txt index bc5d0eedf..f4475e32e 100644 --- a/srpt/Testing.txt +++ b/srpt/Testing.txt @@ -1,4 +1,7 @@ -The following tests must be run at least before releasing a new SRPT version: +ib_srpt Test Procedure +====================== + +At least the following tests must be run before releasing a new SRPT version: * Make sure that SRPT compiles and installs without triggering any compiler warning. Use the following command to compile and install SRPT: @@ -18,36 +21,50 @@ The following tests must be run at least before releasing a new SRPT version: * Run the following command on the target system: ${SCST_TRUNK}/scripts/monitor-memory-usage | tee memlog.txt * Run the following command on the initiator system: - for ((i=0;i<100000;i++)); do echo 'id_ext=0002c9030003cca2,ioc_guid=0002c9030003cca2,pkey=ffff,dgid=fe800000000000000002c9030003cca3,service_id=0002c9030003cca3' >/sys/class/infiniband_srp/srp-mlx4_0-1/add_target ; done + target_id="$(/usr/sbin/ibsrpdm -c -d /dev/infiniband/umad0)" + for ((i=0;i<100000;i++)); do echo "$target_id" >/sys/class/infiniband_srp/srp-mlx4_0-1/add_target; done -* Verify that an I/O stress test runs fine by running the following command - where $dev1 and $dev2 are device nodes created by the SRP initiator and go - over different IB connections to the target: +* Verify that the following I/O stress test does not report any errors even + when left running for several hours: - for dev in $dev1 $dev2 + dev=... + umount /mnt + mkfs.ext4 -O ^has_journal $dev + while \ + mount $dev /mnt && \ + rm -rf /mnt/test* && \ + fio --verify=md5 -rw=randwrite --size=10m --bs=4k \ + --loops=1000000 --iodepth=64 --group_reporting --sync=1 --direct=1 \ + --norandommap --ioengine=aio --directory=/mnt --name=test --thread \ + --numjobs=80 --runtime=30 && \ + fsck -N $dev do - fio --bs=4K --buffered=0 --rw=write --ioengine=psync --verify=sha256 \ - --verify_fatal=1 --loops=1000 \ - --name=partition1 --filename=$dev & + true done -* Verify that the following I/O stress test runs fine: +* Repeat the above test with SCST_MAX_TGT_DEV_COMMANDS set to 48 and after + having applied the patch below on ib_srpt.c. The expected result is that no + data corruption occurs but that on the target error messages are logged + about a negative req_lim value. The lowest expected value for req_lim is -15. - mkfs.ext2 $dev - mount $dev /mnt - while true; do { sleep 30; killall fio; } & { rm -rf /mnt/test* && fio --verify=md5 -rw=randwrite --size=10m --bs=4k --loops=1000000 --iodepth=64 --group_reporting --sync=1 --direct=1 --norandommap --ioengine=psync --directory=/mnt --name=test --thread --numjobs=80; }; done - -* Another I/O stress test: - (initiator) Write data pattern: - lmdd if=internal of=/dev/sdb opat=1 bs=1M count=1000 - (target) Verify data: - lmdd if=/dev/exported-block of=internal ipat=1 bs=1M count=1000 mismatch=1 - (initiator) Verify data: - lmdd if=/dev/sdb of=internal ipat=1 bs=1M count=1000 mismatch=1 - for ((i=0;i<40;i++)); do lmdd if=/dev/sdb of=internal ipat=1 bs=1M count=1000 mismatch=1 & done +Index: srpt/src/ib_srpt.c +=================================================================== +--- srpt/src/ib_srpt.c (revision 2412) ++++ srpt/src/ib_srpt.c (working copy) +@@ -2411,7 +2411,7 @@ + ch->max_ti_iu_len = it_iu_len; + rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); +- rsp->req_lim_delta = cpu_to_be32(ch->rq_size); ++ rsp->req_lim_delta = cpu_to_be32(ch->rq_size + 16); + atomic_set(&ch->req_lim, ch->rq_size); + + /* create cm reply */ * Verify that a SCSI reset works properly by running the following command - on an initiator system: + on an initiator system (note: even with the latest Linux SRP initiator, the + command below triggers a kernel bug -- see also + https://bugzilla.kernel.org/show_bug.cgi?id=13893): sg_reset -d ${initiator_device} @@ -57,7 +74,7 @@ The following tests must be run at least before releasing a new SRPT version: and the following commands on an initiator system: - target_id="id_ext=0002c9030003cca2,ioc_guid=0002c9030003cca2,dgid=fe800000000000000002c9030003cca3,pkey=ffff,service_id=0009030003cca2,ioc_guid=0002c9030003cca2,dgid=fe800000000000000002c9030003cca3,pkey=ffff,service_id=0002c9030003cca2" + target_id="$(/usr/sbin/ibsrpdm -c -d /dev/infiniband/umad0)" while true; do date; rmmod ib_srp; modprobe ib_srp; echo "${target_id}" > /sys/class/infiniband_srp/srp-mlx4_0-1/add_target; sleep 2; done and verify that nothing unexpected happens. @@ -65,7 +82,7 @@ The following tests must be run at least before releasing a new SRPT version: * Log in twice from an initiator system, and verify that the first session receives a DREQ upon the second login: - target_id="id_ext=0002c9030003cca2,ioc_guid=0002c9030003cca2,dgid=fe800000000000000002c9030003cca3,pkey=ffff,service_id=0009030003cca2,ioc_guid=0002c9030003cca2,dgid=fe800000000000000002c9030003cca3,pkey=ffff,service_id=0002c9030003cca2" + target_id="$(/usr/sbin/ibsrpdm -c -d /dev/infiniband/umad0)" dmesg -c echo "${target_id}" > /sys/class/infiniband_srp/srp-mlx4_0-1/add_target dmesg -c @@ -85,11 +102,24 @@ The following tests must be run at least before releasing a new SRPT version: * Test with multiple values of ib_srp_tablesize in the range 1..128. -* Verify that the initiator does not lock up while running the command: +* Verify that the initiator does not lock up while running the command below + (see also https://bugzilla.kernel.org/show_bug.cgi?id=14235): -fio --bs=512 --buffered=0 --ioengine=sg --rw=read --invalidate=1 --thread --numjobs=8 --loops=10 --gtod_reduce=1 --group_reporting --name=/dev/sdb --filename=/dev/sdb + dev=... + fio --bs=512 --buffered=0 --ioengine=libaio --rw=read --invalidate=1 \ + --thread --numjobs=8 --loops=10 --gtod_reduce=1 --group_reporting \ + --name=$dev --filename=$dev - when target processing is delayed by the following command: - -echo 15 > /sys/module/ib_srpt/parameters/interrupt_processing_delay_in_us +* Test whether queue overflow recovery works correctly as follows: + - On the target, reload ib_srpt with srpt_sq_size set to 64. + - On the initiator, run a direct I/O test with large block sizes, e.g. + scripts/blockdev-perftest -f -d -j -m 12 -M 24 /dev/sdb + - On the initiator, run the following two commands in parallel: + scripts/blockdev-perftest -r -d -j -m 12 -M 24 /dev/sdb + fio --verify=md5 -rw=randwrite --size=10m --bs=4k \ + --loops=1000000 --iodepth=64 --group_reporting --sync=1 --direct=1 \ + --norandommap --ioengine=aio --directory=/mnt --name=test --thread \ + --numjobs=80 --runtime=30 +* Repeat the above tests for all three threading modes: thread=0, thread=1 + and thread=2. diff --git a/srpt/src/ib_srpt.c b/srpt/src/ib_srpt.c index 5965e4a3d..cca83dac3 100644 --- a/srpt/src/ib_srpt.c +++ b/srpt/src/ib_srpt.c @@ -59,7 +59,7 @@ /* Name of this kernel module. */ #define DRV_NAME "ib_srpt" #define DRV_VERSION "2.0.0" -#define DRV_RELDATE "September 17, 2010" +#define DRV_RELDATE "October 25, 2010" #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) /* Flags to be used in SCST debug tracing statements. */ #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \ @@ -96,15 +96,7 @@ static atomic_t srpt_device_count; #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS; module_param(trace_flag, long, 0644); -MODULE_PARM_DESC(trace_flag, - "Trace flags for the ib_srpt kernel module."); -#endif -#if defined(CONFIG_SCST_DEBUG) -static unsigned long processing_delay_in_us; -module_param(processing_delay_in_us, long, 0744); -MODULE_PARM_DESC(processing_delay_in_us, - "SRP_CMD processing delay in microseconds. Useful for" - " testing the initiator lockup avoidance algorithm."); +MODULE_PARM_DESC(trace_flag, "SCST trace flags."); #endif static int thread = 1; @@ -121,10 +113,15 @@ module_param(srp_max_rdma_size, int, 0744); MODULE_PARM_DESC(srp_max_rdma_size, "Maximum size of SRP RDMA transfers for new connections."); -static unsigned srp_max_message_size = DEFAULT_MAX_MESSAGE_SIZE; -module_param(srp_max_message_size, int, 0444); -MODULE_PARM_DESC(srp_max_message_size, - "Maximum size of SRP control messages in bytes."); +static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; +module_param(srp_max_req_size, int, 0444); +MODULE_PARM_DESC(srp_max_req_size, + "Maximum size of SRP request messages in bytes."); + +static unsigned int srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; +module_param(srp_max_rsp_size, int, 0444); +MODULE_PARM_DESC(thread, + "Maximum size of SRP response messages in bytes."); static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE; module_param(srpt_srq_size, int, 0444); @@ -136,19 +133,12 @@ module_param(srpt_sq_size, int, 0444); MODULE_PARM_DESC(srpt_sq_size, "Per-channel send queue (SQ) size."); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 27) \ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \ || defined(RHEL_MAJOR) && RHEL_MAJOR -0 <= 5 -static int srpt_autodetect_cred_req; -module_param(srpt_autodetect_cred_req, int, 0444); -#else -static bool srpt_autodetect_cred_req; -module_param(srpt_autodetect_cred_req, bool, 0444); -#endif -MODULE_PARM_DESC(srpt_autodetect_cred_req, - "Whether or not to autodetect whether the initiator supports" - " SRP_CRED_REQ."); - static int use_port_guid_in_session_name; +#else +static bool use_port_guid_in_session_name; +#endif module_param(use_port_guid_in_session_name, bool, 0444); MODULE_PARM_DESC(use_port_guid_in_session_name, "Use target port ID in the SCST session name such that" @@ -171,7 +161,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev); static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt); #endif /*CONFIG_SCST_PROC*/ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx); + struct srpt_send_ioctx *ioctx); static void srpt_release_channel(struct scst_session *scst_sess); static struct ib_client srpt_client = { @@ -352,7 +342,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad) int i; ioui = (struct ib_dm_iou_info *)mad->data; - ioui->change_id = cpu_to_be16(1); + ioui->change_id = __constant_cpu_to_be16(1); ioui->max_controllers = 16; /* set present for slot 1 and empty for the rest */ @@ -378,12 +368,14 @@ static void srpt_get_ioc(struct srpt_device *sdev, u32 slot, iocp = (struct ib_dm_ioc_profile *)mad->data; if (!slot || slot > 16) { - mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2) { - mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC); + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -395,13 +387,13 @@ static void srpt_get_ioc(struct srpt_device *sdev, u32 slot, iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); iocp->subsys_device_id = 0x0; - iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); - iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); - iocp->protocol = cpu_to_be16(SRP_PROTOCOL); - iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->rdma_read_depth = 4; - iocp->send_size = cpu_to_be32(srp_max_message_size); + iocp->send_size = cpu_to_be32(srp_max_req_size); iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U), 1U << 24)); iocp->num_svc_entries = 1; @@ -425,12 +417,14 @@ static void srpt_get_svc_entries(u64 ioc_guid, WARN_ON(!ioc_guid); if (!slot || slot > 16) { - mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2 || lo > hi || hi > 1) { - mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC); + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -481,7 +475,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, break; default: rsp_mad->mad_hdr.status = - cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; } } @@ -537,11 +531,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, break; case IB_MGMT_METHOD_SET: dm_mad->mad_hdr.status = - cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; default: dm_mad->mad_hdr.status = - cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); break; } @@ -660,45 +654,46 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev) } /** - * srpt_alloc_ioctx() - Allocate and initialize an SRPT I/O context structure. + * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure. */ -static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev) +static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, + int ioctx_size, int dma_size, + enum dma_data_direction dir) { struct srpt_ioctx *ioctx; - ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL); + ioctx = kmalloc(ioctx_size, GFP_KERNEL); if (!ioctx) - goto out; + goto err; - ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL); + ioctx->buf = kmalloc(dma_size, GFP_KERNEL); if (!ioctx->buf) - goto out_free_ioctx; + goto err_free_ioctx; - ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, - srp_max_message_size, DMA_BIDIRECTIONAL); + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir); if (ib_dma_mapping_error(sdev->device, ioctx->dma)) - goto out_free_buf; + goto err_free_buf; return ioctx; -out_free_buf: +err_free_buf: kfree(ioctx->buf); -out_free_ioctx: +err_free_ioctx: kfree(ioctx); -out: +err: return NULL; } /** - * srpt_free_ioctx() - Deallocate an SRPT I/O context structure. + * srpt_free_ioctx() - Free an SRPT I/O context structure. */ -static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx) +static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, + int dma_size, enum dma_data_direction dir) { if (!ioctx) return; - ib_dma_unmap_single(sdev->device, ioctx->dma, - srp_max_message_size, DMA_BIDIRECTIONAL); + ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir); kfree(ioctx->buf); kfree(ioctx); } @@ -706,117 +701,64 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx) /** * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures. * @sdev: Device to allocate the I/O context ring for. - * @ioctx_ring: Pointer to an array of I/O contexts. * @ring_size: Number of elements in the I/O context ring. - * @flags: Flags to be set in the ring index. + * @ioctx_size: I/O context size. + * @dma_size: DMA buffer size. + * @dir: DMA data direction. */ -static int srpt_alloc_ioctx_ring(struct srpt_device *sdev, - struct srpt_ioctx **ioctx_ring, - int ring_size, - int flags) +static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, + int ring_size, int ioctx_size, + int dma_size, enum dma_data_direction dir) { - int res; + struct srpt_ioctx **ring; int i; TRACE_ENTRY(); - res = -ENOMEM; + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) + && ioctx_size != sizeof(struct srpt_send_ioctx)); + WARN_ON(dma_size != srp_max_req_size && dma_size != srp_max_rsp_size); + + ring = kmalloc(ring_size * sizeof(ring[0]), GFP_KERNEL); + if (!ring) + goto out; for (i = 0; i < ring_size; ++i) { - ioctx_ring[i] = srpt_alloc_ioctx(sdev); - - if (!ioctx_ring[i]) + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir); + if (!ring[i]) goto err; - - EXTRACHECKS_WARN_ON(i & flags); - ioctx_ring[i]->index = i | flags; + ring[i]->index = i; } - res = 0; goto out; err: - while (--i >= 0) { - srpt_free_ioctx(sdev, ioctx_ring[i]); - ioctx_ring[i] = NULL; - } + while (--i >= 0) + srpt_free_ioctx(sdev, ring[i], dma_size, dir); + kfree(ring); out: - TRACE_EXIT_RES(res); - return res; + TRACE_EXIT_RES(ring); + return ring; } /** * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures. */ -static void srpt_free_ioctx_ring(struct srpt_device *sdev, - struct srpt_ioctx **ioctx_ring, - int ring_size) +static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, + struct srpt_device *sdev, int ring_size, + int dma_size, enum dma_data_direction dir) { int i; - for (i = 0; i < ring_size; ++i) { - srpt_free_ioctx(sdev, ioctx_ring[i]); - ioctx_ring[i] = NULL; - } -} + WARN_ON(dma_size != srp_max_req_size && dma_size != srp_max_rsp_size); -/** - * srpt_alloc_tti_ring() - Allocate target-to-initiator I/O contexts. - */ -static int srpt_alloc_tti_ioctx(struct srpt_rdma_ch *ch) -{ - return srpt_alloc_ioctx_ring(ch->sport->sdev, ch->tti_ioctx, - ARRAY_SIZE(ch->tti_ioctx), - SRPT_OP_TTI); -} - -/** - * srpt_free_tti_ring() - Free target-to-initiator I/O contexts. - */ -static void srpt_free_tti_ioctx(struct srpt_rdma_ch *ch) -{ - srpt_free_ioctx_ring(ch->sport->sdev, ch->tti_ioctx, - ARRAY_SIZE(ch->tti_ioctx)); -} - -/** - * srpt_get_tti_ioctx() - Get a target-to-initiator I/O context. - */ -static struct srpt_ioctx *srpt_get_tti_ioctx(struct srpt_rdma_ch *ch) -{ - struct srpt_ioctx *ioctx; - struct srpt_device *sdev; - unsigned long flags; - - sdev = ch->sport->sdev; - spin_lock_irqsave(&sdev->spinlock, flags); - EXTRACHECKS_WARN_ON(ch->tti_head - ch->tti_tail < 0); - if (ch->tti_head - ch->tti_tail < TTI_IOCTX_COUNT) - ioctx = ch->tti_ioctx[ch->tti_head++ & TTI_IOCTX_MASK]; - else - ioctx = NULL; - spin_unlock_irqrestore(&sdev->spinlock, flags); - return ioctx; -} - -/** - * srpt_put_tti_ioctx() - Put back a target-to-initiator I/O context. - */ -static void srpt_put_tti_ioctx(struct srpt_rdma_ch *ch) -{ - struct srpt_device *sdev; - unsigned long flags; - - sdev = ch->sport->sdev; - spin_lock_irqsave(&sdev->spinlock, flags); - EXTRACHECKS_WARN_ON(ch->tti_head - ch->tti_tail < 0); - ch->tti_tail++; - EXTRACHECKS_WARN_ON(ch->tti_head - ch->tti_tail < 0); - spin_unlock_irqrestore(&sdev->spinlock, flags); + for (i = 0; i < ring_size; ++i) + srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); + kfree(ioctx_ring); } /** * srpt_get_cmd_state() - Get the state of a SCSI command. */ -static enum srpt_command_state srpt_get_cmd_state(struct srpt_ioctx *ioctx) +static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx) { BUG_ON(!ioctx); @@ -830,7 +772,7 @@ static enum srpt_command_state srpt_get_cmd_state(struct srpt_ioctx *ioctx) * Does not modify the state of aborted commands. Returns the previous command * state. */ -static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx, +static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx, enum srpt_command_state new) { enum srpt_command_state previous; @@ -853,7 +795,7 @@ static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx, * Returns the previous command state. */ static enum srpt_command_state -srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx, +srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, enum srpt_command_state old, enum srpt_command_state new) { @@ -867,15 +809,17 @@ srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx, /** * srpt_post_recv() - Post an IB receive request. */ -static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx) +static int srpt_post_recv(struct srpt_device *sdev, + struct srpt_recv_ioctx *ioctx) { struct ib_sge list; struct ib_recv_wr wr, *bad_wr; - wr.wr_id = ioctx->index | SRPT_OP_RECV; + BUG_ON(!sdev); + wr.wr_id = encode_wr_id(IB_WC_RECV, ioctx->ioctx.index); - list.addr = ioctx->dma; - list.length = srp_max_message_size; + list.addr = ioctx->ioctx.dma; + list.length = srp_max_req_size; list.lkey = sdev->mr->lkey; wr.next = NULL; @@ -893,8 +837,8 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx) * * Returns zero upon success and a non-zero value upon failure. */ -static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, - int len) +static int srpt_post_send(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, int len) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -903,19 +847,19 @@ static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, ret = -ENOMEM; if (atomic_dec_return(&ch->sq_wr_avail) < 0) { - PRINT_ERROR("%s[%d]: send queue full", __func__, __LINE__); + PRINT_WARNING("%s", "IB send queue full (needed 1)"); goto out; } - ib_dma_sync_single_for_device(sdev->device, ioctx->dma, - len, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len, + DMA_TO_DEVICE); - list.addr = ioctx->dma; + list.addr = ioctx->ioctx.dma; list.length = len; list.lkey = sdev->mr->lkey; wr.next = NULL; - wr.wr_id = ioctx->index; + wr.wr_id = encode_wr_id(IB_WC_SEND, ioctx->ioctx.index); wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; @@ -943,7 +887,8 @@ out: * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; * -ENOMEM when memory allocation fails and zero upon success. */ -static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd, +static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, + struct srp_cmd *srp_cmd, scst_data_direction *dir, u64 *data_len) { struct srp_indirect_buf *idb; @@ -1127,100 +1072,55 @@ out: } /** - * srpt_req_lim_delta() - Compute req_lim delta. - * - * Compute by how much req_lim changed since the last time this function has - * been called. This value is necessary for filling in the REQUEST LIMIT DELTA - * field of an SRP_RSP response. - * - * Side Effect: - * Resets ch->req_lim_delta. - * - * Note: - * The caller must either pass the returned value to the initiator in the - * REQUEST LIMIT DELTA field of an SRP information unit or pass the returned - * value to srpt_undo_req_lim_delta(). Any other approach will result in an - * SRP protocol violation. + * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. */ -static int srpt_req_lim_delta(struct srpt_rdma_ch *ch) +static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) { - return atomic_xchg(&ch->req_lim_delta, 0); -} - -/** - * srpt_undo_req_lim_delta() - Undo the side effect of srpt_req_lim_delta(). - * @ch: Channel pointer. - * @delta: return value of srpt_req_lim_delta(). - */ -static void srpt_undo_req_lim_delta(struct srpt_rdma_ch *ch, int delta) -{ - atomic_add(delta, &ch->req_lim_delta); -} - -/** - * srpt_send_cred_req() - Send an SRP_CRED_REQ IU to the initiator. - * - * The previous value of ch->req_lim_delta is restored if sending fails - * synchronously or asynchronously. - */ -static void srpt_send_cred_req(struct srpt_rdma_ch *ch, s32 req_lim_delta) -{ - struct srpt_ioctx *ioctx; - struct srp_cred_req *srp_cred_req; - int res; - - ioctx = srpt_get_tti_ioctx(ch); - if (!ioctx) { - PRINT_ERROR("%s", - "Sending SRP_CRED_REQ failed -- no I/O context" - " available ! This will sooner or later result" - " in an initiator lockup."); - goto err; - } + struct srpt_send_ioctx *ioctx; + unsigned long flags; BUG_ON(!ch); - srp_cred_req = ioctx->buf; - BUG_ON(!srp_cred_req); - memset(srp_cred_req, 0, sizeof(*srp_cred_req)); - srp_cred_req->opcode = SRP_CRED_REQ; - srp_cred_req->req_lim_delta = cpu_to_be32(req_lim_delta); - srp_cred_req->tag = cpu_to_be64(0); - res = srpt_post_send(ch, ioctx, sizeof(*srp_cred_req)); - if (res) { - PRINT_ERROR("sending SRP_CRED_REQ failed (res = %d)", res); - goto err_put; + + ioctx = NULL; + spin_lock_irqsave(&ch->spinlock, flags); + if (!list_empty(&ch->free_list)) { + ioctx = list_first_entry(&ch->free_list, + struct srpt_send_ioctx, free_list); + list_del(&ioctx->free_list); } + spin_unlock_irqrestore(&ch->spinlock, flags); - TRACE_DBG("Sent SRP_CRED_REQ with req_lim_delta = %d and tag %lld", - req_lim_delta, 0ULL); + if (!ioctx) + return ioctx; - goto out; + BUG_ON(ioctx->ch != ch); + atomic_set(&ioctx->state, SRPT_STATE_NEW); + ioctx->n_rbuf = 0; + ioctx->rbufs = NULL; + ioctx->n_rdma = 0; + ioctx->n_rdma_ius = 0; + ioctx->rdma_ius = NULL; + ioctx->mapped_sg_count = 0; + ioctx->scmnd = NULL; -err_put: - srpt_put_tti_ioctx(ch); -err: - srpt_undo_req_lim_delta(ch, req_lim_delta); -out: - return; + return ioctx; } /** - * srpt_reset_ioctx() - Free up resources and post again for receiving. - * - * Note: Do NOT modify *ioctx after this function has finished. Otherwise a - * race condition will be triggered between srpt_rcv_completion() and the - * caller of this function on *ioctx. + * srpt_put_send_ioctx() - Free up resources. */ -static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, - bool inc_req_lim) +static void srpt_put_send_ioctx(struct srpt_send_ioctx *ioctx) { - BUG_ON(!ch); + struct srpt_rdma_ch *ch; + unsigned long flags; + BUG_ON(!ioctx); + ch = ioctx->ch; + BUG_ON(!ch); WARN_ON(srpt_get_cmd_state(ioctx) != SRPT_STATE_DONE); ioctx->scmnd = NULL; - ioctx->ch = NULL; /* * If the WARN_ON() below gets triggered this means that @@ -1235,25 +1135,9 @@ static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, ioctx->n_rbuf = 0; } - if (srpt_post_recv(ch->sport->sdev, ioctx)) - PRINT_ERROR("%s", "SRQ post_recv failed - this is serious."); - else if (inc_req_lim) { - int req_lim; - - atomic_inc(&ch->req_lim_delta); - req_lim = atomic_inc_return(&ch->req_lim); - if (req_lim < 0 || req_lim > ch->rq_size) - PRINT_ERROR("req_lim = %d out of range %d .. %d", - req_lim, 0, ch->rq_size); - if (atomic_read(&ch->supports_cred_req)) { - if (req_lim == ch->rq_size / 2 - && atomic_read(&ch->req_lim_delta) > ch->rq_size/4) - srpt_send_cred_req(ch, srpt_req_lim_delta(ch)); - } else { - if (atomic_add_unless(&ch->req_lim_waiter_count, -1, 0)) - complete(&ch->req_lim_compl); - } - } + spin_lock_irqsave(&ch->spinlock, flags); + list_add(&ioctx->free_list, &ch->free_list); + spin_unlock_irqrestore(&ch->spinlock, flags); } /** @@ -1261,7 +1145,7 @@ static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, * @ioctx: I/O context associated with the SCSI command. * @context: Preferred execution context. */ -static void srpt_abort_scst_cmd(struct srpt_ioctx *ioctx, +static void srpt_abort_scst_cmd(struct srpt_send_ioctx *ioctx, enum scst_exec_context context) { struct scst_cmd *scmnd; @@ -1274,7 +1158,7 @@ static void srpt_abort_scst_cmd(struct srpt_ioctx *ioctx, /* * If the command is in a state where the SCST core is waiting for the * ib_srpt driver, change the state to the next state. Changing the - * state of the command from SRPT_NEED_DATA to SRPT_STATE_DATA_IN + * state of the command from SRPT_STATE_NEED_DATA to SRPT_STATE_DATA_IN * ensures that srpt_xmit_response() will call this function a second * time. */ @@ -1286,10 +1170,6 @@ static void srpt_abort_scst_cmd(struct srpt_ioctx *ioctx, if (state != SRPT_STATE_DATA_IN) { state = srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_CMD_RSP_SENT, SRPT_STATE_DONE); - if (state != SRPT_STATE_CMD_RSP_SENT) - state = srpt_test_and_set_cmd_state(ioctx, - SRPT_STATE_MGMT_RSP_SENT, - SRPT_STATE_DONE); } } if (state == SRPT_STATE_DONE) @@ -1307,21 +1187,13 @@ static void srpt_abort_scst_cmd(struct srpt_ioctx *ioctx, switch (state) { case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: /* * Do nothing - defer abort processing until * srpt_xmit_response() is invoked. */ WARN_ON(!scst_cmd_aborted(scmnd)); break; - case SRPT_STATE_DATA_IN: - /* - * Invocation of srpt_pending_cmd_timeout() after - * srpt_handle_rdma_comp() set the state to SRPT_STATE_DATA_IN - * and before srpt_xmit_response() set the state to - * SRPT_STATE_CMD_RSP_SENT. Ignore the timeout and let - * srpt_handle_xmit_response() proceed. - */ - break; case SRPT_STATE_NEED_DATA: /* SCST_DATA_WRITE - RDMA read error or RDMA read timeout. */ scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_ERROR, context); @@ -1332,6 +1204,7 @@ static void srpt_abort_scst_cmd(struct srpt_ioctx *ioctx, * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); + srpt_put_send_ioctx(ioctx); scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED); scst_tgt_cmd_done(scmnd, context); break; @@ -1356,57 +1229,49 @@ out: } /** - * srpt_handle_send_err_comp() - Process an IB_WC_SEND or RDMA error completion. + * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion. */ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id, enum scst_exec_context context) { - struct srpt_ioctx *ioctx; - struct srpt_device *sdev = ch->sport->sdev; + struct srpt_send_ioctx *ioctx; enum srpt_command_state state; struct scst_cmd *scmnd; + u32 index; - EXTRACHECKS_WARN_ON(wr_id & SRPT_OP_RECV); + atomic_inc(&ch->sq_wr_avail); - ioctx = sdev->ioctx_ring[wr_id & ~SRPT_OP_TTI]; + index = idx_from_wr_id(wr_id); + ioctx = ch->ioctx_ring[index]; + state = srpt_get_cmd_state(ioctx); + scmnd = ioctx->scmnd; - if ((wr_id & SRPT_OP_TTI) == 0) { - state = srpt_get_cmd_state(ioctx); - scmnd = ioctx->scmnd; + EXTRACHECKS_WARN_ON(state != SRPT_STATE_CMD_RSP_SENT + && state != SRPT_STATE_MGMT_RSP_SENT + && state != SRPT_STATE_NEED_DATA + && state != SRPT_STATE_DONE); - EXTRACHECKS_WARN_ON(state != SRPT_STATE_CMD_RSP_SENT - && state != SRPT_STATE_MGMT_RSP_SENT - && state != SRPT_STATE_NEED_DATA - && state != SRPT_STATE_DONE); - - if (state != SRPT_STATE_DONE) { - if (scmnd) - srpt_abort_scst_cmd(ioctx, context); - else { - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 1); - } - } else - PRINT_ERROR("Received more than one IB error completion" - " for wr_id = %u.", (unsigned)wr_id); - } else { - struct srp_cred_req *srp_cred_req; - s32 req_lim_delta; - - srp_cred_req = ioctx->buf; - req_lim_delta = be32_to_cpu(srp_cred_req->req_lim_delta); - srpt_undo_req_lim_delta(ch, req_lim_delta); - srpt_put_tti_ioctx(ch); - PRINT_ERROR("Sending SRP_CRED_REQ with delta = %d failed.", - req_lim_delta); - } + /* If SRP_RSP sending failed, undo the ch->req_lim change. */ + if (state == SRPT_STATE_CMD_RSP_SENT + || state == SRPT_STATE_MGMT_RSP_SENT) + atomic_dec(&ch->req_lim); + if (state != SRPT_STATE_DONE) { + if (scmnd) + srpt_abort_scst_cmd(ioctx, context); + else { + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + srpt_put_send_ioctx(ioctx); + } + } else + PRINT_ERROR("Received more than one IB error completion" + " for wr_id = %u.", (unsigned)index); } /** * srpt_handle_send_comp() - Process an IB send completion notification. */ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, + struct srpt_send_ioctx *ioctx, enum scst_exec_context context) { enum srpt_command_state state; @@ -1427,12 +1292,13 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, != (scmnd == NULL)); if (scmnd) { srpt_unmap_sg_to_ib_sge(ch, ioctx); + srpt_put_send_ioctx(ioctx); scst_tgt_cmd_done(scmnd, context); } else - srpt_reset_ioctx(ch, ioctx, 1); + srpt_put_send_ioctx(ioctx); } else { PRINT_ERROR("IB completion has been received too late for" - " wr_id = %u.", ioctx->index); + " wr_id = %u.", ioctx->ioctx.index); } } @@ -1440,7 +1306,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. */ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, + struct srpt_send_ioctx *ioctx, enum scst_exec_context context) { enum srpt_command_state state; @@ -1453,10 +1319,54 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, if (scmnd) { state = srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, SRPT_STATE_DATA_IN); + if (state == SRPT_STATE_NEED_DATA) + scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS, + context); + else + PRINT_ERROR("%s[%d]: wrong state = %d", __func__, + __LINE__, state); + } else + PRINT_ERROR("%s[%d]: scmnd == NULL", __func__, __LINE__); +} - EXTRACHECKS_WARN_ON(state != SRPT_STATE_NEED_DATA); +/** + * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion. + */ +static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + u8 opcode, + enum scst_exec_context context) +{ + struct scst_cmd *scmnd; + enum srpt_command_state state; - scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS, context); + scmnd = ioctx->scmnd; + state = srpt_get_cmd_state(ioctx); + if (scmnd) { + switch (opcode) { + case IB_WC_RDMA_READ: + if (ioctx->n_rdma <= 0) { + PRINT_ERROR("Received invalid RDMA read error" + " completion with idx %d", + ioctx->ioctx.index); + break; + } + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + if (state == SRPT_STATE_NEED_DATA) + srpt_abort_scst_cmd(ioctx, context); + else + PRINT_ERROR("%s[%d]: wrong state = %d", + __func__, __LINE__, state); + break; + case IB_WC_RDMA_WRITE: + scst_set_delivery_status(scmnd, + SCST_CMD_DELIVERY_ABORTED); + break; + default: + PRINT_ERROR("%s[%d]: opcode = %u", __func__, __LINE__, + opcode); + break; + } } else PRINT_ERROR("%s[%d]: scmnd == NULL", __func__, __LINE__); } @@ -1479,9 +1389,9 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, * response. See also SPC-2 for more information about sense data. */ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, s32 req_lim_delta, - u64 tag, int status, - const u8 *sense_data, int sense_data_len) + struct srpt_send_ioctx *ioctx, u64 tag, + int status, const u8 *sense_data, + int sense_data_len) { struct srp_rsp *srp_rsp; int max_sense_len; @@ -1492,24 +1402,23 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, */ EXTRACHECKS_WARN_ON(status & 1); - srp_rsp = ioctx->buf; + srp_rsp = ioctx->ioctx.buf; BUG_ON(!srp_rsp); memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = cpu_to_be32(req_lim_delta); + srp_rsp->req_lim_delta = __constant_cpu_to_be32(1); srp_rsp->tag = tag; srp_rsp->status = status; if (!SCST_SENSE_VALID(sense_data)) sense_data_len = 0; else { - BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp)); + BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); if (sense_data_len > max_sense_len) { PRINT_WARNING("truncated sense data from %d to %d" - " bytes", sense_data_len, - max_sense_len); + " bytes", sense_data_len, max_sense_len); sense_data_len = max_sense_len; } @@ -1535,7 +1444,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, * response. */ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, s32 req_lim_delta, + struct srpt_send_ioctx *ioctx, u8 rsp_code, u64 tag) { struct srp_rsp *srp_rsp; @@ -1545,11 +1454,12 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4; resp_len = sizeof(*srp_rsp) + resp_data_len; - srp_rsp = ioctx->buf; + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = cpu_to_be32(req_lim_delta); + srp_rsp->req_lim_delta = __constant_cpu_to_be32(1); srp_rsp->tag = tag; if (rsp_code != SRP_TSK_MGMT_SUCCESS) { @@ -1564,7 +1474,9 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, /** * srpt_handle_cmd() - Process SRP_CMD. */ -static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, +static int srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx, enum scst_exec_context context) { struct scst_cmd *scmnd; @@ -1573,20 +1485,27 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, u64 data_len; int ret; - srp_cmd = ioctx->buf; + BUG_ON(!send_ioctx); + + srp_cmd = recv_ioctx->ioctx.buf; scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun, - sizeof srp_cmd->lun, srp_cmd->cdb, 16, context); - if (!scmnd) + sizeof srp_cmd->lun, srp_cmd->cdb, + sizeof srp_cmd->cdb, context); + if (!scmnd) { + PRINT_ERROR("0x%llx: allocation of an SCST command failed", + srp_cmd->tag); goto err; + } - ioctx->scmnd = scmnd; + send_ioctx->scmnd = scmnd; - ret = srpt_get_desc_tbl(ioctx, srp_cmd, &dir, &data_len); + ret = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len); if (ret) { + PRINT_ERROR("0x%llx: parsing SRP descriptor table failed.", + srp_cmd->tag); scst_set_cmd_error(scmnd, SCST_LOAD_SENSE(scst_sense_invalid_field_in_cdb)); - goto err; } switch (srp_cmd->task_attr) { @@ -1608,20 +1527,21 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, } scst_cmd_set_tag(scmnd, srp_cmd->tag); - scst_cmd_set_tgt_priv(scmnd, ioctx); + scst_cmd_set_tgt_priv(scmnd, send_ioctx); scst_cmd_set_expected(scmnd, dir, data_len); scst_cmd_init_done(scmnd, context); return 0; err: + srpt_put_send_ioctx(send_ioctx); return -1; } /** * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit. * - * Returns SRP_TSK_MGMT_SUCCESS upon success. + * Returns SCST_MGMT_STATUS_SUCCESS upon success. * * Each task management function is performed by calling one of the * scst_rx_mgmt_fn*() functions. These functions will either report failure @@ -1635,27 +1555,35 @@ err: * 6.7 in the SRP r16a document. */ static u8 srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx) + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) { struct srp_tsk_mgmt *srp_tsk; struct srpt_mgmt_ioctx *mgmt_ioctx; int ret; - u8 srp_tsk_mgmt_status; - srp_tsk = ioctx->buf; + ret = SCST_MGMT_STATUS_FAILED; + + BUG_ON(!send_ioctx); + + srp_tsk = recv_ioctx->ioctx.buf; TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld" " using tag= %lld cm_id= %p sess= %p", srp_tsk->tsk_mgmt_func, srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->scst_sess); - srp_tsk_mgmt_status = SRP_TSK_MGMT_FAILED; mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC); - if (!mgmt_ioctx) + if (!mgmt_ioctx) { + PRINT_ERROR("tag 0x%llx: memory allocation for task management" + " function failed. Ignoring task management request" + " (func %d).", srp_tsk->task_tag, + srp_tsk->tsk_mgmt_func); goto err; + } - mgmt_ioctx->ioctx = ioctx; - mgmt_ioctx->ch = ch; + mgmt_ioctx->ioctx = send_ioctx; + BUG_ON(mgmt_ioctx->ioctx->ch != ch); mgmt_ioctx->tag = srp_tsk->tag; switch (srp_tsk->tsk_mgmt_func) { @@ -1700,48 +1628,33 @@ static u8 srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, break; default: TRACE_DBG("%s", "Unsupported task management function."); - srp_tsk_mgmt_status = SRP_TSK_MGMT_FUNC_NOT_SUPP; - goto err; + ret = SCST_MGMT_STATUS_FN_NOT_SUPPORTED; } - if (ret) { - TRACE_DBG("Processing task management function failed" - " (ret = %d).", ret); + if (ret != SCST_MGMT_STATUS_SUCCESS) goto err; - } - return SRP_TSK_MGMT_SUCCESS; + return ret; err: kfree(mgmt_ioctx); - return srp_tsk_mgmt_status; + return ret; } -static void srpt_handle_cred_rsp(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx) +static u8 scst_to_srp_tsk_mgmt_status(const int scst_mgmt_status) { - int max_lun_commands; - int req_lim_delta; - - if (!atomic_read(&ch->supports_cred_req)) { - atomic_set(&ch->supports_cred_req, true); - PRINT_INFO("Enabled SRP_CRED_REQ support for session %s", - ch->sess_name); - - max_lun_commands = scst_get_max_lun_commands(NULL, 0); - if (4 <= max_lun_commands && max_lun_commands < ch->rq_size) { - req_lim_delta = ch->rq_size - max_lun_commands; - PRINT_INFO("Decreasing initiator request limit from %d" - " to %d", ch->rq_size, max_lun_commands); - /* - * Note: at least in theory this may make the req_lim - * variable managed by the initiator temporarily - * negative. - */ - ch->rq_size -= req_lim_delta; - atomic_sub(req_lim_delta, &ch->req_lim); - atomic_sub(req_lim_delta, &ch->req_lim_delta); - } + switch (scst_mgmt_status) { + case SCST_MGMT_STATUS_SUCCESS: + return SRP_TSK_MGMT_SUCCESS; + case SCST_MGMT_STATUS_FN_NOT_SUPPORTED: + return SRP_TSK_MGMT_FUNC_NOT_SUPP; + case SCST_MGMT_STATUS_TASK_NOT_EXIST: + case SCST_MGMT_STATUS_LUN_NOT_EXIST: + case SCST_MGMT_STATUS_REJECTED: + case SCST_MGMT_STATUS_FAILED: + default: + break; } + return SRP_TSK_MGMT_FAILED; } /** @@ -1750,148 +1663,71 @@ static void srpt_handle_cred_rsp(struct srpt_rdma_ch *ch, * @ioctx: SRPT I/O context associated with the information unit. */ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx, enum scst_exec_context context) { struct srp_cmd *srp_cmd; - struct scst_cmd *scmnd; enum rdma_ch_state ch_state; - u8 srp_response_status; - u8 srp_tsk_mgmt_status; - int len; - int send_rsp_res; + + BUG_ON(!ch); + BUG_ON(!recv_ioctx); + + ib_dma_sync_single_for_cpu(ch->sport->sdev->device, + recv_ioctx->ioctx.dma, srp_max_req_size, + DMA_FROM_DEVICE); ch_state = atomic_read(&ch->state); - if (ch_state == RDMA_CHANNEL_CONNECTING) { - list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list); - return; + srp_cmd = recv_ioctx->ioctx.buf; + if (unlikely(ch_state == RDMA_CHANNEL_CONNECTING)) { + list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); + goto out; } - ioctx->n_rbuf = 0; - ioctx->rbufs = NULL; - ioctx->n_rdma = 0; - ioctx->n_rdma_ius = 0; - ioctx->rdma_ius = NULL; - ioctx->mapped_sg_count = 0; - ioctx->scmnd = NULL; - ioctx->ch = ch; - atomic_set(&ioctx->state, SRPT_STATE_NEW); + if (unlikely(ch_state == RDMA_CHANNEL_DISCONNECTING)) + goto post_recv; - if (unlikely(ch_state == RDMA_CHANNEL_DISCONNECTING)) { - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 0); - return; + if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { + if (!send_ioctx) + send_ioctx = srpt_get_send_ioctx(ch); + if (unlikely(!send_ioctx)) { + list_add_tail(&recv_ioctx->wait_list, + &ch->cmd_wait_list); + goto out; + } } WARN_ON(ch_state != RDMA_CHANNEL_LIVE); - scmnd = NULL; - - srp_response_status = SAM_STAT_BUSY; - /* To keep the compiler happy. */ - srp_tsk_mgmt_status = -1; - - ib_dma_sync_single_for_cpu(ch->sport->sdev->device, - ioctx->dma, srp_max_message_size, - DMA_FROM_DEVICE); - - srp_cmd = ioctx->buf; - - if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT - || srp_cmd->opcode == SRP_I_LOGOUT) { - int req_lim; - - req_lim = atomic_dec_return(&ch->req_lim); - if (unlikely(req_lim < 0)) - PRINT_ERROR("req_lim = %d < 0", req_lim); - } - switch (srp_cmd->opcode) { case SRP_CMD: - if (srpt_handle_cmd(ch, ioctx, context) < 0) { - scmnd = ioctx->scmnd; - if (scmnd) - srp_response_status = - scst_cmd_get_status(scmnd); - goto err; - } + srpt_handle_cmd(ch, recv_ioctx, send_ioctx, context); break; - case SRP_TSK_MGMT: - srp_tsk_mgmt_status = srpt_handle_tsk_mgmt(ch, ioctx); - if (srp_tsk_mgmt_status != SRP_TSK_MGMT_SUCCESS) - goto err; + srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); break; - case SRP_I_LOGOUT: - goto err; - + PRINT_ERROR("%s", "Not yet implemented: SRP_I_LOGOUT"); + break; case SRP_CRED_RSP: TRACE_DBG("%s", "received SRP_CRED_RSP"); - srpt_handle_cred_rsp(ch, ioctx); - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 0); break; - case SRP_AER_RSP: TRACE_DBG("%s", "received SRP_AER_RSP"); - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 0); break; - case SRP_RSP: + PRINT_ERROR("%s", "Received SRP_RSP"); + break; default: PRINT_ERROR("received IU with unknown opcode 0x%x", srp_cmd->opcode); - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 0); break; } +post_recv: + srpt_post_recv(ch->sport->sdev, recv_ioctx); +out: return; - -err: - send_rsp_res = -ENOTCONN; - - if (atomic_read(&ch->state) != RDMA_CHANNEL_LIVE) { - /* Give up if another thread modified the channel state. */ - PRINT_ERROR("%s", "channel is no longer in connected state."); - } else { - s32 req_lim_delta; - - req_lim_delta = srpt_req_lim_delta(ch); - if (srp_cmd->opcode == SRP_TSK_MGMT) - len = srpt_build_tskmgmt_rsp(ch, ioctx, req_lim_delta, - srp_tsk_mgmt_status, - ((struct srp_tsk_mgmt *)srp_cmd)->tag); - else if (scmnd) - len = srpt_build_cmd_rsp(ch, ioctx, req_lim_delta, - srp_cmd->tag, srp_response_status, - scst_cmd_get_sense_buffer(scmnd), - scst_cmd_get_sense_buffer_len(scmnd)); - else - len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag, - req_lim_delta, - srp_response_status, - NULL, 0); - srpt_set_cmd_state(ioctx, - srp_cmd->opcode == SRP_TSK_MGMT - ? SRPT_STATE_MGMT_RSP_SENT - : SRPT_STATE_CMD_RSP_SENT); - send_rsp_res = srpt_post_send(ch, ioctx, len); - if (send_rsp_res) { - PRINT_ERROR("%s", "Sending SRP_RSP response failed."); - srpt_undo_req_lim_delta(ch, req_lim_delta - 1); - } - } - if (send_rsp_res) { - if (scmnd) - srpt_abort_scst_cmd(ioctx, context); - else { - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - srpt_reset_ioctx(ch, ioctx, 1); - } - } } static void srpt_process_rcv_completion(struct ib_cq *cq, @@ -1900,47 +1736,84 @@ static void srpt_process_rcv_completion(struct ib_cq *cq, struct ib_wc *wc) { struct srpt_device *sdev = ch->sport->sdev; - struct srpt_ioctx *ioctx; - - EXTRACHECKS_WARN_ON((wc->wr_id & SRPT_OP_RECV) == 0); - EXTRACHECKS_WARN_ON((wc->wr_id & SRPT_OP_TTI) != 0); + struct srpt_recv_ioctx *ioctx; + u32 index; + index = idx_from_wr_id(wc->wr_id); if (wc->status == IB_WC_SUCCESS) { - ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV]; - srpt_handle_new_iu(ch, ioctx, context); + int req_lim; + + req_lim = atomic_dec_return(&ch->req_lim); + if (unlikely(req_lim < 0)) + PRINT_ERROR("req_lim = %d < 0", req_lim); + ioctx = sdev->ioctx_ring[index]; + srpt_handle_new_iu(ch, ioctx, NULL, context); } else { - PRINT_INFO("receiving wr_id %u failed with status %d", - (unsigned)(wc->wr_id & ~SRPT_OP_RECV), wc->status); + PRINT_INFO("receiving failed for idx %u with status %d", + index, wc->status); } } +/** + * srpt_process_send_completion() - Process an IB send completion. + * + * Note: Although this has not yet been observed during tests, at least in + * theory it is possible that the srpt_get_send_ioctx() call invoked by + * srpt_handle_new_iu() fails. This is possible because the req_lim_delta + * value in each response is set to one, and it is possible that this response + * makes the initiator send a new request before the send completion for that + * response has been processed. This could e.g. happen if the call to + * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or + * if IB retransmission causes generation of the send completion to be + * delayed. Incoming information units for which srpt_get_send_ioctx() fails + * are queued on cmd_wait_list. The code below processes these delayed + * requests one at a time. + */ static void srpt_process_send_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch, enum scst_exec_context context, struct ib_wc *wc) { - struct srpt_device *sdev = ch->sport->sdev; - struct srpt_ioctx *ioctx; - - EXTRACHECKS_WARN_ON((wc->wr_id & SRPT_OP_RECV) != 0); + struct srpt_send_ioctx *send_ioctx; + uint32_t index; + u8 opcode; + index = idx_from_wr_id(wc->wr_id); + opcode = opcode_from_wr_id(wc->wr_id); + send_ioctx = ch->ioctx_ring[index]; if (wc->status == IB_WC_SUCCESS) { - if ((wc->wr_id & SRPT_OP_TTI) == 0) { - ioctx = sdev->ioctx_ring[wc->wr_id]; - if (wc->opcode == IB_WC_SEND) - srpt_handle_send_comp(ch, ioctx, context); - else { - EXTRACHECKS_WARN_ON(wc->opcode - != IB_WC_RDMA_READ); - srpt_handle_rdma_comp(ch, ioctx, context); - } - } else - srpt_put_tti_ioctx(ch); + if (opcode == IB_WC_SEND) + srpt_handle_send_comp(ch, send_ioctx, context); + else { + EXTRACHECKS_WARN_ON(wc->opcode != IB_WC_RDMA_READ); + srpt_handle_rdma_comp(ch, send_ioctx, context); + } } else { - PRINT_INFO("sending %s for wr_id %u failed with status %d", - wc->wr_id & SRPT_OP_TTI ? "request" : "response", - (unsigned)(wc->wr_id & ~SRPT_OP_FLAGS), wc->status); - srpt_handle_send_err_comp(ch, wc->wr_id, context); + if (opcode == IB_WC_SEND) { + PRINT_INFO("sending response for idx %u failed with" + " status %d", index, wc->status); + srpt_handle_send_err_comp(ch, wc->wr_id, context); + } else { + PRINT_INFO("RDMA %s for idx %u failed with status %d", + opcode == IB_WC_RDMA_READ ? "read" + : opcode == IB_WC_RDMA_WRITE ? "write" + : "???", index, wc->status); + srpt_handle_rdma_err_comp(ch, send_ioctx, opcode, + context); + } + } + + while (unlikely(opcode == IB_WC_SEND + && !list_empty(&ch->cmd_wait_list) + && atomic_read(&ch->state) == RDMA_CHANNEL_LIVE + && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, send_ioctx, context); } } @@ -1948,21 +1821,22 @@ static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch, enum scst_exec_context context) { - struct ib_wc wc; + struct ib_wc *const wc = ch->wc; + int i, n; EXTRACHECKS_WARN_ON(cq != ch->cq); - do { - while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.wr_id & SRPT_OP_RECV) + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { + for (i = 0; i < n; i++) { + if (opcode_from_wr_id(wc[i].wr_id) & IB_WC_RECV) srpt_process_rcv_completion(cq, ch, context, - &wc); + &wc[i]); else srpt_process_send_completion(cq, ch, context, - &wc); + &wc[i]); } - } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP - | IB_CQ_REPORT_MISSED_EVENTS) > 0); + } } /** @@ -1980,6 +1854,7 @@ static void srpt_completion(struct ib_cq *cq, void *ctx) { struct srpt_rdma_ch *ch = ctx; + BUG_ON(!ch); atomic_inc(&ch->processing_compl); switch (thread) { case MODE_IB_COMPLETION_IN_THREAD: @@ -2107,20 +1982,9 @@ err_destroy_cq: static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) { - struct ib_qp_attr qp_attr; - int ret; - if (ch->thread) kthread_stop(ch->thread); - qp_attr.qp_state = IB_QPS_RESET; - ret = ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE); - if (ret < 0) - PRINT_ERROR("Resetting queue pair state failed: %d", ret); - - while (atomic_read(&ch->processing_compl)) - ; - ib_destroy_qp(ch->qp); ib_destroy_cq(ch->cq); } @@ -2135,12 +1999,23 @@ static void srpt_unregister_channel(struct srpt_rdma_ch *ch) __releases(&ch->sport->sdev->spinlock) { struct srpt_device *sdev; + struct ib_qp_attr qp_attr; + int ret; sdev = ch->sport->sdev; list_del(&ch->list); atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING); spin_unlock_irq(&sdev->spinlock); + qp_attr.qp_state = IB_QPS_ERR; + ret = ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE); + if (ret < 0) + PRINT_ERROR("Setting queue pair in error state failed: %d", + ret); + + while (atomic_read(&ch->processing_compl)) + ; + /* * At this point it is guaranteed that no new commands will be sent to * the SCST core for channel ch, which is a requirement for @@ -2243,7 +2118,10 @@ static void srpt_release_channel(struct scst_session *scst_sess) ib_destroy_cm_id(ch->cm_id); srpt_destroy_ch_ib(ch); - srpt_free_tti_ioctx(ch); + + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + srp_max_rsp_size, DMA_TO_DEVICE); kfree(ch); @@ -2312,6 +2190,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, struct ib_cm_rep_param *rep_param; struct srpt_rdma_ch *ch, *tmp_ch; u32 it_iu_len; + int i; int ret = 0; EXTRACHECKS_WARN_ON_ONCE(irqs_disabled()); @@ -2350,19 +2229,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto out; } - if (it_iu_len > srp_max_message_size || it_iu_len < 64) { - rej->reason = - cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + if (it_iu_len > srp_max_req_size || it_iu_len < 64) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); ret = -EINVAL; PRINT_ERROR("rejected SRP_LOGIN_REQ because its" " length (%d bytes) is out of range (%d .. %d)", - it_iu_len, 64, srp_max_message_size); + it_iu_len, 64, srp_max_req_size); goto reject; } if (!srpt_is_target_enabled(sdev->scst_tgt)) { - rej->reason = - cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ret = -EINVAL; PRINT_ERROR("rejected SRP_LOGIN_REQ because the target %s" " has not yet been enabled", sdev->device->name); @@ -2425,9 +2304,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) || *(__be64 *)(req->target_port_id + 8) != - cpu_to_be64(srpt_service_guid)) { - rej->reason = - cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + cpu_to_be64(srpt_service_guid)) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); ret = -ENOMEM; PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it" " has an invalid target port identifier."); @@ -2436,7 +2315,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch = kzalloc(sizeof *ch, GFP_KERNEL); if (!ch) { - rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because out of memory."); ret = -ENOMEM; @@ -2447,30 +2327,42 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, memcpy(ch->t_port_id, req->target_port_id, 16); ch->sport = &sdev->port[param->port - 1]; ch->cm_id = cm_id; - ch->rq_size = max(SRPT_RQ_SIZE, scst_get_max_lun_commands(NULL, 0)); + /* + * Avoid QUEUE_FULL conditions by limiting the number of buffers used + * for the SRP protocol to the SCST SCSI command queue size. + */ + ch->rq_size = min(SRPT_RQ_SIZE, scst_get_max_lun_commands(NULL, 0)); atomic_set(&ch->processing_compl, 0); atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING); INIT_LIST_HEAD(&ch->cmd_wait_list); - ch->tti_head = 0; - ch->tti_tail = 0; - ret = srpt_alloc_tti_ioctx(ch); - if (ret) { - PRINT_ERROR("%s", "send ring allocation failed"); + spin_lock_init(&ch->spinlock); + ch->ioctx_ring = (struct srpt_send_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_ring[0]), + srp_max_rsp_size, DMA_TO_DEVICE); + if (!ch->ioctx_ring) goto free_ch; + + INIT_LIST_HEAD(&ch->free_list); + for (i = 0; i < ch->rq_size; i++) { + ch->ioctx_ring[i]->ch = ch; + list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); } ret = srpt_create_ch_ib(ch); if (ret) { - rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed."); - goto free_req_ring; + goto free_ring; } ret = srpt_ch_qp_rtr(ch, ch->qp); if (ret) { - rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling" " RTR failed (error code = %d)", ret); goto destroy_ib; @@ -2506,8 +2398,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name, ch, NULL, NULL); if (!ch->scst_sess) { - rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); - TRACE_DBG("%s", "Failed to create scst sess"); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + TRACE_DBG("%s", "Failed to create SCST session"); goto release_channel; } @@ -2520,14 +2413,10 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, rsp->max_it_iu_len = req->req_it_iu_len; rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; - atomic_set(&ch->supports_cred_req, false); - rsp->buf_fmt = - cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); + rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); rsp->req_lim_delta = cpu_to_be32(ch->rq_size); atomic_set(&ch->req_lim, ch->rq_size); - atomic_set(&ch->req_lim_delta, 0); - atomic_set(&ch->req_lim_waiter_count, 0); - init_completion(&ch->req_lim_compl); /* create cm reply */ rep_param->qp_num = ch->qp->qp_num; @@ -2561,8 +2450,10 @@ release_channel: destroy_ib: srpt_destroy_ch_ib(ch); -free_req_ring: - srpt_free_tti_ioctx(ch); +free_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + srp_max_rsp_size, DMA_TO_DEVICE); free_ch: kfree(ch); @@ -2570,8 +2461,8 @@ free_ch: reject: rej->opcode = SRP_LOGIN_REJ; rej->tag = req->tag; - rej->buf_fmt = - cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); + rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, (void *)rej, sizeof *rej); @@ -2608,17 +2499,15 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING, RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) { - struct srpt_ioctx *ioctx, *ioctx_tmp; + struct srpt_recv_ioctx *ioctx, *ioctx_tmp; ret = srpt_ch_qp_rts(ch, ch->qp); - if (srpt_autodetect_cred_req) - srpt_send_cred_req(ch, 0); - list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, wait_list) { list_del(&ioctx->wait_list); - srpt_handle_new_iu(ch, ioctx, SCST_CONTEXT_THREAD); + srpt_handle_new_iu(ch, ioctx, NULL, + SCST_CONTEXT_THREAD); } if (ret && srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE, @@ -2745,10 +2634,11 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list. */ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx, + struct srpt_send_ioctx *ioctx, struct scst_cmd *scmnd) { - struct scatterlist *scat; + struct scatterlist *sg; + int sg_cnt; scst_data_direction dir; struct rdma_iu *riu; struct srp_direct_buf *db; @@ -2764,8 +2654,6 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, BUG_ON(!ch); BUG_ON(!ioctx); BUG_ON(!scmnd); - scat = scst_cmd_get_sg(scmnd); - WARN_ON(!scat); dir = scst_cmd_get_data_direction(scmnd); BUG_ON(dir == SCST_DATA_NONE); /* @@ -2773,8 +2661,17 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, * and because scst_set_cmd_error_status() resets scmnd->data_direction. */ ioctx->dir = dir; - count = ib_dma_map_sg(ch->sport->sdev->device, scat, - scst_cmd_get_sg_cnt(scmnd), + if (dir == SCST_DATA_WRITE) { + scst_cmd_get_write_fields(scmnd, &sg, &sg_cnt); + WARN_ON(!sg); + } else { + sg = scst_cmd_get_sg(scmnd); + sg_cnt = scst_cmd_get_sg_cnt(scmnd); + WARN_ON(!sg); + } + ioctx->sg = sg; + ioctx->sg_cnt = sg_cnt; + count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, scst_to_tgt_dma_dir(dir)); if (unlikely(!count)) return -EBUSY; @@ -2796,9 +2693,10 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, } db = ioctx->rbufs; - tsize = (dir == SCST_DATA_READ) ? - scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd); - dma_len = sg_dma_len(&scat[0]); + tsize = (dir == SCST_DATA_READ) + ? scst_cmd_get_adjusted_resp_data_len(scmnd) + : scst_cmd_get_bufflen(scmnd); + dma_len = sg_dma_len(&sg[0]); riu = ioctx->rdma_ius; /* @@ -2828,7 +2726,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, if (tsize > 0) { ++j; if (j < count) - dma_len = sg_dma_len(&scat[j]); + dma_len = sg_dma_len(&sg[j]); } } else { tsize -= rsize; @@ -2863,12 +2761,12 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, } db = ioctx->rbufs; - scat = scst_cmd_get_sg(scmnd); - tsize = (dir == SCST_DATA_READ) ? - scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd); + tsize = (dir == SCST_DATA_READ) + ? scst_cmd_get_adjusted_resp_data_len(scmnd) + : scst_cmd_get_bufflen(scmnd); riu = ioctx->rdma_ius; - dma_len = sg_dma_len(&scat[0]); - dma_addr = sg_dma_address(&scat[0]); + dma_len = sg_dma_len(&sg[0]); + dma_addr = sg_dma_address(&sg[0]); /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ for (i = 0, j = 0; @@ -2890,9 +2788,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, if (tsize > 0) { ++j; if (j < count) { - dma_len = sg_dma_len(&scat[j]); + dma_len = sg_dma_len(&sg[j]); dma_addr = - sg_dma_address(&scat[j]); + sg_dma_address(&sg[j]); } } } else { @@ -2925,10 +2823,10 @@ free_mem: * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list. */ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, - struct srpt_ioctx *ioctx) + struct srpt_send_ioctx *ioctx) { struct scst_cmd *scmnd; - struct scatterlist *scat; + struct scatterlist *sg; scst_data_direction dir; EXTRACHECKS_BUG_ON(!ch); @@ -2946,12 +2844,11 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, EXTRACHECKS_BUG_ON(!scmnd); EXTRACHECKS_WARN_ON(ioctx->scmnd != scmnd); EXTRACHECKS_WARN_ON(ioctx != scst_cmd_get_tgt_priv(scmnd)); - scat = scst_cmd_get_sg(scmnd); - EXTRACHECKS_WARN_ON(!scat); + sg = ioctx->sg; + EXTRACHECKS_WARN_ON(!sg); dir = ioctx->dir; EXTRACHECKS_BUG_ON(dir == SCST_DATA_NONE); - ib_dma_unmap_sg(ch->sport->sdev->device, scat, - scst_cmd_get_sg_cnt(scmnd), + ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, scst_to_tgt_dma_dir(dir)); ioctx->mapped_sg_count = 0; } @@ -2959,8 +2856,11 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, /** * srpt_perform_rdmas() - Perform IB RDMA. + * + * Returns zero upon success or a negative number upon failure. */ -static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, +static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, scst_data_direction dir) { struct ib_send_wr wr; @@ -2975,9 +2875,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, sq_wr_avail = atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail); if (sq_wr_avail < 0) { - atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); - PRINT_ERROR("%s[%d]: send queue full", - __func__, __LINE__); + PRINT_WARNING("IB send queue full (needed %d)", + ioctx->n_rdma); goto out; } } @@ -2987,10 +2886,16 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, memset(&wr, 0, sizeof wr); for (i = 0; i < ioctx->n_rdma; ++i, ++riu) { - wr.opcode = (dir == SCST_DATA_READ) ? - IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; + if (dir == SCST_DATA_READ) { + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_id = encode_wr_id(IB_WC_RDMA_WRITE, + ioctx->ioctx.index); + } else { + wr.opcode = IB_WR_RDMA_READ; + wr.wr_id = encode_wr_id(IB_WC_RDMA_READ, + ioctx->ioctx.index); + } wr.next = NULL; - wr.wr_id = ioctx->index; wr.wr.rdma.remote_addr = riu->raddr; wr.wr.rdma.rkey = riu->rkey; wr.num_sge = riu->sge_cnt; @@ -3006,15 +2911,20 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, } out: + if (unlikely(dir == SCST_DATA_WRITE && ret < 0)) + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); return ret; } /** * srpt_xfer_data() - Start data transfer from initiator to target. * + * Returns an SCST_TGT_RES_... status code. + * * Note: Must not block. */ -static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx, +static int srpt_xfer_data(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, struct scst_cmd *scmnd) { int ret; @@ -3050,14 +2960,14 @@ out_unmap: } /** - * srpt_pending_cmd_timeout() - SCST command hw processing timeout callback. + * srpt_pending_cmd_timeout() - SCST command HCA processing timeout callback. * * Called by the SCST core if no IB completion notification has been received * within max_hw_pending_time seconds. */ static void srpt_pending_cmd_timeout(struct scst_cmd *scmnd) { - struct srpt_ioctx *ioctx; + struct srpt_send_ioctx *ioctx; enum srpt_command_state state; ioctx = scst_cmd_get_tgt_priv(scmnd); @@ -3079,9 +2989,9 @@ static void srpt_pending_cmd_timeout(struct scst_cmd *scmnd) case SRPT_STATE_CMD_RSP_SENT: case SRPT_STATE_MGMT_RSP_SENT: default: - PRINT_ERROR("Command %p: IB completion for wr_id %u has not" + PRINT_ERROR("Command %p: IB completion for idx %u has not" " been received in time (SRPT command state %d)", - scmnd, ioctx->index, state); + scmnd, ioctx->ioctx.index, state); break; } @@ -3089,15 +2999,15 @@ static void srpt_pending_cmd_timeout(struct scst_cmd *scmnd) } /** - * srpt_rdy_to_xfer() - SCST callback that fetches data from the initiator. + * srpt_rdy_to_xfer() - Transfers data from initiator to target. * - * Called by the SCST core to inform ib_srpt that data reception from the - * initiator to the target should start (SCST_DATA_WRITE). Must not block. + * Called by the SCST core to transfer data from the initiator to the target + * (SCST_DATA_WRITE). Must not block. */ static int srpt_rdy_to_xfer(struct scst_cmd *scmnd) { struct srpt_rdma_ch *ch; - struct srpt_ioctx *ioctx; + struct srpt_send_ioctx *ioctx; enum srpt_command_state new_state; enum rdma_ch_state ch_state; int ret; @@ -3129,115 +3039,16 @@ out: } /** - * srpt_must_wait_for_cred() - Whether or not the target must wait with - * sending a response towards the initiator in order to avoid that the - * initiator locks up. The Linux SRP initiator locks up when - * initiator.req_lim <= req_lim_min (req_lim_min equals 1 for SRP_CMD and - * equals 0 for SRP_TSK_MGMT) and either no new SRP_RSP will be received by the - * initiator or none of the received SRP_RSP responses increases - * initiator.req_lim. One possible strategy to avoid an initiator lockup is - * that the target does not send an SRP_RSP that makes initiator.req_lim <= - * req_lim_min. While the target does not know the value of initiator.req_lim, - * one can deduce from the credit mechanism specified in the SRP standard that - * when target.req_lim == req_lim_min, initiator.req_lim must also equal - * req_lim_min. Hence wait with sending a response when target.req_lim <= - * req_lim_min if that response would not increase initiator.req_lim. The last - * condition is equivalent to srpt_req_lim_delta(ch) <= 0. + * srpt_xmit_response() - Transmits the response to a SCSI command. * - * If this function returns false, the caller must either send a response to - * the initiator with the REQUEST LIMIT DELTA field set to delta or call - * srpt_undo_req_lim_delta(ch, delta); where delta is the value written to - * the address that is the third argument of this function. - * - * Note: The constant 'compensation' compensates for the fact that multiple - * threads are processing SRP commands simultaneously. - * - * See also: For more information about how to reproduce the initiator lockup, - * see also http://bugzilla.kernel.org/show_bug.cgi?id=14235. - */ -static bool srpt_must_wait_for_cred(struct srpt_rdma_ch *ch, int req_lim_min, - int *req_lim_delta) -{ - int delta; - bool res; - int compensation; - enum { default_vdisk_threads = 8 }; - - EXTRACHECKS_BUG_ON(!req_lim_delta); - - compensation = min_t(int, default_vdisk_threads, num_online_cpus()) + 1; - res = true; - if (atomic_read(&ch->supports_cred_req) - || atomic_read(&ch->req_lim) > req_lim_min + compensation) { - res = false; - *req_lim_delta = srpt_req_lim_delta(ch); - } else { - bool again; - do { - again = false; - delta = atomic_read(&ch->req_lim_delta); - if (delta > 0) { - if (atomic_cmpxchg(&ch->req_lim_delta, delta, 0) - == delta) { - res = false; - *req_lim_delta = delta; - } else - again = true; - } - } while (again); - } - return res; -} - -/** - * srpt_wait_for_cred() - Wait until sending a response won't lock up the - * initiator. - * - * The caller must either send a response to the initiator with the REQUEST - * LIMIT DELTA field set to delta + 1 or call srpt_undo_req_lim_delta(ch, - * delta); where delta is the return value of this function. - */ -static int srpt_wait_for_cred(struct srpt_rdma_ch *ch, int req_lim_min) -{ - int delta; - -#if 0 - bool debug_print = atomic_read(&ch->req_lim) <= req_lim_min + 1; - if (debug_print) - PRINT_INFO("srpt_wait_for_cred(): min %d, req_lim %d," - " req_lim_delta %d", req_lim_min, - atomic_read(&ch->req_lim), - atomic_read(&ch->req_lim_delta)); -#endif -#if defined(CONFIG_SCST_DEBUG) - if (processing_delay_in_us <= MAX_UDELAY_MS * 1000) - udelay(processing_delay_in_us); -#endif - delta = 0; /* superfluous -- to keep sparse happy */ - while (unlikely(srpt_must_wait_for_cred(ch, req_lim_min, &delta))) { - atomic_inc(&ch->req_lim_waiter_count); - wait_for_completion(&ch->req_lim_compl); - } -#if 0 - if (debug_print) - PRINT_INFO("srpt_wait_for_cred() returns %d", delta); -#endif - return delta; -} - -/** - * srpt_xmit_response() - SCST callback function that transmits the response - * to a SCSI command. - * - * Must not block. Must ensure that scst_tgt_cmd_done() will get invoked when - * returning SCST_TGT_RES_SUCCESS. + * Callback function called by the SCST core. Must not block. Must ensure that + * scst_tgt_cmd_done() will get invoked when returning SCST_TGT_RES_SUCCESS. */ static int srpt_xmit_response(struct scst_cmd *scmnd) { struct srpt_rdma_ch *ch; - struct srpt_ioctx *ioctx; + struct srpt_send_ioctx *ioctx; enum srpt_command_state state; - s32 req_lim_delta; int ret; scst_data_direction dir; int resp_len; @@ -3270,18 +3081,24 @@ static int srpt_xmit_response(struct scst_cmd *scmnd) dir = scst_cmd_get_data_direction(scmnd); /* For read commands, transfer the data to the initiator. */ - if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) { + if (dir == SCST_DATA_READ + && scst_cmd_get_adjusted_resp_data_len(scmnd)) { ret = srpt_xfer_data(ch, ioctx, scmnd); - if (ret != SCST_TGT_RES_SUCCESS) { - PRINT_ERROR("%s: tag= %llu xfer_data failed", - __func__, scst_cmd_get_tag(scmnd)); + if (ret == SCST_TGT_RES_QUEUE_FULL) { + srpt_set_cmd_state(ioctx, state); + PRINT_WARNING("xfer_data failed for tag %llu" + " - retrying", scst_cmd_get_tag(scmnd)); + goto out; + } else if (ret != SCST_TGT_RES_SUCCESS) { + PRINT_ERROR("xfer_data failed for tag %llu", + scst_cmd_get_tag(scmnd)); goto out; } } - req_lim_delta = srpt_wait_for_cred(ch, 1); + atomic_inc(&ch->req_lim); - resp_len = srpt_build_cmd_rsp(ch, ioctx, req_lim_delta, + resp_len = srpt_build_cmd_rsp(ch, ioctx, scst_cmd_get_tag(scmnd), scst_cmd_get_status(scmnd), scst_cmd_get_sense_buffer(scmnd), @@ -3290,11 +3107,9 @@ static int srpt_xmit_response(struct scst_cmd *scmnd) if (srpt_post_send(ch, ioctx, resp_len)) { srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, state); - scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED); - PRINT_ERROR("%s[%d]: ch->state %d cmd state %d tag %llu", - __func__, __LINE__, atomic_read(&ch->state), - state, scst_cmd_get_tag(scmnd)); - srpt_undo_req_lim_delta(ch, req_lim_delta - 1); + atomic_dec(&ch->req_lim); + PRINT_WARNING("sending response failed for tag %llu - retrying", + scst_cmd_get_tag(scmnd)); ret = SCST_TGT_RES_QUEUE_FULL; } @@ -3312,20 +3127,19 @@ static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd) { struct srpt_rdma_ch *ch; struct srpt_mgmt_ioctx *mgmt_ioctx; - struct srpt_ioctx *ioctx; + struct srpt_send_ioctx *ioctx; enum srpt_command_state new_state; - s32 req_lim_delta; int rsp_len; mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd); BUG_ON(!mgmt_ioctx); - ch = mgmt_ioctx->ch; - BUG_ON(!ch); - ioctx = mgmt_ioctx->ioctx; BUG_ON(!ioctx); + ch = ioctx->ch; + BUG_ON(!ch); + TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d", __func__, mgmt_ioctx->tag, scst_mgmt_cmd_get_status(mcmnd)); @@ -3334,23 +3148,23 @@ static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd) new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_MGMT_RSP_SENT); WARN_ON(new_state == SRPT_STATE_DONE); - req_lim_delta = srpt_wait_for_cred(ch, 0); + atomic_inc(&ch->req_lim); - rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx, req_lim_delta, - (scst_mgmt_cmd_get_status(mcmnd) == - SCST_MGMT_STATUS_SUCCESS) ? - SRP_TSK_MGMT_SUCCESS : - SRP_TSK_MGMT_FAILED, + rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx, + scst_to_srp_tsk_mgmt_status( + scst_mgmt_cmd_get_status(mcmnd)), mgmt_ioctx->tag); /* * Note: the srpt_post_send() call below sends the task management * response asynchronously. It is possible that the SCST core has * already freed the struct scst_mgmt_cmd structure before the - * response is sent. This is fine. + * response is sent. This is fine however. */ if (srpt_post_send(ch, ioctx, rsp_len)) { PRINT_ERROR("%s", "Sending SRP_RSP response failed."); - srpt_undo_req_lim_delta(ch, req_lim_delta - 1); + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + srpt_put_send_ioctx(ioctx); + atomic_dec(&ch->req_lim); } scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL); @@ -3406,25 +3220,12 @@ out: } /** - * srpt_on_free_cmd() - SCST on_free_cmd callback. + * srpt_on_free_cmd() - Free command-private data. * - * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about - * to be freed. May be called in IRQ context. + * Called by the SCST core. May be called in IRQ context. */ static void srpt_on_free_cmd(struct scst_cmd *scmnd) { - struct srpt_rdma_ch *ch; - struct srpt_ioctx *ioctx; - - ioctx = scst_cmd_get_tgt_priv(scmnd); - BUG_ON(!ioctx); - - WARN_ON(srpt_get_cmd_state(ioctx) != SRPT_STATE_DONE); - - ch = ioctx->ch; - BUG_ON(!ch); - - srpt_reset_ioctx(ch, ioctx, 1); } #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && !defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19) @@ -3444,10 +3245,9 @@ static void srpt_refresh_port_work(struct work_struct *work) } /** - * srpt_detect() - SCST detect callback function. + * srpt_detect() - Returns the number of target adapters. * - * Called by the SCST core to detect target adapters. Returns the number of - * detected target adapters. + * Callback function called by the SCST core. */ static int srpt_detect(struct scst_tgt_template *tp) { @@ -3463,10 +3263,9 @@ static int srpt_detect(struct scst_tgt_template *tp) } /** - * srpt_release() - SCST release callback function. + * srpt_release() - Free the resources associated with an SCST target. * - * Callback function called by the SCST core from scst_unregister_target() to - * free up the resources associated with device scst_tgt. + * Callback function called by the SCST core from scst_unregister_target(). */ static int srpt_release(struct scst_tgt *scst_tgt) { @@ -3501,6 +3300,11 @@ static int srpt_release(struct scst_tgt *scst_tgt) return 0; } +/** + * srpt_get_scsi_transport_version() - Returns the SCSI transport version. + * This function is called from scst_pres.c, the code that implements + * persistent reservation support. + */ static uint16_t srpt_get_scsi_transport_version(struct scst_tgt *scst_tgt) { return 0x0940; /* SRP */ @@ -3508,41 +3312,41 @@ static uint16_t srpt_get_scsi_transport_version(struct scst_tgt *scst_tgt) /* SCST target template for the SRP target implementation. */ static struct scst_tgt_template srpt_template = { - .name = DRV_NAME, - .sg_tablesize = SRPT_DEF_SG_TABLESIZE, - .max_hw_pending_time = 60/*seconds*/, + .name = DRV_NAME, + .sg_tablesize = SRPT_DEF_SG_TABLESIZE, + .max_hw_pending_time = 60/*seconds*/, #if !defined(CONFIG_SCST_PROC) - .enable_target = srpt_enable_target, - .is_target_enabled = srpt_is_target_enabled, + .enable_target = srpt_enable_target, + .is_target_enabled = srpt_is_target_enabled, #endif #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) - .default_trace_flags = DEFAULT_SRPT_TRACE_FLAGS, - .trace_flags = &trace_flag, + .default_trace_flags = DEFAULT_SRPT_TRACE_FLAGS, + .trace_flags = &trace_flag, #endif - .detect = srpt_detect, - .release = srpt_release, - .xmit_response = srpt_xmit_response, - .rdy_to_xfer = srpt_rdy_to_xfer, - .on_hw_pending_cmd_timeout = srpt_pending_cmd_timeout, - .on_free_cmd = srpt_on_free_cmd, - .task_mgmt_fn_done = srpt_tsk_mgmt_done, + .detect = srpt_detect, + .release = srpt_release, + .xmit_response = srpt_xmit_response, + .rdy_to_xfer = srpt_rdy_to_xfer, + .on_hw_pending_cmd_timeout = srpt_pending_cmd_timeout, + .on_free_cmd = srpt_on_free_cmd, + .task_mgmt_fn_done = srpt_tsk_mgmt_done, .get_initiator_port_transport_id = srpt_get_initiator_port_transport_id, - .get_scsi_transport_version = srpt_get_scsi_transport_version, + .get_scsi_transport_version = srpt_get_scsi_transport_version, }; /** - * srpt_release_class_dev() - Device release callback function. + * srpt_dev_release() - Device release callback function. * - * The callback function srpt_release_class_dev() is called whenever a + * The callback function srpt_dev_release() is called whenever a * device is removed from the /sys/class/infiniband_srpt device class. * Although this function has been left empty, a release function has been * defined such that upon module removal no complaint is logged about a * missing release function. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) -static void srpt_release_class_dev(struct class_device *class_dev) +static void srpt_dev_release(struct class_device *dev) #else -static void srpt_release_class_dev(struct device *dev) +static void srpt_dev_release(struct device *dev) #endif { } @@ -3571,22 +3375,19 @@ static struct scst_proc_data srpt_log_proc_data = { #endif /* CONFIG_SCST_PROC */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) -static ssize_t show_login_info(struct class_device *class_dev, char *buf) +static ssize_t show_login_info(struct class_device *dev, char *buf) #else static ssize_t show_login_info(struct device *dev, struct device_attribute *attr, char *buf) #endif { - struct srpt_device *sdev = -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - container_of(class_dev, struct srpt_device, class_dev); -#else - container_of(dev, struct srpt_device, dev); -#endif + struct srpt_device *sdev; struct srpt_port *sport; int i; - int len = 0; + int len; + sdev = container_of(dev, struct srpt_device, dev); + len = 0; for (i = 0; i < sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i]; @@ -3626,9 +3427,9 @@ static struct device_attribute srpt_dev_attrs[] = { static struct class srpt_class = { .name = "infiniband_srpt", #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - .release = srpt_release_class_dev, + .release = srpt_dev_release, #else - .dev_release = srpt_release_class_dev, + .dev_release = srpt_dev_release, #endif .class_attrs = srpt_class_attrs, #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) @@ -3667,23 +3468,22 @@ static void srpt_add_one(struct ib_device *device) sdev->device = device; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - sdev->class_dev.class = &srpt_class; - sdev->class_dev.dev = device->dma_device; - snprintf(sdev->class_dev.class_id, BUS_ID_SIZE, - "srpt-%s", device->name); -#else sdev->dev.class = &srpt_class; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) + sdev->dev.dev = device->dma_device; +#else sdev->dev.parent = device->dma_device; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30) +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) + snprintf(sdev->dev.class_id, BUS_ID_SIZE, "srpt-%s", device->name); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30) snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name); #else dev_set_name(&sdev->dev, "srpt-%s", device->name); #endif -#endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - if (class_device_register(&sdev->class_dev)) + if (class_device_register(&sdev->dev)) goto unregister_tgt; #else if (device_register(&sdev->dev)) @@ -3743,13 +3543,13 @@ static void srpt_add_one(struct ib_device *device) if (ib_register_event_handler(&sdev->event_handler)) goto err_cm; - sdev->ioctx_ring = kmalloc(sdev->srq_size * sizeof sdev->ioctx_ring[0], - GFP_KERNEL); + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, DMA_FROM_DEVICE); if (!sdev->ioctx_ring) goto err_event; - if (srpt_alloc_ioctx_ring(sdev, sdev->ioctx_ring, sdev->srq_size, 0)) - goto err_alloc_ring; INIT_LIST_HEAD(&sdev->rch_list); spin_lock_init(&sdev->spinlock); @@ -3790,9 +3590,9 @@ static void srpt_add_one(struct ib_device *device) err_ring: ib_set_client_data(device, &srpt_client, NULL); - srpt_free_ioctx_ring(sdev, sdev->ioctx_ring, sdev->srq_size); -err_alloc_ring: - kfree(sdev->ioctx_ring); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, + DMA_FROM_DEVICE); err_event: ib_unregister_event_handler(&sdev->event_handler); err_cm: @@ -3805,7 +3605,7 @@ err_pd: ib_dealloc_pd(sdev->pd); err_dev: #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - class_device_unregister(&sdev->class_dev); + class_device_unregister(&sdev->dev); #else device_unregister(&sdev->dev); #endif @@ -3860,7 +3660,7 @@ static void srpt_remove_one(struct ib_device *device) ib_dealloc_pd(sdev->pd); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - class_device_unregister(&sdev->class_dev); + class_device_unregister(&sdev->dev); #else device_unregister(&sdev->dev); #endif @@ -3873,8 +3673,8 @@ static void srpt_remove_one(struct ib_device *device) scst_unregister_target(sdev->scst_tgt); sdev->scst_tgt = NULL; - srpt_free_ioctx_ring(sdev, sdev->ioctx_ring, sdev->srq_size); - kfree(sdev->ioctx_ring); + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); sdev->ioctx_ring = NULL; kfree(sdev); @@ -3945,11 +3745,19 @@ static int __init srpt_init_module(void) int ret; ret = -EINVAL; - if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) { + if (srp_max_req_size < MIN_MAX_REQ_SIZE) { PRINT_ERROR("invalid value %d for kernel module parameter" - " srp_max_message_size -- must be at least %d.", - srp_max_message_size, - MIN_MAX_MESSAGE_SIZE); + " srp_max_req_size -- must be at least %d.", + srp_max_req_size, + MIN_MAX_REQ_SIZE); + goto out; + } + + if (srp_max_rsp_size < MIN_MAX_RSP_SIZE) { + PRINT_ERROR("invalid value %d for kernel module parameter" + " srp_max_rsp_size -- must be at least %d.", + srp_max_rsp_size, + MIN_MAX_RSP_SIZE); goto out; } diff --git a/srpt/src/ib_srpt.h b/srpt/src/ib_srpt.h index 0288409ae..c3d920c82 100644 --- a/srpt/src/ib_srpt.h +++ b/srpt/src/ib_srpt.h @@ -74,7 +74,7 @@ enum { SRP_RDMA_WRITE_FROM_IOC = 0x20, /* - * srp_login_cmd::req_flags bitmasks. See also table 9 in the SRP r16a + * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP r16a * document. */ SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */ @@ -83,14 +83,14 @@ enum { SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */ /* - * srp_cmd::sol_nt / srp_tsk_mgmt::sol_not bitmasks. See also tables + * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables * 18 and 20 in the T10 r16a document. */ SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */ SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */ /* - * srp_rsp::sol_not / srp_t_logout::sol_not bitmasks. See also tables + * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables * 16 and 22 in the T10 r16a document. */ SRP_SOLNT = 0x01, /* SOLNT = solicited notification */ @@ -120,45 +120,24 @@ enum { DEFAULT_SRPT_SRQ_SIZE = 4095, MAX_SRPT_SRQ_SIZE = 65535, - MIN_MAX_MESSAGE_SIZE = 996, - DEFAULT_MAX_MESSAGE_SIZE + MIN_MAX_REQ_SIZE = 996, + DEFAULT_MAX_REQ_SIZE = sizeof(struct srp_cmd)/*48*/ + sizeof(struct srp_indirect_buf)/*20*/ + 128 * sizeof(struct srp_direct_buf)/*16*/, + MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, + DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ + DEFAULT_MAX_RDMA_SIZE = 65536, - - /* - * Number of I/O contexts to be allocated for sending back requests - * from the target to the initiator. Must be a power of two. - */ - TTI_IOCTX_COUNT = 2, - TTI_IOCTX_MASK = TTI_IOCTX_COUNT - 1, }; -/** - * @SRPT_OP_TTI: wr_id flag for marking requests sent by the target to the - * initiator. - * @SRPT_OP_RECV: wr_id flag for marking receive operations. - */ -enum { - SRPT_OP_TTI = (1 << 30), - SRPT_OP_RECV = (1 << 31), - - SRPT_OP_FLAGS = SRPT_OP_TTI | SRPT_OP_RECV, -}; - -/* - * SRP_CRED_REQ information unit, as defined in section 6.10 of the T10 SRP - * r16a document. - */ -struct srp_cred_req { - u8 opcode; - u8 sol_not; - u8 reserved[2]; - __be32 req_lim_delta; - __be64 tag; -}; +static inline u64 encode_wr_id(u8 opcode, u32 idx) +{ return ((u64)opcode << 32) | idx; } +static inline u8 opcode_from_wr_id(u64 wr_id) +{ return wr_id >> 32; } +static inline u32 idx_from_wr_id(u64 wr_id) +{ return (u32)wr_id; } struct rdma_iu { u64 raddr; @@ -169,7 +148,7 @@ struct rdma_iu { }; /** - * enum srpt_command_state - SCSI command states managed by SRPT. + * enum srpt_command_state - SCSI command state managed by SRPT. * @SRPT_STATE_NEW: New command arrived and is being processed. * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting * for data arrival. @@ -191,45 +170,60 @@ enum srpt_command_state { }; /** - * struct srpt_ioctx - SRPT-private data associated with a struct scst_cmd. - * @index: Index of the I/O context in ioctx_ring. - * @buf: Pointer to the message transferred via this I/O context. - * @dma: DMA address of buf. - * @wait_list: Node for insertion in srpt_rdma_ch::cmd_wait_list. - * @state: I/O context state. See also enum srpt_command_state. + * struct srpt_ioctx - Shared SRPT I/O context information. + * @buf: Pointer to the buffer. + * @dma: DMA address of the buffer. + * @index: Index of the I/O context in its ioctx_ring array. */ struct srpt_ioctx { - int index; - void *buf; - dma_addr_t dma; - struct rdma_iu *rdma_ius; - struct srp_direct_buf *rbufs; - struct srp_direct_buf single_rbuf; - struct list_head wait_list; - int mapped_sg_count; - u16 n_rdma_ius; - u8 n_rdma; - u8 n_rbuf; + void *buf; + dma_addr_t dma; + uint32_t index; +}; - u64 wr_id; - enum ib_wc_status status; - enum ib_wc_opcode opcode; - struct srpt_rdma_ch *ch; - struct scst_cmd *scmnd; - scst_data_direction dir; - atomic_t state; +/** + * struct srpt_recv_ioctx - SRPT receive I/O context. + * @ioctx: See above. + * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + */ +struct srpt_recv_ioctx { + struct srpt_ioctx ioctx; + struct list_head wait_list; +}; + +/** + * struct srpt_send_ioctx - SRPT send I/O context. + * @ioctx: See above. + * @free_list: Allows to make this struct an entry in srpt_rdma_ch.free_list. + * @state: I/O context state. See also enum srpt_command_state. + */ +struct srpt_send_ioctx { + struct srpt_ioctx ioctx; + struct srpt_rdma_ch *ch; + struct rdma_iu *rdma_ius; + struct srp_direct_buf *rbufs; + struct srp_direct_buf single_rbuf; + struct scatterlist *sg; + struct list_head free_list; + int sg_cnt; + int mapped_sg_count; + u16 n_rdma_ius; + u8 n_rdma; + u8 n_rbuf; + + struct scst_cmd *scmnd; + scst_data_direction dir; + atomic_t state; }; /** * struct srpt_mgmt_ioctx - SCST management command context information. * @ioctx: SRPT I/O context associated with the management command. - * @ch: RDMA channel over which the management command has been received. * @tag: SCSI tag of the management command. */ struct srpt_mgmt_ioctx { - struct srpt_ioctx *ioctx; - struct srpt_rdma_ch *ch; - u64 tag; + struct srpt_send_ioctx *ioctx; + u64 tag; }; /** @@ -259,51 +253,41 @@ enum rdma_ch_state { * @max_ti_iu_len: maximum target-to-initiator information unit length. * @supports_cred_req: whether or not the initiator supports SRP_CRED_REQ. * @req_lim: request limit: maximum number of requests that may be sent - * by the initiator without having received a response or - * SRP_CRED_REQ. - * @req_lim_delta: req_lim_delta to be sent in the next SRP_RSP. - * @req_lim_waiter_count: number of threads waiting on req_lim_wait. - * @req_lim_compl: completion variable that is signalled every time req_lim - * has been incremented. + * by the initiator without having received a response. * @state: channel state. See also enum rdma_ch_state. - * @list: node for insertion in the srpt_device::rch_list list. + * @list: node for insertion in the srpt_device.rch_list list. * @cmd_wait_list: list of SCST commands that arrived before the RTU event. This * list contains struct srpt_ioctx elements and is protected * against concurrent modification by the cm_id spinlock. - * @tti_head: Index of first element of tti_ioctx that is not in use. - * @tti_tail: Index of first element of tti_ioctx that is in use. - * @tti_ioctx: Circular buffer with I/O contexts for sending requests from - * target to initiator. + * @spinlock: Protects free_list. + * @free_list: Head of list with free send I/O contexts. * @scst_sess: SCST session information associated with this SRP channel. * @sess_name: SCST session name. */ struct srpt_rdma_ch { - wait_queue_head_t wait_queue; - struct task_struct *thread; - struct ib_cm_id *cm_id; - struct ib_qp *qp; - int rq_size; - atomic_t processing_compl; - struct ib_cq *cq; - atomic_t sq_wr_avail; - struct srpt_port *sport; - u8 i_port_id[16]; - u8 t_port_id[16]; - int max_ti_iu_len; - atomic_t supports_cred_req; - atomic_t req_lim; - atomic_t req_lim_delta; - atomic_t req_lim_waiter_count; - struct completion req_lim_compl; - atomic_t state; - struct list_head list; - struct list_head cmd_wait_list; - int tti_head; - int tti_tail; - struct srpt_ioctx *tti_ioctx[TTI_IOCTX_COUNT]; + wait_queue_head_t wait_queue; + struct task_struct *thread; + struct ib_cm_id *cm_id; + struct ib_qp *qp; + int rq_size; + atomic_t processing_compl; + struct ib_cq *cq; + atomic_t sq_wr_avail; + struct srpt_port *sport; + u8 i_port_id[16]; + u8 t_port_id[16]; + int max_ti_iu_len; + atomic_t req_lim; + spinlock_t spinlock; + struct list_head free_list; + struct srpt_send_ioctx **ioctx_ring; + struct ib_wc wc[16]; + atomic_t state; + struct list_head list; + struct list_head cmd_wait_list; - struct scst_session *scst_sess; - u8 sess_name[36]; + struct scst_session *scst_sess; + u8 sess_name[36]; }; /** @@ -334,9 +318,9 @@ struct srpt_port { * @srq: Per-HCA SRQ (shared receive queue). * @cm_id: connection identifier. * @dev_attr: attributes of the InfiniBand device as obtained during the - * ib_client::add() callback. + * ib_client.add() callback. * @ioctx_ring: Per-HCA I/O context ring. - * @rch_list: per-device channel list -- see also srpt_rdma_ch::list. + * @rch_list: per-device channel list -- see also srpt_rdma_ch.list. * @spinlock: protects rch_list. * @srpt_port: information about the ports owned by this HCA. * @event_handler: per-HCA asynchronous IB event handler. @@ -352,13 +336,13 @@ struct srpt_device { struct ib_cm_id *cm_id; struct ib_device_attr dev_attr; int srq_size; - struct srpt_ioctx **ioctx_ring; + struct srpt_recv_ioctx **ioctx_ring; struct list_head rch_list; spinlock_t spinlock; struct srpt_port port[2]; struct ib_event_handler event_handler; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) - struct class_device class_dev; + struct class_device dev; #else struct device dev; #endif