ib_srpt: Merge r4045:4083 from trunk

git-svn-id: http://svn.code.sf.net/p/scst/svn/branches/2.2.x@4084 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Bart Van Assche
2012-01-21 17:54:32 +00:00
parent 1f65365675
commit 3b2db8bb5a
5 changed files with 110 additions and 40 deletions

View File

@@ -283,14 +283,6 @@ For more information, see also:
* http://www.linux-ha.org/wiki/Main_Page
Notes about ib_srpt
-------------------
* Unloading the kernel module ib_srpt while I/O is ongoing is supported.
However, it can take up to two minutes before unloading finishes. During
that time CPU usage will be high.
Performance Notes - Target Side
-------------------------------
@@ -314,5 +306,32 @@ Performance Notes - Initiator Side
* /proc/irq/${ib_int_no}/smp_affinity
Frequently Asked Questions
--------------------------
Q: Loading the kernel module ib_srpt triggers a kernel panic with a call trace
like the one below. What is the cause of this and how can this be solved ?
Call Trace:
[<ffffffffa02f2a50>] srpt_alloc_ioctx+0x60/0xb0 [ib_srpt]
[<ffffffffa02f2f0a>] srpt_alloc_ioctx_ring+0xea/0x1e0 [ib_srpt]
[<ffffffffa02f32e9>] srpt_add_one+0x2e9/0x670 [ib_srpt]
[<ffffffffa015a480>] ib_register_client+0x80/0xa0 [ib_core]
[<ffffffffa02421eb>] srpt_init_module+0x1eb/0x235 [ib_srpt]
[<ffffffff81000344>] do_one_initcall+0x34/0x1a0
[<ffffffff8107a63c>] sys_init_module+0xdc/0x260
[<ffffffff81002e3b>] system_call_fastpath+0x16/0x1b
A: This means that you are using a system on which OFED has been installed but
that ib_srpt has been compiled against the non-OFED kernel headers instead
of the OFED kernel headers. You can fix this by rebuilding ib_srpt against
the OFED kernel headers. The ib_srpt makefile should detect the OFED kernel
headers automatically - at least if ib_srpt is built after OFED has been
installed.
Feedback
--------
Send questions about this driver to scst-devel@lists.sourceforge.net, CC:
Vu Pham <vuhuong@mellanox.com> and Bart Van Assche <bvanassche@acm.org>.

View File

@@ -118,7 +118,9 @@ Index: srpt/src/ib_srpt.c
--name=$dev --filename=$dev
* Test whether queue overflow recovery works correctly as follows:
- On the target, reload ib_srpt with srpt_sq_size set to 64.
- On the target, reload ib_srpt with srpt_sq_size set to 64. Add e.g. the
following line at the end of /etc/modprobe.d/99-local.conf:
options ib_srpt srpt_sq_size=64
- On the initiator, run a direct I/O test with large block sizes, e.g.
scripts/blockdev-perftest -f -d -j -m 12 -M 24 /dev/sdb
- On the initiator, run the following two commands in parallel:

View File

@@ -0,0 +1,14 @@
diff --git a/Makefile b/Makefile
index d018956..3c3b936 100644
--- a/Makefile
+++ b/Makefile
@@ -357,7 +357,8 @@ CFLAGS_GCOV = -fprofile-arcs -ftest-coverage
# Use LINUXINCLUDE when you must reference the include/ directory.
# Needed to be compatible with the O= option
-LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include \
+LINUXINCLUDE := $(PRE_CFLAGS) \
+ -I$(srctree)/arch/$(hdr-arch)/include \
-Iarch/$(hdr-arch)/include/generated -Iinclude \
$(if $(KBUILD_SRC), -I$(srctree)/include) \
-include include/generated/autoconf.h

View File

@@ -89,7 +89,7 @@ MODULE_PARM_DESC(trace_flag, "SCST trace flags.");
#endif
static unsigned srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
module_param(srp_max_rdma_size, int, 0744);
module_param(srp_max_rdma_size, int, 0644);
MODULE_PARM_DESC(srp_max_rdma_size,
"Maximum size of SRP RDMA transfers for new connections.");
@@ -166,6 +166,7 @@ static enum rdma_ch_state srpt_set_ch_state_to_disc(struct srpt_rdma_ch *ch)
case CH_CONNECTING:
case CH_LIVE:
ch->state = CH_DISCONNECTING;
wake_up_process(ch->thread);
changed = true;
break;
default:
@@ -173,9 +174,6 @@ static enum rdma_ch_state srpt_set_ch_state_to_disc(struct srpt_rdma_ch *ch)
}
spin_unlock_irqrestore(&ch->spinlock, flags);
if (changed)
wake_up_process(ch->thread);
return prev;
}
@@ -190,6 +188,7 @@ static bool srpt_set_ch_state_to_draining(struct srpt_rdma_ch *ch)
case CH_LIVE:
case CH_DISCONNECTING:
ch->state = CH_DRAINING;
wake_up_process(ch->thread);
changed = true;
break;
default:
@@ -197,8 +196,6 @@ static bool srpt_set_ch_state_to_draining(struct srpt_rdma_ch *ch)
}
spin_unlock_irqrestore(&ch->spinlock, flags);
if (changed)
wake_up_process(ch->thread);
return changed;
}
@@ -217,12 +214,11 @@ static bool srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch,
spin_lock_irqsave(&ch->spinlock, flags);
if (ch->state == old) {
ch->state = new;
wake_up_process(ch->thread);
changed = true;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
if (changed)
wake_up_process(ch->thread);
return changed;
}
@@ -286,6 +282,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
{
struct srpt_device *sdev;
struct srpt_port *sport;
u8 port_num;
TRACE_ENTRY();
@@ -298,10 +295,15 @@ static void srpt_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
if (event->element.port_num <= sdev->device->phys_port_cnt) {
sport = &sdev->port[event->element.port_num - 1];
port_num = event->element.port_num - 1;
if (port_num < sdev->device->phys_port_cnt) {
sport = &sdev->port[port_num];
sport->lid = 0;
sport->sm_lid = 0;
} else {
WARN(true, "event %d: port_num %d out of range 1..%d\n",
event->event, port_num + 1,
sdev->device->phys_port_cnt);
}
break;
case IB_EVENT_PORT_ACTIVE:
@@ -310,10 +312,15 @@ static void srpt_event_handler(struct ib_event_handler *handler,
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
/* Refresh port data asynchronously. */
if (event->element.port_num <= sdev->device->phys_port_cnt) {
sport = &sdev->port[event->element.port_num - 1];
port_num = event->element.port_num - 1;
if (port_num < sdev->device->phys_port_cnt) {
sport = &sdev->port[port_num];
if (!sport->lid && !sport->sm_lid)
schedule_work(&sport->work);
} else {
WARN(true, "event %d: port_num %d out of range 1..%d\n",
event->event, port_num + 1,
sdev->device->phys_port_cnt);
}
break;
default:
@@ -343,6 +350,8 @@ static const char *get_ch_state_name(enum rdma_ch_state s)
return "disconnecting";
case CH_DRAINING:
return "draining";
case CH_FREEING:
return "freeing";
}
return "???";
}
@@ -370,6 +379,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
TRACE_DBG("%s, state %s: received Last WQE event.",
ch->sess_name, get_ch_state_name(ch->state));
ch->last_wqe_received = true;
BUG_ON(!ch->thread);
wake_up_process(ch->thread);
break;
default:
@@ -1959,19 +1969,18 @@ static void srpt_process_completion(struct ib_cq *cq,
EXTRACHECKS_WARN_ON(cq != ch->cq);
do {
while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
for (i = 0; i < n; i++) {
if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
srpt_process_rcv_completion(cq, ch,
rcv_context, &wc[i]);
else
srpt_process_send_completion(cq, ch,
send_context, &wc[i]);
}
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
for (i = 0; i < n; i++) {
if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
srpt_process_rcv_completion(cq, ch, rcv_context,
&wc[i]);
else
srpt_process_send_completion(cq, ch,
send_context,
&wc[i]);
}
} while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS) > 0);
}
}
/**
@@ -1981,6 +1990,7 @@ static void srpt_completion(struct ib_cq *cq, void *ctx)
{
struct srpt_rdma_ch *ch = ctx;
BUG_ON(!ch->thread);
wake_up_process(ch->thread);
}
@@ -2012,13 +2022,22 @@ static int srpt_compl_thread(void *arg)
ch->sess_name);
scst_unregister_session(ch->scst_sess, false, srpt_free_ch);
while (!kthread_should_stop()) {
/*
* Some HCAs can queue send completions after the Last WQE
* event. Make sure to process these work completions.
*/
while (ch->state < CH_FREEING) {
set_current_state(TASK_INTERRUPTIBLE);
srpt_process_completion(ch->cq, ch, SCST_CONTEXT_THREAD,
SCST_CONTEXT_DIRECT);
schedule();
}
complete(&ch->finished_processing_completions);
while (!kthread_should_stop())
schedule();
return 0;
}
@@ -2155,6 +2174,7 @@ static bool __srpt_close_ch(struct srpt_rdma_ch *ch)
break;
case CH_DISCONNECTING:
case CH_DRAINING:
case CH_FREEING:
break;
}
@@ -2214,13 +2234,15 @@ static void srpt_free_ch(struct scst_session *sess)
sdev = ch->sport->sdev;
BUG_ON(!sdev);
WARN_ON(ch->state != CH_DRAINING);
WARN_ON(!srpt_test_and_set_ch_state(ch, CH_DRAINING, CH_FREEING));
WARN_ON(!ch->last_wqe_received);
BUG_ON(!ch->thread);
BUG_ON(ch->thread == current);
kthread_stop(ch->thread);
ch->thread = NULL;
while (wait_for_completion_timeout(&ch->finished_processing_completions,
10 * HZ) == 0)
PRINT_INFO("Waiting for completion processing thread ...");
srpt_destroy_ch_ib(ch);
@@ -2234,10 +2256,13 @@ static void srpt_free_ch(struct scst_session *sess)
ib_destroy_cm_id(ch->cm_id);
wake_up(&sdev->ch_releaseQ);
kthread_stop(ch->thread);
ch->thread = NULL;
kfree(ch);
wake_up(&sdev->ch_releaseQ);
TRACE_EXIT();
}
@@ -2403,6 +2428,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ch->state = CH_CONNECTING;
INIT_LIST_HEAD(&ch->cmd_wait_list);
init_waitqueue_head(&ch->state_wq);
init_completion(&ch->finished_processing_completions);
ch->max_rsp_size = max_t(uint32_t, srp_max_rsp_size, MIN_MAX_RSP_SIZE);
ch->ioctx_ring = (struct srpt_send_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
@@ -2583,6 +2609,7 @@ free_ring:
ch->max_rsp_size, DMA_TO_DEVICE);
free_ch:
cm_id->context = NULL;
kfree(ch);
reject:
@@ -2697,6 +2724,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
int ret;
BUG_ON(!cm_id->context);
ret = 0;
switch (event->event) {
case IB_CM_REQ_RECEIVED:
@@ -3750,11 +3779,15 @@ static void srpt_add_one(struct ib_device *device)
sdev->srq_size = min(max(srpt_srq_size, MIN_SRPT_SRQ_SIZE),
sdev->dev_attr.max_srq_wr);
memset(&srq_attr, 0, sizeof(srq_attr));
srq_attr.event_handler = srpt_srq_event;
srq_attr.srq_context = (void *)sdev;
srq_attr.attr.max_wr = sdev->srq_size;
srq_attr.attr.max_sge = 1;
srq_attr.attr.srq_limit = 0;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
srq_attr.srq_type = IB_SRQT_BASIC;
#endif
sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
if (IS_ERR(sdev->srq)) {
@@ -3815,8 +3848,7 @@ static void srpt_add_one(struct ib_device *device)
for (i = 0; i < sdev->srq_size; ++i)
srpt_post_recv(sdev, sdev->ioctx_ring[i]);
WARN_ON(sdev->device->phys_port_cnt
> sizeof(sdev->port)/sizeof(sdev->port[0]));
WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
sport = &sdev->port[i - 1];

View File

@@ -272,12 +272,14 @@ struct srpt_send_ioctx {
* been sent and waiting for DREP or channel is being closed
* for another reason.
* @CH_DRAINING: QP is in ERR state.
* @CH_FREEING: QP resources are being freed.
*/
enum rdma_ch_state {
CH_CONNECTING,
CH_LIVE,
CH_DISCONNECTING,
CH_DRAINING,
CH_FREEING,
};
/**
@@ -335,6 +337,7 @@ struct srpt_rdma_ch {
wait_queue_head_t state_wq;
struct list_head list;
struct list_head cmd_wait_list;
struct completion finished_processing_completions;
bool last_wqe_received;
struct scst_session *scst_sess;