From 3b2db8bb5a03417f361a8e1e175b46d7586bfa48 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 21 Jan 2012 17:54:32 +0000 Subject: [PATCH] ib_srpt: Merge r4045:4083 from trunk git-svn-id: http://svn.code.sf.net/p/scst/svn/branches/2.2.x@4084 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- srpt/README | 35 +++++++-- srpt/Testing.txt | 4 +- srpt/patches/kernel-3.2-pre-cflags.patch | 14 ++++ srpt/src/ib_srpt.c | 94 ++++++++++++++++-------- srpt/src/ib_srpt.h | 3 + 5 files changed, 110 insertions(+), 40 deletions(-) create mode 100644 srpt/patches/kernel-3.2-pre-cflags.patch diff --git a/srpt/README b/srpt/README index 5f5bbaa0b..27d39c91f 100644 --- a/srpt/README +++ b/srpt/README @@ -283,14 +283,6 @@ For more information, see also: * http://www.linux-ha.org/wiki/Main_Page -Notes about ib_srpt -------------------- - -* Unloading the kernel module ib_srpt while I/O is ongoing is supported. - However, it can take up to two minutes before unloading finishes. During - that time CPU usage will be high. - - Performance Notes - Target Side ------------------------------- @@ -314,5 +306,32 @@ Performance Notes - Initiator Side * /proc/irq/${ib_int_no}/smp_affinity +Frequently Asked Questions +-------------------------- + +Q: Loading the kernel module ib_srpt triggers a kernel panic with a call trace + like the one below. What is the cause of this and how can this be solved ? + + Call Trace: + [] srpt_alloc_ioctx+0x60/0xb0 [ib_srpt] + [] srpt_alloc_ioctx_ring+0xea/0x1e0 [ib_srpt] + [] srpt_add_one+0x2e9/0x670 [ib_srpt] + [] ib_register_client+0x80/0xa0 [ib_core] + [] srpt_init_module+0x1eb/0x235 [ib_srpt] + [] do_one_initcall+0x34/0x1a0 + [] sys_init_module+0xdc/0x260 + [] system_call_fastpath+0x16/0x1b + +A: This means that you are using a system on which OFED has been installed but + that ib_srpt has been compiled against the non-OFED kernel headers instead + of the OFED kernel headers. You can fix this by rebuilding ib_srpt against + the OFED kernel headers. The ib_srpt makefile should detect the OFED kernel + headers automatically - at least if ib_srpt is built after OFED has been + installed. + + +Feedback +-------- + Send questions about this driver to scst-devel@lists.sourceforge.net, CC: Vu Pham and Bart Van Assche . diff --git a/srpt/Testing.txt b/srpt/Testing.txt index 6513ff7fc..274c6e4c6 100644 --- a/srpt/Testing.txt +++ b/srpt/Testing.txt @@ -118,7 +118,9 @@ Index: srpt/src/ib_srpt.c --name=$dev --filename=$dev * Test whether queue overflow recovery works correctly as follows: - - On the target, reload ib_srpt with srpt_sq_size set to 64. + - On the target, reload ib_srpt with srpt_sq_size set to 64. Add e.g. the + following line at the end of /etc/modprobe.d/99-local.conf: + options ib_srpt srpt_sq_size=64 - On the initiator, run a direct I/O test with large block sizes, e.g. scripts/blockdev-perftest -f -d -j -m 12 -M 24 /dev/sdb - On the initiator, run the following two commands in parallel: diff --git a/srpt/patches/kernel-3.2-pre-cflags.patch b/srpt/patches/kernel-3.2-pre-cflags.patch new file mode 100644 index 000000000..f73c5e686 --- /dev/null +++ b/srpt/patches/kernel-3.2-pre-cflags.patch @@ -0,0 +1,14 @@ +diff --git a/Makefile b/Makefile +index d018956..3c3b936 100644 +--- a/Makefile ++++ b/Makefile +@@ -357,7 +357,8 @@ CFLAGS_GCOV = -fprofile-arcs -ftest-coverage + + # Use LINUXINCLUDE when you must reference the include/ directory. + # Needed to be compatible with the O= option +-LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include \ ++LINUXINCLUDE := $(PRE_CFLAGS) \ ++ -I$(srctree)/arch/$(hdr-arch)/include \ + -Iarch/$(hdr-arch)/include/generated -Iinclude \ + $(if $(KBUILD_SRC), -I$(srctree)/include) \ + -include include/generated/autoconf.h diff --git a/srpt/src/ib_srpt.c b/srpt/src/ib_srpt.c index 5bd684bc7..d8706c70d 100644 --- a/srpt/src/ib_srpt.c +++ b/srpt/src/ib_srpt.c @@ -89,7 +89,7 @@ MODULE_PARM_DESC(trace_flag, "SCST trace flags."); #endif static unsigned srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; -module_param(srp_max_rdma_size, int, 0744); +module_param(srp_max_rdma_size, int, 0644); MODULE_PARM_DESC(srp_max_rdma_size, "Maximum size of SRP RDMA transfers for new connections."); @@ -166,6 +166,7 @@ static enum rdma_ch_state srpt_set_ch_state_to_disc(struct srpt_rdma_ch *ch) case CH_CONNECTING: case CH_LIVE: ch->state = CH_DISCONNECTING; + wake_up_process(ch->thread); changed = true; break; default: @@ -173,9 +174,6 @@ static enum rdma_ch_state srpt_set_ch_state_to_disc(struct srpt_rdma_ch *ch) } spin_unlock_irqrestore(&ch->spinlock, flags); - if (changed) - wake_up_process(ch->thread); - return prev; } @@ -190,6 +188,7 @@ static bool srpt_set_ch_state_to_draining(struct srpt_rdma_ch *ch) case CH_LIVE: case CH_DISCONNECTING: ch->state = CH_DRAINING; + wake_up_process(ch->thread); changed = true; break; default: @@ -197,8 +196,6 @@ static bool srpt_set_ch_state_to_draining(struct srpt_rdma_ch *ch) } spin_unlock_irqrestore(&ch->spinlock, flags); - if (changed) - wake_up_process(ch->thread); return changed; } @@ -217,12 +214,11 @@ static bool srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, spin_lock_irqsave(&ch->spinlock, flags); if (ch->state == old) { ch->state = new; + wake_up_process(ch->thread); changed = true; } spin_unlock_irqrestore(&ch->spinlock, flags); - if (changed) - wake_up_process(ch->thread); return changed; } @@ -286,6 +282,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, { struct srpt_device *sdev; struct srpt_port *sport; + u8 port_num; TRACE_ENTRY(); @@ -298,10 +295,15 @@ static void srpt_event_handler(struct ib_event_handler *handler, switch (event->event) { case IB_EVENT_PORT_ERR: - if (event->element.port_num <= sdev->device->phys_port_cnt) { - sport = &sdev->port[event->element.port_num - 1]; + port_num = event->element.port_num - 1; + if (port_num < sdev->device->phys_port_cnt) { + sport = &sdev->port[port_num]; sport->lid = 0; sport->sm_lid = 0; + } else { + WARN(true, "event %d: port_num %d out of range 1..%d\n", + event->event, port_num + 1, + sdev->device->phys_port_cnt); } break; case IB_EVENT_PORT_ACTIVE: @@ -310,10 +312,15 @@ static void srpt_event_handler(struct ib_event_handler *handler, case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: /* Refresh port data asynchronously. */ - if (event->element.port_num <= sdev->device->phys_port_cnt) { - sport = &sdev->port[event->element.port_num - 1]; + port_num = event->element.port_num - 1; + if (port_num < sdev->device->phys_port_cnt) { + sport = &sdev->port[port_num]; if (!sport->lid && !sport->sm_lid) schedule_work(&sport->work); + } else { + WARN(true, "event %d: port_num %d out of range 1..%d\n", + event->event, port_num + 1, + sdev->device->phys_port_cnt); } break; default: @@ -343,6 +350,8 @@ static const char *get_ch_state_name(enum rdma_ch_state s) return "disconnecting"; case CH_DRAINING: return "draining"; + case CH_FREEING: + return "freeing"; } return "???"; } @@ -370,6 +379,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) TRACE_DBG("%s, state %s: received Last WQE event.", ch->sess_name, get_ch_state_name(ch->state)); ch->last_wqe_received = true; + BUG_ON(!ch->thread); wake_up_process(ch->thread); break; default: @@ -1959,19 +1969,18 @@ static void srpt_process_completion(struct ib_cq *cq, EXTRACHECKS_WARN_ON(cq != ch->cq); - do { - while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { - for (i = 0; i < n; i++) { - if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) - srpt_process_rcv_completion(cq, ch, - rcv_context, &wc[i]); - else - srpt_process_send_completion(cq, ch, - send_context, &wc[i]); - } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { + for (i = 0; i < n; i++) { + if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) + srpt_process_rcv_completion(cq, ch, rcv_context, + &wc[i]); + else + srpt_process_send_completion(cq, ch, + send_context, + &wc[i]); } - } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | - IB_CQ_REPORT_MISSED_EVENTS) > 0); + } } /** @@ -1981,6 +1990,7 @@ static void srpt_completion(struct ib_cq *cq, void *ctx) { struct srpt_rdma_ch *ch = ctx; + BUG_ON(!ch->thread); wake_up_process(ch->thread); } @@ -2012,13 +2022,22 @@ static int srpt_compl_thread(void *arg) ch->sess_name); scst_unregister_session(ch->scst_sess, false, srpt_free_ch); - while (!kthread_should_stop()) { + /* + * Some HCAs can queue send completions after the Last WQE + * event. Make sure to process these work completions. + */ + while (ch->state < CH_FREEING) { set_current_state(TASK_INTERRUPTIBLE); srpt_process_completion(ch->cq, ch, SCST_CONTEXT_THREAD, SCST_CONTEXT_DIRECT); schedule(); } + complete(&ch->finished_processing_completions); + + while (!kthread_should_stop()) + schedule(); + return 0; } @@ -2155,6 +2174,7 @@ static bool __srpt_close_ch(struct srpt_rdma_ch *ch) break; case CH_DISCONNECTING: case CH_DRAINING: + case CH_FREEING: break; } @@ -2214,13 +2234,15 @@ static void srpt_free_ch(struct scst_session *sess) sdev = ch->sport->sdev; BUG_ON(!sdev); - WARN_ON(ch->state != CH_DRAINING); + WARN_ON(!srpt_test_and_set_ch_state(ch, CH_DRAINING, CH_FREEING)); WARN_ON(!ch->last_wqe_received); BUG_ON(!ch->thread); BUG_ON(ch->thread == current); - kthread_stop(ch->thread); - ch->thread = NULL; + + while (wait_for_completion_timeout(&ch->finished_processing_completions, + 10 * HZ) == 0) + PRINT_INFO("Waiting for completion processing thread ..."); srpt_destroy_ch_ib(ch); @@ -2234,10 +2256,13 @@ static void srpt_free_ch(struct scst_session *sess) ib_destroy_cm_id(ch->cm_id); - wake_up(&sdev->ch_releaseQ); + kthread_stop(ch->thread); + ch->thread = NULL; kfree(ch); + wake_up(&sdev->ch_releaseQ); + TRACE_EXIT(); } @@ -2403,6 +2428,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch->state = CH_CONNECTING; INIT_LIST_HEAD(&ch->cmd_wait_list); init_waitqueue_head(&ch->state_wq); + init_completion(&ch->finished_processing_completions); ch->max_rsp_size = max_t(uint32_t, srp_max_rsp_size, MIN_MAX_RSP_SIZE); ch->ioctx_ring = (struct srpt_send_ioctx **) srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, @@ -2583,6 +2609,7 @@ free_ring: ch->max_rsp_size, DMA_TO_DEVICE); free_ch: + cm_id->context = NULL; kfree(ch); reject: @@ -2697,6 +2724,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { int ret; + BUG_ON(!cm_id->context); + ret = 0; switch (event->event) { case IB_CM_REQ_RECEIVED: @@ -3750,11 +3779,15 @@ static void srpt_add_one(struct ib_device *device) sdev->srq_size = min(max(srpt_srq_size, MIN_SRPT_SRQ_SIZE), sdev->dev_attr.max_srq_wr); + memset(&srq_attr, 0, sizeof(srq_attr)); srq_attr.event_handler = srpt_srq_event; srq_attr.srq_context = (void *)sdev; srq_attr.attr.max_wr = sdev->srq_size; srq_attr.attr.max_sge = 1; srq_attr.attr.srq_limit = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) + srq_attr.srq_type = IB_SRQT_BASIC; +#endif sdev->srq = ib_create_srq(sdev->pd, &srq_attr); if (IS_ERR(sdev->srq)) { @@ -3815,8 +3848,7 @@ static void srpt_add_one(struct ib_device *device) for (i = 0; i < sdev->srq_size; ++i) srpt_post_recv(sdev, sdev->ioctx_ring[i]); - WARN_ON(sdev->device->phys_port_cnt - > sizeof(sdev->port)/sizeof(sdev->port[0])); + WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; diff --git a/srpt/src/ib_srpt.h b/srpt/src/ib_srpt.h index a5a2178ed..2a5afacd8 100644 --- a/srpt/src/ib_srpt.h +++ b/srpt/src/ib_srpt.h @@ -272,12 +272,14 @@ struct srpt_send_ioctx { * been sent and waiting for DREP or channel is being closed * for another reason. * @CH_DRAINING: QP is in ERR state. + * @CH_FREEING: QP resources are being freed. */ enum rdma_ch_state { CH_CONNECTING, CH_LIVE, CH_DISCONNECTING, CH_DRAINING, + CH_FREEING, }; /** @@ -335,6 +337,7 @@ struct srpt_rdma_ch { wait_queue_head_t state_wq; struct list_head list; struct list_head cmd_wait_list; + struct completion finished_processing_completions; bool last_wqe_received; struct scst_session *scst_sess;