Backported some 2.6.32 IB bug fixes to the 2.6.30 kernel.

git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@1442 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Bart Van Assche
2010-01-10 13:14:28 +00:00
parent 5cc522712e
commit 048a338747
3 changed files with 186 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
This patch was introduced in kernel 2.6.31 with the following description:
commit 4e49627b9bc29a14b393c480e8c979e3bc922ef7
Author: Oleg Nesterov <oleg@redhat.com>
Date: Sat Sep 5 11:17:06 2009 -0700
workqueues: introduce __cancel_delayed_work()
cancel_delayed_work() has to use del_timer_sync() to guarantee the timer
function is not running after return. But most users doesn't actually
need this, and del_timer_sync() has problems: it is not useable from
interrupt, and it depends on every lock which could be taken from irq.
Introduce __cancel_delayed_work() which calls del_timer() instead.
The immediate reason for this patch is
http://bugzilla.kernel.org/show_bug.cgi?id=13757
but hopefully this helper makes sense anyway.
As for 13757 bug, actually we need requeue_delayed_work(), but its
semantics are not yet clear.
Merge this patch early to resolves cross-tree interdependencies between
input and infiniband.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 13e1adf..6273fa9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
return ret;
}
+/*
+ * Like above, but uses del_timer() instead of del_timer_sync(). This means,
+ * if it returns 0 the timer function may be running and the queueing is in
+ * progress.
+ */
+static inline int __cancel_delayed_work(struct delayed_work *work)
+{
+ int ret;
+
+ ret = del_timer(&work->timer);
+ if (ret)
+ work_clear_pending(&work->work);
+ return ret;
+}
+
extern int cancel_delayed_work_sync(struct delayed_work *work);
/* Obsolete. use cancel_delayed_work_sync() */

View File

@@ -0,0 +1,63 @@
commit 6b2eef8fd78ff909c3396b8671d57c42559cc51d
Author: Roland Dreier <rolandd@cisco.com>
Date: Mon Sep 7 08:27:50 2009 -0700
IB/mad: Fix possible lock-lock-timer deadlock
Lockdep reported a possible deadlock with cm_id_priv->lock,
mad_agent_priv->lock and mad_agent_priv->timed_work.timer; this
happens because the mad module does
cancel_delayed_work(&mad_agent_priv->timed_work);
while holding mad_agent_priv->lock. cancel_delayed_work() internally
does del_timer_sync(&mad_agent_priv->timed_work.timer).
This can turn into a deadlock because mad_agent_priv->lock is taken
inside cm_id_priv->lock, so we can get the following set of contexts
that deadlock each other:
A: holding cm_id_priv->lock, waiting for mad_agent_priv->lock
B: holding mad_agent_priv->lock, waiting for del_timer_sync()
C: interrupt during mad_agent_priv->timed_work.timer that takes
cm_id_priv->lock
Fix this by using the new __cancel_delayed_work() interface (which
internally does del_timer() instead of del_timer_sync()) in all the
places where we are holding a lock.
Addresses: http://bugzilla.kernel.org/show_bug.cgi?id=13757
Reported-by: Bart Van Assche <bart.vanassche@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index de922a0..bc30c00 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1974,7 +1974,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
unsigned long delay;
if (list_empty(&mad_agent_priv->wait_list)) {
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
} else {
mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
struct ib_mad_send_wr_private,
@@ -1983,7 +1983,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
if (time_after(mad_agent_priv->timeout,
mad_send_wr->timeout)) {
mad_agent_priv->timeout = mad_send_wr->timeout;
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
delay = mad_send_wr->timeout - jiffies;
if ((long)delay <= 0)
delay = 1;
@@ -2023,7 +2023,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
&mad_agent_priv->timed_work, delay);
}

View File

@@ -0,0 +1,63 @@
This is a backported version of a 2.6.32 patch with the following description:
commit 721d67cdca5b7642b380ca0584de8dceecf6102f
Author: Roland Dreier <rolandd@cisco.com>
Date: Sat Sep 5 20:23:40 2009 -0700
IPoIB: Drop priv->lock before calling ipoib_send()
IPoIB currently must use irqsave locking for priv->lock, since it is
taken from interrupt context in one path. However, ipoib_send() does
skb_orphan(), and the network stack locking is not IRQ-safe.
Therefore we need to make sure we don't hold priv->lock when calling
ipoib_send() to avoid lockdep warnings (the code was almost certainly
safe in practice, since the only code path that takes priv->lock from
interrupt context would never call into the network stack).
Addresses: http://bugzilla.kernel.org/show_bug.cgi?id=13757
Reported-by: Bart Van Assche <bart.vanassche@gmail.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e319d91..2bf5116 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -604,8 +604,11 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
skb_queue_len(&neigh->queue));
goto err_drop;
}
- } else
+ } else {
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ return;
+ }
} else {
neigh->ah = NULL;
@@ -688,7 +691,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
ipoib_dbg(priv, "Send unicast ARP to %04x\n",
be16_to_cpu(path->pathrec.dlid));
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
/* put pseudoheader back on for next time */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a0e9753..a0825fe 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -720,7 +720,9 @@ out:
}
}
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ return;
}
unlock: