From: Shlomo Pongratz Date: Wed, 29 Aug 2012 15:14:34 +0000 (+0000) Subject: IPoIB: Fix AB-BA deadlock when deleting neighbours X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=b5120a6e11e90d98d8a752545ac60bfa1ea95f1a;p=linux-beck.git IPoIB: Fix AB-BA deadlock when deleting neighbours Lockdep points out a circular locking dependency betwwen the ipoib device priv spinlock (priv->lock) and the neighbour table rwlock (ntbl->rwlock). In the normal path, ie neigbour garbage collection task, the neigh table rwlock is taken first and then if the neighbour needs to be deleted, priv->lock is taken. However in some error paths, such as in ipoib_cm_handle_tx_wc(), priv->lock is taken first and then ipoib_neigh_free routine is called which in turn takes the neighbour table ntbl->rwlock. The solution is to get rid the neigh table rwlock completely and use only priv->lock. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e6bbeae1c309..0af216d21f87 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -274,7 +274,6 @@ struct ipoib_neigh_hash { struct ipoib_neigh_table { struct ipoib_neigh_hash __rcu *htbl; - rwlock_t rwlock; atomic_t entries; struct completion flushed; struct completion deleted; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 72c1fc28f079..1e19b5ae7c47 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -546,15 +546,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, struct ipoib_neigh *neigh; unsigned long flags; + spin_lock_irqsave(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); if (!neigh) { + spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); return; } - spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, daddr + 4); if (!path) { path = path_rec_create(dev, daddr + 4); @@ -863,10 +863,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -883,16 +883,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -902,7 +900,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_reap_neigh(struct work_struct *work) @@ -947,10 +945,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct ipoib_neigh *neigh; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) { neigh = NULL; goto out_unlock; @@ -961,10 +957,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, */ hash_val = ipoib_addr_hash(htbl, daddr); for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); neigh != NULL; neigh = rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ if (!atomic_inc_not_zero(&neigh->refcnt)) { @@ -987,12 +983,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, /* put in hash */ rcu_assign_pointer(neigh->hnext, rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); rcu_assign_pointer(htbl->buckets[hash_val], neigh); atomic_inc(&ntbl->entries); out_unlock: - write_unlock_bh(&ntbl->rwlock); return neigh; } @@ -1040,35 +1035,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) struct ipoib_neigh *n; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) - goto out_unlock; + return; hash_val = ipoib_addr_hash(htbl, neigh->daddr); np = &htbl->buckets[hash_val]; for (n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); n != NULL; n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (n == neigh) { /* found */ rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); - goto out_unlock; + return; } else { np = &n->hnext; } } - -out_unlock: - write_unlock_bh(&ntbl->rwlock); - } static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) @@ -1080,7 +1069,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); ntbl->htbl = NULL; - rwlock_init(&ntbl->rwlock); htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; @@ -1128,10 +1116,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) int i; /* remove all neigh connected to a given path or mcast */ - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -1141,16 +1129,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* delete neighs belong to this parent */ if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from parent list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -1159,7 +1145,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) } } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) @@ -1171,10 +1157,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) init_completion(&priv->ntbl.flushed); - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -1187,14 +1173,12 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } } @@ -1204,7 +1188,7 @@ free_htbl: call_rcu(&htbl->rcu, neigh_hash_free_rcu); out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); if (wait_flushed) wait_for_completion(&priv->ntbl.flushed); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 13f4aa7593c8..75367249f447 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -707,9 +707,7 @@ out: neigh = ipoib_neigh_get(dev, daddr); spin_lock_irqsave(&priv->lock, flags); if (!neigh) { - spin_unlock_irqrestore(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); - spin_lock_irqsave(&priv->lock, flags); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah;