IPoIB/cm: Partial error clean up unmaps wrong address

[mv-sheeva.git] / drivers / infiniband / ulp / ipoib / ipoib_cm.c
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index ffec794b7913d6470bc2f3409a9fadd93c446cb2..ea74d1eaf0046c15c78e6a819067b8743a572a71 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -56,21 +56,15 @@ MODULE_PARM_DESC(cm_data_debug_level,
  #define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
  #define IPOIB_CM_RX_UPDATE_MASK (0x3)
  
-struct ipoib_cm_id {
-       struct ib_cm_id *id;
-       int flags;
-       u32 remote_qpn;
-       u32 remote_mtu;
-};
-
  static struct ib_qp_attr ipoib_cm_err_attr = {
         .qp_state = IB_QPS_ERR
  };
  
  #define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
  
-static struct ib_recv_wr ipoib_cm_rx_drain_wr = {
-       .wr_id = IPOIB_CM_RX_DRAIN_WRID
+static struct ib_send_wr ipoib_cm_rx_drain_wr = {
+       .wr_id = IPOIB_CM_RX_DRAIN_WRID,
+       .opcode = IB_WR_SEND,
  };
  
  static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
@@ -154,8 +148,8 @@ partial_error:
  
         ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
  
-       for (; i >= 0; --i)
-               ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+       for (; i > 0; --i)
+               ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
  
         dev_kfree_skb_any(skb);
         return NULL;
@@ -163,16 +157,22 @@ partial_error:
  
  static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
  {
-       struct ib_recv_wr *bad_wr;
+       struct ib_send_wr *bad_wr;
+       struct ipoib_cm_rx *p;
  
-       /* rx_drain_qp send queue depth is 1, so
+       /* We only reserved 1 extra slot in CQ for drain WRs, so
          * make sure we have at most 1 outstanding WR. */
         if (list_empty(&priv->cm.rx_flush_list) ||
             !list_empty(&priv->cm.rx_drain_list))
                 return;
  
-       if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr))
-               ipoib_warn(priv, "failed to post rx_drain wr\n");
+       /*
+        * QPs on flush list are error state.  This way, a "flush
+        * error" WC will be immediately generated for each WR we post.
+        */
+       p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+       if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
+               ipoib_warn(priv, "failed to post drain wr\n");
  
         list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
  }
@@ -199,10 +199,10 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ib_qp_init_attr attr = {
                 .event_handler = ipoib_cm_rx_event_handler,
-               .send_cq = priv->cq, /* does not matter, we never send anything */
+               .send_cq = priv->cq, /* For drain WR */
                 .recv_cq = priv->cq,
                 .srq = priv->cm.srq,
-               .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
+               .cap.max_send_wr = 1, /* For drain WR */
                 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
                 .sq_sig_type = IB_SIGNAL_ALL_WR,
                 .qp_type = IB_QPT_RC,
@@ -242,6 +242,27 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
                 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
                 return ret;
         }
+
+       /*
+        * Current Mellanox HCA firmware won't generate completions
+        * with error for drain WRs unless the QP has been moved to
+        * RTS first. This work-around leaves a window where a QP has
+        * moved to error asynchronously, but this will eventually get
+        * fixed in firmware, so let's not error out if modify QP
+        * fails.
+        */
+       qp_attr.qp_state = IB_QPS_RTS;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+               return 0;
+       }
+       ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+               return 0;
+       }
+
         return 0;
  }
  
@@ -281,6 +302,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
                 return -ENOMEM;
         p->dev = dev;
         p->id = cm_id;
+       cm_id->context = p;
+       p->state = IPOIB_CM_RX_LIVE;
+       p->jiffies = jiffies;
+       INIT_LIST_HEAD(&p->list);
+
         p->qp = ipoib_cm_create_rx_qp(dev, p);
         if (IS_ERR(p->qp)) {
                 ret = PTR_ERR(p->qp);
@@ -292,24 +318,24 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
         if (ret)
                 goto err_modify;
  
+       spin_lock_irq(&priv->lock);
+       queue_delayed_work(ipoib_workqueue,
+                          &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+       /* Add this entry to passive ids list head, but do not re-add it
+        * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
+       p->jiffies = jiffies;
+       if (p->state == IPOIB_CM_RX_LIVE)
+               list_move(&p->list, &priv->cm.passive_ids);
+       spin_unlock_irq(&priv->lock);
+
         ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
         if (ret) {
                 ipoib_warn(priv, "failed to send REP: %d\n", ret);
-               goto err_rep;
+               if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
+                       ipoib_warn(priv, "unable to move qp to error state\n");
         }
-
-       cm_id->context = p;
-       p->jiffies = jiffies;
-       p->state = IPOIB_CM_RX_LIVE;
-       spin_lock_irq(&priv->lock);
-       if (list_empty(&priv->cm.passive_ids))
-               queue_delayed_work(ipoib_workqueue,
-                                  &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
-       list_add(&p->list, &priv->cm.passive_ids);
-       spin_unlock_irq(&priv->lock);
         return 0;
  
-err_rep:
  err_modify:
         ib_destroy_qp(p->qp);
  err_qp:
@@ -623,38 +649,11 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
  int ipoib_cm_dev_open(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_qp_init_attr qp_init_attr = {
-               .send_cq = priv->cq,   /* does not matter, we never send anything */
-               .recv_cq = priv->cq,
-               .cap.max_send_wr = 1,  /* FIXME: 0 Seems not to work */
-               .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
-               .cap.max_recv_wr = 1,
-               .cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
-               .sq_sig_type = IB_SIGNAL_ALL_WR,
-               .qp_type = IB_QPT_UC,
-       };
         int ret;
  
         if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
                 return 0;
  
-       priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
-       if (IS_ERR(priv->cm.rx_drain_qp)) {
-               printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
-               ret = PTR_ERR(priv->cm.rx_drain_qp);
-               return ret;
-       }
-
-       /*
-        * We put the QP in error state directly.  This way, a "flush
-        * error" WC will be immediately generated for each WR we post.
-        */
-       ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
-       if (ret) {
-               ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
-               goto err_qp;
-       }
-
         priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
         if (IS_ERR(priv->cm.id)) {
                 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
@@ -676,8 +675,6 @@ err_listen:
         ib_destroy_cm_id(priv->cm.id);
  err_cm:
         priv->cm.id = NULL;
-err_qp:
-       ib_destroy_qp(priv->cm.rx_drain_qp);
         return ret;
  }
  
@@ -713,7 +710,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
         while (!list_empty(&priv->cm.rx_error_list) ||
                !list_empty(&priv->cm.rx_flush_list) ||
                !list_empty(&priv->cm.rx_drain_list)) {
-               if (!time_after(jiffies, begin + 5 * HZ)) {
+               if (time_after(jiffies, begin + 5 * HZ)) {
                         ipoib_warn(priv, "RX drain timing out\n");
  
                         /*
@@ -726,6 +723,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
                 }
                 spin_unlock_irq(&priv->lock);
                 msleep(1);
+               ipoib_drain_cq(dev);
                 spin_lock_irq(&priv->lock);
         }
  
@@ -739,7 +737,6 @@ void ipoib_cm_dev_stop(struct net_device *dev)
                 kfree(p);
         }
  
-       ib_destroy_qp(priv->cm.rx_drain_qp);
         cancel_delayed_work(&priv->cm.stale_task);
  }
  
@@ -755,9 +752,9 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
  
         p->mtu = be32_to_cpu(data->mtu);
  
-       if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) {
-               ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n",
-                          p->mtu, priv->dev->mtu);
+       if (p->mtu <= IPOIB_ENCAP_LEN) {
+               ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
+                          p->mtu, IPOIB_ENCAP_LEN);
                 return -EINVAL;
         }