]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/infiniband/ulp/ipoib/ipoib_main.c
IB/ipoib: Limit call to free rdma_netdev for capable devices
[karo-tx-linux.git] / drivers / infiniband / ulp / ipoib / ipoib_main.c
index d1d3fb7a6127c5d585009267d8379c401b2ea4ff..91fae34bdd4f1b2f5ac5d813d6387ed131dfd45a 100644 (file)
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
        .get_net_dev_by_params = ipoib_get_net_dev_by_params,
 };
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+                             unsigned long event, void *ptr)
+{
+       struct netdev_notifier_info *ni = ptr;
+       struct net_device *dev = ni->dev;
+
+       if (dev->netdev_ops->ndo_open != ipoib_open)
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_REGISTER:
+               ipoib_create_debug_files(dev);
+               break;
+       case NETDEV_CHANGENAME:
+               ipoib_delete_debug_files(dev);
+               ipoib_create_debug_files(dev);
+               break;
+       case NETDEV_UNREGISTER:
+               ipoib_delete_debug_files(dev);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+#endif
+
 int ipoib_open(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        ipoib_dbg(priv, "bringing up interface\n");
 
@@ -157,7 +184,7 @@ err_disable:
 
 static int ipoib_stop(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        ipoib_dbg(priv, "stopping interface\n");
 
@@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
 
 static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
                features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
 
 static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        /* dev->mtu > 2K ==> connected mode */
        if (ipoib_cm_admin_enabled(dev)) {
@@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
 
 int ipoib_set_mode(struct net_device *dev, const char *buf)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
             !strcmp(buf, "connected\n")) ||
@@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
 
 struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct rb_node *n = priv->path_tree.rb_node;
        struct ipoib_path *path;
        int ret;
@@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 
 static int __path_add(struct net_device *dev, struct ipoib_path *path)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct rb_node **n = &priv->path_tree.rb_node;
        struct rb_node *pn = NULL;
        struct ipoib_path *tpath;
@@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
        while ((skb = __skb_dequeue(&path->queue)))
                dev_kfree_skb_irq(skb);
 
-       ipoib_dbg(netdev_priv(dev), "path_free\n");
+       ipoib_dbg(ipoib_priv(dev), "path_free\n");
 
        /* remove all neigh connected to this path */
        ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
 
 int ipoib_path_iter_next(struct ipoib_path_iter *iter)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
        struct rb_node *n;
        struct ipoib_path *path;
        int ret = 1;
@@ -635,92 +662,21 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 void ipoib_mark_paths_invalid(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_path *path, *tp;
 
        spin_lock_irq(&priv->lock);
 
        list_for_each_entry_safe(path, tp, &priv->path_list, list) {
-               ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
-                       be16_to_cpu(path->pathrec.dlid),
-                       path->pathrec.dgid.raw);
+               ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+                         be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+                         path->pathrec.dgid.raw);
                path->valid =  0;
        }
 
        spin_unlock_irq(&priv->lock);
 }
 
-struct classport_info_context {
-       struct ipoib_dev_priv   *priv;
-       struct completion       done;
-       struct ib_sa_query      *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
-                                   void *context)
-{
-       struct classport_info_context *cb_ctx = context;
-       struct ipoib_dev_priv *priv;
-
-       WARN_ON(!context);
-
-       priv = cb_ctx->priv;
-
-       if (status || !rec) {
-               pr_debug("device: %s failed query classport_info status: %d\n",
-                        priv->dev->name, status);
-               /* keeps the default, will try next mcast_restart */
-               priv->sm_fullmember_sendonly_support = false;
-               goto out;
-       }
-
-       if (ib_get_cpi_capmask2(rec) &
-           IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
-               pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
-                        priv->dev->name);
-               priv->sm_fullmember_sendonly_support = true;
-       } else {
-               pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
-                        priv->dev->name);
-               priv->sm_fullmember_sendonly_support = false;
-       }
-
-out:
-       complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
-       struct classport_info_context *callback_context;
-       int ret;
-
-       callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
-       if (!callback_context)
-               return -ENOMEM;
-
-       callback_context->priv = priv;
-       init_completion(&callback_context->done);
-
-       ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
-                                            priv->ca, priv->port, 3000,
-                                            GFP_KERNEL,
-                                            classport_info_query_cb,
-                                            callback_context,
-                                            &callback_context->sa_query);
-       if (ret < 0) {
-               pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
-                       priv->dev->name, ret);
-               kfree(callback_context);
-               return ret;
-       }
-
-       /* waiting for the callback to finish before returnning */
-       wait_for_completion(&callback_context->done);
-       kfree(callback_context);
-
-       return ret;
-}
-
 static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 {
        struct ipoib_pseudo_header *phdr;
@@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
 
 void ipoib_flush_paths(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_path *path, *tp;
        LIST_HEAD(remove_list);
        unsigned long flags;
@@ -760,12 +716,12 @@ void ipoib_flush_paths(struct net_device *dev)
 }
 
 static void path_rec_completion(int status,
-                               struct ib_sa_path_rec *pathrec,
+                               struct sa_path_rec *pathrec,
                                void *path_ptr)
 {
        struct ipoib_path *path = path_ptr;
        struct net_device *dev = path->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_ah *ah = NULL;
        struct ipoib_ah *old_ah = NULL;
        struct ipoib_neigh *neigh, *tn;
@@ -775,7 +731,8 @@ static void path_rec_completion(int status,
 
        if (!status)
                ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
-                         be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+                         be32_to_cpu(sa_path_get_dlid(pathrec)),
+                         pathrec->dgid.raw);
        else
                ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
                          status, path->pathrec.dgid.raw);
@@ -783,7 +740,7 @@ static void path_rec_completion(int status,
        skb_queue_head_init(&skqueue);
 
        if (!status) {
-               struct ib_ah_attr av;
+               struct rdma_ah_attr av;
 
                if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
                        ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -798,7 +755,8 @@ static void path_rec_completion(int status,
                path->ah = ah;
 
                ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
-                         ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+                         ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+                         pathrec->sl);
 
                while ((skb = __skb_dequeue(&path->queue)))
                        __skb_queue_tail(&skqueue, skb);
@@ -858,7 +816,7 @@ static void path_rec_completion(int status,
 
 static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_path *path;
 
        if (!priv->broadcast)
@@ -874,6 +832,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 
        INIT_LIST_HEAD(&path->neigh_list);
 
+       if (rdma_cap_opa_ah(priv->ca, priv->port))
+               path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+       else
+               path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
        memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
        path->pathrec.sgid          = priv->local_gid;
        path->pathrec.pkey          = cpu_to_be16(priv->pkey);
@@ -886,7 +848,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 static int path_rec_start(struct net_device *dev,
                          struct ipoib_path *path)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        ipoib_dbg(priv, "Start path record lookup for %pI6\n",
                  path->pathrec.dgid.raw);
@@ -917,7 +879,8 @@ static int path_rec_start(struct net_device *dev,
 static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                           struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+       struct rdma_netdev *rn = netdev_priv(dev);
        struct ipoib_path *path;
        struct ipoib_neigh *neigh;
        unsigned long flags;
@@ -964,7 +927,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                        }
                } else {
                        spin_unlock_irqrestore(&priv->lock, flags);
-                       ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+                       path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+                                                      IPOIB_QPN(daddr));
                        ipoib_neigh_put(neigh);
                        return;
                }
@@ -998,7 +962,8 @@ err_drop:
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                             struct ipoib_pseudo_header *phdr)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+       struct rdma_netdev *rn = netdev_priv(dev);
        struct ipoib_path *path;
        unsigned long flags;
 
@@ -1038,11 +1003,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
        }
 
        if (path->ah) {
-               ipoib_dbg(priv, "Send unicast ARP to %04x\n",
-                         be16_to_cpu(path->pathrec.dlid));
+               ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+                         be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
 
                spin_unlock_irqrestore(&priv->lock, flags);
-               ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+               path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+                                              IPOIB_QPN(phdr->hwaddr));
                return;
        } else if ((path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
@@ -1058,7 +1024,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 
 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+       struct rdma_netdev *rn = netdev_priv(dev);
        struct ipoib_neigh *neigh;
        struct ipoib_pseudo_header *phdr;
        struct ipoib_header *header;
@@ -1122,7 +1089,8 @@ send_using_neigh:
                        goto unref;
                }
        } else if (neigh->ah) {
-               ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+               neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+                                               IPOIB_QPN(phdr->hwaddr));
                goto unref;
        }
 
@@ -1144,7 +1112,7 @@ unref:
 
 static void ipoib_timeout(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
                   jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1178,7 +1146,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
 
 static void ipoib_set_mcast_list(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
                ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1190,7 +1158,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
 
 static int ipoib_get_iflink(const struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        /* parent interface */
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1218,7 +1186,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
 
 struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_neigh_table *ntbl = &priv->ntbl;
        struct ipoib_neigh_hash *htbl;
        struct ipoib_neigh *neigh = NULL;
@@ -1347,7 +1315,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
 struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
                                      struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_neigh_table *ntbl = &priv->ntbl;
        struct ipoib_neigh_hash *htbl;
        struct ipoib_neigh *neigh;
@@ -1404,7 +1372,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
 {
        /* neigh reference count was dropprd to zero */
        struct net_device *dev = neigh->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct sk_buff *skb;
        if (neigh->ah)
                ipoib_put_ah(neigh->ah);
@@ -1414,7 +1382,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
        }
        if (ipoib_cm_get(neigh))
                ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
-       ipoib_dbg(netdev_priv(dev),
+       ipoib_dbg(ipoib_priv(dev),
                  "neigh free for %06x %pI6\n",
                  IPOIB_QPN(neigh->daddr),
                  neigh->daddr + 4);
@@ -1436,7 +1404,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
 void ipoib_neigh_free(struct ipoib_neigh *neigh)
 {
        struct net_device *dev = neigh->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_neigh_table *ntbl = &priv->ntbl;
        struct ipoib_neigh_hash *htbl;
        struct ipoib_neigh __rcu **np;
@@ -1519,7 +1487,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 
 void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct ipoib_neigh_table *ntbl = &priv->ntbl;
        struct ipoib_neigh_hash *htbl;
        unsigned long flags;
@@ -1605,7 +1573,7 @@ out_unlock:
 
 static void ipoib_neigh_hash_uninit(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        int stopped;
 
        ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1622,10 +1590,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
        wait_for_completion(&priv->ntbl.deleted);
 }
 
+static void ipoib_dev_uninit_default(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+       ipoib_transport_dev_cleanup(dev);
+
+       ipoib_cm_dev_cleanup(dev);
+
+       kfree(priv->rx_ring);
+       vfree(priv->tx_ring);
+
+       priv->rx_ring = NULL;
+       priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+       netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
 
        /* Allocate RX/TX "rings" to hold queued skbs */
        priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1636,46 +1620,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
        if (!priv->tx_ring) {
                printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
-                      ca->name, ipoib_sendq_size);
+                      priv->ca->name, ipoib_sendq_size);
                goto out_rx_ring_cleanup;
        }
 
        /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
-       if (ipoib_ib_dev_init(dev, ca, port))
+       if (ipoib_transport_dev_init(dev, priv->ca)) {
+               pr_warn("%s: ipoib_transport_dev_init failed\n",
+                       priv->ca->name);
                goto out_tx_ring_cleanup;
+       }
+
+       /* after qp created set dev address */
+       priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+       priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
+       priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+       setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+                   (unsigned long)dev);
+
+       return 0;
+
+out_tx_ring_cleanup:
+       vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+       kfree(priv->rx_ring);
+
+out:
+       return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
+       int ret = -ENOMEM;
+
+       priv->ca = ca;
+       priv->port = port;
+       priv->qp = NULL;
 
        /*
-        * Must be after ipoib_ib_dev_init so we can allocate a per
-        * device wq there and use it here
+        * the various IPoIB tasks assume they will never race against
+        * themselves, so always use a single thread workqueue
         */
-       if (ipoib_neigh_hash_init(priv) < 0)
+       priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+       if (!priv->wq) {
+               pr_warn("%s: failed to allocate device WQ\n", dev->name);
+               goto out;
+       }
+
+       /* create pd, which used both for control and datapath*/
+       priv->pd = ib_alloc_pd(priv->ca, 0);
+       if (IS_ERR(priv->pd)) {
+               pr_warn("%s: failed to allocate PD\n", ca->name);
+               goto clean_wq;
+       }
+
+       ret = priv->rn_ops->ndo_init(dev);
+       if (ret) {
+               pr_warn("%s failed to init HW resource\n", dev->name);
+               goto out_free_pd;
+       }
+
+       if (ipoib_neigh_hash_init(priv) < 0) {
+               pr_warn("%s failed to init neigh hash\n", dev->name);
                goto out_dev_uninit;
+       }
+
+       if (dev->flags & IFF_UP) {
+               if (ipoib_ib_dev_open(dev)) {
+                       pr_warn("%s failed to open device\n", dev->name);
+                       ret = -ENODEV;
+                       goto out_dev_uninit;
+               }
+       }
 
        return 0;
 
 out_dev_uninit:
        ipoib_ib_dev_cleanup(dev);
 
-out_tx_ring_cleanup:
-       vfree(priv->tx_ring);
+out_free_pd:
+       if (priv->pd) {
+               ib_dealloc_pd(priv->pd);
+               priv->pd = NULL;
+       }
 
-out_rx_ring_cleanup:
-       kfree(priv->rx_ring);
+clean_wq:
+       if (priv->wq) {
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
 
 out:
-       return -ENOMEM;
+       return ret;
 }
 
 void ipoib_dev_cleanup(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+       struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
        LIST_HEAD(head);
 
        ASSERT_RTNL();
 
-       ipoib_delete_debug_files(dev);
-
        /* Delete any child interfaces first */
        list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
                /* Stop GC on child */
@@ -1685,24 +1734,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
        }
        unregister_netdevice_many(&head);
 
-       /*
-        * Must be before ipoib_ib_dev_cleanup or we delete an in use
-        * work queue
-        */
        ipoib_neigh_hash_uninit(dev);
 
        ipoib_ib_dev_cleanup(dev);
 
-       kfree(priv->rx_ring);
-       vfree(priv->tx_ring);
-
-       priv->rx_ring = NULL;
-       priv->tx_ring = NULL;
+       /* no more works over the priv->wq */
+       if (priv->wq) {
+               flush_workqueue(priv->wq);
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
 }
 
 static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
 }
@@ -1710,7 +1756,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
 static int ipoib_get_vf_config(struct net_device *dev, int vf,
                               struct ifla_vf_info *ivf)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        int err;
 
        err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1724,7 +1770,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 
 static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
                return -EINVAL;
@@ -1735,7 +1781,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
 static int ipoib_get_vf_stats(struct net_device *dev, int vf,
                              struct ifla_vf_stats *vf_stats)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
        return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
 }
@@ -1773,21 +1819,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
        .ndo_get_iflink          = ipoib_get_iflink,
 };
 
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-       if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
-               dev->netdev_ops = &ipoib_netdev_ops_vf;
-       else
-               dev->netdev_ops = &ipoib_netdev_ops_pf;
-
        dev->header_ops          = &ipoib_header_ops;
 
        ipoib_set_ethtool_ops(dev);
 
-       netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
        dev->watchdog_timeo      = HZ;
 
        dev->flags              |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1801,11 +1838,14 @@ void ipoib_setup(struct net_device *dev)
        netif_keep_dst(dev);
 
        memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
 
-       priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
+       priv->dev = dev;
        spin_lock_init(&priv->lock);
-
        init_rwsem(&priv->vlan_rwsem);
 
        INIT_LIST_HEAD(&priv->path_list);
@@ -1823,22 +1863,99 @@ void ipoib_setup(struct net_device *dev)
        INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
 }
 
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+       .ndo_init                = ipoib_dev_init_default,
+       .ndo_uninit              = ipoib_dev_uninit_default,
+       .ndo_open                = ipoib_ib_dev_open_default,
+       .ndo_stop                = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+                            const char *name,
+                            unsigned char name_assign_type,
+                            void (*setup)(struct net_device *))
 {
        struct net_device *dev;
+       struct rdma_netdev *rn;
 
-       dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
-                          NET_NAME_UNKNOWN, ipoib_setup);
+       dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+                          name,
+                          name_assign_type, setup);
        if (!dev)
                return NULL;
 
-       return netdev_priv(dev);
+       rn = netdev_priv(dev);
+
+       rn->send = ipoib_send;
+       rn->attach_mcast = ipoib_mcast_attach;
+       rn->detach_mcast = ipoib_mcast_detach;
+       rn->hca = hca;
+
+       dev->netdev_ops = &ipoib_netdev_default_pf;
+
+       return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+                                          const char *name)
+{
+       struct net_device *dev;
+
+       if (hca->alloc_rdma_netdev) {
+               dev = hca->alloc_rdma_netdev(hca, port,
+                                            RDMA_NETDEV_IPOIB, name,
+                                            NET_NAME_UNKNOWN,
+                                            ipoib_setup_common);
+               if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+                       return NULL;
+       }
+
+       if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+               dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+                                                 ipoib_setup_common);
+
+       return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+                                       const char *name)
+{
+       struct net_device *dev;
+       struct ipoib_dev_priv *priv;
+       struct rdma_netdev *rn;
+
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return NULL;
+
+       dev = ipoib_get_netdev(hca, port, name);
+       if (!dev)
+               goto free_priv;
+
+       priv->rn_ops = dev->netdev_ops;
+
+       /* fixme : should be after the query_cap */
+       if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+               dev->netdev_ops = &ipoib_netdev_ops_vf;
+       else
+               dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+       rn = netdev_priv(dev);
+       rn->clnt_priv = priv;
+       ipoib_build_priv(dev);
+
+       return priv;
+free_priv:
+       kfree(priv);
+       return NULL;
 }
 
 static ssize_t show_pkey(struct device *dev,
                         struct device_attribute *attr, char *buf)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+       struct net_device *ndev = to_net_dev(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
        return sprintf(buf, "0x%04x\n", priv->pkey);
 }
@@ -1847,14 +1964,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
 static ssize_t show_umcast(struct device *dev,
                           struct device_attribute *attr, char *buf)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+       struct net_device *ndev = to_net_dev(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
        return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
 }
 
 void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(ndev);
+       struct ipoib_dev_priv *priv = ipoib_priv(ndev);
 
        if (umcast_val > 0) {
                set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1927,7 +2045,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 
 static int ipoib_set_mac(struct net_device *dev, void *addr)
 {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct sockaddr_storage *ss = addr;
        int ret;
 
@@ -2000,7 +2118,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
        priv->hca_caps = hca->attrs.device_cap_flags;
 
        if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
-               priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+               priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
                if (priv->hca_caps & IB_DEVICE_UD_TSO)
                        priv->dev->hw_features |= NETIF_F_TSO;
@@ -2016,7 +2134,7 @@ static struct net_device *ipoib_add_port(const char *format,
        struct ib_port_attr attr;
        int result = -ENOMEM;
 
-       priv = ipoib_intf_alloc(format);
+       priv = ipoib_intf_alloc(hca, port, format);
        if (!priv)
                goto alloc_mem_failed;
 
@@ -2090,8 +2208,6 @@ static struct net_device *ipoib_add_port(const char *format,
                goto register_failed;
        }
 
-       ipoib_create_debug_files(priv->dev);
-
        if (ipoib_cm_add_mode_attr(priv->dev))
                goto sysfs_failed;
        if (ipoib_add_pkey_attr(priv->dev))
@@ -2106,7 +2222,6 @@ static struct net_device *ipoib_add_port(const char *format,
        return priv->dev;
 
 sysfs_failed:
-       ipoib_delete_debug_files(priv->dev);
        unregister_netdev(priv->dev);
 
 register_failed:
@@ -2122,6 +2237,7 @@ event_failed:
 
 device_init_failed:
        free_netdev(priv->dev);
+       kfree(priv);
 
 alloc_mem_failed:
        return ERR_PTR(result);
@@ -2146,7 +2262,7 @@ static void ipoib_add_one(struct ib_device *device)
                        continue;
                dev = ipoib_add_port("ib%d", device, p);
                if (!IS_ERR(dev)) {
-                       priv = netdev_priv(dev);
+                       priv = ipoib_priv(dev);
                        list_add_tail(&priv->list, dev_list);
                        count++;
                }
@@ -2162,7 +2278,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-       struct ipoib_dev_priv *priv, *tmp;
+       struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
        struct list_head *dev_list = client_data;
 
        if (!dev_list)
@@ -2185,12 +2301,26 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                flush_workqueue(priv->wq);
 
                unregister_netdev(priv->dev);
-               free_netdev(priv->dev);
+               if (device->free_rdma_netdev)
+                       device->free_rdma_netdev(priv->dev);
+               else
+                       free_netdev(priv->dev);
+
+               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+                       kfree(cpriv);
+
+               kfree(priv);
        }
 
        kfree(dev_list);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+       .notifier_call = ipoib_netdev_event,
+};
+#endif
+
 static int __init ipoib_init_module(void)
 {
        int ret;
@@ -2243,6 +2373,9 @@ static int __init ipoib_init_module(void)
        if (ret)
                goto err_client;
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+       register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
        return 0;
 
 err_client:
@@ -2260,6 +2393,9 @@ err_fs:
 
 static void __exit ipoib_cleanup_module(void)
 {
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+       unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
        ipoib_netlink_fini();
        ib_unregister_client(&ipoib_client);
        ib_sa_unregister_client(&ipoib_sa_client);