]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/infiniband/hw/mlx5/main.c
IB/mlx5: Add driver cross-channel support
[karo-tx-linux.git] / drivers / infiniband / hw / mlx5 / main.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/highmem.h>
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/errno.h>
37 #include <linux/pci.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/slab.h>
40 #include <linux/io-mapping.h>
41 #include <linux/sched.h>
42 #include <rdma/ib_user_verbs.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <linux/mlx5/vport.h>
46 #include <rdma/ib_smi.h>
47 #include <rdma/ib_umem.h>
48 #include "user.h"
49 #include "mlx5_ib.h"
50
51 #define DRIVER_NAME "mlx5_ib"
52 #define DRIVER_VERSION "2.2-1"
53 #define DRIVER_RELDATE  "Feb 2014"
54
55 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
56 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
57 MODULE_LICENSE("Dual BSD/GPL");
58 MODULE_VERSION(DRIVER_VERSION);
59
60 static int deprecated_prof_sel = 2;
61 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
62 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
63
64 static char mlx5_version[] =
65         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
66         DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
67
68 static enum rdma_link_layer
69 mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
70 {
71         switch (port_type_cap) {
72         case MLX5_CAP_PORT_TYPE_IB:
73                 return IB_LINK_LAYER_INFINIBAND;
74         case MLX5_CAP_PORT_TYPE_ETH:
75                 return IB_LINK_LAYER_ETHERNET;
76         default:
77                 return IB_LINK_LAYER_UNSPECIFIED;
78         }
79 }
80
81 static enum rdma_link_layer
82 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
83 {
84         struct mlx5_ib_dev *dev = to_mdev(device);
85         int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
86
87         return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
88 }
89
90 static int mlx5_netdev_event(struct notifier_block *this,
91                              unsigned long event, void *ptr)
92 {
93         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
94         struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
95                                                  roce.nb);
96
97         if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
98                 return NOTIFY_DONE;
99
100         write_lock(&ibdev->roce.netdev_lock);
101         if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
102                 ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
103         write_unlock(&ibdev->roce.netdev_lock);
104
105         return NOTIFY_DONE;
106 }
107
108 static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
109                                              u8 port_num)
110 {
111         struct mlx5_ib_dev *ibdev = to_mdev(device);
112         struct net_device *ndev;
113
114         /* Ensure ndev does not disappear before we invoke dev_hold()
115          */
116         read_lock(&ibdev->roce.netdev_lock);
117         ndev = ibdev->roce.netdev;
118         if (ndev)
119                 dev_hold(ndev);
120         read_unlock(&ibdev->roce.netdev_lock);
121
122         return ndev;
123 }
124
125 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
126                                 struct ib_port_attr *props)
127 {
128         struct mlx5_ib_dev *dev = to_mdev(device);
129         struct net_device *ndev;
130         enum ib_mtu ndev_ib_mtu;
131
132         memset(props, 0, sizeof(*props));
133
134         props->port_cap_flags  |= IB_PORT_CM_SUP;
135         props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
136
137         props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
138                                                 roce_address_table_size);
139         props->max_mtu          = IB_MTU_4096;
140         props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
141         props->pkey_tbl_len     = 1;
142         props->state            = IB_PORT_DOWN;
143         props->phys_state       = 3;
144
145         mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev,
146                                             (u16 *)&props->qkey_viol_cntr);
147
148         ndev = mlx5_ib_get_netdev(device, port_num);
149         if (!ndev)
150                 return 0;
151
152         if (netif_running(ndev) && netif_carrier_ok(ndev)) {
153                 props->state      = IB_PORT_ACTIVE;
154                 props->phys_state = 5;
155         }
156
157         ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
158
159         dev_put(ndev);
160
161         props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
162
163         props->active_width     = IB_WIDTH_4X;  /* TODO */
164         props->active_speed     = IB_SPEED_QDR; /* TODO */
165
166         return 0;
167 }
168
169 static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
170                                      const struct ib_gid_attr *attr,
171                                      void *mlx5_addr)
172 {
173 #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
174         char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
175                                                source_l3_address);
176         void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
177                                                source_mac_47_32);
178
179         if (!gid)
180                 return;
181
182         ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
183
184         if (is_vlan_dev(attr->ndev)) {
185                 MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
186                 MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
187         }
188
189         switch (attr->gid_type) {
190         case IB_GID_TYPE_IB:
191                 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
192                 break;
193         case IB_GID_TYPE_ROCE_UDP_ENCAP:
194                 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
195                 break;
196
197         default:
198                 WARN_ON(true);
199         }
200
201         if (attr->gid_type != IB_GID_TYPE_IB) {
202                 if (ipv6_addr_v4mapped((void *)gid))
203                         MLX5_SET_RA(mlx5_addr, roce_l3_type,
204                                     MLX5_ROCE_L3_TYPE_IPV4);
205                 else
206                         MLX5_SET_RA(mlx5_addr, roce_l3_type,
207                                     MLX5_ROCE_L3_TYPE_IPV6);
208         }
209
210         if ((attr->gid_type == IB_GID_TYPE_IB) ||
211             !ipv6_addr_v4mapped((void *)gid))
212                 memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
213         else
214                 memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
215 }
216
217 static int set_roce_addr(struct ib_device *device, u8 port_num,
218                          unsigned int index,
219                          const union ib_gid *gid,
220                          const struct ib_gid_attr *attr)
221 {
222         struct mlx5_ib_dev *dev = to_mdev(device);
223         u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
224         u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
225         void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
226         enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
227
228         if (ll != IB_LINK_LAYER_ETHERNET)
229                 return -EINVAL;
230
231         memset(in, 0, sizeof(in));
232
233         ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
234
235         MLX5_SET(set_roce_address_in, in, roce_address_index, index);
236         MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
237
238         memset(out, 0, sizeof(out));
239         return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
240 }
241
242 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
243                            unsigned int index, const union ib_gid *gid,
244                            const struct ib_gid_attr *attr,
245                            __always_unused void **context)
246 {
247         return set_roce_addr(device, port_num, index, gid, attr);
248 }
249
250 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
251                            unsigned int index, __always_unused void **context)
252 {
253         return set_roce_addr(device, port_num, index, NULL, NULL);
254 }
255
256 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
257                                int index)
258 {
259         struct ib_gid_attr attr;
260         union ib_gid gid;
261
262         if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
263                 return 0;
264
265         if (!attr.ndev)
266                 return 0;
267
268         dev_put(attr.ndev);
269
270         if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
271                 return 0;
272
273         return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
274 }
275
276 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
277 {
278         return !dev->mdev->issi;
279 }
280
281 enum {
282         MLX5_VPORT_ACCESS_METHOD_MAD,
283         MLX5_VPORT_ACCESS_METHOD_HCA,
284         MLX5_VPORT_ACCESS_METHOD_NIC,
285 };
286
287 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
288 {
289         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
290                 return MLX5_VPORT_ACCESS_METHOD_MAD;
291
292         if (mlx5_ib_port_link_layer(ibdev, 1) ==
293             IB_LINK_LAYER_ETHERNET)
294                 return MLX5_VPORT_ACCESS_METHOD_NIC;
295
296         return MLX5_VPORT_ACCESS_METHOD_HCA;
297 }
298
299 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
300                                         __be64 *sys_image_guid)
301 {
302         struct mlx5_ib_dev *dev = to_mdev(ibdev);
303         struct mlx5_core_dev *mdev = dev->mdev;
304         u64 tmp;
305         int err;
306
307         switch (mlx5_get_vport_access_method(ibdev)) {
308         case MLX5_VPORT_ACCESS_METHOD_MAD:
309                 return mlx5_query_mad_ifc_system_image_guid(ibdev,
310                                                             sys_image_guid);
311
312         case MLX5_VPORT_ACCESS_METHOD_HCA:
313                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
314                 break;
315
316         case MLX5_VPORT_ACCESS_METHOD_NIC:
317                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
318                 break;
319
320         default:
321                 return -EINVAL;
322         }
323
324         if (!err)
325                 *sys_image_guid = cpu_to_be64(tmp);
326
327         return err;
328
329 }
330
331 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
332                                 u16 *max_pkeys)
333 {
334         struct mlx5_ib_dev *dev = to_mdev(ibdev);
335         struct mlx5_core_dev *mdev = dev->mdev;
336
337         switch (mlx5_get_vport_access_method(ibdev)) {
338         case MLX5_VPORT_ACCESS_METHOD_MAD:
339                 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
340
341         case MLX5_VPORT_ACCESS_METHOD_HCA:
342         case MLX5_VPORT_ACCESS_METHOD_NIC:
343                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
344                                                 pkey_table_size));
345                 return 0;
346
347         default:
348                 return -EINVAL;
349         }
350 }
351
352 static int mlx5_query_vendor_id(struct ib_device *ibdev,
353                                 u32 *vendor_id)
354 {
355         struct mlx5_ib_dev *dev = to_mdev(ibdev);
356
357         switch (mlx5_get_vport_access_method(ibdev)) {
358         case MLX5_VPORT_ACCESS_METHOD_MAD:
359                 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
360
361         case MLX5_VPORT_ACCESS_METHOD_HCA:
362         case MLX5_VPORT_ACCESS_METHOD_NIC:
363                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
364
365         default:
366                 return -EINVAL;
367         }
368 }
369
370 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
371                                 __be64 *node_guid)
372 {
373         u64 tmp;
374         int err;
375
376         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
377         case MLX5_VPORT_ACCESS_METHOD_MAD:
378                 return mlx5_query_mad_ifc_node_guid(dev, node_guid);
379
380         case MLX5_VPORT_ACCESS_METHOD_HCA:
381                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
382                 break;
383
384         case MLX5_VPORT_ACCESS_METHOD_NIC:
385                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
386                 break;
387
388         default:
389                 return -EINVAL;
390         }
391
392         if (!err)
393                 *node_guid = cpu_to_be64(tmp);
394
395         return err;
396 }
397
398 struct mlx5_reg_node_desc {
399         u8      desc[64];
400 };
401
402 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
403 {
404         struct mlx5_reg_node_desc in;
405
406         if (mlx5_use_mad_ifc(dev))
407                 return mlx5_query_mad_ifc_node_desc(dev, node_desc);
408
409         memset(&in, 0, sizeof(in));
410
411         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
412                                     sizeof(struct mlx5_reg_node_desc),
413                                     MLX5_REG_NODE_DESC, 0, 0);
414 }
415
416 static int mlx5_ib_query_device(struct ib_device *ibdev,
417                                 struct ib_device_attr *props,
418                                 struct ib_udata *uhw)
419 {
420         struct mlx5_ib_dev *dev = to_mdev(ibdev);
421         struct mlx5_core_dev *mdev = dev->mdev;
422         int err = -ENOMEM;
423         int max_rq_sg;
424         int max_sq_sg;
425         u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
426
427         if (uhw->inlen || uhw->outlen)
428                 return -EINVAL;
429
430         memset(props, 0, sizeof(*props));
431         err = mlx5_query_system_image_guid(ibdev,
432                                            &props->sys_image_guid);
433         if (err)
434                 return err;
435
436         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
437         if (err)
438                 return err;
439
440         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
441         if (err)
442                 return err;
443
444         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
445                 (fw_rev_min(dev->mdev) << 16) |
446                 fw_rev_sub(dev->mdev);
447         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
448                 IB_DEVICE_PORT_ACTIVE_EVENT             |
449                 IB_DEVICE_SYS_IMAGE_GUID                |
450                 IB_DEVICE_RC_RNR_NAK_GEN;
451
452         if (MLX5_CAP_GEN(mdev, pkv))
453                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
454         if (MLX5_CAP_GEN(mdev, qkv))
455                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
456         if (MLX5_CAP_GEN(mdev, apm))
457                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
458         if (MLX5_CAP_GEN(mdev, xrc))
459                 props->device_cap_flags |= IB_DEVICE_XRC;
460         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
461         if (MLX5_CAP_GEN(mdev, sho)) {
462                 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
463                 /* At this stage no support for signature handover */
464                 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
465                                       IB_PROT_T10DIF_TYPE_2 |
466                                       IB_PROT_T10DIF_TYPE_3;
467                 props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
468                                        IB_GUARD_T10DIF_CSUM;
469         }
470         if (MLX5_CAP_GEN(mdev, block_lb_mc))
471                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
472
473         props->vendor_part_id      = mdev->pdev->device;
474         props->hw_ver              = mdev->pdev->revision;
475
476         props->max_mr_size         = ~0ull;
477         props->page_size_cap       = ~(min_page_size - 1);
478         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
479         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
480         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
481                      sizeof(struct mlx5_wqe_data_seg);
482         max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
483                      sizeof(struct mlx5_wqe_ctrl_seg)) /
484                      sizeof(struct mlx5_wqe_data_seg);
485         props->max_sge = min(max_rq_sg, max_sq_sg);
486         props->max_sge_rd = props->max_sge;
487         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
488         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
489         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
490         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
491         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
492         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
493         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
494         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
495         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
496         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
497         props->max_srq_sge         = max_rq_sg - 1;
498         props->max_fast_reg_page_list_len = (unsigned int)-1;
499         props->atomic_cap          = IB_ATOMIC_NONE;
500         props->masked_atomic_cap   = IB_ATOMIC_NONE;
501         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
502         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
503         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
504                                            props->max_mcast_grp;
505         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
506         props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
507         props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
508
509 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
510         if (MLX5_CAP_GEN(mdev, pg))
511                 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
512         props->odp_caps = dev->odp_caps;
513 #endif
514
515         if (MLX5_CAP_GEN(mdev, cd))
516                 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
517
518         return 0;
519 }
520
521 enum mlx5_ib_width {
522         MLX5_IB_WIDTH_1X        = 1 << 0,
523         MLX5_IB_WIDTH_2X        = 1 << 1,
524         MLX5_IB_WIDTH_4X        = 1 << 2,
525         MLX5_IB_WIDTH_8X        = 1 << 3,
526         MLX5_IB_WIDTH_12X       = 1 << 4
527 };
528
529 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
530                                   u8 *ib_width)
531 {
532         struct mlx5_ib_dev *dev = to_mdev(ibdev);
533         int err = 0;
534
535         if (active_width & MLX5_IB_WIDTH_1X) {
536                 *ib_width = IB_WIDTH_1X;
537         } else if (active_width & MLX5_IB_WIDTH_2X) {
538                 mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
539                             (int)active_width);
540                 err = -EINVAL;
541         } else if (active_width & MLX5_IB_WIDTH_4X) {
542                 *ib_width = IB_WIDTH_4X;
543         } else if (active_width & MLX5_IB_WIDTH_8X) {
544                 *ib_width = IB_WIDTH_8X;
545         } else if (active_width & MLX5_IB_WIDTH_12X) {
546                 *ib_width = IB_WIDTH_12X;
547         } else {
548                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
549                             (int)active_width);
550                 err = -EINVAL;
551         }
552
553         return err;
554 }
555
556 static int mlx5_mtu_to_ib_mtu(int mtu)
557 {
558         switch (mtu) {
559         case 256: return 1;
560         case 512: return 2;
561         case 1024: return 3;
562         case 2048: return 4;
563         case 4096: return 5;
564         default:
565                 pr_warn("invalid mtu\n");
566                 return -1;
567         }
568 }
569
570 enum ib_max_vl_num {
571         __IB_MAX_VL_0           = 1,
572         __IB_MAX_VL_0_1         = 2,
573         __IB_MAX_VL_0_3         = 3,
574         __IB_MAX_VL_0_7         = 4,
575         __IB_MAX_VL_0_14        = 5,
576 };
577
578 enum mlx5_vl_hw_cap {
579         MLX5_VL_HW_0    = 1,
580         MLX5_VL_HW_0_1  = 2,
581         MLX5_VL_HW_0_2  = 3,
582         MLX5_VL_HW_0_3  = 4,
583         MLX5_VL_HW_0_4  = 5,
584         MLX5_VL_HW_0_5  = 6,
585         MLX5_VL_HW_0_6  = 7,
586         MLX5_VL_HW_0_7  = 8,
587         MLX5_VL_HW_0_14 = 15
588 };
589
590 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
591                                 u8 *max_vl_num)
592 {
593         switch (vl_hw_cap) {
594         case MLX5_VL_HW_0:
595                 *max_vl_num = __IB_MAX_VL_0;
596                 break;
597         case MLX5_VL_HW_0_1:
598                 *max_vl_num = __IB_MAX_VL_0_1;
599                 break;
600         case MLX5_VL_HW_0_3:
601                 *max_vl_num = __IB_MAX_VL_0_3;
602                 break;
603         case MLX5_VL_HW_0_7:
604                 *max_vl_num = __IB_MAX_VL_0_7;
605                 break;
606         case MLX5_VL_HW_0_14:
607                 *max_vl_num = __IB_MAX_VL_0_14;
608                 break;
609
610         default:
611                 return -EINVAL;
612         }
613
614         return 0;
615 }
616
617 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
618                                struct ib_port_attr *props)
619 {
620         struct mlx5_ib_dev *dev = to_mdev(ibdev);
621         struct mlx5_core_dev *mdev = dev->mdev;
622         struct mlx5_hca_vport_context *rep;
623         int max_mtu;
624         int oper_mtu;
625         int err;
626         u8 ib_link_width_oper;
627         u8 vl_hw_cap;
628
629         rep = kzalloc(sizeof(*rep), GFP_KERNEL);
630         if (!rep) {
631                 err = -ENOMEM;
632                 goto out;
633         }
634
635         memset(props, 0, sizeof(*props));
636
637         err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
638         if (err)
639                 goto out;
640
641         props->lid              = rep->lid;
642         props->lmc              = rep->lmc;
643         props->sm_lid           = rep->sm_lid;
644         props->sm_sl            = rep->sm_sl;
645         props->state            = rep->vport_state;
646         props->phys_state       = rep->port_physical_state;
647         props->port_cap_flags   = rep->cap_mask1;
648         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
649         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
650         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
651         props->bad_pkey_cntr    = rep->pkey_violation_counter;
652         props->qkey_viol_cntr   = rep->qkey_violation_counter;
653         props->subnet_timeout   = rep->subnet_timeout;
654         props->init_type_reply  = rep->init_type_reply;
655
656         err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
657         if (err)
658                 goto out;
659
660         err = translate_active_width(ibdev, ib_link_width_oper,
661                                      &props->active_width);
662         if (err)
663                 goto out;
664         err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
665                                          port);
666         if (err)
667                 goto out;
668
669         mlx5_query_port_max_mtu(mdev, &max_mtu, port);
670
671         props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
672
673         mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
674
675         props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
676
677         err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
678         if (err)
679                 goto out;
680
681         err = translate_max_vl_num(ibdev, vl_hw_cap,
682                                    &props->max_vl_num);
683 out:
684         kfree(rep);
685         return err;
686 }
687
688 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
689                        struct ib_port_attr *props)
690 {
691         switch (mlx5_get_vport_access_method(ibdev)) {
692         case MLX5_VPORT_ACCESS_METHOD_MAD:
693                 return mlx5_query_mad_ifc_port(ibdev, port, props);
694
695         case MLX5_VPORT_ACCESS_METHOD_HCA:
696                 return mlx5_query_hca_port(ibdev, port, props);
697
698         case MLX5_VPORT_ACCESS_METHOD_NIC:
699                 return mlx5_query_port_roce(ibdev, port, props);
700
701         default:
702                 return -EINVAL;
703         }
704 }
705
706 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
707                              union ib_gid *gid)
708 {
709         struct mlx5_ib_dev *dev = to_mdev(ibdev);
710         struct mlx5_core_dev *mdev = dev->mdev;
711
712         switch (mlx5_get_vport_access_method(ibdev)) {
713         case MLX5_VPORT_ACCESS_METHOD_MAD:
714                 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
715
716         case MLX5_VPORT_ACCESS_METHOD_HCA:
717                 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
718
719         default:
720                 return -EINVAL;
721         }
722
723 }
724
725 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
726                               u16 *pkey)
727 {
728         struct mlx5_ib_dev *dev = to_mdev(ibdev);
729         struct mlx5_core_dev *mdev = dev->mdev;
730
731         switch (mlx5_get_vport_access_method(ibdev)) {
732         case MLX5_VPORT_ACCESS_METHOD_MAD:
733                 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
734
735         case MLX5_VPORT_ACCESS_METHOD_HCA:
736         case MLX5_VPORT_ACCESS_METHOD_NIC:
737                 return mlx5_query_hca_vport_pkey(mdev, 0, port,  0, index,
738                                                  pkey);
739         default:
740                 return -EINVAL;
741         }
742 }
743
744 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
745                                  struct ib_device_modify *props)
746 {
747         struct mlx5_ib_dev *dev = to_mdev(ibdev);
748         struct mlx5_reg_node_desc in;
749         struct mlx5_reg_node_desc out;
750         int err;
751
752         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
753                 return -EOPNOTSUPP;
754
755         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
756                 return 0;
757
758         /*
759          * If possible, pass node desc to FW, so it can generate
760          * a 144 trap.  If cmd fails, just ignore.
761          */
762         memcpy(&in, props->node_desc, 64);
763         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
764                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
765         if (err)
766                 return err;
767
768         memcpy(ibdev->node_desc, props->node_desc, 64);
769
770         return err;
771 }
772
773 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
774                                struct ib_port_modify *props)
775 {
776         struct mlx5_ib_dev *dev = to_mdev(ibdev);
777         struct ib_port_attr attr;
778         u32 tmp;
779         int err;
780
781         mutex_lock(&dev->cap_mask_mutex);
782
783         err = mlx5_ib_query_port(ibdev, port, &attr);
784         if (err)
785                 goto out;
786
787         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
788                 ~props->clr_port_cap_mask;
789
790         err = mlx5_set_port_caps(dev->mdev, port, tmp);
791
792 out:
793         mutex_unlock(&dev->cap_mask_mutex);
794         return err;
795 }
796
797 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
798                                                   struct ib_udata *udata)
799 {
800         struct mlx5_ib_dev *dev = to_mdev(ibdev);
801         struct mlx5_ib_alloc_ucontext_req_v2 req = {};
802         struct mlx5_ib_alloc_ucontext_resp resp = {};
803         struct mlx5_ib_ucontext *context;
804         struct mlx5_uuar_info *uuari;
805         struct mlx5_uar *uars;
806         int gross_uuars;
807         int num_uars;
808         int ver;
809         int uuarn;
810         int err;
811         int i;
812         size_t reqlen;
813
814         if (!dev->ib_active)
815                 return ERR_PTR(-EAGAIN);
816
817         reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
818         if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
819                 ver = 0;
820         else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
821                 ver = 2;
822         else
823                 return ERR_PTR(-EINVAL);
824
825         err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
826         if (err)
827                 return ERR_PTR(err);
828
829         if (req.flags)
830                 return ERR_PTR(-EINVAL);
831
832         if (req.total_num_uuars > MLX5_MAX_UUARS)
833                 return ERR_PTR(-ENOMEM);
834
835         if (req.total_num_uuars == 0)
836                 return ERR_PTR(-EINVAL);
837
838         if (req.comp_mask)
839                 return ERR_PTR(-EOPNOTSUPP);
840
841         if (reqlen > sizeof(req) &&
842             !ib_is_udata_cleared(udata, sizeof(req),
843                                  udata->inlen - sizeof(req)))
844                 return ERR_PTR(-EOPNOTSUPP);
845
846         req.total_num_uuars = ALIGN(req.total_num_uuars,
847                                     MLX5_NON_FP_BF_REGS_PER_PAGE);
848         if (req.num_low_latency_uuars > req.total_num_uuars - 1)
849                 return ERR_PTR(-EINVAL);
850
851         num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
852         gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
853         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
854         resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
855         resp.cache_line_size = L1_CACHE_BYTES;
856         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
857         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
858         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
859         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
860         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
861         resp.response_length = min(offsetof(typeof(resp), response_length) +
862                                    sizeof(resp.response_length), udata->outlen);
863
864         context = kzalloc(sizeof(*context), GFP_KERNEL);
865         if (!context)
866                 return ERR_PTR(-ENOMEM);
867
868         uuari = &context->uuari;
869         mutex_init(&uuari->lock);
870         uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
871         if (!uars) {
872                 err = -ENOMEM;
873                 goto out_ctx;
874         }
875
876         uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
877                                 sizeof(*uuari->bitmap),
878                                 GFP_KERNEL);
879         if (!uuari->bitmap) {
880                 err = -ENOMEM;
881                 goto out_uar_ctx;
882         }
883         /*
884          * clear all fast path uuars
885          */
886         for (i = 0; i < gross_uuars; i++) {
887                 uuarn = i & 3;
888                 if (uuarn == 2 || uuarn == 3)
889                         set_bit(i, uuari->bitmap);
890         }
891
892         uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
893         if (!uuari->count) {
894                 err = -ENOMEM;
895                 goto out_bitmap;
896         }
897
898         for (i = 0; i < num_uars; i++) {
899                 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
900                 if (err)
901                         goto out_count;
902         }
903
904 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
905         context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
906 #endif
907
908         INIT_LIST_HEAD(&context->db_page_list);
909         mutex_init(&context->db_page_mutex);
910
911         resp.tot_uuars = req.total_num_uuars;
912         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
913
914         if (field_avail(typeof(resp), reserved2, udata->outlen))
915                 resp.response_length += sizeof(resp.reserved2);
916
917         if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
918                 resp.comp_mask |=
919                         MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
920                 resp.hca_core_clock_offset =
921                         offsetof(struct mlx5_init_seg, internal_timer_h) %
922                         PAGE_SIZE;
923                 resp.response_length += sizeof(resp.hca_core_clock_offset);
924         }
925
926         err = ib_copy_to_udata(udata, &resp, resp.response_length);
927         if (err)
928                 goto out_uars;
929
930         uuari->ver = ver;
931         uuari->num_low_latency_uuars = req.num_low_latency_uuars;
932         uuari->uars = uars;
933         uuari->num_uars = num_uars;
934         return &context->ibucontext;
935
936 out_uars:
937         for (i--; i >= 0; i--)
938                 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
939 out_count:
940         kfree(uuari->count);
941
942 out_bitmap:
943         kfree(uuari->bitmap);
944
945 out_uar_ctx:
946         kfree(uars);
947
948 out_ctx:
949         kfree(context);
950         return ERR_PTR(err);
951 }
952
953 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
954 {
955         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
956         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
957         struct mlx5_uuar_info *uuari = &context->uuari;
958         int i;
959
960         for (i = 0; i < uuari->num_uars; i++) {
961                 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
962                         mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
963         }
964
965         kfree(uuari->count);
966         kfree(uuari->bitmap);
967         kfree(uuari->uars);
968         kfree(context);
969
970         return 0;
971 }
972
973 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
974 {
975         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
976 }
977
978 static int get_command(unsigned long offset)
979 {
980         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
981 }
982
983 static int get_arg(unsigned long offset)
984 {
985         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
986 }
987
988 static int get_index(unsigned long offset)
989 {
990         return get_arg(offset);
991 }
992
993 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
994 {
995         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
996         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
997         struct mlx5_uuar_info *uuari = &context->uuari;
998         unsigned long command;
999         unsigned long idx;
1000         phys_addr_t pfn;
1001
1002         command = get_command(vma->vm_pgoff);
1003         switch (command) {
1004         case MLX5_IB_MMAP_REGULAR_PAGE:
1005                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1006                         return -EINVAL;
1007
1008                 idx = get_index(vma->vm_pgoff);
1009                 if (idx >= uuari->num_uars)
1010                         return -EINVAL;
1011
1012                 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1013                 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1014                             (unsigned long long)pfn);
1015
1016                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
1017                 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1018                                        PAGE_SIZE, vma->vm_page_prot))
1019                         return -EAGAIN;
1020
1021                 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
1022                             vma->vm_start,
1023                             (unsigned long long)pfn << PAGE_SHIFT);
1024                 break;
1025
1026         case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
1027                 return -ENOSYS;
1028
1029         case MLX5_IB_MMAP_CORE_CLOCK:
1030         {
1031                 phys_addr_t pfn;
1032
1033                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1034                         return -EINVAL;
1035
1036                 if (vma->vm_flags & (VM_WRITE | VM_EXEC))
1037                         return -EPERM;
1038
1039                 /* Don't expose to user-space information it shouldn't have */
1040                 if (PAGE_SIZE > 4096)
1041                         return -EOPNOTSUPP;
1042
1043                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1044                 pfn = (dev->mdev->iseg_base +
1045                        offsetof(struct mlx5_init_seg, internal_timer_h)) >>
1046                         PAGE_SHIFT;
1047                 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1048                                        PAGE_SIZE, vma->vm_page_prot))
1049                         return -EAGAIN;
1050
1051                 mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
1052                             vma->vm_start,
1053                             (unsigned long long)pfn << PAGE_SHIFT);
1054                 break;
1055         }
1056
1057         default:
1058                 return -EINVAL;
1059         }
1060
1061         return 0;
1062 }
1063
1064 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1065                                       struct ib_ucontext *context,
1066                                       struct ib_udata *udata)
1067 {
1068         struct mlx5_ib_alloc_pd_resp resp;
1069         struct mlx5_ib_pd *pd;
1070         int err;
1071
1072         pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1073         if (!pd)
1074                 return ERR_PTR(-ENOMEM);
1075
1076         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1077         if (err) {
1078                 kfree(pd);
1079                 return ERR_PTR(err);
1080         }
1081
1082         if (context) {
1083                 resp.pdn = pd->pdn;
1084                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1085                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1086                         kfree(pd);
1087                         return ERR_PTR(-EFAULT);
1088                 }
1089         }
1090
1091         return &pd->ibpd;
1092 }
1093
1094 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1095 {
1096         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1097         struct mlx5_ib_pd *mpd = to_mpd(pd);
1098
1099         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1100         kfree(mpd);
1101
1102         return 0;
1103 }
1104
1105 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1106 {
1107         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1108         int err;
1109
1110         err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1111         if (err)
1112                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1113                              ibqp->qp_num, gid->raw);
1114
1115         return err;
1116 }
1117
1118 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1119 {
1120         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1121         int err;
1122
1123         err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1124         if (err)
1125                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1126                              ibqp->qp_num, gid->raw);
1127
1128         return err;
1129 }
1130
1131 static int init_node_data(struct mlx5_ib_dev *dev)
1132 {
1133         int err;
1134
1135         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1136         if (err)
1137                 return err;
1138
1139         dev->mdev->rev_id = dev->mdev->pdev->revision;
1140
1141         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1142 }
1143
1144 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1145                              char *buf)
1146 {
1147         struct mlx5_ib_dev *dev =
1148                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1149
1150         return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
1151 }
1152
1153 static ssize_t show_reg_pages(struct device *device,
1154                               struct device_attribute *attr, char *buf)
1155 {
1156         struct mlx5_ib_dev *dev =
1157                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1158
1159         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1160 }
1161
1162 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1163                         char *buf)
1164 {
1165         struct mlx5_ib_dev *dev =
1166                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1167         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1168 }
1169
1170 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1171                            char *buf)
1172 {
1173         struct mlx5_ib_dev *dev =
1174                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1175         return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
1176                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1177 }
1178
1179 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1180                         char *buf)
1181 {
1182         struct mlx5_ib_dev *dev =
1183                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1184         return sprintf(buf, "%x\n", dev->mdev->rev_id);
1185 }
1186
1187 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1188                           char *buf)
1189 {
1190         struct mlx5_ib_dev *dev =
1191                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1192         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1193                        dev->mdev->board_id);
1194 }
1195
1196 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1197 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1198 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1199 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1200 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1201 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1202
1203 static struct device_attribute *mlx5_class_attributes[] = {
1204         &dev_attr_hw_rev,
1205         &dev_attr_fw_ver,
1206         &dev_attr_hca_type,
1207         &dev_attr_board_id,
1208         &dev_attr_fw_pages,
1209         &dev_attr_reg_pages,
1210 };
1211
1212 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1213                           enum mlx5_dev_event event, unsigned long param)
1214 {
1215         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1216         struct ib_event ibev;
1217
1218         u8 port = 0;
1219
1220         switch (event) {
1221         case MLX5_DEV_EVENT_SYS_ERROR:
1222                 ibdev->ib_active = false;
1223                 ibev.event = IB_EVENT_DEVICE_FATAL;
1224                 break;
1225
1226         case MLX5_DEV_EVENT_PORT_UP:
1227                 ibev.event = IB_EVENT_PORT_ACTIVE;
1228                 port = (u8)param;
1229                 break;
1230
1231         case MLX5_DEV_EVENT_PORT_DOWN:
1232                 ibev.event = IB_EVENT_PORT_ERR;
1233                 port = (u8)param;
1234                 break;
1235
1236         case MLX5_DEV_EVENT_PORT_INITIALIZED:
1237                 /* not used by ULPs */
1238                 return;
1239
1240         case MLX5_DEV_EVENT_LID_CHANGE:
1241                 ibev.event = IB_EVENT_LID_CHANGE;
1242                 port = (u8)param;
1243                 break;
1244
1245         case MLX5_DEV_EVENT_PKEY_CHANGE:
1246                 ibev.event = IB_EVENT_PKEY_CHANGE;
1247                 port = (u8)param;
1248                 break;
1249
1250         case MLX5_DEV_EVENT_GUID_CHANGE:
1251                 ibev.event = IB_EVENT_GID_CHANGE;
1252                 port = (u8)param;
1253                 break;
1254
1255         case MLX5_DEV_EVENT_CLIENT_REREG:
1256                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1257                 port = (u8)param;
1258                 break;
1259         }
1260
1261         ibev.device           = &ibdev->ib_dev;
1262         ibev.element.port_num = port;
1263
1264         if (port < 1 || port > ibdev->num_ports) {
1265                 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1266                 return;
1267         }
1268
1269         if (ibdev->ib_active)
1270                 ib_dispatch_event(&ibev);
1271 }
1272
1273 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1274 {
1275         int port;
1276
1277         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1278                 mlx5_query_ext_port_caps(dev, port);
1279 }
1280
1281 static int get_port_caps(struct mlx5_ib_dev *dev)
1282 {
1283         struct ib_device_attr *dprops = NULL;
1284         struct ib_port_attr *pprops = NULL;
1285         int err = -ENOMEM;
1286         int port;
1287         struct ib_udata uhw = {.inlen = 0, .outlen = 0};
1288
1289         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1290         if (!pprops)
1291                 goto out;
1292
1293         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1294         if (!dprops)
1295                 goto out;
1296
1297         err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
1298         if (err) {
1299                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1300                 goto out;
1301         }
1302
1303         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1304                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1305                 if (err) {
1306                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
1307                                      port, err);
1308                         break;
1309                 }
1310                 dev->mdev->port_caps[port - 1].pkey_table_len =
1311                                                 dprops->max_pkeys;
1312                 dev->mdev->port_caps[port - 1].gid_table_len =
1313                                                 pprops->gid_tbl_len;
1314                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1315                             dprops->max_pkeys, pprops->gid_tbl_len);
1316         }
1317
1318 out:
1319         kfree(pprops);
1320         kfree(dprops);
1321
1322         return err;
1323 }
1324
1325 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1326 {
1327         int err;
1328
1329         err = mlx5_mr_cache_cleanup(dev);
1330         if (err)
1331                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1332
1333         mlx5_ib_destroy_qp(dev->umrc.qp);
1334         ib_destroy_cq(dev->umrc.cq);
1335         ib_dealloc_pd(dev->umrc.pd);
1336 }
1337
1338 enum {
1339         MAX_UMR_WR = 128,
1340 };
1341
1342 static int create_umr_res(struct mlx5_ib_dev *dev)
1343 {
1344         struct ib_qp_init_attr *init_attr = NULL;
1345         struct ib_qp_attr *attr = NULL;
1346         struct ib_pd *pd;
1347         struct ib_cq *cq;
1348         struct ib_qp *qp;
1349         struct ib_cq_init_attr cq_attr = {};
1350         int ret;
1351
1352         attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1353         init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1354         if (!attr || !init_attr) {
1355                 ret = -ENOMEM;
1356                 goto error_0;
1357         }
1358
1359         pd = ib_alloc_pd(&dev->ib_dev);
1360         if (IS_ERR(pd)) {
1361                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1362                 ret = PTR_ERR(pd);
1363                 goto error_0;
1364         }
1365
1366         cq_attr.cqe = 128;
1367         cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
1368                           &cq_attr);
1369         if (IS_ERR(cq)) {
1370                 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1371                 ret = PTR_ERR(cq);
1372                 goto error_2;
1373         }
1374         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1375
1376         init_attr->send_cq = cq;
1377         init_attr->recv_cq = cq;
1378         init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1379         init_attr->cap.max_send_wr = MAX_UMR_WR;
1380         init_attr->cap.max_send_sge = 1;
1381         init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1382         init_attr->port_num = 1;
1383         qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1384         if (IS_ERR(qp)) {
1385                 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1386                 ret = PTR_ERR(qp);
1387                 goto error_3;
1388         }
1389         qp->device     = &dev->ib_dev;
1390         qp->real_qp    = qp;
1391         qp->uobject    = NULL;
1392         qp->qp_type    = MLX5_IB_QPT_REG_UMR;
1393
1394         attr->qp_state = IB_QPS_INIT;
1395         attr->port_num = 1;
1396         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1397                                 IB_QP_PORT, NULL);
1398         if (ret) {
1399                 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1400                 goto error_4;
1401         }
1402
1403         memset(attr, 0, sizeof(*attr));
1404         attr->qp_state = IB_QPS_RTR;
1405         attr->path_mtu = IB_MTU_256;
1406
1407         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1408         if (ret) {
1409                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1410                 goto error_4;
1411         }
1412
1413         memset(attr, 0, sizeof(*attr));
1414         attr->qp_state = IB_QPS_RTS;
1415         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1416         if (ret) {
1417                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1418                 goto error_4;
1419         }
1420
1421         dev->umrc.qp = qp;
1422         dev->umrc.cq = cq;
1423         dev->umrc.pd = pd;
1424
1425         sema_init(&dev->umrc.sem, MAX_UMR_WR);
1426         ret = mlx5_mr_cache_init(dev);
1427         if (ret) {
1428                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1429                 goto error_4;
1430         }
1431
1432         kfree(attr);
1433         kfree(init_attr);
1434
1435         return 0;
1436
1437 error_4:
1438         mlx5_ib_destroy_qp(qp);
1439
1440 error_3:
1441         ib_destroy_cq(cq);
1442
1443 error_2:
1444         ib_dealloc_pd(pd);
1445
1446 error_0:
1447         kfree(attr);
1448         kfree(init_attr);
1449         return ret;
1450 }
1451
1452 static int create_dev_resources(struct mlx5_ib_resources *devr)
1453 {
1454         struct ib_srq_init_attr attr;
1455         struct mlx5_ib_dev *dev;
1456         struct ib_cq_init_attr cq_attr = {.cqe = 1};
1457         int ret = 0;
1458
1459         dev = container_of(devr, struct mlx5_ib_dev, devr);
1460
1461         devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1462         if (IS_ERR(devr->p0)) {
1463                 ret = PTR_ERR(devr->p0);
1464                 goto error0;
1465         }
1466         devr->p0->device  = &dev->ib_dev;
1467         devr->p0->uobject = NULL;
1468         atomic_set(&devr->p0->usecnt, 0);
1469
1470         devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1471         if (IS_ERR(devr->c0)) {
1472                 ret = PTR_ERR(devr->c0);
1473                 goto error1;
1474         }
1475         devr->c0->device        = &dev->ib_dev;
1476         devr->c0->uobject       = NULL;
1477         devr->c0->comp_handler  = NULL;
1478         devr->c0->event_handler = NULL;
1479         devr->c0->cq_context    = NULL;
1480         atomic_set(&devr->c0->usecnt, 0);
1481
1482         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1483         if (IS_ERR(devr->x0)) {
1484                 ret = PTR_ERR(devr->x0);
1485                 goto error2;
1486         }
1487         devr->x0->device = &dev->ib_dev;
1488         devr->x0->inode = NULL;
1489         atomic_set(&devr->x0->usecnt, 0);
1490         mutex_init(&devr->x0->tgt_qp_mutex);
1491         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1492
1493         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1494         if (IS_ERR(devr->x1)) {
1495                 ret = PTR_ERR(devr->x1);
1496                 goto error3;
1497         }
1498         devr->x1->device = &dev->ib_dev;
1499         devr->x1->inode = NULL;
1500         atomic_set(&devr->x1->usecnt, 0);
1501         mutex_init(&devr->x1->tgt_qp_mutex);
1502         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1503
1504         memset(&attr, 0, sizeof(attr));
1505         attr.attr.max_sge = 1;
1506         attr.attr.max_wr = 1;
1507         attr.srq_type = IB_SRQT_XRC;
1508         attr.ext.xrc.cq = devr->c0;
1509         attr.ext.xrc.xrcd = devr->x0;
1510
1511         devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1512         if (IS_ERR(devr->s0)) {
1513                 ret = PTR_ERR(devr->s0);
1514                 goto error4;
1515         }
1516         devr->s0->device        = &dev->ib_dev;
1517         devr->s0->pd            = devr->p0;
1518         devr->s0->uobject       = NULL;
1519         devr->s0->event_handler = NULL;
1520         devr->s0->srq_context   = NULL;
1521         devr->s0->srq_type      = IB_SRQT_XRC;
1522         devr->s0->ext.xrc.xrcd  = devr->x0;
1523         devr->s0->ext.xrc.cq    = devr->c0;
1524         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1525         atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1526         atomic_inc(&devr->p0->usecnt);
1527         atomic_set(&devr->s0->usecnt, 0);
1528
1529         memset(&attr, 0, sizeof(attr));
1530         attr.attr.max_sge = 1;
1531         attr.attr.max_wr = 1;
1532         attr.srq_type = IB_SRQT_BASIC;
1533         devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1534         if (IS_ERR(devr->s1)) {
1535                 ret = PTR_ERR(devr->s1);
1536                 goto error5;
1537         }
1538         devr->s1->device        = &dev->ib_dev;
1539         devr->s1->pd            = devr->p0;
1540         devr->s1->uobject       = NULL;
1541         devr->s1->event_handler = NULL;
1542         devr->s1->srq_context   = NULL;
1543         devr->s1->srq_type      = IB_SRQT_BASIC;
1544         devr->s1->ext.xrc.cq    = devr->c0;
1545         atomic_inc(&devr->p0->usecnt);
1546         atomic_set(&devr->s0->usecnt, 0);
1547
1548         return 0;
1549
1550 error5:
1551         mlx5_ib_destroy_srq(devr->s0);
1552 error4:
1553         mlx5_ib_dealloc_xrcd(devr->x1);
1554 error3:
1555         mlx5_ib_dealloc_xrcd(devr->x0);
1556 error2:
1557         mlx5_ib_destroy_cq(devr->c0);
1558 error1:
1559         mlx5_ib_dealloc_pd(devr->p0);
1560 error0:
1561         return ret;
1562 }
1563
1564 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1565 {
1566         mlx5_ib_destroy_srq(devr->s1);
1567         mlx5_ib_destroy_srq(devr->s0);
1568         mlx5_ib_dealloc_xrcd(devr->x0);
1569         mlx5_ib_dealloc_xrcd(devr->x1);
1570         mlx5_ib_destroy_cq(devr->c0);
1571         mlx5_ib_dealloc_pd(devr->p0);
1572 }
1573
1574 static u32 get_core_cap_flags(struct ib_device *ibdev)
1575 {
1576         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1577         enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
1578         u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
1579         u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
1580         u32 ret = 0;
1581
1582         if (ll == IB_LINK_LAYER_INFINIBAND)
1583                 return RDMA_CORE_PORT_IBA_IB;
1584
1585         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
1586                 return 0;
1587
1588         if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
1589                 return 0;
1590
1591         if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
1592                 ret |= RDMA_CORE_PORT_IBA_ROCE;
1593
1594         if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
1595                 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1596
1597         return ret;
1598 }
1599
1600 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1601                                struct ib_port_immutable *immutable)
1602 {
1603         struct ib_port_attr attr;
1604         int err;
1605
1606         err = mlx5_ib_query_port(ibdev, port_num, &attr);
1607         if (err)
1608                 return err;
1609
1610         immutable->pkey_tbl_len = attr.pkey_tbl_len;
1611         immutable->gid_tbl_len = attr.gid_tbl_len;
1612         immutable->core_cap_flags = get_core_cap_flags(ibdev);
1613         immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1614
1615         return 0;
1616 }
1617
1618 static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
1619 {
1620         int err;
1621
1622         dev->roce.nb.notifier_call = mlx5_netdev_event;
1623         err = register_netdevice_notifier(&dev->roce.nb);
1624         if (err)
1625                 return err;
1626
1627         err = mlx5_nic_vport_enable_roce(dev->mdev);
1628         if (err)
1629                 goto err_unregister_netdevice_notifier;
1630
1631         return 0;
1632
1633 err_unregister_netdevice_notifier:
1634         unregister_netdevice_notifier(&dev->roce.nb);
1635         return err;
1636 }
1637
1638 static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
1639 {
1640         mlx5_nic_vport_disable_roce(dev->mdev);
1641         unregister_netdevice_notifier(&dev->roce.nb);
1642 }
1643
1644 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1645 {
1646         struct mlx5_ib_dev *dev;
1647         enum rdma_link_layer ll;
1648         int port_type_cap;
1649         int err;
1650         int i;
1651
1652         port_type_cap = MLX5_CAP_GEN(mdev, port_type);
1653         ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
1654
1655         if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
1656                 return NULL;
1657
1658         printk_once(KERN_INFO "%s", mlx5_version);
1659
1660         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1661         if (!dev)
1662                 return NULL;
1663
1664         dev->mdev = mdev;
1665
1666         rwlock_init(&dev->roce.netdev_lock);
1667         err = get_port_caps(dev);
1668         if (err)
1669                 goto err_dealloc;
1670
1671         if (mlx5_use_mad_ifc(dev))
1672                 get_ext_port_caps(dev);
1673
1674         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1675
1676         strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1677         dev->ib_dev.owner               = THIS_MODULE;
1678         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
1679         dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
1680         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
1681         dev->ib_dev.phys_port_cnt     = dev->num_ports;
1682         dev->ib_dev.num_comp_vectors    =
1683                 dev->mdev->priv.eq_table.num_comp_vectors;
1684         dev->ib_dev.dma_device  = &mdev->pdev->dev;
1685
1686         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
1687         dev->ib_dev.uverbs_cmd_mask     =
1688                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
1689                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
1690                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
1691                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
1692                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
1693                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
1694                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
1695                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1696                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
1697                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
1698                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
1699                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
1700                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
1701                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
1702                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
1703                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
1704                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
1705                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
1706                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
1707                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
1708                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
1709                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
1710                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1711         dev->ib_dev.uverbs_ex_cmd_mask =
1712                 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
1713
1714         dev->ib_dev.query_device        = mlx5_ib_query_device;
1715         dev->ib_dev.query_port          = mlx5_ib_query_port;
1716         dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
1717         if (ll == IB_LINK_LAYER_ETHERNET)
1718                 dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
1719         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
1720         dev->ib_dev.add_gid             = mlx5_ib_add_gid;
1721         dev->ib_dev.del_gid             = mlx5_ib_del_gid;
1722         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
1723         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
1724         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
1725         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
1726         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
1727         dev->ib_dev.mmap                = mlx5_ib_mmap;
1728         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
1729         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
1730         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
1731         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
1732         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
1733         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
1734         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
1735         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
1736         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
1737         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
1738         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
1739         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
1740         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
1741         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
1742         dev->ib_dev.post_send           = mlx5_ib_post_send;
1743         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
1744         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
1745         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
1746         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
1747         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
1748         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
1749         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
1750         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
1751         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
1752         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
1753         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
1754         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
1755         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
1756         dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
1757         dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
1758         dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
1759         dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
1760
1761         mlx5_ib_internal_fill_odp_caps(dev);
1762
1763         if (MLX5_CAP_GEN(mdev, xrc)) {
1764                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1765                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1766                 dev->ib_dev.uverbs_cmd_mask |=
1767                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1768                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1769         }
1770
1771         err = init_node_data(dev);
1772         if (err)
1773                 goto err_dealloc;
1774
1775         mutex_init(&dev->cap_mask_mutex);
1776
1777         if (ll == IB_LINK_LAYER_ETHERNET) {
1778                 err = mlx5_enable_roce(dev);
1779                 if (err)
1780                         goto err_dealloc;
1781         }
1782
1783         err = create_dev_resources(&dev->devr);
1784         if (err)
1785                 goto err_disable_roce;
1786
1787         err = mlx5_ib_odp_init_one(dev);
1788         if (err)
1789                 goto err_rsrc;
1790
1791         err = ib_register_device(&dev->ib_dev, NULL);
1792         if (err)
1793                 goto err_odp;
1794
1795         err = create_umr_res(dev);
1796         if (err)
1797                 goto err_dev;
1798
1799         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1800                 err = device_create_file(&dev->ib_dev.dev,
1801                                          mlx5_class_attributes[i]);
1802                 if (err)
1803                         goto err_umrc;
1804         }
1805
1806         dev->ib_active = true;
1807
1808         return dev;
1809
1810 err_umrc:
1811         destroy_umrc_res(dev);
1812
1813 err_dev:
1814         ib_unregister_device(&dev->ib_dev);
1815
1816 err_odp:
1817         mlx5_ib_odp_remove_one(dev);
1818
1819 err_rsrc:
1820         destroy_dev_resources(&dev->devr);
1821
1822 err_disable_roce:
1823         if (ll == IB_LINK_LAYER_ETHERNET)
1824                 mlx5_disable_roce(dev);
1825
1826 err_dealloc:
1827         ib_dealloc_device((struct ib_device *)dev);
1828
1829         return NULL;
1830 }
1831
1832 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
1833 {
1834         struct mlx5_ib_dev *dev = context;
1835         enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
1836
1837         ib_unregister_device(&dev->ib_dev);
1838         destroy_umrc_res(dev);
1839         mlx5_ib_odp_remove_one(dev);
1840         destroy_dev_resources(&dev->devr);
1841         if (ll == IB_LINK_LAYER_ETHERNET)
1842                 mlx5_disable_roce(dev);
1843         ib_dealloc_device(&dev->ib_dev);
1844 }
1845
1846 static struct mlx5_interface mlx5_ib_interface = {
1847         .add            = mlx5_ib_add,
1848         .remove         = mlx5_ib_remove,
1849         .event          = mlx5_ib_event,
1850         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
1851 };
1852
1853 static int __init mlx5_ib_init(void)
1854 {
1855         int err;
1856
1857         if (deprecated_prof_sel != 2)
1858                 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
1859
1860         err = mlx5_ib_odp_init();
1861         if (err)
1862                 return err;
1863
1864         err = mlx5_register_interface(&mlx5_ib_interface);
1865         if (err)
1866                 goto clean_odp;
1867
1868         return err;
1869
1870 clean_odp:
1871         mlx5_ib_odp_cleanup();
1872         return err;
1873 }
1874
1875 static void __exit mlx5_ib_cleanup(void)
1876 {
1877         mlx5_unregister_interface(&mlx5_ib_interface);
1878         mlx5_ib_odp_cleanup();
1879 }
1880
1881 module_init(mlx5_ib_init);
1882 module_exit(mlx5_ib_cleanup);