]> git.karo-electronics.de Git - linux-beck.git/blob - drivers/infiniband/hw/mlx5/main.c
IB/mlx5: Add flow steering support
[linux-beck.git] / drivers / infiniband / hw / mlx5 / main.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/highmem.h>
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/errno.h>
37 #include <linux/pci.h>
38 #include <linux/dma-mapping.h>
39 #include <linux/slab.h>
40 #include <linux/io-mapping.h>
41 #include <linux/sched.h>
42 #include <rdma/ib_user_verbs.h>
43 #include <linux/mlx5/vport.h>
44 #include <rdma/ib_smi.h>
45 #include <rdma/ib_umem.h>
46 #include <linux/in.h>
47 #include <linux/etherdevice.h>
48 #include <linux/mlx5/fs.h>
49 #include "user.h"
50 #include "mlx5_ib.h"
51
52 #define DRIVER_NAME "mlx5_ib"
53 #define DRIVER_VERSION "2.2-1"
54 #define DRIVER_RELDATE  "Feb 2014"
55
56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58 MODULE_LICENSE("Dual BSD/GPL");
59 MODULE_VERSION(DRIVER_VERSION);
60
61 static int deprecated_prof_sel = 2;
62 module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
63 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
64
65 static char mlx5_version[] =
66         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
67         DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
68
69 static enum rdma_link_layer
70 mlx5_ib_port_link_layer(struct ib_device *device)
71 {
72         struct mlx5_ib_dev *dev = to_mdev(device);
73
74         switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
75         case MLX5_CAP_PORT_TYPE_IB:
76                 return IB_LINK_LAYER_INFINIBAND;
77         case MLX5_CAP_PORT_TYPE_ETH:
78                 return IB_LINK_LAYER_ETHERNET;
79         default:
80                 return IB_LINK_LAYER_UNSPECIFIED;
81         }
82 }
83
84 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
85 {
86         return !dev->mdev->issi;
87 }
88
89 enum {
90         MLX5_VPORT_ACCESS_METHOD_MAD,
91         MLX5_VPORT_ACCESS_METHOD_HCA,
92         MLX5_VPORT_ACCESS_METHOD_NIC,
93 };
94
95 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
96 {
97         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
98                 return MLX5_VPORT_ACCESS_METHOD_MAD;
99
100         if (mlx5_ib_port_link_layer(ibdev) ==
101             IB_LINK_LAYER_ETHERNET)
102                 return MLX5_VPORT_ACCESS_METHOD_NIC;
103
104         return MLX5_VPORT_ACCESS_METHOD_HCA;
105 }
106
107 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
108                                         __be64 *sys_image_guid)
109 {
110         struct mlx5_ib_dev *dev = to_mdev(ibdev);
111         struct mlx5_core_dev *mdev = dev->mdev;
112         u64 tmp;
113         int err;
114
115         switch (mlx5_get_vport_access_method(ibdev)) {
116         case MLX5_VPORT_ACCESS_METHOD_MAD:
117                 return mlx5_query_mad_ifc_system_image_guid(ibdev,
118                                                             sys_image_guid);
119
120         case MLX5_VPORT_ACCESS_METHOD_HCA:
121                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
122                 if (!err)
123                         *sys_image_guid = cpu_to_be64(tmp);
124                 return err;
125
126         default:
127                 return -EINVAL;
128         }
129 }
130
131 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
132                                 u16 *max_pkeys)
133 {
134         struct mlx5_ib_dev *dev = to_mdev(ibdev);
135         struct mlx5_core_dev *mdev = dev->mdev;
136
137         switch (mlx5_get_vport_access_method(ibdev)) {
138         case MLX5_VPORT_ACCESS_METHOD_MAD:
139                 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
140
141         case MLX5_VPORT_ACCESS_METHOD_HCA:
142         case MLX5_VPORT_ACCESS_METHOD_NIC:
143                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
144                                                 pkey_table_size));
145                 return 0;
146
147         default:
148                 return -EINVAL;
149         }
150 }
151
152 static int mlx5_query_vendor_id(struct ib_device *ibdev,
153                                 u32 *vendor_id)
154 {
155         struct mlx5_ib_dev *dev = to_mdev(ibdev);
156
157         switch (mlx5_get_vport_access_method(ibdev)) {
158         case MLX5_VPORT_ACCESS_METHOD_MAD:
159                 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
160
161         case MLX5_VPORT_ACCESS_METHOD_HCA:
162         case MLX5_VPORT_ACCESS_METHOD_NIC:
163                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
164
165         default:
166                 return -EINVAL;
167         }
168 }
169
170 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
171                                 __be64 *node_guid)
172 {
173         u64 tmp;
174         int err;
175
176         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
177         case MLX5_VPORT_ACCESS_METHOD_MAD:
178                 return mlx5_query_mad_ifc_node_guid(dev, node_guid);
179
180         case MLX5_VPORT_ACCESS_METHOD_HCA:
181                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
182                 if (!err)
183                         *node_guid = cpu_to_be64(tmp);
184                 return err;
185
186         default:
187                 return -EINVAL;
188         }
189 }
190
191 struct mlx5_reg_node_desc {
192         u8      desc[64];
193 };
194
195 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
196 {
197         struct mlx5_reg_node_desc in;
198
199         if (mlx5_use_mad_ifc(dev))
200                 return mlx5_query_mad_ifc_node_desc(dev, node_desc);
201
202         memset(&in, 0, sizeof(in));
203
204         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
205                                     sizeof(struct mlx5_reg_node_desc),
206                                     MLX5_REG_NODE_DESC, 0, 0);
207 }
208
209 static int mlx5_ib_query_device(struct ib_device *ibdev,
210                                 struct ib_device_attr *props,
211                                 struct ib_udata *uhw)
212 {
213         struct mlx5_ib_dev *dev = to_mdev(ibdev);
214         struct mlx5_core_dev *mdev = dev->mdev;
215         int err = -ENOMEM;
216         int max_rq_sg;
217         int max_sq_sg;
218         u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
219
220         if (uhw->inlen || uhw->outlen)
221                 return -EINVAL;
222
223         memset(props, 0, sizeof(*props));
224         err = mlx5_query_system_image_guid(ibdev,
225                                            &props->sys_image_guid);
226         if (err)
227                 return err;
228
229         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
230         if (err)
231                 return err;
232
233         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
234         if (err)
235                 return err;
236
237         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
238                 (fw_rev_min(dev->mdev) << 16) |
239                 fw_rev_sub(dev->mdev);
240         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
241                 IB_DEVICE_PORT_ACTIVE_EVENT             |
242                 IB_DEVICE_SYS_IMAGE_GUID                |
243                 IB_DEVICE_RC_RNR_NAK_GEN;
244
245         if (MLX5_CAP_GEN(mdev, pkv))
246                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
247         if (MLX5_CAP_GEN(mdev, qkv))
248                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
249         if (MLX5_CAP_GEN(mdev, apm))
250                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
251         if (MLX5_CAP_GEN(mdev, xrc))
252                 props->device_cap_flags |= IB_DEVICE_XRC;
253         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
254         if (MLX5_CAP_GEN(mdev, sho)) {
255                 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
256                 /* At this stage no support for signature handover */
257                 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
258                                       IB_PROT_T10DIF_TYPE_2 |
259                                       IB_PROT_T10DIF_TYPE_3;
260                 props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
261                                        IB_GUARD_T10DIF_CSUM;
262         }
263         if (MLX5_CAP_GEN(mdev, block_lb_mc))
264                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
265
266         props->vendor_part_id      = mdev->pdev->device;
267         props->hw_ver              = mdev->pdev->revision;
268
269         props->max_mr_size         = ~0ull;
270         props->page_size_cap       = ~(min_page_size - 1);
271         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
272         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
273         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
274                      sizeof(struct mlx5_wqe_data_seg);
275         max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
276                      sizeof(struct mlx5_wqe_ctrl_seg)) /
277                      sizeof(struct mlx5_wqe_data_seg);
278         props->max_sge = min(max_rq_sg, max_sq_sg);
279         props->max_sge_rd = props->max_sge;
280         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
281         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
282         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
283         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
284         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
285         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
286         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
287         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
288         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
289         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
290         props->max_srq_sge         = max_rq_sg - 1;
291         props->max_fast_reg_page_list_len = (unsigned int)-1;
292         props->atomic_cap          = IB_ATOMIC_NONE;
293         props->masked_atomic_cap   = IB_ATOMIC_NONE;
294         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
295         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
296         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
297                                            props->max_mcast_grp;
298         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
299
300 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
301         if (MLX5_CAP_GEN(mdev, pg))
302                 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
303         props->odp_caps = dev->odp_caps;
304 #endif
305
306         return 0;
307 }
308
309 enum mlx5_ib_width {
310         MLX5_IB_WIDTH_1X        = 1 << 0,
311         MLX5_IB_WIDTH_2X        = 1 << 1,
312         MLX5_IB_WIDTH_4X        = 1 << 2,
313         MLX5_IB_WIDTH_8X        = 1 << 3,
314         MLX5_IB_WIDTH_12X       = 1 << 4
315 };
316
317 static int translate_active_width(struct ib_device *ibdev, u8 active_width,
318                                   u8 *ib_width)
319 {
320         struct mlx5_ib_dev *dev = to_mdev(ibdev);
321         int err = 0;
322
323         if (active_width & MLX5_IB_WIDTH_1X) {
324                 *ib_width = IB_WIDTH_1X;
325         } else if (active_width & MLX5_IB_WIDTH_2X) {
326                 mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
327                             (int)active_width);
328                 err = -EINVAL;
329         } else if (active_width & MLX5_IB_WIDTH_4X) {
330                 *ib_width = IB_WIDTH_4X;
331         } else if (active_width & MLX5_IB_WIDTH_8X) {
332                 *ib_width = IB_WIDTH_8X;
333         } else if (active_width & MLX5_IB_WIDTH_12X) {
334                 *ib_width = IB_WIDTH_12X;
335         } else {
336                 mlx5_ib_dbg(dev, "Invalid active_width %d\n",
337                             (int)active_width);
338                 err = -EINVAL;
339         }
340
341         return err;
342 }
343
344 static int mlx5_mtu_to_ib_mtu(int mtu)
345 {
346         switch (mtu) {
347         case 256: return 1;
348         case 512: return 2;
349         case 1024: return 3;
350         case 2048: return 4;
351         case 4096: return 5;
352         default:
353                 pr_warn("invalid mtu\n");
354                 return -1;
355         }
356 }
357
358 enum ib_max_vl_num {
359         __IB_MAX_VL_0           = 1,
360         __IB_MAX_VL_0_1         = 2,
361         __IB_MAX_VL_0_3         = 3,
362         __IB_MAX_VL_0_7         = 4,
363         __IB_MAX_VL_0_14        = 5,
364 };
365
366 enum mlx5_vl_hw_cap {
367         MLX5_VL_HW_0    = 1,
368         MLX5_VL_HW_0_1  = 2,
369         MLX5_VL_HW_0_2  = 3,
370         MLX5_VL_HW_0_3  = 4,
371         MLX5_VL_HW_0_4  = 5,
372         MLX5_VL_HW_0_5  = 6,
373         MLX5_VL_HW_0_6  = 7,
374         MLX5_VL_HW_0_7  = 8,
375         MLX5_VL_HW_0_14 = 15
376 };
377
378 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
379                                 u8 *max_vl_num)
380 {
381         switch (vl_hw_cap) {
382         case MLX5_VL_HW_0:
383                 *max_vl_num = __IB_MAX_VL_0;
384                 break;
385         case MLX5_VL_HW_0_1:
386                 *max_vl_num = __IB_MAX_VL_0_1;
387                 break;
388         case MLX5_VL_HW_0_3:
389                 *max_vl_num = __IB_MAX_VL_0_3;
390                 break;
391         case MLX5_VL_HW_0_7:
392                 *max_vl_num = __IB_MAX_VL_0_7;
393                 break;
394         case MLX5_VL_HW_0_14:
395                 *max_vl_num = __IB_MAX_VL_0_14;
396                 break;
397
398         default:
399                 return -EINVAL;
400         }
401
402         return 0;
403 }
404
405 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
406                                struct ib_port_attr *props)
407 {
408         struct mlx5_ib_dev *dev = to_mdev(ibdev);
409         struct mlx5_core_dev *mdev = dev->mdev;
410         struct mlx5_hca_vport_context *rep;
411         int max_mtu;
412         int oper_mtu;
413         int err;
414         u8 ib_link_width_oper;
415         u8 vl_hw_cap;
416
417         rep = kzalloc(sizeof(*rep), GFP_KERNEL);
418         if (!rep) {
419                 err = -ENOMEM;
420                 goto out;
421         }
422
423         memset(props, 0, sizeof(*props));
424
425         err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
426         if (err)
427                 goto out;
428
429         props->lid              = rep->lid;
430         props->lmc              = rep->lmc;
431         props->sm_lid           = rep->sm_lid;
432         props->sm_sl            = rep->sm_sl;
433         props->state            = rep->vport_state;
434         props->phys_state       = rep->port_physical_state;
435         props->port_cap_flags   = rep->cap_mask1;
436         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
437         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
438         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
439         props->bad_pkey_cntr    = rep->pkey_violation_counter;
440         props->qkey_viol_cntr   = rep->qkey_violation_counter;
441         props->subnet_timeout   = rep->subnet_timeout;
442         props->init_type_reply  = rep->init_type_reply;
443
444         err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
445         if (err)
446                 goto out;
447
448         err = translate_active_width(ibdev, ib_link_width_oper,
449                                      &props->active_width);
450         if (err)
451                 goto out;
452         err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
453                                          port);
454         if (err)
455                 goto out;
456
457         mlx5_query_port_max_mtu(mdev, &max_mtu, port);
458
459         props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
460
461         mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
462
463         props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
464
465         err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
466         if (err)
467                 goto out;
468
469         err = translate_max_vl_num(ibdev, vl_hw_cap,
470                                    &props->max_vl_num);
471 out:
472         kfree(rep);
473         return err;
474 }
475
476 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
477                        struct ib_port_attr *props)
478 {
479         switch (mlx5_get_vport_access_method(ibdev)) {
480         case MLX5_VPORT_ACCESS_METHOD_MAD:
481                 return mlx5_query_mad_ifc_port(ibdev, port, props);
482
483         case MLX5_VPORT_ACCESS_METHOD_HCA:
484                 return mlx5_query_hca_port(ibdev, port, props);
485
486         default:
487                 return -EINVAL;
488         }
489 }
490
491 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
492                              union ib_gid *gid)
493 {
494         struct mlx5_ib_dev *dev = to_mdev(ibdev);
495         struct mlx5_core_dev *mdev = dev->mdev;
496
497         switch (mlx5_get_vport_access_method(ibdev)) {
498         case MLX5_VPORT_ACCESS_METHOD_MAD:
499                 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
500
501         case MLX5_VPORT_ACCESS_METHOD_HCA:
502                 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
503
504         default:
505                 return -EINVAL;
506         }
507
508 }
509
510 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
511                               u16 *pkey)
512 {
513         struct mlx5_ib_dev *dev = to_mdev(ibdev);
514         struct mlx5_core_dev *mdev = dev->mdev;
515
516         switch (mlx5_get_vport_access_method(ibdev)) {
517         case MLX5_VPORT_ACCESS_METHOD_MAD:
518                 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
519
520         case MLX5_VPORT_ACCESS_METHOD_HCA:
521         case MLX5_VPORT_ACCESS_METHOD_NIC:
522                 return mlx5_query_hca_vport_pkey(mdev, 0, port,  0, index,
523                                                  pkey);
524         default:
525                 return -EINVAL;
526         }
527 }
528
529 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
530                                  struct ib_device_modify *props)
531 {
532         struct mlx5_ib_dev *dev = to_mdev(ibdev);
533         struct mlx5_reg_node_desc in;
534         struct mlx5_reg_node_desc out;
535         int err;
536
537         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
538                 return -EOPNOTSUPP;
539
540         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
541                 return 0;
542
543         /*
544          * If possible, pass node desc to FW, so it can generate
545          * a 144 trap.  If cmd fails, just ignore.
546          */
547         memcpy(&in, props->node_desc, 64);
548         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
549                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
550         if (err)
551                 return err;
552
553         memcpy(ibdev->node_desc, props->node_desc, 64);
554
555         return err;
556 }
557
558 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
559                                struct ib_port_modify *props)
560 {
561         struct mlx5_ib_dev *dev = to_mdev(ibdev);
562         struct ib_port_attr attr;
563         u32 tmp;
564         int err;
565
566         mutex_lock(&dev->cap_mask_mutex);
567
568         err = mlx5_ib_query_port(ibdev, port, &attr);
569         if (err)
570                 goto out;
571
572         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
573                 ~props->clr_port_cap_mask;
574
575         err = mlx5_set_port_caps(dev->mdev, port, tmp);
576
577 out:
578         mutex_unlock(&dev->cap_mask_mutex);
579         return err;
580 }
581
582 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
583                                                   struct ib_udata *udata)
584 {
585         struct mlx5_ib_dev *dev = to_mdev(ibdev);
586         struct mlx5_ib_alloc_ucontext_req_v2 req;
587         struct mlx5_ib_alloc_ucontext_resp resp;
588         struct mlx5_ib_ucontext *context;
589         struct mlx5_uuar_info *uuari;
590         struct mlx5_uar *uars;
591         int gross_uuars;
592         int num_uars;
593         int ver;
594         int uuarn;
595         int err;
596         int i;
597         size_t reqlen;
598
599         if (!dev->ib_active)
600                 return ERR_PTR(-EAGAIN);
601
602         memset(&req, 0, sizeof(req));
603         reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
604         if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
605                 ver = 0;
606         else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
607                 ver = 2;
608         else
609                 return ERR_PTR(-EINVAL);
610
611         err = ib_copy_from_udata(&req, udata, reqlen);
612         if (err)
613                 return ERR_PTR(err);
614
615         if (req.flags || req.reserved)
616                 return ERR_PTR(-EINVAL);
617
618         if (req.total_num_uuars > MLX5_MAX_UUARS)
619                 return ERR_PTR(-ENOMEM);
620
621         if (req.total_num_uuars == 0)
622                 return ERR_PTR(-EINVAL);
623
624         req.total_num_uuars = ALIGN(req.total_num_uuars,
625                                     MLX5_NON_FP_BF_REGS_PER_PAGE);
626         if (req.num_low_latency_uuars > req.total_num_uuars - 1)
627                 return ERR_PTR(-EINVAL);
628
629         num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
630         gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
631         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
632         resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
633         resp.cache_line_size = L1_CACHE_BYTES;
634         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
635         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
636         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
637         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
638         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
639
640         context = kzalloc(sizeof(*context), GFP_KERNEL);
641         if (!context)
642                 return ERR_PTR(-ENOMEM);
643
644         uuari = &context->uuari;
645         mutex_init(&uuari->lock);
646         uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
647         if (!uars) {
648                 err = -ENOMEM;
649                 goto out_ctx;
650         }
651
652         uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
653                                 sizeof(*uuari->bitmap),
654                                 GFP_KERNEL);
655         if (!uuari->bitmap) {
656                 err = -ENOMEM;
657                 goto out_uar_ctx;
658         }
659         /*
660          * clear all fast path uuars
661          */
662         for (i = 0; i < gross_uuars; i++) {
663                 uuarn = i & 3;
664                 if (uuarn == 2 || uuarn == 3)
665                         set_bit(i, uuari->bitmap);
666         }
667
668         uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
669         if (!uuari->count) {
670                 err = -ENOMEM;
671                 goto out_bitmap;
672         }
673
674         for (i = 0; i < num_uars; i++) {
675                 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
676                 if (err)
677                         goto out_count;
678         }
679
680 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
681         context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
682 #endif
683
684         INIT_LIST_HEAD(&context->db_page_list);
685         mutex_init(&context->db_page_mutex);
686
687         resp.tot_uuars = req.total_num_uuars;
688         resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
689         err = ib_copy_to_udata(udata, &resp,
690                                sizeof(resp) - sizeof(resp.reserved));
691         if (err)
692                 goto out_uars;
693
694         uuari->ver = ver;
695         uuari->num_low_latency_uuars = req.num_low_latency_uuars;
696         uuari->uars = uars;
697         uuari->num_uars = num_uars;
698         return &context->ibucontext;
699
700 out_uars:
701         for (i--; i >= 0; i--)
702                 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
703 out_count:
704         kfree(uuari->count);
705
706 out_bitmap:
707         kfree(uuari->bitmap);
708
709 out_uar_ctx:
710         kfree(uars);
711
712 out_ctx:
713         kfree(context);
714         return ERR_PTR(err);
715 }
716
717 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
718 {
719         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
720         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
721         struct mlx5_uuar_info *uuari = &context->uuari;
722         int i;
723
724         for (i = 0; i < uuari->num_uars; i++) {
725                 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
726                         mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
727         }
728
729         kfree(uuari->count);
730         kfree(uuari->bitmap);
731         kfree(uuari->uars);
732         kfree(context);
733
734         return 0;
735 }
736
737 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
738 {
739         return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
740 }
741
742 static int get_command(unsigned long offset)
743 {
744         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
745 }
746
747 static int get_arg(unsigned long offset)
748 {
749         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
750 }
751
752 static int get_index(unsigned long offset)
753 {
754         return get_arg(offset);
755 }
756
757 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
758 {
759         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
760         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
761         struct mlx5_uuar_info *uuari = &context->uuari;
762         unsigned long command;
763         unsigned long idx;
764         phys_addr_t pfn;
765
766         command = get_command(vma->vm_pgoff);
767         switch (command) {
768         case MLX5_IB_MMAP_REGULAR_PAGE:
769                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
770                         return -EINVAL;
771
772                 idx = get_index(vma->vm_pgoff);
773                 if (idx >= uuari->num_uars)
774                         return -EINVAL;
775
776                 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
777                 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
778                             (unsigned long long)pfn);
779
780                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
781                 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
782                                        PAGE_SIZE, vma->vm_page_prot))
783                         return -EAGAIN;
784
785                 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
786                             vma->vm_start,
787                             (unsigned long long)pfn << PAGE_SHIFT);
788                 break;
789
790         case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
791                 return -ENOSYS;
792
793         default:
794                 return -EINVAL;
795         }
796
797         return 0;
798 }
799
800 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
801                                       struct ib_ucontext *context,
802                                       struct ib_udata *udata)
803 {
804         struct mlx5_ib_alloc_pd_resp resp;
805         struct mlx5_ib_pd *pd;
806         int err;
807
808         pd = kmalloc(sizeof(*pd), GFP_KERNEL);
809         if (!pd)
810                 return ERR_PTR(-ENOMEM);
811
812         err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
813         if (err) {
814                 kfree(pd);
815                 return ERR_PTR(err);
816         }
817
818         if (context) {
819                 resp.pdn = pd->pdn;
820                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
821                         mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
822                         kfree(pd);
823                         return ERR_PTR(-EFAULT);
824                 }
825         }
826
827         return &pd->ibpd;
828 }
829
830 static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
831 {
832         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
833         struct mlx5_ib_pd *mpd = to_mpd(pd);
834
835         mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
836         kfree(mpd);
837
838         return 0;
839 }
840
841 static bool outer_header_zero(u32 *match_criteria)
842 {
843         int size = MLX5_ST_SZ_BYTES(fte_match_param);
844         char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
845                                              outer_headers);
846
847         return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
848                                                   outer_headers_c + 1,
849                                                   size - 1);
850 }
851
852 static int parse_flow_attr(u32 *match_c, u32 *match_v,
853                            union ib_flow_spec *ib_spec)
854 {
855         void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
856                                              outer_headers);
857         void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
858                                              outer_headers);
859         switch (ib_spec->type) {
860         case IB_FLOW_SPEC_ETH:
861                 if (ib_spec->size != sizeof(ib_spec->eth))
862                         return -EINVAL;
863
864                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
865                                              dmac_47_16),
866                                 ib_spec->eth.mask.dst_mac);
867                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
868                                              dmac_47_16),
869                                 ib_spec->eth.val.dst_mac);
870
871                 if (ib_spec->eth.mask.vlan_tag) {
872                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
873                                  vlan_tag, 1);
874                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
875                                  vlan_tag, 1);
876
877                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
878                                  first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
879                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
880                                  first_vid, ntohs(ib_spec->eth.val.vlan_tag));
881
882                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
883                                  first_cfi,
884                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
885                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
886                                  first_cfi,
887                                  ntohs(ib_spec->eth.val.vlan_tag) >> 12);
888
889                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
890                                  first_prio,
891                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
892                         MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
893                                  first_prio,
894                                  ntohs(ib_spec->eth.val.vlan_tag) >> 13);
895                 }
896                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
897                          ethertype, ntohs(ib_spec->eth.mask.ether_type));
898                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
899                          ethertype, ntohs(ib_spec->eth.val.ether_type));
900                 break;
901         case IB_FLOW_SPEC_IPV4:
902                 if (ib_spec->size != sizeof(ib_spec->ipv4))
903                         return -EINVAL;
904
905                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
906                          ethertype, 0xffff);
907                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
908                          ethertype, ETH_P_IP);
909
910                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
911                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
912                        &ib_spec->ipv4.mask.src_ip,
913                        sizeof(ib_spec->ipv4.mask.src_ip));
914                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
915                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
916                        &ib_spec->ipv4.val.src_ip,
917                        sizeof(ib_spec->ipv4.val.src_ip));
918                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
919                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
920                        &ib_spec->ipv4.mask.dst_ip,
921                        sizeof(ib_spec->ipv4.mask.dst_ip));
922                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
923                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
924                        &ib_spec->ipv4.val.dst_ip,
925                        sizeof(ib_spec->ipv4.val.dst_ip));
926                 break;
927         case IB_FLOW_SPEC_TCP:
928                 if (ib_spec->size != sizeof(ib_spec->tcp_udp))
929                         return -EINVAL;
930
931                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
932                          0xff);
933                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
934                          IPPROTO_TCP);
935
936                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
937                          ntohs(ib_spec->tcp_udp.mask.src_port));
938                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
939                          ntohs(ib_spec->tcp_udp.val.src_port));
940
941                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
942                          ntohs(ib_spec->tcp_udp.mask.dst_port));
943                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
944                          ntohs(ib_spec->tcp_udp.val.dst_port));
945                 break;
946         case IB_FLOW_SPEC_UDP:
947                 if (ib_spec->size != sizeof(ib_spec->tcp_udp))
948                         return -EINVAL;
949
950                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
951                          0xff);
952                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
953                          IPPROTO_UDP);
954
955                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
956                          ntohs(ib_spec->tcp_udp.mask.src_port));
957                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
958                          ntohs(ib_spec->tcp_udp.val.src_port));
959
960                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
961                          ntohs(ib_spec->tcp_udp.mask.dst_port));
962                 MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
963                          ntohs(ib_spec->tcp_udp.val.dst_port));
964                 break;
965         default:
966                 return -EINVAL;
967         }
968
969         return 0;
970 }
971
972 /* If a flow could catch both multicast and unicast packets,
973  * it won't fall into the multicast flow steering table and this rule
974  * could steal other multicast packets.
975  */
976 static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
977 {
978         struct ib_flow_spec_eth *eth_spec;
979
980         if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
981             ib_attr->size < sizeof(struct ib_flow_attr) +
982             sizeof(struct ib_flow_spec_eth) ||
983             ib_attr->num_of_specs < 1)
984                 return false;
985
986         eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
987         if (eth_spec->type != IB_FLOW_SPEC_ETH ||
988             eth_spec->size != sizeof(*eth_spec))
989                 return false;
990
991         return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
992                is_multicast_ether_addr(eth_spec->val.dst_mac);
993 }
994
995 static bool is_valid_attr(struct ib_flow_attr *flow_attr)
996 {
997         union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
998         bool has_ipv4_spec = false;
999         bool eth_type_ipv4 = true;
1000         unsigned int spec_index;
1001
1002         /* Validate that ethertype is correct */
1003         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1004                 if (ib_spec->type == IB_FLOW_SPEC_ETH &&
1005                     ib_spec->eth.mask.ether_type) {
1006                         if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
1007                               ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
1008                                 eth_type_ipv4 = false;
1009                 } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
1010                         has_ipv4_spec = true;
1011                 }
1012                 ib_spec = (void *)ib_spec + ib_spec->size;
1013         }
1014         return !has_ipv4_spec || eth_type_ipv4;
1015 }
1016
1017 static void put_flow_table(struct mlx5_ib_dev *dev,
1018                            struct mlx5_ib_flow_prio *prio, bool ft_added)
1019 {
1020         prio->refcount -= !!ft_added;
1021         if (!prio->refcount) {
1022                 mlx5_destroy_flow_table(prio->flow_table);
1023                 prio->flow_table = NULL;
1024         }
1025 }
1026
1027 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
1028 {
1029         struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
1030         struct mlx5_ib_flow_handler *handler = container_of(flow_id,
1031                                                           struct mlx5_ib_flow_handler,
1032                                                           ibflow);
1033         struct mlx5_ib_flow_handler *iter, *tmp;
1034
1035         mutex_lock(&dev->flow_db.lock);
1036
1037         list_for_each_entry_safe(iter, tmp, &handler->list, list) {
1038                 mlx5_del_flow_rule(iter->rule);
1039                 list_del(&iter->list);
1040                 kfree(iter);
1041         }
1042
1043         mlx5_del_flow_rule(handler->rule);
1044         put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
1045         mutex_unlock(&dev->flow_db.lock);
1046
1047         kfree(handler);
1048
1049         return 0;
1050 }
1051
1052 #define MLX5_FS_MAX_TYPES        10
1053 #define MLX5_FS_MAX_ENTRIES      32000UL
1054 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
1055                                                 struct ib_flow_attr *flow_attr)
1056 {
1057         struct mlx5_flow_namespace *ns = NULL;
1058         struct mlx5_ib_flow_prio *prio;
1059         struct mlx5_flow_table *ft;
1060         int num_entries;
1061         int num_groups;
1062         int priority;
1063         int err = 0;
1064
1065         if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1066                 if (flow_is_multicast_only(flow_attr))
1067                         priority = MLX5_IB_FLOW_MCAST_PRIO;
1068                 else
1069                         priority = flow_attr->priority;
1070                 ns = mlx5_get_flow_namespace(dev->mdev,
1071                                              MLX5_FLOW_NAMESPACE_BYPASS);
1072                 num_entries = MLX5_FS_MAX_ENTRIES;
1073                 num_groups = MLX5_FS_MAX_TYPES;
1074                 prio = &dev->flow_db.prios[priority];
1075         } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1076                    flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
1077                 ns = mlx5_get_flow_namespace(dev->mdev,
1078                                              MLX5_FLOW_NAMESPACE_LEFTOVERS);
1079                 build_leftovers_ft_param(&priority,
1080                                          &num_entries,
1081                                          &num_groups);
1082                 prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
1083         }
1084
1085         if (!ns)
1086                 return ERR_PTR(-ENOTSUPP);
1087
1088         ft = prio->flow_table;
1089         if (!ft) {
1090                 ft = mlx5_create_auto_grouped_flow_table(ns, priority,
1091                                                          num_entries,
1092                                                          num_groups);
1093
1094                 if (!IS_ERR(ft)) {
1095                         prio->refcount = 0;
1096                         prio->flow_table = ft;
1097                 } else {
1098                         err = PTR_ERR(ft);
1099                 }
1100         }
1101
1102         return err ? ERR_PTR(err) : prio;
1103 }
1104
1105 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1106                                                      struct mlx5_ib_flow_prio *ft_prio,
1107                                                      struct ib_flow_attr *flow_attr,
1108                                                      struct mlx5_flow_destination *dst)
1109 {
1110         struct mlx5_flow_table  *ft = ft_prio->flow_table;
1111         struct mlx5_ib_flow_handler *handler;
1112         void *ib_flow = flow_attr + 1;
1113         u8 match_criteria_enable = 0;
1114         unsigned int spec_index;
1115         u32 *match_c;
1116         u32 *match_v;
1117         int err = 0;
1118
1119         if (!is_valid_attr(flow_attr))
1120                 return ERR_PTR(-EINVAL);
1121
1122         match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
1123         match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
1124         handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1125         if (!handler || !match_c || !match_v) {
1126                 err = -ENOMEM;
1127                 goto free;
1128         }
1129
1130         INIT_LIST_HEAD(&handler->list);
1131
1132         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1133                 err = parse_flow_attr(match_c, match_v, ib_flow);
1134                 if (err < 0)
1135                         goto free;
1136
1137                 ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1138         }
1139
1140         /* Outer header support only */
1141         match_criteria_enable = (!outer_header_zero(match_c)) << 0;
1142         handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
1143                                            match_c, match_v,
1144                                            MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
1145                                            MLX5_FS_DEFAULT_FLOW_TAG,
1146                                            dst);
1147
1148         if (IS_ERR(handler->rule)) {
1149                 err = PTR_ERR(handler->rule);
1150                 goto free;
1151         }
1152
1153         handler->prio = ft_prio - dev->flow_db.prios;
1154
1155         ft_prio->flow_table = ft;
1156 free:
1157         if (err)
1158                 kfree(handler);
1159         kfree(match_c);
1160         kfree(match_v);
1161         return err ? ERR_PTR(err) : handler;
1162 }
1163
1164 enum {
1165         LEFTOVERS_MC,
1166         LEFTOVERS_UC,
1167 };
1168
1169 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1170                                                           struct mlx5_ib_flow_prio *ft_prio,
1171                                                           struct ib_flow_attr *flow_attr,
1172                                                           struct mlx5_flow_destination *dst)
1173 {
1174         struct mlx5_ib_flow_handler *handler_ucast = NULL;
1175         struct mlx5_ib_flow_handler *handler = NULL;
1176
1177         static struct {
1178                 struct ib_flow_attr     flow_attr;
1179                 struct ib_flow_spec_eth eth_flow;
1180         } leftovers_specs[] = {
1181                 [LEFTOVERS_MC] = {
1182                         .flow_attr = {
1183                                 .num_of_specs = 1,
1184                                 .size = sizeof(leftovers_specs[0])
1185                         },
1186                         .eth_flow = {
1187                                 .type = IB_FLOW_SPEC_ETH,
1188                                 .size = sizeof(struct ib_flow_spec_eth),
1189                                 .mask = {.dst_mac = {0x1} },
1190                                 .val =  {.dst_mac = {0x1} }
1191                         }
1192                 },
1193                 [LEFTOVERS_UC] = {
1194                         .flow_attr = {
1195                                 .num_of_specs = 1,
1196                                 .size = sizeof(leftovers_specs[0])
1197                         },
1198                         .eth_flow = {
1199                                 .type = IB_FLOW_SPEC_ETH,
1200                                 .size = sizeof(struct ib_flow_spec_eth),
1201                                 .mask = {.dst_mac = {0x1} },
1202                                 .val = {.dst_mac = {} }
1203                         }
1204                 }
1205         };
1206
1207         handler = create_flow_rule(dev, ft_prio,
1208                                    &leftovers_specs[LEFTOVERS_MC].flow_attr,
1209                                    dst);
1210         if (!IS_ERR(handler) &&
1211             flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1212                 handler_ucast = create_flow_rule(dev, ft_prio,
1213                                                  &leftovers_specs[LEFTOVERS_UC].flow_attr,
1214                                                  dst);
1215                 if (IS_ERR(handler_ucast)) {
1216                         kfree(handler);
1217                         handler = handler_ucast;
1218                 } else {
1219                         list_add(&handler_ucast->list, &handler->list);
1220                 }
1221         }
1222
1223         return handler;
1224 }
1225
1226 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1227                                            struct ib_flow_attr *flow_attr,
1228                                            int domain)
1229 {
1230         struct mlx5_ib_dev *dev = to_mdev(qp->device);
1231         struct mlx5_ib_flow_handler *handler = NULL;
1232         struct mlx5_flow_destination *dst = NULL;
1233         struct mlx5_ib_flow_prio *ft_prio;
1234         int err;
1235
1236         if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
1237                 return ERR_PTR(-ENOSPC);
1238
1239         if (domain != IB_FLOW_DOMAIN_USER ||
1240             flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
1241             flow_attr->flags)
1242                 return ERR_PTR(-EINVAL);
1243
1244         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1245         if (!dst)
1246                 return ERR_PTR(-ENOMEM);
1247
1248         mutex_lock(&dev->flow_db.lock);
1249
1250         ft_prio = get_flow_table(dev, flow_attr);
1251         if (IS_ERR(ft_prio)) {
1252                 err = PTR_ERR(ft_prio);
1253                 goto unlock;
1254         }
1255
1256         dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1257         dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
1258
1259         if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1260                 handler = create_flow_rule(dev, ft_prio, flow_attr,
1261                                            dst);
1262         } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1263                    flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
1264                 handler = create_leftovers_rule(dev, ft_prio, flow_attr,
1265                                                 dst);
1266         } else {
1267                 err = -EINVAL;
1268                 goto destroy_ft;
1269         }
1270
1271         if (IS_ERR(handler)) {
1272                 err = PTR_ERR(handler);
1273                 handler = NULL;
1274                 goto destroy_ft;
1275         }
1276
1277         ft_prio->refcount++;
1278         mutex_unlock(&dev->flow_db.lock);
1279         kfree(dst);
1280
1281         return &handler->ibflow;
1282
1283 destroy_ft:
1284         put_flow_table(dev, ft_prio, false);
1285 unlock:
1286         mutex_unlock(&dev->flow_db.lock);
1287         kfree(dst);
1288         kfree(handler);
1289         return ERR_PTR(err);
1290 }
1291
1292 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1293 {
1294         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1295         int err;
1296
1297         err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1298         if (err)
1299                 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1300                              ibqp->qp_num, gid->raw);
1301
1302         return err;
1303 }
1304
1305 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1306 {
1307         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1308         int err;
1309
1310         err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1311         if (err)
1312                 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1313                              ibqp->qp_num, gid->raw);
1314
1315         return err;
1316 }
1317
1318 static int init_node_data(struct mlx5_ib_dev *dev)
1319 {
1320         int err;
1321
1322         err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1323         if (err)
1324                 return err;
1325
1326         dev->mdev->rev_id = dev->mdev->pdev->revision;
1327
1328         return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1329 }
1330
1331 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1332                              char *buf)
1333 {
1334         struct mlx5_ib_dev *dev =
1335                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1336
1337         return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
1338 }
1339
1340 static ssize_t show_reg_pages(struct device *device,
1341                               struct device_attribute *attr, char *buf)
1342 {
1343         struct mlx5_ib_dev *dev =
1344                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1345
1346         return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1347 }
1348
1349 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1350                         char *buf)
1351 {
1352         struct mlx5_ib_dev *dev =
1353                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1354         return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1355 }
1356
1357 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1358                            char *buf)
1359 {
1360         struct mlx5_ib_dev *dev =
1361                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1362         return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
1363                        fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1364 }
1365
1366 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1367                         char *buf)
1368 {
1369         struct mlx5_ib_dev *dev =
1370                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1371         return sprintf(buf, "%x\n", dev->mdev->rev_id);
1372 }
1373
1374 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1375                           char *buf)
1376 {
1377         struct mlx5_ib_dev *dev =
1378                 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1379         return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1380                        dev->mdev->board_id);
1381 }
1382
1383 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1384 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1385 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1386 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1387 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1388 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1389
1390 static struct device_attribute *mlx5_class_attributes[] = {
1391         &dev_attr_hw_rev,
1392         &dev_attr_fw_ver,
1393         &dev_attr_hca_type,
1394         &dev_attr_board_id,
1395         &dev_attr_fw_pages,
1396         &dev_attr_reg_pages,
1397 };
1398
1399 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1400                           enum mlx5_dev_event event, unsigned long param)
1401 {
1402         struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1403         struct ib_event ibev;
1404
1405         u8 port = 0;
1406
1407         switch (event) {
1408         case MLX5_DEV_EVENT_SYS_ERROR:
1409                 ibdev->ib_active = false;
1410                 ibev.event = IB_EVENT_DEVICE_FATAL;
1411                 break;
1412
1413         case MLX5_DEV_EVENT_PORT_UP:
1414                 ibev.event = IB_EVENT_PORT_ACTIVE;
1415                 port = (u8)param;
1416                 break;
1417
1418         case MLX5_DEV_EVENT_PORT_DOWN:
1419                 ibev.event = IB_EVENT_PORT_ERR;
1420                 port = (u8)param;
1421                 break;
1422
1423         case MLX5_DEV_EVENT_PORT_INITIALIZED:
1424                 /* not used by ULPs */
1425                 return;
1426
1427         case MLX5_DEV_EVENT_LID_CHANGE:
1428                 ibev.event = IB_EVENT_LID_CHANGE;
1429                 port = (u8)param;
1430                 break;
1431
1432         case MLX5_DEV_EVENT_PKEY_CHANGE:
1433                 ibev.event = IB_EVENT_PKEY_CHANGE;
1434                 port = (u8)param;
1435                 break;
1436
1437         case MLX5_DEV_EVENT_GUID_CHANGE:
1438                 ibev.event = IB_EVENT_GID_CHANGE;
1439                 port = (u8)param;
1440                 break;
1441
1442         case MLX5_DEV_EVENT_CLIENT_REREG:
1443                 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1444                 port = (u8)param;
1445                 break;
1446         }
1447
1448         ibev.device           = &ibdev->ib_dev;
1449         ibev.element.port_num = port;
1450
1451         if (port < 1 || port > ibdev->num_ports) {
1452                 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1453                 return;
1454         }
1455
1456         if (ibdev->ib_active)
1457                 ib_dispatch_event(&ibev);
1458 }
1459
1460 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1461 {
1462         int port;
1463
1464         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1465                 mlx5_query_ext_port_caps(dev, port);
1466 }
1467
1468 static int get_port_caps(struct mlx5_ib_dev *dev)
1469 {
1470         struct ib_device_attr *dprops = NULL;
1471         struct ib_port_attr *pprops = NULL;
1472         int err = -ENOMEM;
1473         int port;
1474         struct ib_udata uhw = {.inlen = 0, .outlen = 0};
1475
1476         pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1477         if (!pprops)
1478                 goto out;
1479
1480         dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1481         if (!dprops)
1482                 goto out;
1483
1484         err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
1485         if (err) {
1486                 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1487                 goto out;
1488         }
1489
1490         for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1491                 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1492                 if (err) {
1493                         mlx5_ib_warn(dev, "query_port %d failed %d\n",
1494                                      port, err);
1495                         break;
1496                 }
1497                 dev->mdev->port_caps[port - 1].pkey_table_len =
1498                                                 dprops->max_pkeys;
1499                 dev->mdev->port_caps[port - 1].gid_table_len =
1500                                                 pprops->gid_tbl_len;
1501                 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1502                             dprops->max_pkeys, pprops->gid_tbl_len);
1503         }
1504
1505 out:
1506         kfree(pprops);
1507         kfree(dprops);
1508
1509         return err;
1510 }
1511
1512 static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1513 {
1514         int err;
1515
1516         err = mlx5_mr_cache_cleanup(dev);
1517         if (err)
1518                 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1519
1520         mlx5_ib_destroy_qp(dev->umrc.qp);
1521         ib_destroy_cq(dev->umrc.cq);
1522         ib_dealloc_pd(dev->umrc.pd);
1523 }
1524
1525 enum {
1526         MAX_UMR_WR = 128,
1527 };
1528
1529 static int create_umr_res(struct mlx5_ib_dev *dev)
1530 {
1531         struct ib_qp_init_attr *init_attr = NULL;
1532         struct ib_qp_attr *attr = NULL;
1533         struct ib_pd *pd;
1534         struct ib_cq *cq;
1535         struct ib_qp *qp;
1536         struct ib_cq_init_attr cq_attr = {};
1537         int ret;
1538
1539         attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1540         init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1541         if (!attr || !init_attr) {
1542                 ret = -ENOMEM;
1543                 goto error_0;
1544         }
1545
1546         pd = ib_alloc_pd(&dev->ib_dev);
1547         if (IS_ERR(pd)) {
1548                 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1549                 ret = PTR_ERR(pd);
1550                 goto error_0;
1551         }
1552
1553         cq_attr.cqe = 128;
1554         cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
1555                           &cq_attr);
1556         if (IS_ERR(cq)) {
1557                 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1558                 ret = PTR_ERR(cq);
1559                 goto error_2;
1560         }
1561         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1562
1563         init_attr->send_cq = cq;
1564         init_attr->recv_cq = cq;
1565         init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1566         init_attr->cap.max_send_wr = MAX_UMR_WR;
1567         init_attr->cap.max_send_sge = 1;
1568         init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1569         init_attr->port_num = 1;
1570         qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1571         if (IS_ERR(qp)) {
1572                 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1573                 ret = PTR_ERR(qp);
1574                 goto error_3;
1575         }
1576         qp->device     = &dev->ib_dev;
1577         qp->real_qp    = qp;
1578         qp->uobject    = NULL;
1579         qp->qp_type    = MLX5_IB_QPT_REG_UMR;
1580
1581         attr->qp_state = IB_QPS_INIT;
1582         attr->port_num = 1;
1583         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1584                                 IB_QP_PORT, NULL);
1585         if (ret) {
1586                 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1587                 goto error_4;
1588         }
1589
1590         memset(attr, 0, sizeof(*attr));
1591         attr->qp_state = IB_QPS_RTR;
1592         attr->path_mtu = IB_MTU_256;
1593
1594         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1595         if (ret) {
1596                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1597                 goto error_4;
1598         }
1599
1600         memset(attr, 0, sizeof(*attr));
1601         attr->qp_state = IB_QPS_RTS;
1602         ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1603         if (ret) {
1604                 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1605                 goto error_4;
1606         }
1607
1608         dev->umrc.qp = qp;
1609         dev->umrc.cq = cq;
1610         dev->umrc.pd = pd;
1611
1612         sema_init(&dev->umrc.sem, MAX_UMR_WR);
1613         ret = mlx5_mr_cache_init(dev);
1614         if (ret) {
1615                 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1616                 goto error_4;
1617         }
1618
1619         kfree(attr);
1620         kfree(init_attr);
1621
1622         return 0;
1623
1624 error_4:
1625         mlx5_ib_destroy_qp(qp);
1626
1627 error_3:
1628         ib_destroy_cq(cq);
1629
1630 error_2:
1631         ib_dealloc_pd(pd);
1632
1633 error_0:
1634         kfree(attr);
1635         kfree(init_attr);
1636         return ret;
1637 }
1638
1639 static int create_dev_resources(struct mlx5_ib_resources *devr)
1640 {
1641         struct ib_srq_init_attr attr;
1642         struct mlx5_ib_dev *dev;
1643         struct ib_cq_init_attr cq_attr = {.cqe = 1};
1644         int ret = 0;
1645
1646         dev = container_of(devr, struct mlx5_ib_dev, devr);
1647
1648         devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1649         if (IS_ERR(devr->p0)) {
1650                 ret = PTR_ERR(devr->p0);
1651                 goto error0;
1652         }
1653         devr->p0->device  = &dev->ib_dev;
1654         devr->p0->uobject = NULL;
1655         atomic_set(&devr->p0->usecnt, 0);
1656
1657         devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1658         if (IS_ERR(devr->c0)) {
1659                 ret = PTR_ERR(devr->c0);
1660                 goto error1;
1661         }
1662         devr->c0->device        = &dev->ib_dev;
1663         devr->c0->uobject       = NULL;
1664         devr->c0->comp_handler  = NULL;
1665         devr->c0->event_handler = NULL;
1666         devr->c0->cq_context    = NULL;
1667         atomic_set(&devr->c0->usecnt, 0);
1668
1669         devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1670         if (IS_ERR(devr->x0)) {
1671                 ret = PTR_ERR(devr->x0);
1672                 goto error2;
1673         }
1674         devr->x0->device = &dev->ib_dev;
1675         devr->x0->inode = NULL;
1676         atomic_set(&devr->x0->usecnt, 0);
1677         mutex_init(&devr->x0->tgt_qp_mutex);
1678         INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1679
1680         devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1681         if (IS_ERR(devr->x1)) {
1682                 ret = PTR_ERR(devr->x1);
1683                 goto error3;
1684         }
1685         devr->x1->device = &dev->ib_dev;
1686         devr->x1->inode = NULL;
1687         atomic_set(&devr->x1->usecnt, 0);
1688         mutex_init(&devr->x1->tgt_qp_mutex);
1689         INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1690
1691         memset(&attr, 0, sizeof(attr));
1692         attr.attr.max_sge = 1;
1693         attr.attr.max_wr = 1;
1694         attr.srq_type = IB_SRQT_XRC;
1695         attr.ext.xrc.cq = devr->c0;
1696         attr.ext.xrc.xrcd = devr->x0;
1697
1698         devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1699         if (IS_ERR(devr->s0)) {
1700                 ret = PTR_ERR(devr->s0);
1701                 goto error4;
1702         }
1703         devr->s0->device        = &dev->ib_dev;
1704         devr->s0->pd            = devr->p0;
1705         devr->s0->uobject       = NULL;
1706         devr->s0->event_handler = NULL;
1707         devr->s0->srq_context   = NULL;
1708         devr->s0->srq_type      = IB_SRQT_XRC;
1709         devr->s0->ext.xrc.xrcd  = devr->x0;
1710         devr->s0->ext.xrc.cq    = devr->c0;
1711         atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1712         atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1713         atomic_inc(&devr->p0->usecnt);
1714         atomic_set(&devr->s0->usecnt, 0);
1715
1716         memset(&attr, 0, sizeof(attr));
1717         attr.attr.max_sge = 1;
1718         attr.attr.max_wr = 1;
1719         attr.srq_type = IB_SRQT_BASIC;
1720         devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1721         if (IS_ERR(devr->s1)) {
1722                 ret = PTR_ERR(devr->s1);
1723                 goto error5;
1724         }
1725         devr->s1->device        = &dev->ib_dev;
1726         devr->s1->pd            = devr->p0;
1727         devr->s1->uobject       = NULL;
1728         devr->s1->event_handler = NULL;
1729         devr->s1->srq_context   = NULL;
1730         devr->s1->srq_type      = IB_SRQT_BASIC;
1731         devr->s1->ext.xrc.cq    = devr->c0;
1732         atomic_inc(&devr->p0->usecnt);
1733         atomic_set(&devr->s0->usecnt, 0);
1734
1735         return 0;
1736
1737 error5:
1738         mlx5_ib_destroy_srq(devr->s0);
1739 error4:
1740         mlx5_ib_dealloc_xrcd(devr->x1);
1741 error3:
1742         mlx5_ib_dealloc_xrcd(devr->x0);
1743 error2:
1744         mlx5_ib_destroy_cq(devr->c0);
1745 error1:
1746         mlx5_ib_dealloc_pd(devr->p0);
1747 error0:
1748         return ret;
1749 }
1750
1751 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1752 {
1753         mlx5_ib_destroy_srq(devr->s1);
1754         mlx5_ib_destroy_srq(devr->s0);
1755         mlx5_ib_dealloc_xrcd(devr->x0);
1756         mlx5_ib_dealloc_xrcd(devr->x1);
1757         mlx5_ib_destroy_cq(devr->c0);
1758         mlx5_ib_dealloc_pd(devr->p0);
1759 }
1760
1761 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1762                                struct ib_port_immutable *immutable)
1763 {
1764         struct ib_port_attr attr;
1765         int err;
1766
1767         err = mlx5_ib_query_port(ibdev, port_num, &attr);
1768         if (err)
1769                 return err;
1770
1771         immutable->pkey_tbl_len = attr.pkey_tbl_len;
1772         immutable->gid_tbl_len = attr.gid_tbl_len;
1773         immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1774         immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1775
1776         return 0;
1777 }
1778
1779 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1780 {
1781         struct mlx5_ib_dev *dev;
1782         int err;
1783         int i;
1784
1785         /* don't create IB instance over Eth ports, no RoCE yet! */
1786         if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
1787                 return NULL;
1788
1789         printk_once(KERN_INFO "%s", mlx5_version);
1790
1791         dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1792         if (!dev)
1793                 return NULL;
1794
1795         dev->mdev = mdev;
1796
1797         err = get_port_caps(dev);
1798         if (err)
1799                 goto err_dealloc;
1800
1801         if (mlx5_use_mad_ifc(dev))
1802                 get_ext_port_caps(dev);
1803
1804         MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1805
1806         strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1807         dev->ib_dev.owner               = THIS_MODULE;
1808         dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
1809         dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
1810         dev->num_ports          = MLX5_CAP_GEN(mdev, num_ports);
1811         dev->ib_dev.phys_port_cnt     = dev->num_ports;
1812         dev->ib_dev.num_comp_vectors    =
1813                 dev->mdev->priv.eq_table.num_comp_vectors;
1814         dev->ib_dev.dma_device  = &mdev->pdev->dev;
1815
1816         dev->ib_dev.uverbs_abi_ver      = MLX5_IB_UVERBS_ABI_VERSION;
1817         dev->ib_dev.uverbs_cmd_mask     =
1818                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
1819                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
1820                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
1821                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
1822                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
1823                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
1824                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
1825                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1826                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
1827                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
1828                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
1829                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
1830                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
1831                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
1832                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
1833                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
1834                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
1835                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
1836                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
1837                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
1838                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
1839                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
1840                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1841         dev->ib_dev.uverbs_ex_cmd_mask =
1842                 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
1843
1844         dev->ib_dev.query_device        = mlx5_ib_query_device;
1845         dev->ib_dev.query_port          = mlx5_ib_query_port;
1846         dev->ib_dev.query_gid           = mlx5_ib_query_gid;
1847         dev->ib_dev.query_pkey          = mlx5_ib_query_pkey;
1848         dev->ib_dev.modify_device       = mlx5_ib_modify_device;
1849         dev->ib_dev.modify_port         = mlx5_ib_modify_port;
1850         dev->ib_dev.alloc_ucontext      = mlx5_ib_alloc_ucontext;
1851         dev->ib_dev.dealloc_ucontext    = mlx5_ib_dealloc_ucontext;
1852         dev->ib_dev.mmap                = mlx5_ib_mmap;
1853         dev->ib_dev.alloc_pd            = mlx5_ib_alloc_pd;
1854         dev->ib_dev.dealloc_pd          = mlx5_ib_dealloc_pd;
1855         dev->ib_dev.create_ah           = mlx5_ib_create_ah;
1856         dev->ib_dev.query_ah            = mlx5_ib_query_ah;
1857         dev->ib_dev.destroy_ah          = mlx5_ib_destroy_ah;
1858         dev->ib_dev.create_srq          = mlx5_ib_create_srq;
1859         dev->ib_dev.modify_srq          = mlx5_ib_modify_srq;
1860         dev->ib_dev.query_srq           = mlx5_ib_query_srq;
1861         dev->ib_dev.destroy_srq         = mlx5_ib_destroy_srq;
1862         dev->ib_dev.post_srq_recv       = mlx5_ib_post_srq_recv;
1863         dev->ib_dev.create_qp           = mlx5_ib_create_qp;
1864         dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
1865         dev->ib_dev.query_qp            = mlx5_ib_query_qp;
1866         dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
1867         dev->ib_dev.post_send           = mlx5_ib_post_send;
1868         dev->ib_dev.post_recv           = mlx5_ib_post_recv;
1869         dev->ib_dev.create_cq           = mlx5_ib_create_cq;
1870         dev->ib_dev.modify_cq           = mlx5_ib_modify_cq;
1871         dev->ib_dev.resize_cq           = mlx5_ib_resize_cq;
1872         dev->ib_dev.destroy_cq          = mlx5_ib_destroy_cq;
1873         dev->ib_dev.poll_cq             = mlx5_ib_poll_cq;
1874         dev->ib_dev.req_notify_cq       = mlx5_ib_arm_cq;
1875         dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
1876         dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
1877         dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
1878         dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
1879         dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
1880         dev->ib_dev.process_mad         = mlx5_ib_process_mad;
1881         dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
1882         dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
1883         dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
1884         dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
1885
1886         mlx5_ib_internal_fill_odp_caps(dev);
1887
1888         if (MLX5_CAP_GEN(mdev, xrc)) {
1889                 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1890                 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1891                 dev->ib_dev.uverbs_cmd_mask |=
1892                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1893                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1894         }
1895
1896         if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
1897             IB_LINK_LAYER_ETHERNET) {
1898                 dev->ib_dev.create_flow = mlx5_ib_create_flow;
1899                 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
1900                 dev->ib_dev.uverbs_ex_cmd_mask |=
1901                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
1902                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
1903         }
1904         err = init_node_data(dev);
1905         if (err)
1906                 goto err_dealloc;
1907
1908         mutex_init(&dev->flow_db.lock);
1909         mutex_init(&dev->cap_mask_mutex);
1910
1911         err = create_dev_resources(&dev->devr);
1912         if (err)
1913                 goto err_dealloc;
1914
1915         err = mlx5_ib_odp_init_one(dev);
1916         if (err)
1917                 goto err_rsrc;
1918
1919         err = ib_register_device(&dev->ib_dev, NULL);
1920         if (err)
1921                 goto err_odp;
1922
1923         err = create_umr_res(dev);
1924         if (err)
1925                 goto err_dev;
1926
1927         for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1928                 err = device_create_file(&dev->ib_dev.dev,
1929                                          mlx5_class_attributes[i]);
1930                 if (err)
1931                         goto err_umrc;
1932         }
1933
1934         dev->ib_active = true;
1935
1936         return dev;
1937
1938 err_umrc:
1939         destroy_umrc_res(dev);
1940
1941 err_dev:
1942         ib_unregister_device(&dev->ib_dev);
1943
1944 err_odp:
1945         mlx5_ib_odp_remove_one(dev);
1946
1947 err_rsrc:
1948         destroy_dev_resources(&dev->devr);
1949
1950 err_dealloc:
1951         ib_dealloc_device((struct ib_device *)dev);
1952
1953         return NULL;
1954 }
1955
1956 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
1957 {
1958         struct mlx5_ib_dev *dev = context;
1959
1960         ib_unregister_device(&dev->ib_dev);
1961         destroy_umrc_res(dev);
1962         mlx5_ib_odp_remove_one(dev);
1963         destroy_dev_resources(&dev->devr);
1964         ib_dealloc_device(&dev->ib_dev);
1965 }
1966
1967 static struct mlx5_interface mlx5_ib_interface = {
1968         .add            = mlx5_ib_add,
1969         .remove         = mlx5_ib_remove,
1970         .event          = mlx5_ib_event,
1971         .protocol       = MLX5_INTERFACE_PROTOCOL_IB,
1972 };
1973
1974 static int __init mlx5_ib_init(void)
1975 {
1976         int err;
1977
1978         if (deprecated_prof_sel != 2)
1979                 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
1980
1981         err = mlx5_ib_odp_init();
1982         if (err)
1983                 return err;
1984
1985         err = mlx5_register_interface(&mlx5_ib_interface);
1986         if (err)
1987                 goto clean_odp;
1988
1989         return err;
1990
1991 clean_odp:
1992         mlx5_ib_odp_cleanup();
1993         return err;
1994 }
1995
1996 static void __exit mlx5_ib_cleanup(void)
1997 {
1998         mlx5_unregister_interface(&mlx5_ib_interface);
1999         mlx5_ib_odp_cleanup();
2000 }
2001
2002 module_init(mlx5_ib_init);
2003 module_exit(mlx5_ib_cleanup);