]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/infiniband/hw/hfi1/mad.c
IB/core: Use rdma_ah_attr accessor functions
[karo-tx-linux.git] / drivers / infiniband / hw / hfi1 / mad.c
1 /*
2  * Copyright(c) 2015-2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/net.h>
49 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
50                         / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
51
52 #include "hfi.h"
53 #include "mad.h"
54 #include "trace.h"
55 #include "qp.h"
56 #include "vnic.h"
57
58 /* the reset value from the FM is supposed to be 0xffff, handle both */
59 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
60 #define OPA_LINK_WIDTH_RESET 0xffff
61
62 static int reply(struct ib_mad_hdr *smp)
63 {
64         /*
65          * The verbs framework will handle the directed/LID route
66          * packet changes.
67          */
68         smp->method = IB_MGMT_METHOD_GET_RESP;
69         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
70                 smp->status |= IB_SMP_DIRECTION;
71         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
72 }
73
74 static inline void clear_opa_smp_data(struct opa_smp *smp)
75 {
76         void *data = opa_get_smp_data(smp);
77         size_t size = opa_get_smp_data_size(smp);
78
79         memset(data, 0, size);
80 }
81
82 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
83 {
84         struct ib_event event;
85
86         event.event = IB_EVENT_PKEY_CHANGE;
87         event.device = &dd->verbs_dev.rdi.ibdev;
88         event.element.port_num = port;
89         ib_dispatch_event(&event);
90 }
91
92 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
93 {
94         struct ib_mad_send_buf *send_buf;
95         struct ib_mad_agent *agent;
96         struct opa_smp *smp;
97         int ret;
98         unsigned long flags;
99         unsigned long timeout;
100         int pkey_idx;
101         u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
102
103         agent = ibp->rvp.send_agent;
104         if (!agent)
105                 return;
106
107         /* o14-3.2.1 */
108         if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
109                 return;
110
111         /* o14-2 */
112         if (ibp->rvp.trap_timeout && time_before(jiffies,
113                                                  ibp->rvp.trap_timeout))
114                 return;
115
116         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
117         if (pkey_idx < 0) {
118                 pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
119                         __func__, hfi1_get_pkey(ibp, 1));
120                 pkey_idx = 1;
121         }
122
123         send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
124                                       IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
125                                       GFP_ATOMIC, IB_MGMT_BASE_VERSION);
126         if (IS_ERR(send_buf))
127                 return;
128
129         smp = send_buf->mad;
130         smp->base_version = OPA_MGMT_BASE_VERSION;
131         smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
132         smp->class_version = OPA_SM_CLASS_VERSION;
133         smp->method = IB_MGMT_METHOD_TRAP;
134         ibp->rvp.tid++;
135         smp->tid = cpu_to_be64(ibp->rvp.tid);
136         smp->attr_id = IB_SMP_ATTR_NOTICE;
137         /* o14-1: smp->mkey = 0; */
138         memcpy(smp->route.lid.data, data, len);
139
140         spin_lock_irqsave(&ibp->rvp.lock, flags);
141         if (!ibp->rvp.sm_ah) {
142                 if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
143                         struct ib_ah *ah;
144
145                         ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
146                         if (IS_ERR(ah)) {
147                                 ret = PTR_ERR(ah);
148                         } else {
149                                 send_buf->ah = ah;
150                                 ibp->rvp.sm_ah = ibah_to_rvtah(ah);
151                                 ret = 0;
152                         }
153                 } else {
154                         ret = -EINVAL;
155                 }
156         } else {
157                 send_buf->ah = &ibp->rvp.sm_ah->ibah;
158                 ret = 0;
159         }
160         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
161
162         if (!ret)
163                 ret = ib_post_send_mad(send_buf, NULL);
164         if (!ret) {
165                 /* 4.096 usec. */
166                 timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
167                 ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
168         } else {
169                 ib_free_send_mad(send_buf);
170                 ibp->rvp.trap_timeout = 0;
171         }
172 }
173
174 /*
175  * Send a bad [PQ]_Key trap (ch. 14.3.8).
176  */
177 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
178                     u32 qp1, u32 qp2, u16 lid1, u16 lid2)
179 {
180         struct opa_mad_notice_attr data;
181         u32 lid = ppd_from_ibp(ibp)->lid;
182         u32 _lid1 = lid1;
183         u32 _lid2 = lid2;
184
185         memset(&data, 0, sizeof(data));
186
187         if (trap_num == OPA_TRAP_BAD_P_KEY)
188                 ibp->rvp.pkey_violations++;
189         else
190                 ibp->rvp.qkey_violations++;
191         ibp->rvp.n_pkt_drops++;
192
193         /* Send violation trap */
194         data.generic_type = IB_NOTICE_TYPE_SECURITY;
195         data.prod_type_lsb = IB_NOTICE_PROD_CA;
196         data.trap_num = trap_num;
197         data.issuer_lid = cpu_to_be32(lid);
198         data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
199         data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
200         data.ntc_257_258.key = cpu_to_be32(key);
201         data.ntc_257_258.sl = sl << 3;
202         data.ntc_257_258.qp1 = cpu_to_be32(qp1);
203         data.ntc_257_258.qp2 = cpu_to_be32(qp2);
204
205         send_trap(ibp, &data, sizeof(data));
206 }
207
208 /*
209  * Send a bad M_Key trap (ch. 14.3.9).
210  */
211 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
212                      __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
213 {
214         struct opa_mad_notice_attr data;
215         u32 lid = ppd_from_ibp(ibp)->lid;
216
217         memset(&data, 0, sizeof(data));
218         /* Send violation trap */
219         data.generic_type = IB_NOTICE_TYPE_SECURITY;
220         data.prod_type_lsb = IB_NOTICE_PROD_CA;
221         data.trap_num = OPA_TRAP_BAD_M_KEY;
222         data.issuer_lid = cpu_to_be32(lid);
223         data.ntc_256.lid = data.issuer_lid;
224         data.ntc_256.method = mad->method;
225         data.ntc_256.attr_id = mad->attr_id;
226         data.ntc_256.attr_mod = mad->attr_mod;
227         data.ntc_256.mkey = mkey;
228         if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
229                 data.ntc_256.dr_slid = dr_slid;
230                 data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
231                 if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
232                         data.ntc_256.dr_trunc_hop |=
233                                 IB_NOTICE_TRAP_DR_TRUNC;
234                         hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
235                 }
236                 data.ntc_256.dr_trunc_hop |= hop_cnt;
237                 memcpy(data.ntc_256.dr_rtn_path, return_path,
238                        hop_cnt);
239         }
240
241         send_trap(ibp, &data, sizeof(data));
242 }
243
244 /*
245  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
246  */
247 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
248 {
249         struct opa_mad_notice_attr data;
250         struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
251         struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
252         struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
253         u32 lid = ppd_from_ibp(ibp)->lid;
254
255         memset(&data, 0, sizeof(data));
256
257         data.generic_type = IB_NOTICE_TYPE_INFO;
258         data.prod_type_lsb = IB_NOTICE_PROD_CA;
259         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
260         data.issuer_lid = cpu_to_be32(lid);
261         data.ntc_144.lid = data.issuer_lid;
262         data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
263
264         send_trap(ibp, &data, sizeof(data));
265 }
266
267 /*
268  * Send a System Image GUID Changed trap (ch. 14.3.12).
269  */
270 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
271 {
272         struct opa_mad_notice_attr data;
273         u32 lid = ppd_from_ibp(ibp)->lid;
274
275         memset(&data, 0, sizeof(data));
276
277         data.generic_type = IB_NOTICE_TYPE_INFO;
278         data.prod_type_lsb = IB_NOTICE_PROD_CA;
279         data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
280         data.issuer_lid = cpu_to_be32(lid);
281         data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
282         data.ntc_145.lid = data.issuer_lid;
283
284         send_trap(ibp, &data, sizeof(data));
285 }
286
287 /*
288  * Send a Node Description Changed trap (ch. 14.3.13).
289  */
290 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
291 {
292         struct opa_mad_notice_attr data;
293         u32 lid = ppd_from_ibp(ibp)->lid;
294
295         memset(&data, 0, sizeof(data));
296
297         data.generic_type = IB_NOTICE_TYPE_INFO;
298         data.prod_type_lsb = IB_NOTICE_PROD_CA;
299         data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
300         data.issuer_lid = cpu_to_be32(lid);
301         data.ntc_144.lid = data.issuer_lid;
302         data.ntc_144.change_flags =
303                 cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
304
305         send_trap(ibp, &data, sizeof(data));
306 }
307
308 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
309                                    u8 *data, struct ib_device *ibdev,
310                                    u8 port, u32 *resp_len)
311 {
312         struct opa_node_description *nd;
313
314         if (am) {
315                 smp->status |= IB_SMP_INVALID_FIELD;
316                 return reply((struct ib_mad_hdr *)smp);
317         }
318
319         nd = (struct opa_node_description *)data;
320
321         memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
322
323         if (resp_len)
324                 *resp_len += sizeof(*nd);
325
326         return reply((struct ib_mad_hdr *)smp);
327 }
328
329 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
330                                    struct ib_device *ibdev, u8 port,
331                                    u32 *resp_len)
332 {
333         struct opa_node_info *ni;
334         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
335         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
336
337         ni = (struct opa_node_info *)data;
338
339         /* GUID 0 is illegal */
340         if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
341             get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
342                 smp->status |= IB_SMP_INVALID_FIELD;
343                 return reply((struct ib_mad_hdr *)smp);
344         }
345
346         ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
347         ni->base_version = OPA_MGMT_BASE_VERSION;
348         ni->class_version = OPA_SM_CLASS_VERSION;
349         ni->node_type = 1;     /* channel adapter */
350         ni->num_ports = ibdev->phys_port_cnt;
351         /* This is already in network order */
352         ni->system_image_guid = ib_hfi1_sys_image_guid;
353         ni->node_guid = ibdev->node_guid;
354         ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
355         ni->device_id = cpu_to_be16(dd->pcidev->device);
356         ni->revision = cpu_to_be32(dd->minrev);
357         ni->local_port_num = port;
358         ni->vendor_id[0] = dd->oui1;
359         ni->vendor_id[1] = dd->oui2;
360         ni->vendor_id[2] = dd->oui3;
361
362         if (resp_len)
363                 *resp_len += sizeof(*ni);
364
365         return reply((struct ib_mad_hdr *)smp);
366 }
367
368 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
369                              u8 port)
370 {
371         struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
372         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
373         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
374
375         /* GUID 0 is illegal */
376         if (smp->attr_mod || pidx >= dd->num_pports ||
377             ibdev->node_guid == 0 ||
378             get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
379                 smp->status |= IB_SMP_INVALID_FIELD;
380                 return reply((struct ib_mad_hdr *)smp);
381         }
382
383         nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
384         nip->base_version = OPA_MGMT_BASE_VERSION;
385         nip->class_version = OPA_SM_CLASS_VERSION;
386         nip->node_type = 1;     /* channel adapter */
387         nip->num_ports = ibdev->phys_port_cnt;
388         /* This is already in network order */
389         nip->sys_guid = ib_hfi1_sys_image_guid;
390         nip->node_guid = ibdev->node_guid;
391         nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
392         nip->device_id = cpu_to_be16(dd->pcidev->device);
393         nip->revision = cpu_to_be32(dd->minrev);
394         nip->local_port_num = port;
395         nip->vendor_id[0] = dd->oui1;
396         nip->vendor_id[1] = dd->oui2;
397         nip->vendor_id[2] = dd->oui3;
398
399         return reply((struct ib_mad_hdr *)smp);
400 }
401
402 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
403 {
404         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
405 }
406
407 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
408 {
409         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
410 }
411
412 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
413 {
414         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
415 }
416
417 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
418                       int mad_flags, __be64 mkey, __be32 dr_slid,
419                       u8 return_path[], u8 hop_cnt)
420 {
421         int valid_mkey = 0;
422         int ret = 0;
423
424         /* Is the mkey in the process of expiring? */
425         if (ibp->rvp.mkey_lease_timeout &&
426             time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
427                 /* Clear timeout and mkey protection field. */
428                 ibp->rvp.mkey_lease_timeout = 0;
429                 ibp->rvp.mkeyprot = 0;
430         }
431
432         if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
433             ibp->rvp.mkey == mkey)
434                 valid_mkey = 1;
435
436         /* Unset lease timeout on any valid Get/Set/TrapRepress */
437         if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
438             (mad->method == IB_MGMT_METHOD_GET ||
439              mad->method == IB_MGMT_METHOD_SET ||
440              mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
441                 ibp->rvp.mkey_lease_timeout = 0;
442
443         if (!valid_mkey) {
444                 switch (mad->method) {
445                 case IB_MGMT_METHOD_GET:
446                         /* Bad mkey not a violation below level 2 */
447                         if (ibp->rvp.mkeyprot < 2)
448                                 break;
449                 case IB_MGMT_METHOD_SET:
450                 case IB_MGMT_METHOD_TRAP_REPRESS:
451                         if (ibp->rvp.mkey_violations != 0xFFFF)
452                                 ++ibp->rvp.mkey_violations;
453                         if (!ibp->rvp.mkey_lease_timeout &&
454                             ibp->rvp.mkey_lease_period)
455                                 ibp->rvp.mkey_lease_timeout = jiffies +
456                                         ibp->rvp.mkey_lease_period * HZ;
457                         /* Generate a trap notice. */
458                         bad_mkey(ibp, mad, mkey, dr_slid, return_path,
459                                  hop_cnt);
460                         ret = 1;
461                 }
462         }
463
464         return ret;
465 }
466
467 /*
468  * The SMA caches reads from LCB registers in case the LCB is unavailable.
469  * (The LCB is unavailable in certain link states, for example.)
470  */
471 struct lcb_datum {
472         u32 off;
473         u64 val;
474 };
475
476 static struct lcb_datum lcb_cache[] = {
477         { DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
478 };
479
480 static int write_lcb_cache(u32 off, u64 val)
481 {
482         int i;
483
484         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
485                 if (lcb_cache[i].off == off) {
486                         lcb_cache[i].val = val;
487                         return 0;
488                 }
489         }
490
491         pr_warn("%s bad offset 0x%x\n", __func__, off);
492         return -1;
493 }
494
495 static int read_lcb_cache(u32 off, u64 *val)
496 {
497         int i;
498
499         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
500                 if (lcb_cache[i].off == off) {
501                         *val = lcb_cache[i].val;
502                         return 0;
503                 }
504         }
505
506         pr_warn("%s bad offset 0x%x\n", __func__, off);
507         return -1;
508 }
509
510 void read_ltp_rtt(struct hfi1_devdata *dd)
511 {
512         u64 reg;
513
514         if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
515                 dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
516         else
517                 write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
518 }
519
520 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
521                                    struct ib_device *ibdev, u8 port,
522                                    u32 *resp_len)
523 {
524         int i;
525         struct hfi1_devdata *dd;
526         struct hfi1_pportdata *ppd;
527         struct hfi1_ibport *ibp;
528         struct opa_port_info *pi = (struct opa_port_info *)data;
529         u8 mtu;
530         u8 credit_rate;
531         u8 is_beaconing_active;
532         u32 state;
533         u32 num_ports = OPA_AM_NPORT(am);
534         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
535         u32 buffer_units;
536         u64 tmp = 0;
537
538         if (num_ports != 1) {
539                 smp->status |= IB_SMP_INVALID_FIELD;
540                 return reply((struct ib_mad_hdr *)smp);
541         }
542
543         dd = dd_from_ibdev(ibdev);
544         /* IB numbers ports from 1, hw from 0 */
545         ppd = dd->pport + (port - 1);
546         ibp = &ppd->ibport_data;
547
548         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
549             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
550                 smp->status |= IB_SMP_INVALID_FIELD;
551                 return reply((struct ib_mad_hdr *)smp);
552         }
553
554         pi->lid = cpu_to_be32(ppd->lid);
555
556         /* Only return the mkey if the protection field allows it. */
557         if (!(smp->method == IB_MGMT_METHOD_GET &&
558               ibp->rvp.mkey != smp->mkey &&
559               ibp->rvp.mkeyprot == 1))
560                 pi->mkey = ibp->rvp.mkey;
561
562         pi->subnet_prefix = ibp->rvp.gid_prefix;
563         pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
564         pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
565         pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
566         pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
567         pi->sa_qp = cpu_to_be32(ppd->sa_qp);
568
569         pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
570         pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
571         pi->link_width.active = cpu_to_be16(ppd->link_width_active);
572
573         pi->link_width_downgrade.supported =
574                         cpu_to_be16(ppd->link_width_downgrade_supported);
575         pi->link_width_downgrade.enabled =
576                         cpu_to_be16(ppd->link_width_downgrade_enabled);
577         pi->link_width_downgrade.tx_active =
578                         cpu_to_be16(ppd->link_width_downgrade_tx_active);
579         pi->link_width_downgrade.rx_active =
580                         cpu_to_be16(ppd->link_width_downgrade_rx_active);
581
582         pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
583         pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
584         pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
585
586         state = driver_lstate(ppd);
587
588         if (start_of_sm_config && (state == IB_PORT_INIT))
589                 ppd->is_sm_config_started = 1;
590
591         pi->port_phys_conf = (ppd->port_type & 0xf);
592
593         pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
594         pi->port_states.ledenable_offlinereason |=
595                 ppd->is_sm_config_started << 5;
596         /*
597          * This pairs with the memory barrier in hfi1_start_led_override to
598          * ensure that we read the correct state of LED beaconing represented
599          * by led_override_timer_active
600          */
601         smp_rmb();
602         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
603         pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
604         pi->port_states.ledenable_offlinereason |=
605                 ppd->offline_disabled_reason;
606
607         pi->port_states.portphysstate_portstate =
608                 (hfi1_ibphys_portstate(ppd) << 4) | state;
609
610         pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
611
612         memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
613         for (i = 0; i < ppd->vls_supported; i++) {
614                 mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
615                 if ((i % 2) == 0)
616                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
617                 else
618                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
619         }
620         /* don't forget VL 15 */
621         mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
622         pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
623         pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
624         pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
625         pi->partenforce_filterraw |=
626                 (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
627         if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
628                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
629         if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
630                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
631         pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
632         /* P_KeyViolations are counted by hardware. */
633         pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
634         pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
635
636         pi->vl.cap = ppd->vls_supported;
637         pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
638         pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
639         pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
640
641         pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
642
643         pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
644                                           OPA_PORT_LINK_MODE_OPA << 5 |
645                                           OPA_PORT_LINK_MODE_OPA);
646
647         pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
648
649         pi->port_mode = cpu_to_be16(
650                                 ppd->is_active_optimize_enabled ?
651                                         OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
652
653         pi->port_packet_format.supported =
654                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
655                             OPA_PORT_PACKET_FORMAT_16B);
656         pi->port_packet_format.enabled =
657                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
658                             OPA_PORT_PACKET_FORMAT_16B);
659
660         /* flit_control.interleave is (OPA V1, version .76):
661          * bits         use
662          * ----         ---
663          * 2            res
664          * 2            DistanceSupported
665          * 2            DistanceEnabled
666          * 5            MaxNextLevelTxEnabled
667          * 5            MaxNestLevelRxSupported
668          *
669          * HFI supports only "distance mode 1" (see OPA V1, version .76,
670          * section 9.6.2), so set DistanceSupported, DistanceEnabled
671          * to 0x1.
672          */
673         pi->flit_control.interleave = cpu_to_be16(0x1400);
674
675         pi->link_down_reason = ppd->local_link_down_reason.sma;
676         pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
677         pi->port_error_action = cpu_to_be32(ppd->port_error_action);
678         pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
679
680         /* 32.768 usec. response time (guessing) */
681         pi->resptimevalue = 3;
682
683         pi->local_port_num = port;
684
685         /* buffer info for FM */
686         pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
687
688         pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
689         pi->neigh_port_num = ppd->neighbor_port_number;
690         pi->port_neigh_mode =
691                 (ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
692                 (ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
693                 (ppd->neighbor_fm_security ?
694                         OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
695
696         /* HFIs shall always return VL15 credits to their
697          * neighbor in a timely manner, without any credit return pacing.
698          */
699         credit_rate = 0;
700         buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
701         buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
702         buffer_units |= (credit_rate << 6) &
703                                 OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
704         buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
705         pi->buffer_units = cpu_to_be32(buffer_units);
706
707         pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
708                                        OPA_CAP_MASK3_IsEthOnFabricSupported);
709         /* Driver does not support mcast/collective configuration */
710         pi->opa_cap_mask &=
711                 cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
712         pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
713                                             << 3 | (HFI1_MCAST_NR & 0x7));
714
715         /* HFI supports a replay buffer 128 LTPs in size */
716         pi->replay_depth.buffer = 0x80;
717         /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
718         read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
719
720         /*
721          * this counter is 16 bits wide, but the replay_depth.wire
722          * variable is only 8 bits
723          */
724         if (tmp > 0xff)
725                 tmp = 0xff;
726         pi->replay_depth.wire = tmp;
727
728         if (resp_len)
729                 *resp_len += sizeof(struct opa_port_info);
730
731         return reply((struct ib_mad_hdr *)smp);
732 }
733
734 /**
735  * get_pkeys - return the PKEY table
736  * @dd: the hfi1_ib device
737  * @port: the IB port number
738  * @pkeys: the pkey table is placed here
739  */
740 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
741 {
742         struct hfi1_pportdata *ppd = dd->pport + port - 1;
743
744         memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
745
746         return 0;
747 }
748
749 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
750                                     struct ib_device *ibdev, u8 port,
751                                     u32 *resp_len)
752 {
753         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
754         u32 n_blocks_req = OPA_AM_NBLK(am);
755         u32 start_block = am & 0x7ff;
756         __be16 *p;
757         u16 *q;
758         int i;
759         u16 n_blocks_avail;
760         unsigned npkeys = hfi1_get_npkeys(dd);
761         size_t size;
762
763         if (n_blocks_req == 0) {
764                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
765                         port, start_block, n_blocks_req);
766                 smp->status |= IB_SMP_INVALID_FIELD;
767                 return reply((struct ib_mad_hdr *)smp);
768         }
769
770         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
771
772         size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
773
774         if (start_block + n_blocks_req > n_blocks_avail ||
775             n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
776                 pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
777                         "avail 0x%x; blk/smp 0x%lx\n",
778                         start_block, n_blocks_req, n_blocks_avail,
779                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
780                 smp->status |= IB_SMP_INVALID_FIELD;
781                 return reply((struct ib_mad_hdr *)smp);
782         }
783
784         p = (__be16 *)data;
785         q = (u16 *)data;
786         /* get the real pkeys if we are requesting the first block */
787         if (start_block == 0) {
788                 get_pkeys(dd, port, q);
789                 for (i = 0; i < npkeys; i++)
790                         p[i] = cpu_to_be16(q[i]);
791                 if (resp_len)
792                         *resp_len += size;
793         } else {
794                 smp->status |= IB_SMP_INVALID_FIELD;
795         }
796         return reply((struct ib_mad_hdr *)smp);
797 }
798
799 enum {
800         HFI_TRANSITION_DISALLOWED,
801         HFI_TRANSITION_IGNORED,
802         HFI_TRANSITION_ALLOWED,
803         HFI_TRANSITION_UNDEFINED,
804 };
805
806 /*
807  * Use shortened names to improve readability of
808  * {logical,physical}_state_transitions
809  */
810 enum {
811         __D = HFI_TRANSITION_DISALLOWED,
812         __I = HFI_TRANSITION_IGNORED,
813         __A = HFI_TRANSITION_ALLOWED,
814         __U = HFI_TRANSITION_UNDEFINED,
815 };
816
817 /*
818  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
819  * represented in physical_state_transitions.
820  */
821 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
822
823 /*
824  * Within physical_state_transitions, rows represent "old" states,
825  * columns "new" states, and physical_state_transitions.allowed[old][new]
826  * indicates if the transition from old state to new state is legal (see
827  * OPAg1v1, Table 6-4).
828  */
829 static const struct {
830         u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
831 } physical_state_transitions = {
832         {
833                 /* 2    3    4    5    6    7    8    9   10   11 */
834         /* 2 */ { __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
835         /* 3 */ { __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
836         /* 4 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
837         /* 5 */ { __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
838         /* 6 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
839         /* 7 */ { __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
840         /* 8 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
841         /* 9 */ { __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
842         /*10 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
843         /*11 */ { __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
844         }
845 };
846
847 /*
848  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
849  * logical_state_transitions
850  */
851
852 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
853
854 /*
855  * Within logical_state_transitions rows represent "old" states,
856  * columns "new" states, and logical_state_transitions.allowed[old][new]
857  * indicates if the transition from old state to new state is legal (see
858  * OPAg1v1, Table 9-12).
859  */
860 static const struct {
861         u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
862 } logical_state_transitions = {
863         {
864                 /* 1    2    3    4    5 */
865         /* 1 */ { __I, __D, __D, __D, __U},
866         /* 2 */ { __D, __I, __A, __D, __U},
867         /* 3 */ { __D, __D, __I, __A, __U},
868         /* 4 */ { __D, __D, __I, __I, __U},
869         /* 5 */ { __U, __U, __U, __U, __U},
870         }
871 };
872
873 static int logical_transition_allowed(int old, int new)
874 {
875         if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
876             new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
877                 pr_warn("invalid logical state(s) (old %d new %d)\n",
878                         old, new);
879                 return HFI_TRANSITION_UNDEFINED;
880         }
881
882         if (new == IB_PORT_NOP)
883                 return HFI_TRANSITION_ALLOWED; /* always allowed */
884
885         /* adjust states for indexing into logical_state_transitions */
886         old -= IB_PORT_DOWN;
887         new -= IB_PORT_DOWN;
888
889         if (old < 0 || new < 0)
890                 return HFI_TRANSITION_UNDEFINED;
891         return logical_state_transitions.allowed[old][new];
892 }
893
894 static int physical_transition_allowed(int old, int new)
895 {
896         if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
897             new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
898                 pr_warn("invalid physical state(s) (old %d new %d)\n",
899                         old, new);
900                 return HFI_TRANSITION_UNDEFINED;
901         }
902
903         if (new == IB_PORTPHYSSTATE_NOP)
904                 return HFI_TRANSITION_ALLOWED; /* always allowed */
905
906         /* adjust states for indexing into physical_state_transitions */
907         old -= IB_PORTPHYSSTATE_POLLING;
908         new -= IB_PORTPHYSSTATE_POLLING;
909
910         if (old < 0 || new < 0)
911                 return HFI_TRANSITION_UNDEFINED;
912         return physical_state_transitions.allowed[old][new];
913 }
914
915 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
916                                           u32 logical_new, u32 physical_new)
917 {
918         u32 physical_old = driver_physical_state(ppd);
919         u32 logical_old = driver_logical_state(ppd);
920         int ret, logical_allowed, physical_allowed;
921
922         ret = logical_transition_allowed(logical_old, logical_new);
923         logical_allowed = ret;
924
925         if (ret == HFI_TRANSITION_DISALLOWED ||
926             ret == HFI_TRANSITION_UNDEFINED) {
927                 pr_warn("invalid logical state transition %s -> %s\n",
928                         opa_lstate_name(logical_old),
929                         opa_lstate_name(logical_new));
930                 return ret;
931         }
932
933         ret = physical_transition_allowed(physical_old, physical_new);
934         physical_allowed = ret;
935
936         if (ret == HFI_TRANSITION_DISALLOWED ||
937             ret == HFI_TRANSITION_UNDEFINED) {
938                 pr_warn("invalid physical state transition %s -> %s\n",
939                         opa_pstate_name(physical_old),
940                         opa_pstate_name(physical_new));
941                 return ret;
942         }
943
944         if (logical_allowed == HFI_TRANSITION_IGNORED &&
945             physical_allowed == HFI_TRANSITION_IGNORED)
946                 return HFI_TRANSITION_IGNORED;
947
948         /*
949          * A change request of Physical Port State from
950          * 'Offline' to 'Polling' should be ignored.
951          */
952         if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
953             (physical_new == IB_PORTPHYSSTATE_POLLING))
954                 return HFI_TRANSITION_IGNORED;
955
956         /*
957          * Either physical_allowed or logical_allowed is
958          * HFI_TRANSITION_ALLOWED.
959          */
960         return HFI_TRANSITION_ALLOWED;
961 }
962
963 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
964                            u32 logical_state, u32 phys_state,
965                            int suppress_idle_sma)
966 {
967         struct hfi1_devdata *dd = ppd->dd;
968         u32 link_state;
969         int ret;
970
971         ret = port_states_transition_allowed(ppd, logical_state, phys_state);
972         if (ret == HFI_TRANSITION_DISALLOWED ||
973             ret == HFI_TRANSITION_UNDEFINED) {
974                 /* error message emitted above */
975                 smp->status |= IB_SMP_INVALID_FIELD;
976                 return 0;
977         }
978
979         if (ret == HFI_TRANSITION_IGNORED)
980                 return 0;
981
982         if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
983             !(logical_state == IB_PORT_DOWN ||
984               logical_state == IB_PORT_NOP)){
985                 pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
986                         logical_state, phys_state);
987                 smp->status |= IB_SMP_INVALID_FIELD;
988         }
989
990         /*
991          * Logical state changes are summarized in OPAv1g1 spec.,
992          * Table 9-12; physical state changes are summarized in
993          * OPAv1g1 spec., Table 6.4.
994          */
995         switch (logical_state) {
996         case IB_PORT_NOP:
997                 if (phys_state == IB_PORTPHYSSTATE_NOP)
998                         break;
999                 /* FALLTHROUGH */
1000         case IB_PORT_DOWN:
1001                 if (phys_state == IB_PORTPHYSSTATE_NOP) {
1002                         link_state = HLS_DN_DOWNDEF;
1003                 } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1004                         link_state = HLS_DN_POLL;
1005                         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1006                                              0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1007                 } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1008                         link_state = HLS_DN_DISABLE;
1009                 } else {
1010                         pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1011                                 phys_state);
1012                         smp->status |= IB_SMP_INVALID_FIELD;
1013                         break;
1014                 }
1015
1016                 if ((link_state == HLS_DN_POLL ||
1017                      link_state == HLS_DN_DOWNDEF)) {
1018                         /*
1019                          * Going to poll.  No matter what the current state,
1020                          * always move offline first, then tune and start the
1021                          * link.  This correctly handles a FM link bounce and
1022                          * a link enable.  Going offline is a no-op if already
1023                          * offline.
1024                          */
1025                         set_link_state(ppd, HLS_DN_OFFLINE);
1026                         start_link(ppd);
1027                 } else {
1028                         set_link_state(ppd, link_state);
1029                 }
1030                 if (link_state == HLS_DN_DISABLE &&
1031                     (ppd->offline_disabled_reason >
1032                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1033                      ppd->offline_disabled_reason ==
1034                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1035                         ppd->offline_disabled_reason =
1036                         HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1037                 /*
1038                  * Don't send a reply if the response would be sent
1039                  * through the disabled port.
1040                  */
1041                 if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1042                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1043                 break;
1044         case IB_PORT_ARMED:
1045                 ret = set_link_state(ppd, HLS_UP_ARMED);
1046                 if ((ret == 0) && (suppress_idle_sma == 0))
1047                         send_idle_sma(dd, SMA_IDLE_ARM);
1048                 break;
1049         case IB_PORT_ACTIVE:
1050                 if (ppd->neighbor_normal) {
1051                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
1052                         if (ret == 0)
1053                                 send_idle_sma(dd, SMA_IDLE_ACTIVE);
1054                 } else {
1055                         pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1056                         smp->status |= IB_SMP_INVALID_FIELD;
1057                 }
1058                 break;
1059         default:
1060                 pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1061                         logical_state);
1062                 smp->status |= IB_SMP_INVALID_FIELD;
1063         }
1064
1065         return 0;
1066 }
1067
1068 /**
1069  * subn_set_opa_portinfo - set port information
1070  * @smp: the incoming SM packet
1071  * @ibdev: the infiniband device
1072  * @port: the port on the device
1073  *
1074  */
1075 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1076                                    struct ib_device *ibdev, u8 port,
1077                                    u32 *resp_len)
1078 {
1079         struct opa_port_info *pi = (struct opa_port_info *)data;
1080         struct ib_event event;
1081         struct hfi1_devdata *dd;
1082         struct hfi1_pportdata *ppd;
1083         struct hfi1_ibport *ibp;
1084         u8 clientrereg;
1085         unsigned long flags;
1086         u32 smlid, opa_lid; /* tmp vars to hold LID values */
1087         u16 lid;
1088         u8 ls_old, ls_new, ps_new;
1089         u8 vls;
1090         u8 msl;
1091         u8 crc_enabled;
1092         u16 lse, lwe, mtu;
1093         u32 num_ports = OPA_AM_NPORT(am);
1094         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1095         int ret, i, invalid = 0, call_set_mtu = 0;
1096         int call_link_downgrade_policy = 0;
1097
1098         if (num_ports != 1) {
1099                 smp->status |= IB_SMP_INVALID_FIELD;
1100                 return reply((struct ib_mad_hdr *)smp);
1101         }
1102
1103         opa_lid = be32_to_cpu(pi->lid);
1104         if (opa_lid & 0xFFFF0000) {
1105                 pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
1106                 smp->status |= IB_SMP_INVALID_FIELD;
1107                 goto get_only;
1108         }
1109
1110         lid = (u16)(opa_lid & 0x0000FFFF);
1111
1112         smlid = be32_to_cpu(pi->sm_lid);
1113         if (smlid & 0xFFFF0000) {
1114                 pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1115                 smp->status |= IB_SMP_INVALID_FIELD;
1116                 goto get_only;
1117         }
1118         smlid &= 0x0000FFFF;
1119
1120         clientrereg = (pi->clientrereg_subnettimeout &
1121                         OPA_PI_MASK_CLIENT_REREGISTER);
1122
1123         dd = dd_from_ibdev(ibdev);
1124         /* IB numbers ports from 1, hw from 0 */
1125         ppd = dd->pport + (port - 1);
1126         ibp = &ppd->ibport_data;
1127         event.device = ibdev;
1128         event.element.port_num = port;
1129
1130         ls_old = driver_lstate(ppd);
1131
1132         ibp->rvp.mkey = pi->mkey;
1133         ibp->rvp.gid_prefix = pi->subnet_prefix;
1134         ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1135
1136         /* Must be a valid unicast LID address. */
1137         if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1138             lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1139                 smp->status |= IB_SMP_INVALID_FIELD;
1140                 pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1141                         lid);
1142         } else if (ppd->lid != lid ||
1143                  ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1144                 if (ppd->lid != lid)
1145                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1146                 if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1147                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1148                 hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1149                 event.event = IB_EVENT_LID_CHANGE;
1150                 ib_dispatch_event(&event);
1151         }
1152
1153         msl = pi->smsl & OPA_PI_MASK_SMSL;
1154         if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1155                 ppd->linkinit_reason =
1156                         (pi->partenforce_filterraw &
1157                          OPA_PI_MASK_LINKINIT_REASON);
1158
1159         /* Must be a valid unicast LID address. */
1160         if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1161             smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
1162                 smp->status |= IB_SMP_INVALID_FIELD;
1163                 pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1164         } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1165                 pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1166                 spin_lock_irqsave(&ibp->rvp.lock, flags);
1167                 if (ibp->rvp.sm_ah) {
1168                         if (smlid != ibp->rvp.sm_lid)
1169                                 rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid);
1170                         if (msl != ibp->rvp.sm_sl)
1171                                 rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1172                 }
1173                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1174                 if (smlid != ibp->rvp.sm_lid)
1175                         ibp->rvp.sm_lid = smlid;
1176                 if (msl != ibp->rvp.sm_sl)
1177                         ibp->rvp.sm_sl = msl;
1178                 event.event = IB_EVENT_SM_CHANGE;
1179                 ib_dispatch_event(&event);
1180         }
1181
1182         if (pi->link_down_reason == 0) {
1183                 ppd->local_link_down_reason.sma = 0;
1184                 ppd->local_link_down_reason.latest = 0;
1185         }
1186
1187         if (pi->neigh_link_down_reason == 0) {
1188                 ppd->neigh_link_down_reason.sma = 0;
1189                 ppd->neigh_link_down_reason.latest = 0;
1190         }
1191
1192         ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1193         ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1194
1195         ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1196         lwe = be16_to_cpu(pi->link_width.enabled);
1197         if (lwe) {
1198                 if (lwe == OPA_LINK_WIDTH_RESET ||
1199                     lwe == OPA_LINK_WIDTH_RESET_OLD)
1200                         set_link_width_enabled(ppd, ppd->link_width_supported);
1201                 else if ((lwe & ~ppd->link_width_supported) == 0)
1202                         set_link_width_enabled(ppd, lwe);
1203                 else
1204                         smp->status |= IB_SMP_INVALID_FIELD;
1205         }
1206         lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1207         /* LWD.E is always applied - 0 means "disabled" */
1208         if (lwe == OPA_LINK_WIDTH_RESET ||
1209             lwe == OPA_LINK_WIDTH_RESET_OLD) {
1210                 set_link_width_downgrade_enabled(ppd,
1211                                                  ppd->
1212                                                  link_width_downgrade_supported
1213                                                  );
1214         } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1215                 /* only set and apply if something changed */
1216                 if (lwe != ppd->link_width_downgrade_enabled) {
1217                         set_link_width_downgrade_enabled(ppd, lwe);
1218                         call_link_downgrade_policy = 1;
1219                 }
1220         } else {
1221                 smp->status |= IB_SMP_INVALID_FIELD;
1222         }
1223         lse = be16_to_cpu(pi->link_speed.enabled);
1224         if (lse) {
1225                 if (lse & be16_to_cpu(pi->link_speed.supported))
1226                         set_link_speed_enabled(ppd, lse);
1227                 else
1228                         smp->status |= IB_SMP_INVALID_FIELD;
1229         }
1230
1231         ibp->rvp.mkeyprot =
1232                 (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1233         ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1234         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1235                                     ibp->rvp.vl_high_limit);
1236
1237         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1238             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1239                 smp->status |= IB_SMP_INVALID_FIELD;
1240                 return reply((struct ib_mad_hdr *)smp);
1241         }
1242         for (i = 0; i < ppd->vls_supported; i++) {
1243                 if ((i % 2) == 0)
1244                         mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1245                                            4) & 0xF);
1246                 else
1247                         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1248                                           0xF);
1249                 if (mtu == 0xffff) {
1250                         pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1251                                 mtu,
1252                                 (pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1253                         smp->status |= IB_SMP_INVALID_FIELD;
1254                         mtu = hfi1_max_mtu; /* use a valid MTU */
1255                 }
1256                 if (dd->vld[i].mtu != mtu) {
1257                         dd_dev_info(dd,
1258                                     "MTU change on vl %d from %d to %d\n",
1259                                     i, dd->vld[i].mtu, mtu);
1260                         dd->vld[i].mtu = mtu;
1261                         call_set_mtu++;
1262                 }
1263         }
1264         /* As per OPAV1 spec: VL15 must support and be configured
1265          * for operation with a 2048 or larger MTU.
1266          */
1267         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1268         if (mtu < 2048 || mtu == 0xffff)
1269                 mtu = 2048;
1270         if (dd->vld[15].mtu != mtu) {
1271                 dd_dev_info(dd,
1272                             "MTU change on vl 15 from %d to %d\n",
1273                             dd->vld[15].mtu, mtu);
1274                 dd->vld[15].mtu = mtu;
1275                 call_set_mtu++;
1276         }
1277         if (call_set_mtu)
1278                 set_mtu(ppd);
1279
1280         /* Set operational VLs */
1281         vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1282         if (vls) {
1283                 if (vls > ppd->vls_supported) {
1284                         pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1285                                 pi->operational_vls);
1286                         smp->status |= IB_SMP_INVALID_FIELD;
1287                 } else {
1288                         if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1289                                             vls) == -EINVAL)
1290                                 smp->status |= IB_SMP_INVALID_FIELD;
1291                 }
1292         }
1293
1294         if (pi->mkey_violations == 0)
1295                 ibp->rvp.mkey_violations = 0;
1296
1297         if (pi->pkey_violations == 0)
1298                 ibp->rvp.pkey_violations = 0;
1299
1300         if (pi->qkey_violations == 0)
1301                 ibp->rvp.qkey_violations = 0;
1302
1303         ibp->rvp.subnet_timeout =
1304                 pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1305
1306         crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1307         crc_enabled >>= 4;
1308         crc_enabled &= 0xf;
1309
1310         if (crc_enabled != 0)
1311                 ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1312
1313         ppd->is_active_optimize_enabled =
1314                         !!(be16_to_cpu(pi->port_mode)
1315                                         & OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1316
1317         ls_new = pi->port_states.portphysstate_portstate &
1318                         OPA_PI_MASK_PORT_STATE;
1319         ps_new = (pi->port_states.portphysstate_portstate &
1320                         OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1321
1322         if (ls_old == IB_PORT_INIT) {
1323                 if (start_of_sm_config) {
1324                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1325                                 ppd->is_sm_config_started = 1;
1326                 } else if (ls_new == IB_PORT_ARMED) {
1327                         if (ppd->is_sm_config_started == 0)
1328                                 invalid = 1;
1329                 }
1330         }
1331
1332         /* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1333         if (clientrereg) {
1334                 event.event = IB_EVENT_CLIENT_REREGISTER;
1335                 ib_dispatch_event(&event);
1336         }
1337
1338         /*
1339          * Do the port state change now that the other link parameters
1340          * have been set.
1341          * Changing the port physical state only makes sense if the link
1342          * is down or is being set to down.
1343          */
1344
1345         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1346         if (ret)
1347                 return ret;
1348
1349         ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1350
1351         /* restore re-reg bit per o14-12.2.1 */
1352         pi->clientrereg_subnettimeout |= clientrereg;
1353
1354         /*
1355          * Apply the new link downgrade policy.  This may result in a link
1356          * bounce.  Do this after everything else so things are settled.
1357          * Possible problem: if setting the port state above fails, then
1358          * the policy change is not applied.
1359          */
1360         if (call_link_downgrade_policy)
1361                 apply_link_downgrade_policy(ppd, 0);
1362
1363         return ret;
1364
1365 get_only:
1366         return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
1367 }
1368
1369 /**
1370  * set_pkeys - set the PKEY table for ctxt 0
1371  * @dd: the hfi1_ib device
1372  * @port: the IB port number
1373  * @pkeys: the PKEY table
1374  */
1375 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1376 {
1377         struct hfi1_pportdata *ppd;
1378         int i;
1379         int changed = 0;
1380         int update_includes_mgmt_partition = 0;
1381
1382         /*
1383          * IB port one/two always maps to context zero/one,
1384          * always a kernel context, no locking needed
1385          * If we get here with ppd setup, no need to check
1386          * that rcd is valid.
1387          */
1388         ppd = dd->pport + (port - 1);
1389         /*
1390          * If the update does not include the management pkey, don't do it.
1391          */
1392         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1393                 if (pkeys[i] == LIM_MGMT_P_KEY) {
1394                         update_includes_mgmt_partition = 1;
1395                         break;
1396                 }
1397         }
1398
1399         if (!update_includes_mgmt_partition)
1400                 return 1;
1401
1402         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1403                 u16 key = pkeys[i];
1404                 u16 okey = ppd->pkeys[i];
1405
1406                 if (key == okey)
1407                         continue;
1408                 /*
1409                  * The SM gives us the complete PKey table. We have
1410                  * to ensure that we put the PKeys in the matching
1411                  * slots.
1412                  */
1413                 ppd->pkeys[i] = key;
1414                 changed = 1;
1415         }
1416
1417         if (changed) {
1418                 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1419                 hfi1_event_pkey_change(dd, port);
1420         }
1421
1422         return 0;
1423 }
1424
1425 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1426                                     struct ib_device *ibdev, u8 port,
1427                                     u32 *resp_len)
1428 {
1429         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1430         u32 n_blocks_sent = OPA_AM_NBLK(am);
1431         u32 start_block = am & 0x7ff;
1432         u16 *p = (u16 *)data;
1433         __be16 *q = (__be16 *)data;
1434         int i;
1435         u16 n_blocks_avail;
1436         unsigned npkeys = hfi1_get_npkeys(dd);
1437
1438         if (n_blocks_sent == 0) {
1439                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1440                         port, start_block, n_blocks_sent);
1441                 smp->status |= IB_SMP_INVALID_FIELD;
1442                 return reply((struct ib_mad_hdr *)smp);
1443         }
1444
1445         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1446
1447         if (start_block + n_blocks_sent > n_blocks_avail ||
1448             n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1449                 pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1450                         start_block, n_blocks_sent, n_blocks_avail,
1451                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
1452                 smp->status |= IB_SMP_INVALID_FIELD;
1453                 return reply((struct ib_mad_hdr *)smp);
1454         }
1455
1456         for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1457                 p[i] = be16_to_cpu(q[i]);
1458
1459         if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1460                 smp->status |= IB_SMP_INVALID_FIELD;
1461                 return reply((struct ib_mad_hdr *)smp);
1462         }
1463
1464         return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
1465 }
1466
1467 #define ILLEGAL_VL 12
1468 /*
1469  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1470  * for SC15, which must map to VL15). If we don't remap things this
1471  * way it is possible for VL15 counters to increment when we try to
1472  * send on a SC which is mapped to an invalid VL.
1473  * When getting the table convert ILLEGAL_VL back to VL15.
1474  */
1475 static void filter_sc2vlt(void *data, bool set)
1476 {
1477         int i;
1478         u8 *pd = data;
1479
1480         for (i = 0; i < OPA_MAX_SCS; i++) {
1481                 if (i == 15)
1482                         continue;
1483
1484                 if (set) {
1485                         if ((pd[i] & 0x1f) == 0xf)
1486                                 pd[i] = ILLEGAL_VL;
1487                 } else {
1488                         if ((pd[i] & 0x1f) == ILLEGAL_VL)
1489                                 pd[i] = 0xf;
1490                 }
1491         }
1492 }
1493
1494 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1495 {
1496         u64 *val = data;
1497
1498         filter_sc2vlt(data, true);
1499
1500         write_csr(dd, SEND_SC2VLT0, *val++);
1501         write_csr(dd, SEND_SC2VLT1, *val++);
1502         write_csr(dd, SEND_SC2VLT2, *val++);
1503         write_csr(dd, SEND_SC2VLT3, *val++);
1504         write_seqlock_irq(&dd->sc2vl_lock);
1505         memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1506         write_sequnlock_irq(&dd->sc2vl_lock);
1507         return 0;
1508 }
1509
1510 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1511 {
1512         u64 *val = (u64 *)data;
1513
1514         *val++ = read_csr(dd, SEND_SC2VLT0);
1515         *val++ = read_csr(dd, SEND_SC2VLT1);
1516         *val++ = read_csr(dd, SEND_SC2VLT2);
1517         *val++ = read_csr(dd, SEND_SC2VLT3);
1518
1519         filter_sc2vlt((u64 *)data, false);
1520         return 0;
1521 }
1522
1523 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1524                                    struct ib_device *ibdev, u8 port,
1525                                    u32 *resp_len)
1526 {
1527         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1528         u8 *p = data;
1529         size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1530         unsigned i;
1531
1532         if (am) {
1533                 smp->status |= IB_SMP_INVALID_FIELD;
1534                 return reply((struct ib_mad_hdr *)smp);
1535         }
1536
1537         for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1538                 *p++ = ibp->sl_to_sc[i];
1539
1540         if (resp_len)
1541                 *resp_len += size;
1542
1543         return reply((struct ib_mad_hdr *)smp);
1544 }
1545
1546 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1547                                    struct ib_device *ibdev, u8 port,
1548                                    u32 *resp_len)
1549 {
1550         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1551         u8 *p = data;
1552         int i;
1553         u8 sc;
1554
1555         if (am) {
1556                 smp->status |= IB_SMP_INVALID_FIELD;
1557                 return reply((struct ib_mad_hdr *)smp);
1558         }
1559
1560         for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1561                 sc = *p++;
1562                 if (ibp->sl_to_sc[i] != sc) {
1563                         ibp->sl_to_sc[i] = sc;
1564
1565                         /* Put all stale qps into error state */
1566                         hfi1_error_port_qps(ibp, i);
1567                 }
1568         }
1569
1570         return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
1571 }
1572
1573 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1574                                    struct ib_device *ibdev, u8 port,
1575                                    u32 *resp_len)
1576 {
1577         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1578         u8 *p = data;
1579         size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1580         unsigned i;
1581
1582         if (am) {
1583                 smp->status |= IB_SMP_INVALID_FIELD;
1584                 return reply((struct ib_mad_hdr *)smp);
1585         }
1586
1587         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1588                 *p++ = ibp->sc_to_sl[i];
1589
1590         if (resp_len)
1591                 *resp_len += size;
1592
1593         return reply((struct ib_mad_hdr *)smp);
1594 }
1595
1596 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1597                                    struct ib_device *ibdev, u8 port,
1598                                    u32 *resp_len)
1599 {
1600         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1601         u8 *p = data;
1602         int i;
1603
1604         if (am) {
1605                 smp->status |= IB_SMP_INVALID_FIELD;
1606                 return reply((struct ib_mad_hdr *)smp);
1607         }
1608
1609         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1610                 ibp->sc_to_sl[i] = *p++;
1611
1612         return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
1613 }
1614
1615 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1616                                     struct ib_device *ibdev, u8 port,
1617                                     u32 *resp_len)
1618 {
1619         u32 n_blocks = OPA_AM_NBLK(am);
1620         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1621         void *vp = (void *)data;
1622         size_t size = 4 * sizeof(u64);
1623
1624         if (n_blocks != 1) {
1625                 smp->status |= IB_SMP_INVALID_FIELD;
1626                 return reply((struct ib_mad_hdr *)smp);
1627         }
1628
1629         get_sc2vlt_tables(dd, vp);
1630
1631         if (resp_len)
1632                 *resp_len += size;
1633
1634         return reply((struct ib_mad_hdr *)smp);
1635 }
1636
1637 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1638                                     struct ib_device *ibdev, u8 port,
1639                                     u32 *resp_len)
1640 {
1641         u32 n_blocks = OPA_AM_NBLK(am);
1642         int async_update = OPA_AM_ASYNC(am);
1643         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1644         void *vp = (void *)data;
1645         struct hfi1_pportdata *ppd;
1646         int lstate;
1647
1648         if (n_blocks != 1 || async_update) {
1649                 smp->status |= IB_SMP_INVALID_FIELD;
1650                 return reply((struct ib_mad_hdr *)smp);
1651         }
1652
1653         /* IB numbers ports from 1, hw from 0 */
1654         ppd = dd->pport + (port - 1);
1655         lstate = driver_lstate(ppd);
1656         /*
1657          * it's known that async_update is 0 by this point, but include
1658          * the explicit check for clarity
1659          */
1660         if (!async_update &&
1661             (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1662                 smp->status |= IB_SMP_INVALID_FIELD;
1663                 return reply((struct ib_mad_hdr *)smp);
1664         }
1665
1666         set_sc2vlt_tables(dd, vp);
1667
1668         return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
1669 }
1670
1671 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1672                                      struct ib_device *ibdev, u8 port,
1673                                      u32 *resp_len)
1674 {
1675         u32 n_blocks = OPA_AM_NPORT(am);
1676         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1677         struct hfi1_pportdata *ppd;
1678         void *vp = (void *)data;
1679         int size;
1680
1681         if (n_blocks != 1) {
1682                 smp->status |= IB_SMP_INVALID_FIELD;
1683                 return reply((struct ib_mad_hdr *)smp);
1684         }
1685
1686         ppd = dd->pport + (port - 1);
1687
1688         size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1689
1690         if (resp_len)
1691                 *resp_len += size;
1692
1693         return reply((struct ib_mad_hdr *)smp);
1694 }
1695
1696 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1697                                      struct ib_device *ibdev, u8 port,
1698                                      u32 *resp_len)
1699 {
1700         u32 n_blocks = OPA_AM_NPORT(am);
1701         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1702         struct hfi1_pportdata *ppd;
1703         void *vp = (void *)data;
1704         int lstate;
1705
1706         if (n_blocks != 1) {
1707                 smp->status |= IB_SMP_INVALID_FIELD;
1708                 return reply((struct ib_mad_hdr *)smp);
1709         }
1710
1711         /* IB numbers ports from 1, hw from 0 */
1712         ppd = dd->pport + (port - 1);
1713         lstate = driver_lstate(ppd);
1714         if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
1715                 smp->status |= IB_SMP_INVALID_FIELD;
1716                 return reply((struct ib_mad_hdr *)smp);
1717         }
1718
1719         ppd = dd->pport + (port - 1);
1720
1721         fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
1722
1723         return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
1724                                          resp_len);
1725 }
1726
1727 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1728                               struct ib_device *ibdev, u8 port,
1729                               u32 *resp_len)
1730 {
1731         u32 nports = OPA_AM_NPORT(am);
1732         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1733         u32 lstate;
1734         struct hfi1_ibport *ibp;
1735         struct hfi1_pportdata *ppd;
1736         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1737
1738         if (nports != 1) {
1739                 smp->status |= IB_SMP_INVALID_FIELD;
1740                 return reply((struct ib_mad_hdr *)smp);
1741         }
1742
1743         ibp = to_iport(ibdev, port);
1744         ppd = ppd_from_ibp(ibp);
1745
1746         lstate = driver_lstate(ppd);
1747
1748         if (start_of_sm_config && (lstate == IB_PORT_INIT))
1749                 ppd->is_sm_config_started = 1;
1750
1751         psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
1752         psi->port_states.ledenable_offlinereason |=
1753                 ppd->is_sm_config_started << 5;
1754         psi->port_states.ledenable_offlinereason |=
1755                 ppd->offline_disabled_reason;
1756
1757         psi->port_states.portphysstate_portstate =
1758                 (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
1759         psi->link_width_downgrade_tx_active =
1760                 cpu_to_be16(ppd->link_width_downgrade_tx_active);
1761         psi->link_width_downgrade_rx_active =
1762                 cpu_to_be16(ppd->link_width_downgrade_rx_active);
1763         if (resp_len)
1764                 *resp_len += sizeof(struct opa_port_state_info);
1765
1766         return reply((struct ib_mad_hdr *)smp);
1767 }
1768
1769 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
1770                               struct ib_device *ibdev, u8 port,
1771                               u32 *resp_len)
1772 {
1773         u32 nports = OPA_AM_NPORT(am);
1774         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1775         u32 ls_old;
1776         u8 ls_new, ps_new;
1777         struct hfi1_ibport *ibp;
1778         struct hfi1_pportdata *ppd;
1779         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
1780         int ret, invalid = 0;
1781
1782         if (nports != 1) {
1783                 smp->status |= IB_SMP_INVALID_FIELD;
1784                 return reply((struct ib_mad_hdr *)smp);
1785         }
1786
1787         ibp = to_iport(ibdev, port);
1788         ppd = ppd_from_ibp(ibp);
1789
1790         ls_old = driver_lstate(ppd);
1791
1792         ls_new = port_states_to_logical_state(&psi->port_states);
1793         ps_new = port_states_to_phys_state(&psi->port_states);
1794
1795         if (ls_old == IB_PORT_INIT) {
1796                 if (start_of_sm_config) {
1797                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1798                                 ppd->is_sm_config_started = 1;
1799                 } else if (ls_new == IB_PORT_ARMED) {
1800                         if (ppd->is_sm_config_started == 0)
1801                                 invalid = 1;
1802                 }
1803         }
1804
1805         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1806         if (ret)
1807                 return ret;
1808
1809         if (invalid)
1810                 smp->status |= IB_SMP_INVALID_FIELD;
1811
1812         return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
1813 }
1814
1815 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
1816                                      struct ib_device *ibdev, u8 port,
1817                                      u32 *resp_len)
1818 {
1819         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1820         u32 addr = OPA_AM_CI_ADDR(am);
1821         u32 len = OPA_AM_CI_LEN(am) + 1;
1822         int ret;
1823
1824         if (dd->pport->port_type != PORT_TYPE_QSFP) {
1825                 smp->status |= IB_SMP_INVALID_FIELD;
1826                 return reply((struct ib_mad_hdr *)smp);
1827         }
1828
1829 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
1830 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
1831 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
1832
1833         /*
1834          * check that addr is within spec, and
1835          * addr and (addr + len - 1) are on the same "page"
1836          */
1837         if (addr >= 4096 ||
1838             (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
1839                 smp->status |= IB_SMP_INVALID_FIELD;
1840                 return reply((struct ib_mad_hdr *)smp);
1841         }
1842
1843         ret = get_cable_info(dd, port, addr, len, data);
1844
1845         if (ret == -ENODEV) {
1846                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1847                 return reply((struct ib_mad_hdr *)smp);
1848         }
1849
1850         /* The address range for the CableInfo SMA query is wider than the
1851          * memory available on the QSFP cable. We want to return a valid
1852          * response, albeit zeroed out, for address ranges beyond available
1853          * memory but that are within the CableInfo query spec
1854          */
1855         if (ret < 0 && ret != -ERANGE) {
1856                 smp->status |= IB_SMP_INVALID_FIELD;
1857                 return reply((struct ib_mad_hdr *)smp);
1858         }
1859
1860         if (resp_len)
1861                 *resp_len += len;
1862
1863         return reply((struct ib_mad_hdr *)smp);
1864 }
1865
1866 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1867                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1868 {
1869         u32 num_ports = OPA_AM_NPORT(am);
1870         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1871         struct hfi1_pportdata *ppd;
1872         struct buffer_control *p = (struct buffer_control *)data;
1873         int size;
1874
1875         if (num_ports != 1) {
1876                 smp->status |= IB_SMP_INVALID_FIELD;
1877                 return reply((struct ib_mad_hdr *)smp);
1878         }
1879
1880         ppd = dd->pport + (port - 1);
1881         size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
1882         trace_bct_get(dd, p);
1883         if (resp_len)
1884                 *resp_len += size;
1885
1886         return reply((struct ib_mad_hdr *)smp);
1887 }
1888
1889 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
1890                               struct ib_device *ibdev, u8 port, u32 *resp_len)
1891 {
1892         u32 num_ports = OPA_AM_NPORT(am);
1893         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1894         struct hfi1_pportdata *ppd;
1895         struct buffer_control *p = (struct buffer_control *)data;
1896
1897         if (num_ports != 1) {
1898                 smp->status |= IB_SMP_INVALID_FIELD;
1899                 return reply((struct ib_mad_hdr *)smp);
1900         }
1901         ppd = dd->pport + (port - 1);
1902         trace_bct_set(dd, p);
1903         if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
1904                 smp->status |= IB_SMP_INVALID_FIELD;
1905                 return reply((struct ib_mad_hdr *)smp);
1906         }
1907
1908         return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
1909 }
1910
1911 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1912                                  struct ib_device *ibdev, u8 port,
1913                                  u32 *resp_len)
1914 {
1915         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1916         u32 num_ports = OPA_AM_NPORT(am);
1917         u8 section = (am & 0x00ff0000) >> 16;
1918         u8 *p = data;
1919         int size = 0;
1920
1921         if (num_ports != 1) {
1922                 smp->status |= IB_SMP_INVALID_FIELD;
1923                 return reply((struct ib_mad_hdr *)smp);
1924         }
1925
1926         switch (section) {
1927         case OPA_VLARB_LOW_ELEMENTS:
1928                 size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
1929                 break;
1930         case OPA_VLARB_HIGH_ELEMENTS:
1931                 size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1932                 break;
1933         case OPA_VLARB_PREEMPT_ELEMENTS:
1934                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
1935                 break;
1936         case OPA_VLARB_PREEMPT_MATRIX:
1937                 size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
1938                 break;
1939         default:
1940                 pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
1941                         be32_to_cpu(smp->attr_mod));
1942                 smp->status |= IB_SMP_INVALID_FIELD;
1943                 break;
1944         }
1945
1946         if (size > 0 && resp_len)
1947                 *resp_len += size;
1948
1949         return reply((struct ib_mad_hdr *)smp);
1950 }
1951
1952 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
1953                                  struct ib_device *ibdev, u8 port,
1954                                  u32 *resp_len)
1955 {
1956         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
1957         u32 num_ports = OPA_AM_NPORT(am);
1958         u8 section = (am & 0x00ff0000) >> 16;
1959         u8 *p = data;
1960
1961         if (num_ports != 1) {
1962                 smp->status |= IB_SMP_INVALID_FIELD;
1963                 return reply((struct ib_mad_hdr *)smp);
1964         }
1965
1966         switch (section) {
1967         case OPA_VLARB_LOW_ELEMENTS:
1968                 (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
1969                 break;
1970         case OPA_VLARB_HIGH_ELEMENTS:
1971                 (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
1972                 break;
1973         /*
1974          * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
1975          * can be changed from the default values
1976          */
1977         case OPA_VLARB_PREEMPT_ELEMENTS:
1978                 /* FALLTHROUGH */
1979         case OPA_VLARB_PREEMPT_MATRIX:
1980                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1981                 break;
1982         default:
1983                 pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
1984                         be32_to_cpu(smp->attr_mod));
1985                 smp->status |= IB_SMP_INVALID_FIELD;
1986                 break;
1987         }
1988
1989         return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
1990 }
1991
1992 struct opa_pma_mad {
1993         struct ib_mad_hdr mad_hdr;
1994         u8 data[2024];
1995 } __packed;
1996
1997 struct opa_port_status_req {
1998         __u8 port_num;
1999         __u8 reserved[3];
2000         __be32 vl_select_mask;
2001 };
2002
2003 #define VL_MASK_ALL             0x000080ff
2004
2005 struct opa_port_status_rsp {
2006         __u8 port_num;
2007         __u8 reserved[3];
2008         __be32  vl_select_mask;
2009
2010         /* Data counters */
2011         __be64 port_xmit_data;
2012         __be64 port_rcv_data;
2013         __be64 port_xmit_pkts;
2014         __be64 port_rcv_pkts;
2015         __be64 port_multicast_xmit_pkts;
2016         __be64 port_multicast_rcv_pkts;
2017         __be64 port_xmit_wait;
2018         __be64 sw_port_congestion;
2019         __be64 port_rcv_fecn;
2020         __be64 port_rcv_becn;
2021         __be64 port_xmit_time_cong;
2022         __be64 port_xmit_wasted_bw;
2023         __be64 port_xmit_wait_data;
2024         __be64 port_rcv_bubble;
2025         __be64 port_mark_fecn;
2026         /* Error counters */
2027         __be64 port_rcv_constraint_errors;
2028         __be64 port_rcv_switch_relay_errors;
2029         __be64 port_xmit_discards;
2030         __be64 port_xmit_constraint_errors;
2031         __be64 port_rcv_remote_physical_errors;
2032         __be64 local_link_integrity_errors;
2033         __be64 port_rcv_errors;
2034         __be64 excessive_buffer_overruns;
2035         __be64 fm_config_errors;
2036         __be32 link_error_recovery;
2037         __be32 link_downed;
2038         u8 uncorrectable_errors;
2039
2040         u8 link_quality_indicator; /* 5res, 3bit */
2041         u8 res2[6];
2042         struct _vls_pctrs {
2043                 /* per-VL Data counters */
2044                 __be64 port_vl_xmit_data;
2045                 __be64 port_vl_rcv_data;
2046                 __be64 port_vl_xmit_pkts;
2047                 __be64 port_vl_rcv_pkts;
2048                 __be64 port_vl_xmit_wait;
2049                 __be64 sw_port_vl_congestion;
2050                 __be64 port_vl_rcv_fecn;
2051                 __be64 port_vl_rcv_becn;
2052                 __be64 port_xmit_time_cong;
2053                 __be64 port_vl_xmit_wasted_bw;
2054                 __be64 port_vl_xmit_wait_data;
2055                 __be64 port_vl_rcv_bubble;
2056                 __be64 port_vl_mark_fecn;
2057                 __be64 port_vl_xmit_discards;
2058         } vls[0]; /* real array size defined by # bits set in vl_select_mask */
2059 };
2060
2061 enum counter_selects {
2062         CS_PORT_XMIT_DATA                       = (1 << 31),
2063         CS_PORT_RCV_DATA                        = (1 << 30),
2064         CS_PORT_XMIT_PKTS                       = (1 << 29),
2065         CS_PORT_RCV_PKTS                        = (1 << 28),
2066         CS_PORT_MCAST_XMIT_PKTS                 = (1 << 27),
2067         CS_PORT_MCAST_RCV_PKTS                  = (1 << 26),
2068         CS_PORT_XMIT_WAIT                       = (1 << 25),
2069         CS_SW_PORT_CONGESTION                   = (1 << 24),
2070         CS_PORT_RCV_FECN                        = (1 << 23),
2071         CS_PORT_RCV_BECN                        = (1 << 22),
2072         CS_PORT_XMIT_TIME_CONG                  = (1 << 21),
2073         CS_PORT_XMIT_WASTED_BW                  = (1 << 20),
2074         CS_PORT_XMIT_WAIT_DATA                  = (1 << 19),
2075         CS_PORT_RCV_BUBBLE                      = (1 << 18),
2076         CS_PORT_MARK_FECN                       = (1 << 17),
2077         CS_PORT_RCV_CONSTRAINT_ERRORS           = (1 << 16),
2078         CS_PORT_RCV_SWITCH_RELAY_ERRORS         = (1 << 15),
2079         CS_PORT_XMIT_DISCARDS                   = (1 << 14),
2080         CS_PORT_XMIT_CONSTRAINT_ERRORS          = (1 << 13),
2081         CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS      = (1 << 12),
2082         CS_LOCAL_LINK_INTEGRITY_ERRORS          = (1 << 11),
2083         CS_PORT_RCV_ERRORS                      = (1 << 10),
2084         CS_EXCESSIVE_BUFFER_OVERRUNS            = (1 << 9),
2085         CS_FM_CONFIG_ERRORS                     = (1 << 8),
2086         CS_LINK_ERROR_RECOVERY                  = (1 << 7),
2087         CS_LINK_DOWNED                          = (1 << 6),
2088         CS_UNCORRECTABLE_ERRORS                 = (1 << 5),
2089 };
2090
2091 struct opa_clear_port_status {
2092         __be64 port_select_mask[4];
2093         __be32 counter_select_mask;
2094 };
2095
2096 struct opa_aggregate {
2097         __be16 attr_id;
2098         __be16 err_reqlength;   /* 1 bit, 8 res, 7 bit */
2099         __be32 attr_mod;
2100         u8 data[0];
2101 };
2102
2103 #define MSK_LLI 0x000000f0
2104 #define MSK_LLI_SFT 4
2105 #define MSK_LER 0x0000000f
2106 #define MSK_LER_SFT 0
2107 #define ADD_LLI 8
2108 #define ADD_LER 2
2109
2110 /* Request contains first three fields, response contains those plus the rest */
2111 struct opa_port_data_counters_msg {
2112         __be64 port_select_mask[4];
2113         __be32 vl_select_mask;
2114         __be32 resolution;
2115
2116         /* Response fields follow */
2117         struct _port_dctrs {
2118                 u8 port_number;
2119                 u8 reserved2[3];
2120                 __be32 link_quality_indicator; /* 29res, 3bit */
2121
2122                 /* Data counters */
2123                 __be64 port_xmit_data;
2124                 __be64 port_rcv_data;
2125                 __be64 port_xmit_pkts;
2126                 __be64 port_rcv_pkts;
2127                 __be64 port_multicast_xmit_pkts;
2128                 __be64 port_multicast_rcv_pkts;
2129                 __be64 port_xmit_wait;
2130                 __be64 sw_port_congestion;
2131                 __be64 port_rcv_fecn;
2132                 __be64 port_rcv_becn;
2133                 __be64 port_xmit_time_cong;
2134                 __be64 port_xmit_wasted_bw;
2135                 __be64 port_xmit_wait_data;
2136                 __be64 port_rcv_bubble;
2137                 __be64 port_mark_fecn;
2138
2139                 __be64 port_error_counter_summary;
2140                 /* Sum of error counts/port */
2141
2142                 struct _vls_dctrs {
2143                         /* per-VL Data counters */
2144                         __be64 port_vl_xmit_data;
2145                         __be64 port_vl_rcv_data;
2146                         __be64 port_vl_xmit_pkts;
2147                         __be64 port_vl_rcv_pkts;
2148                         __be64 port_vl_xmit_wait;
2149                         __be64 sw_port_vl_congestion;
2150                         __be64 port_vl_rcv_fecn;
2151                         __be64 port_vl_rcv_becn;
2152                         __be64 port_xmit_time_cong;
2153                         __be64 port_vl_xmit_wasted_bw;
2154                         __be64 port_vl_xmit_wait_data;
2155                         __be64 port_vl_rcv_bubble;
2156                         __be64 port_vl_mark_fecn;
2157                 } vls[0];
2158                 /* array size defined by #bits set in vl_select_mask*/
2159         } port[1]; /* array size defined by  #ports in attribute modifier */
2160 };
2161
2162 struct opa_port_error_counters64_msg {
2163         /*
2164          * Request contains first two fields, response contains the
2165          * whole magilla
2166          */
2167         __be64 port_select_mask[4];
2168         __be32 vl_select_mask;
2169
2170         /* Response-only fields follow */
2171         __be32 reserved1;
2172         struct _port_ectrs {
2173                 u8 port_number;
2174                 u8 reserved2[7];
2175                 __be64 port_rcv_constraint_errors;
2176                 __be64 port_rcv_switch_relay_errors;
2177                 __be64 port_xmit_discards;
2178                 __be64 port_xmit_constraint_errors;
2179                 __be64 port_rcv_remote_physical_errors;
2180                 __be64 local_link_integrity_errors;
2181                 __be64 port_rcv_errors;
2182                 __be64 excessive_buffer_overruns;
2183                 __be64 fm_config_errors;
2184                 __be32 link_error_recovery;
2185                 __be32 link_downed;
2186                 u8 uncorrectable_errors;
2187                 u8 reserved3[7];
2188                 struct _vls_ectrs {
2189                         __be64 port_vl_xmit_discards;
2190                 } vls[0];
2191                 /* array size defined by #bits set in vl_select_mask */
2192         } port[1]; /* array size defined by #ports in attribute modifier */
2193 };
2194
2195 struct opa_port_error_info_msg {
2196         __be64 port_select_mask[4];
2197         __be32 error_info_select_mask;
2198         __be32 reserved1;
2199         struct _port_ei {
2200                 u8 port_number;
2201                 u8 reserved2[7];
2202
2203                 /* PortRcvErrorInfo */
2204                 struct {
2205                         u8 status_and_code;
2206                         union {
2207                                 u8 raw[17];
2208                                 struct {
2209                                         /* EI1to12 format */
2210                                         u8 packet_flit1[8];
2211                                         u8 packet_flit2[8];
2212                                         u8 remaining_flit_bits12;
2213                                 } ei1to12;
2214                                 struct {
2215                                         u8 packet_bytes[8];
2216                                         u8 remaining_flit_bits;
2217                                 } ei13;
2218                         } ei;
2219                         u8 reserved3[6];
2220                 } __packed port_rcv_ei;
2221
2222                 /* ExcessiveBufferOverrunInfo */
2223                 struct {
2224                         u8 status_and_sc;
2225                         u8 reserved4[7];
2226                 } __packed excessive_buffer_overrun_ei;
2227
2228                 /* PortXmitConstraintErrorInfo */
2229                 struct {
2230                         u8 status;
2231                         u8 reserved5;
2232                         __be16 pkey;
2233                         __be32 slid;
2234                 } __packed port_xmit_constraint_ei;
2235
2236                 /* PortRcvConstraintErrorInfo */
2237                 struct {
2238                         u8 status;
2239                         u8 reserved6;
2240                         __be16 pkey;
2241                         __be32 slid;
2242                 } __packed port_rcv_constraint_ei;
2243
2244                 /* PortRcvSwitchRelayErrorInfo */
2245                 struct {
2246                         u8 status_and_code;
2247                         u8 reserved7[3];
2248                         __u32 error_info;
2249                 } __packed port_rcv_switch_relay_ei;
2250
2251                 /* UncorrectableErrorInfo */
2252                 struct {
2253                         u8 status_and_code;
2254                         u8 reserved8;
2255                 } __packed uncorrectable_ei;
2256
2257                 /* FMConfigErrorInfo */
2258                 struct {
2259                         u8 status_and_code;
2260                         u8 error_info;
2261                 } __packed fm_config_ei;
2262                 __u32 reserved9;
2263         } port[1]; /* actual array size defined by #ports in attr modifier */
2264 };
2265
2266 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2267 enum error_info_selects {
2268         ES_PORT_RCV_ERROR_INFO                  = (1 << 31),
2269         ES_EXCESSIVE_BUFFER_OVERRUN_INFO        = (1 << 30),
2270         ES_PORT_XMIT_CONSTRAINT_ERROR_INFO      = (1 << 29),
2271         ES_PORT_RCV_CONSTRAINT_ERROR_INFO       = (1 << 28),
2272         ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO     = (1 << 27),
2273         ES_UNCORRECTABLE_ERROR_INFO             = (1 << 26),
2274         ES_FM_CONFIG_ERROR_INFO                 = (1 << 25)
2275 };
2276
2277 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2278                                      struct ib_device *ibdev, u32 *resp_len)
2279 {
2280         struct opa_class_port_info *p =
2281                 (struct opa_class_port_info *)pmp->data;
2282
2283         memset(pmp->data, 0, sizeof(pmp->data));
2284
2285         if (pmp->mad_hdr.attr_mod != 0)
2286                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2287
2288         p->base_version = OPA_MGMT_BASE_VERSION;
2289         p->class_version = OPA_SM_CLASS_VERSION;
2290         /*
2291          * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2292          */
2293         p->cap_mask2_resp_time = cpu_to_be32(18);
2294
2295         if (resp_len)
2296                 *resp_len += sizeof(*p);
2297
2298         return reply((struct ib_mad_hdr *)pmp);
2299 }
2300
2301 static void a0_portstatus(struct hfi1_pportdata *ppd,
2302                           struct opa_port_status_rsp *rsp, u32 vl_select_mask)
2303 {
2304         if (!is_bx(ppd->dd)) {
2305                 unsigned long vl;
2306                 u64 sum_vl_xmit_wait = 0;
2307                 u32 vl_all_mask = VL_MASK_ALL;
2308
2309                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2310                                  8 * sizeof(vl_all_mask)) {
2311                         u64 tmp = sum_vl_xmit_wait +
2312                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2313                                                  idx_from_vl(vl));
2314                         if (tmp < sum_vl_xmit_wait) {
2315                                 /* we wrapped */
2316                                 sum_vl_xmit_wait = (u64)~0;
2317                                 break;
2318                         }
2319                         sum_vl_xmit_wait = tmp;
2320                 }
2321                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2322                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2323         }
2324 }
2325
2326 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2327                                   struct ib_device *ibdev,
2328                                   u8 port, u32 *resp_len)
2329 {
2330         struct opa_port_status_req *req =
2331                 (struct opa_port_status_req *)pmp->data;
2332         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2333         struct opa_port_status_rsp *rsp;
2334         u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
2335         unsigned long vl;
2336         size_t response_data_size;
2337         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2338         u8 port_num = req->port_num;
2339         u8 num_vls = hweight32(vl_select_mask);
2340         struct _vls_pctrs *vlinfo;
2341         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2342         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2343         int vfi;
2344         u64 tmp, tmp2;
2345
2346         response_data_size = sizeof(struct opa_port_status_rsp) +
2347                                 num_vls * sizeof(struct _vls_pctrs);
2348         if (response_data_size > sizeof(pmp->data)) {
2349                 pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2350                 return reply((struct ib_mad_hdr *)pmp);
2351         }
2352
2353         if (nports != 1 || (port_num && port_num != port) ||
2354             num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2355                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2356                 return reply((struct ib_mad_hdr *)pmp);
2357         }
2358
2359         memset(pmp->data, 0, sizeof(pmp->data));
2360
2361         rsp = (struct opa_port_status_rsp *)pmp->data;
2362         if (port_num)
2363                 rsp->port_num = port_num;
2364         else
2365                 rsp->port_num = port;
2366
2367         rsp->port_rcv_constraint_errors =
2368                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2369                                            CNTR_INVALID_VL));
2370
2371         hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2372
2373         rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
2374         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2375                                           CNTR_INVALID_VL));
2376         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2377                                          CNTR_INVALID_VL));
2378         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2379                                           CNTR_INVALID_VL));
2380         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2381                                          CNTR_INVALID_VL));
2382         rsp->port_multicast_xmit_pkts =
2383                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2384                                           CNTR_INVALID_VL));
2385         rsp->port_multicast_rcv_pkts =
2386                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2387                                           CNTR_INVALID_VL));
2388         rsp->port_xmit_wait =
2389                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2390         rsp->port_rcv_fecn =
2391                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2392         rsp->port_rcv_becn =
2393                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2394         rsp->port_xmit_discards =
2395                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2396                                            CNTR_INVALID_VL));
2397         rsp->port_xmit_constraint_errors =
2398                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2399                                            CNTR_INVALID_VL));
2400         rsp->port_rcv_remote_physical_errors =
2401                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2402                                           CNTR_INVALID_VL));
2403         rsp->local_link_integrity_errors =
2404                 cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2405                                           CNTR_INVALID_VL));
2406         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2407         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2408                                    CNTR_INVALID_VL);
2409         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2410                 /* overflow/wrapped */
2411                 rsp->link_error_recovery = cpu_to_be32(~0);
2412         } else {
2413                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2414         }
2415         rsp->port_rcv_errors =
2416                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2417         rsp->excessive_buffer_overruns =
2418                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2419         rsp->fm_config_errors =
2420                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2421                                           CNTR_INVALID_VL));
2422         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2423                                                       CNTR_INVALID_VL));
2424
2425         /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2426         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2427         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2428
2429         vlinfo = &rsp->vls[0];
2430         vfi = 0;
2431         /* The vl_select_mask has been checked above, and we know
2432          * that it contains only entries which represent valid VLs.
2433          * So in the for_each_set_bit() loop below, we don't need
2434          * any additional checks for vl.
2435          */
2436         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2437                          8 * sizeof(vl_select_mask)) {
2438                 memset(vlinfo, 0, sizeof(*vlinfo));
2439
2440                 tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2441                 rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2442
2443                 rsp->vls[vfi].port_vl_rcv_pkts =
2444                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2445                                                   idx_from_vl(vl)));
2446
2447                 rsp->vls[vfi].port_vl_xmit_data =
2448                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2449                                                    idx_from_vl(vl)));
2450
2451                 rsp->vls[vfi].port_vl_xmit_pkts =
2452                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2453                                                    idx_from_vl(vl)));
2454
2455                 rsp->vls[vfi].port_vl_xmit_wait =
2456                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2457                                                    idx_from_vl(vl)));
2458
2459                 rsp->vls[vfi].port_vl_rcv_fecn =
2460                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2461                                                   idx_from_vl(vl)));
2462
2463                 rsp->vls[vfi].port_vl_rcv_becn =
2464                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2465                                                   idx_from_vl(vl)));
2466
2467                 rsp->vls[vfi].port_vl_xmit_discards =
2468                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2469                                                    idx_from_vl(vl)));
2470                 vlinfo++;
2471                 vfi++;
2472         }
2473
2474         a0_portstatus(ppd, rsp, vl_select_mask);
2475
2476         if (resp_len)
2477                 *resp_len += response_data_size;
2478
2479         return reply((struct ib_mad_hdr *)pmp);
2480 }
2481
2482 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2483                                      u8 res_lli, u8 res_ler)
2484 {
2485         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2486         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2487         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2488         u64 error_counter_summary = 0, tmp;
2489
2490         error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2491                                                 CNTR_INVALID_VL);
2492         /* port_rcv_switch_relay_errors is 0 for HFIs */
2493         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2494                                                 CNTR_INVALID_VL);
2495         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2496                                                 CNTR_INVALID_VL);
2497         error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2498                                                CNTR_INVALID_VL);
2499         /* local link integrity must be right-shifted by the lli resolution */
2500         error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2501                                                 CNTR_INVALID_VL) >> res_lli);
2502         /* link error recovery must b right-shifted by the ler resolution */
2503         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2504         tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2505         error_counter_summary += (tmp >> res_ler);
2506         error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2507                                                CNTR_INVALID_VL);
2508         error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2509         error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2510                                                CNTR_INVALID_VL);
2511         /* ppd->link_downed is a 32-bit value */
2512         error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2513                                                 CNTR_INVALID_VL);
2514         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2515         /* this is an 8-bit quantity */
2516         error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2517
2518         return error_counter_summary;
2519 }
2520
2521 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
2522                             u32 vl_select_mask)
2523 {
2524         if (!is_bx(ppd->dd)) {
2525                 unsigned long vl;
2526                 u64 sum_vl_xmit_wait = 0;
2527                 u32 vl_all_mask = VL_MASK_ALL;
2528
2529                 for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
2530                                  8 * sizeof(vl_all_mask)) {
2531                         u64 tmp = sum_vl_xmit_wait +
2532                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2533                                                  idx_from_vl(vl));
2534                         if (tmp < sum_vl_xmit_wait) {
2535                                 /* we wrapped */
2536                                 sum_vl_xmit_wait = (u64)~0;
2537                                 break;
2538                         }
2539                         sum_vl_xmit_wait = tmp;
2540                 }
2541                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2542                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2543         }
2544 }
2545
2546 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2547                                    struct _port_dctrs *rsp)
2548 {
2549         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2550
2551         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2552                                                 CNTR_INVALID_VL));
2553         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2554                                                 CNTR_INVALID_VL));
2555         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2556                                                 CNTR_INVALID_VL));
2557         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2558                                                 CNTR_INVALID_VL));
2559         rsp->port_multicast_xmit_pkts =
2560                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2561                                           CNTR_INVALID_VL));
2562         rsp->port_multicast_rcv_pkts =
2563                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2564                                           CNTR_INVALID_VL));
2565 }
2566
2567 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2568                                     struct ib_device *ibdev,
2569                                     u8 port, u32 *resp_len)
2570 {
2571         struct opa_port_data_counters_msg *req =
2572                 (struct opa_port_data_counters_msg *)pmp->data;
2573         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2574         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2575         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2576         struct _port_dctrs *rsp;
2577         struct _vls_dctrs *vlinfo;
2578         size_t response_data_size;
2579         u32 num_ports;
2580         u8 num_pslm;
2581         u8 lq, num_vls;
2582         u8 res_lli, res_ler;
2583         u64 port_mask;
2584         u8 port_num;
2585         unsigned long vl;
2586         u32 vl_select_mask;
2587         int vfi;
2588
2589         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2590         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2591         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2592         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2593         res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2594         res_lli = res_lli ? res_lli + ADD_LLI : 0;
2595         res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2596         res_ler = res_ler ? res_ler + ADD_LER : 0;
2597
2598         if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2599                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2600                 return reply((struct ib_mad_hdr *)pmp);
2601         }
2602
2603         /* Sanity check */
2604         response_data_size = sizeof(struct opa_port_data_counters_msg) +
2605                                 num_vls * sizeof(struct _vls_dctrs);
2606
2607         if (response_data_size > sizeof(pmp->data)) {
2608                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2609                 return reply((struct ib_mad_hdr *)pmp);
2610         }
2611
2612         /*
2613          * The bit set in the mask needs to be consistent with the
2614          * port the request came in on.
2615          */
2616         port_mask = be64_to_cpu(req->port_select_mask[3]);
2617         port_num = find_first_bit((unsigned long *)&port_mask,
2618                                   sizeof(port_mask) * 8);
2619
2620         if (port_num != port) {
2621                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2622                 return reply((struct ib_mad_hdr *)pmp);
2623         }
2624
2625         rsp = &req->port[0];
2626         memset(rsp, 0, sizeof(*rsp));
2627
2628         rsp->port_number = port;
2629         /*
2630          * Note that link_quality_indicator is a 32 bit quantity in
2631          * 'datacounters' queries (as opposed to 'portinfo' queries,
2632          * where it's a byte).
2633          */
2634         hfi1_read_link_quality(dd, &lq);
2635         rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2636         pma_get_opa_port_dctrs(ibdev, rsp);
2637
2638         rsp->port_xmit_wait =
2639                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2640         rsp->port_rcv_fecn =
2641                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2642         rsp->port_rcv_becn =
2643                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2644         rsp->port_error_counter_summary =
2645                 cpu_to_be64(get_error_counter_summary(ibdev, port,
2646                                                       res_lli, res_ler));
2647
2648         vlinfo = &rsp->vls[0];
2649         vfi = 0;
2650         /* The vl_select_mask has been checked above, and we know
2651          * that it contains only entries which represent valid VLs.
2652          * So in the for_each_set_bit() loop below, we don't need
2653          * any additional checks for vl.
2654          */
2655         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2656                          8 * sizeof(req->vl_select_mask)) {
2657                 memset(vlinfo, 0, sizeof(*vlinfo));
2658
2659                 rsp->vls[vfi].port_vl_xmit_data =
2660                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2661                                                    idx_from_vl(vl)));
2662
2663                 rsp->vls[vfi].port_vl_rcv_data =
2664                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2665                                                   idx_from_vl(vl)));
2666
2667                 rsp->vls[vfi].port_vl_xmit_pkts =
2668                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2669                                                    idx_from_vl(vl)));
2670
2671                 rsp->vls[vfi].port_vl_rcv_pkts =
2672                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2673                                                   idx_from_vl(vl)));
2674
2675                 rsp->vls[vfi].port_vl_xmit_wait =
2676                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2677                                                    idx_from_vl(vl)));
2678
2679                 rsp->vls[vfi].port_vl_rcv_fecn =
2680                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2681                                                   idx_from_vl(vl)));
2682                 rsp->vls[vfi].port_vl_rcv_becn =
2683                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2684                                                   idx_from_vl(vl)));
2685
2686                 /* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2687                 /* rsp->port_vl_xmit_wasted_bw ??? */
2688                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2689                  * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2690                  */
2691                 /*rsp->vls[vfi].port_vl_mark_fecn =
2692                  *      cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2693                  *              + offset));
2694                  */
2695                 vlinfo++;
2696                 vfi++;
2697         }
2698
2699         a0_datacounters(ppd, rsp, vl_select_mask);
2700
2701         if (resp_len)
2702                 *resp_len += response_data_size;
2703
2704         return reply((struct ib_mad_hdr *)pmp);
2705 }
2706
2707 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
2708                                        struct ib_device *ibdev, u8 port)
2709 {
2710         struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
2711                                                 pmp->data;
2712         struct _port_dctrs rsp;
2713
2714         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2715                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2716                 goto bail;
2717         }
2718
2719         memset(&rsp, 0, sizeof(rsp));
2720         pma_get_opa_port_dctrs(ibdev, &rsp);
2721
2722         p->port_xmit_data = rsp.port_xmit_data;
2723         p->port_rcv_data = rsp.port_rcv_data;
2724         p->port_xmit_packets = rsp.port_xmit_pkts;
2725         p->port_rcv_packets = rsp.port_rcv_pkts;
2726         p->port_unicast_xmit_packets = 0;
2727         p->port_unicast_rcv_packets =  0;
2728         p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
2729         p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
2730
2731 bail:
2732         return reply((struct ib_mad_hdr *)pmp);
2733 }
2734
2735 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
2736                                    struct _port_ectrs *rsp, u8 port)
2737 {
2738         u64 tmp, tmp2;
2739         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2740         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2741         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2742
2743         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2744         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2745                                         CNTR_INVALID_VL);
2746         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2747                 /* overflow/wrapped */
2748                 rsp->link_error_recovery = cpu_to_be32(~0);
2749         } else {
2750                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2751         }
2752
2753         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2754                                                 CNTR_INVALID_VL));
2755         rsp->port_rcv_errors =
2756                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2757         rsp->port_rcv_remote_physical_errors =
2758                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2759                                           CNTR_INVALID_VL));
2760         rsp->port_rcv_switch_relay_errors = 0;
2761         rsp->port_xmit_discards =
2762                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2763                                            CNTR_INVALID_VL));
2764         rsp->port_xmit_constraint_errors =
2765                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2766                                            CNTR_INVALID_VL));
2767         rsp->port_rcv_constraint_errors =
2768                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2769                                            CNTR_INVALID_VL));
2770         rsp->local_link_integrity_errors =
2771                 cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2772                                           CNTR_INVALID_VL));
2773         rsp->excessive_buffer_overruns =
2774                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2775 }
2776
2777 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
2778                                   struct ib_device *ibdev,
2779                                   u8 port, u32 *resp_len)
2780 {
2781         size_t response_data_size;
2782         struct _port_ectrs *rsp;
2783         u8 port_num;
2784         struct opa_port_error_counters64_msg *req;
2785         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2786         u32 num_ports;
2787         u8 num_pslm;
2788         u8 num_vls;
2789         struct hfi1_ibport *ibp;
2790         struct hfi1_pportdata *ppd;
2791         struct _vls_ectrs *vlinfo;
2792         unsigned long vl;
2793         u64 port_mask, tmp;
2794         u32 vl_select_mask;
2795         int vfi;
2796
2797         req = (struct opa_port_error_counters64_msg *)pmp->data;
2798
2799         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2800
2801         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2802         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2803
2804         if (num_ports != 1 || num_ports != num_pslm) {
2805                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2806                 return reply((struct ib_mad_hdr *)pmp);
2807         }
2808
2809         response_data_size = sizeof(struct opa_port_error_counters64_msg) +
2810                                 num_vls * sizeof(struct _vls_ectrs);
2811
2812         if (response_data_size > sizeof(pmp->data)) {
2813                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2814                 return reply((struct ib_mad_hdr *)pmp);
2815         }
2816         /*
2817          * The bit set in the mask needs to be consistent with the
2818          * port the request came in on.
2819          */
2820         port_mask = be64_to_cpu(req->port_select_mask[3]);
2821         port_num = find_first_bit((unsigned long *)&port_mask,
2822                                   sizeof(port_mask) * 8);
2823
2824         if (port_num != port) {
2825                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2826                 return reply((struct ib_mad_hdr *)pmp);
2827         }
2828
2829         rsp = &req->port[0];
2830
2831         ibp = to_iport(ibdev, port_num);
2832         ppd = ppd_from_ibp(ibp);
2833
2834         memset(rsp, 0, sizeof(*rsp));
2835         rsp->port_number = port_num;
2836
2837         pma_get_opa_port_ectrs(ibdev, rsp, port_num);
2838
2839         rsp->port_rcv_remote_physical_errors =
2840                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2841                                           CNTR_INVALID_VL));
2842         rsp->fm_config_errors =
2843                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2844                                           CNTR_INVALID_VL));
2845         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2846
2847         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2848         rsp->port_rcv_errors =
2849                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2850         vlinfo = &rsp->vls[0];
2851         vfi = 0;
2852         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2853         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
2854                          8 * sizeof(req->vl_select_mask)) {
2855                 memset(vlinfo, 0, sizeof(*vlinfo));
2856                 rsp->vls[vfi].port_vl_xmit_discards =
2857                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2858                                                    idx_from_vl(vl)));
2859                 vlinfo += 1;
2860                 vfi++;
2861         }
2862
2863         if (resp_len)
2864                 *resp_len += response_data_size;
2865
2866         return reply((struct ib_mad_hdr *)pmp);
2867 }
2868
2869 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
2870                                    struct ib_device *ibdev, u8 port)
2871 {
2872         struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
2873                 pmp->data;
2874         struct _port_ectrs rsp;
2875         u64 temp_link_overrun_errors;
2876         u64 temp_64;
2877         u32 temp_32;
2878
2879         memset(&rsp, 0, sizeof(rsp));
2880         pma_get_opa_port_ectrs(ibdev, &rsp, port);
2881
2882         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
2883                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2884                 goto bail;
2885         }
2886
2887         p->symbol_error_counter = 0; /* N/A for OPA */
2888
2889         temp_32 = be32_to_cpu(rsp.link_error_recovery);
2890         if (temp_32 > 0xFFUL)
2891                 p->link_error_recovery_counter = 0xFF;
2892         else
2893                 p->link_error_recovery_counter = (u8)temp_32;
2894
2895         temp_32 = be32_to_cpu(rsp.link_downed);
2896         if (temp_32 > 0xFFUL)
2897                 p->link_downed_counter = 0xFF;
2898         else
2899                 p->link_downed_counter = (u8)temp_32;
2900
2901         temp_64 = be64_to_cpu(rsp.port_rcv_errors);
2902         if (temp_64 > 0xFFFFUL)
2903                 p->port_rcv_errors = cpu_to_be16(0xFFFF);
2904         else
2905                 p->port_rcv_errors = cpu_to_be16((u16)temp_64);
2906
2907         temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
2908         if (temp_64 > 0xFFFFUL)
2909                 p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
2910         else
2911                 p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
2912
2913         temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
2914         p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
2915
2916         temp_64 = be64_to_cpu(rsp.port_xmit_discards);
2917         if (temp_64 > 0xFFFFUL)
2918                 p->port_xmit_discards = cpu_to_be16(0xFFFF);
2919         else
2920                 p->port_xmit_discards = cpu_to_be16((u16)temp_64);
2921
2922         temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
2923         if (temp_64 > 0xFFUL)
2924                 p->port_xmit_constraint_errors = 0xFF;
2925         else
2926                 p->port_xmit_constraint_errors = (u8)temp_64;
2927
2928         temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
2929         if (temp_64 > 0xFFUL)
2930                 p->port_rcv_constraint_errors = 0xFFUL;
2931         else
2932                 p->port_rcv_constraint_errors = (u8)temp_64;
2933
2934         /* LocalLink: 7:4, BufferOverrun: 3:0 */
2935         temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
2936         if (temp_64 > 0xFUL)
2937                 temp_64 = 0xFUL;
2938
2939         temp_link_overrun_errors = temp_64 << 4;
2940
2941         temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
2942         if (temp_64 > 0xFUL)
2943                 temp_64 = 0xFUL;
2944         temp_link_overrun_errors |= temp_64;
2945
2946         p->link_overrun_errors = (u8)temp_link_overrun_errors;
2947
2948         p->vl15_dropped = 0; /* N/A for OPA */
2949
2950 bail:
2951         return reply((struct ib_mad_hdr *)pmp);
2952 }
2953
2954 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
2955                                  struct ib_device *ibdev,
2956                                  u8 port, u32 *resp_len)
2957 {
2958         size_t response_data_size;
2959         struct _port_ei *rsp;
2960         struct opa_port_error_info_msg *req;
2961         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2962         u64 port_mask;
2963         u32 num_ports;
2964         u8 port_num;
2965         u8 num_pslm;
2966         u64 reg;
2967
2968         req = (struct opa_port_error_info_msg *)pmp->data;
2969         rsp = &req->port[0];
2970
2971         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
2972         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2973
2974         memset(rsp, 0, sizeof(*rsp));
2975
2976         if (num_ports != 1 || num_ports != num_pslm) {
2977                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2978                 return reply((struct ib_mad_hdr *)pmp);
2979         }
2980
2981         /* Sanity check */
2982         response_data_size = sizeof(struct opa_port_error_info_msg);
2983
2984         if (response_data_size > sizeof(pmp->data)) {
2985                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2986                 return reply((struct ib_mad_hdr *)pmp);
2987         }
2988
2989         /*
2990          * The bit set in the mask needs to be consistent with the port
2991          * the request came in on.
2992          */
2993         port_mask = be64_to_cpu(req->port_select_mask[3]);
2994         port_num = find_first_bit((unsigned long *)&port_mask,
2995                                   sizeof(port_mask) * 8);
2996
2997         if (port_num != port) {
2998                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2999                 return reply((struct ib_mad_hdr *)pmp);
3000         }
3001
3002         /* PortRcvErrorInfo */
3003         rsp->port_rcv_ei.status_and_code =
3004                 dd->err_info_rcvport.status_and_code;
3005         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3006                &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3007         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3008                &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3009
3010         /* ExcessiverBufferOverrunInfo */
3011         reg = read_csr(dd, RCV_ERR_INFO);
3012         if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3013                 /*
3014                  * if the RcvExcessBufferOverrun bit is set, save SC of
3015                  * first pkt that encountered an excess buffer overrun
3016                  */
3017                 u8 tmp = (u8)reg;
3018
3019                 tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3020                 tmp <<= 2;
3021                 rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3022                 /* set the status bit */
3023                 rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3024         }
3025
3026         rsp->port_xmit_constraint_ei.status =
3027                 dd->err_info_xmit_constraint.status;
3028         rsp->port_xmit_constraint_ei.pkey =
3029                 cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3030         rsp->port_xmit_constraint_ei.slid =
3031                 cpu_to_be32(dd->err_info_xmit_constraint.slid);
3032
3033         rsp->port_rcv_constraint_ei.status =
3034                 dd->err_info_rcv_constraint.status;
3035         rsp->port_rcv_constraint_ei.pkey =
3036                 cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3037         rsp->port_rcv_constraint_ei.slid =
3038                 cpu_to_be32(dd->err_info_rcv_constraint.slid);
3039
3040         /* UncorrectableErrorInfo */
3041         rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3042
3043         /* FMConfigErrorInfo */
3044         rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3045
3046         if (resp_len)
3047                 *resp_len += response_data_size;
3048
3049         return reply((struct ib_mad_hdr *)pmp);
3050 }
3051
3052 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3053                                   struct ib_device *ibdev,
3054                                   u8 port, u32 *resp_len)
3055 {
3056         struct opa_clear_port_status *req =
3057                 (struct opa_clear_port_status *)pmp->data;
3058         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3059         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3060         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3061         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3062         u64 portn = be64_to_cpu(req->port_select_mask[3]);
3063         u32 counter_select = be32_to_cpu(req->counter_select_mask);
3064         u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3065         unsigned long vl;
3066
3067         if ((nports != 1) || (portn != 1 << port)) {
3068                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3069                 return reply((struct ib_mad_hdr *)pmp);
3070         }
3071         /*
3072          * only counters returned by pma_get_opa_portstatus() are
3073          * handled, so when pma_get_opa_portstatus() gets a fix,
3074          * the corresponding change should be made here as well.
3075          */
3076
3077         if (counter_select & CS_PORT_XMIT_DATA)
3078                 write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3079
3080         if (counter_select & CS_PORT_RCV_DATA)
3081                 write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3082
3083         if (counter_select & CS_PORT_XMIT_PKTS)
3084                 write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3085
3086         if (counter_select & CS_PORT_RCV_PKTS)
3087                 write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3088
3089         if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3090                 write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3091
3092         if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3093                 write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3094
3095         if (counter_select & CS_PORT_XMIT_WAIT)
3096                 write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3097
3098         /* ignore cs_sw_portCongestion for HFIs */
3099
3100         if (counter_select & CS_PORT_RCV_FECN)
3101                 write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3102
3103         if (counter_select & CS_PORT_RCV_BECN)
3104                 write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3105
3106         /* ignore cs_port_xmit_time_cong for HFIs */
3107         /* ignore cs_port_xmit_wasted_bw for now */
3108         /* ignore cs_port_xmit_wait_data for now */
3109         if (counter_select & CS_PORT_RCV_BUBBLE)
3110                 write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3111
3112         /* Only applicable for switch */
3113         /* if (counter_select & CS_PORT_MARK_FECN)
3114          *      write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3115          */
3116
3117         if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3118                 write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3119
3120         /* ignore cs_port_rcv_switch_relay_errors for HFIs */
3121         if (counter_select & CS_PORT_XMIT_DISCARDS)
3122                 write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3123
3124         if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3125                 write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3126
3127         if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3128                 write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3129
3130         if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3131                 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3132
3133         if (counter_select & CS_LINK_ERROR_RECOVERY) {
3134                 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3135                 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3136                                CNTR_INVALID_VL, 0);
3137         }
3138
3139         if (counter_select & CS_PORT_RCV_ERRORS)
3140                 write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3141
3142         if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3143                 write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3144                 dd->rcv_ovfl_cnt = 0;
3145         }
3146
3147         if (counter_select & CS_FM_CONFIG_ERRORS)
3148                 write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3149
3150         if (counter_select & CS_LINK_DOWNED)
3151                 write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3152
3153         if (counter_select & CS_UNCORRECTABLE_ERRORS)
3154                 write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3155
3156         for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
3157                          8 * sizeof(vl_select_mask)) {
3158                 if (counter_select & CS_PORT_XMIT_DATA)
3159                         write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3160
3161                 if (counter_select & CS_PORT_RCV_DATA)
3162                         write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3163
3164                 if (counter_select & CS_PORT_XMIT_PKTS)
3165                         write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3166
3167                 if (counter_select & CS_PORT_RCV_PKTS)
3168                         write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3169
3170                 if (counter_select & CS_PORT_XMIT_WAIT)
3171                         write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3172
3173                 /* sw_port_vl_congestion is 0 for HFIs */
3174                 if (counter_select & CS_PORT_RCV_FECN)
3175                         write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3176
3177                 if (counter_select & CS_PORT_RCV_BECN)
3178                         write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3179
3180                 /* port_vl_xmit_time_cong is 0 for HFIs */
3181                 /* port_vl_xmit_wasted_bw ??? */
3182                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3183                 if (counter_select & CS_PORT_RCV_BUBBLE)
3184                         write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3185
3186                 /* if (counter_select & CS_PORT_MARK_FECN)
3187                  *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3188                  */
3189                 if (counter_select & C_SW_XMIT_DSCD_VL)
3190                         write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3191                                         idx_from_vl(vl), 0);
3192         }
3193
3194         if (resp_len)
3195                 *resp_len += sizeof(*req);
3196
3197         return reply((struct ib_mad_hdr *)pmp);
3198 }
3199
3200 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3201                                  struct ib_device *ibdev,
3202                                  u8 port, u32 *resp_len)
3203 {
3204         struct _port_ei *rsp;
3205         struct opa_port_error_info_msg *req;
3206         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3207         u64 port_mask;
3208         u32 num_ports;
3209         u8 port_num;
3210         u8 num_pslm;
3211         u32 error_info_select;
3212
3213         req = (struct opa_port_error_info_msg *)pmp->data;
3214         rsp = &req->port[0];
3215
3216         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3217         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3218
3219         memset(rsp, 0, sizeof(*rsp));
3220
3221         if (num_ports != 1 || num_ports != num_pslm) {
3222                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3223                 return reply((struct ib_mad_hdr *)pmp);
3224         }
3225
3226         /*
3227          * The bit set in the mask needs to be consistent with the port
3228          * the request came in on.
3229          */
3230         port_mask = be64_to_cpu(req->port_select_mask[3]);
3231         port_num = find_first_bit((unsigned long *)&port_mask,
3232                                   sizeof(port_mask) * 8);
3233
3234         if (port_num != port) {
3235                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3236                 return reply((struct ib_mad_hdr *)pmp);
3237         }
3238
3239         error_info_select = be32_to_cpu(req->error_info_select_mask);
3240
3241         /* PortRcvErrorInfo */
3242         if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3243                 /* turn off status bit */
3244                 dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3245
3246         /* ExcessiverBufferOverrunInfo */
3247         if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3248                 /*
3249                  * status bit is essentially kept in the h/w - bit 5 of
3250                  * RCV_ERR_INFO
3251                  */
3252                 write_csr(dd, RCV_ERR_INFO,
3253                           RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3254
3255         if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3256                 dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3257
3258         if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3259                 dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3260
3261         /* UncorrectableErrorInfo */
3262         if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3263                 /* turn off status bit */
3264                 dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3265
3266         /* FMConfigErrorInfo */
3267         if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3268                 /* turn off status bit */
3269                 dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3270
3271         if (resp_len)
3272                 *resp_len += sizeof(*req);
3273
3274         return reply((struct ib_mad_hdr *)pmp);
3275 }
3276
3277 struct opa_congestion_info_attr {
3278         __be16 congestion_info;
3279         u8 control_table_cap;   /* Multiple of 64 entry unit CCTs */
3280         u8 congestion_log_length;
3281 } __packed;
3282
3283 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3284                                     struct ib_device *ibdev, u8 port,
3285                                     u32 *resp_len)
3286 {
3287         struct opa_congestion_info_attr *p =
3288                 (struct opa_congestion_info_attr *)data;
3289         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3290         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3291
3292         p->congestion_info = 0;
3293         p->control_table_cap = ppd->cc_max_table_entries;
3294         p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3295
3296         if (resp_len)
3297                 *resp_len += sizeof(*p);
3298
3299         return reply((struct ib_mad_hdr *)smp);
3300 }
3301
3302 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3303                                        u8 *data, struct ib_device *ibdev,
3304                                        u8 port, u32 *resp_len)
3305 {
3306         int i;
3307         struct opa_congestion_setting_attr *p =
3308                 (struct opa_congestion_setting_attr *)data;
3309         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3310         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3311         struct opa_congestion_setting_entry_shadow *entries;
3312         struct cc_state *cc_state;
3313
3314         rcu_read_lock();
3315
3316         cc_state = get_cc_state(ppd);
3317
3318         if (!cc_state) {
3319                 rcu_read_unlock();
3320                 return reply((struct ib_mad_hdr *)smp);
3321         }
3322
3323         entries = cc_state->cong_setting.entries;
3324         p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3325         p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3326         for (i = 0; i < OPA_MAX_SLS; i++) {
3327                 p->entries[i].ccti_increase = entries[i].ccti_increase;
3328                 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3329                 p->entries[i].trigger_threshold =
3330                         entries[i].trigger_threshold;
3331                 p->entries[i].ccti_min = entries[i].ccti_min;
3332         }
3333
3334         rcu_read_unlock();
3335
3336         if (resp_len)
3337                 *resp_len += sizeof(*p);
3338
3339         return reply((struct ib_mad_hdr *)smp);
3340 }
3341
3342 /*
3343  * Apply congestion control information stored in the ppd to the
3344  * active structure.
3345  */
3346 static void apply_cc_state(struct hfi1_pportdata *ppd)
3347 {
3348         struct cc_state *old_cc_state, *new_cc_state;
3349
3350         new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3351         if (!new_cc_state)
3352                 return;
3353
3354         /*
3355          * Hold the lock for updating *and* to prevent ppd information
3356          * from changing during the update.
3357          */
3358         spin_lock(&ppd->cc_state_lock);
3359
3360         old_cc_state = get_cc_state_protected(ppd);
3361         if (!old_cc_state) {
3362                 /* never active, or shutting down */
3363                 spin_unlock(&ppd->cc_state_lock);
3364                 kfree(new_cc_state);
3365                 return;
3366         }
3367
3368         *new_cc_state = *old_cc_state;
3369
3370         new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3371         memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3372                ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3373
3374         new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3375         new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3376         memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3377                OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3378
3379         rcu_assign_pointer(ppd->cc_state, new_cc_state);
3380
3381         spin_unlock(&ppd->cc_state_lock);
3382
3383         kfree_rcu(old_cc_state, rcu);
3384 }
3385
3386 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3387                                        struct ib_device *ibdev, u8 port,
3388                                        u32 *resp_len)
3389 {
3390         struct opa_congestion_setting_attr *p =
3391                 (struct opa_congestion_setting_attr *)data;
3392         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3393         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3394         struct opa_congestion_setting_entry_shadow *entries;
3395         int i;
3396
3397         /*
3398          * Save details from packet into the ppd.  Hold the cc_state_lock so
3399          * our information is consistent with anyone trying to apply the state.
3400          */
3401         spin_lock(&ppd->cc_state_lock);
3402         ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3403
3404         entries = ppd->congestion_entries;
3405         for (i = 0; i < OPA_MAX_SLS; i++) {
3406                 entries[i].ccti_increase = p->entries[i].ccti_increase;
3407                 entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3408                 entries[i].trigger_threshold =
3409                         p->entries[i].trigger_threshold;
3410                 entries[i].ccti_min = p->entries[i].ccti_min;
3411         }
3412         spin_unlock(&ppd->cc_state_lock);
3413
3414         /* now apply the information */
3415         apply_cc_state(ppd);
3416
3417         return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3418                                            resp_len);
3419 }
3420
3421 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3422                                         u8 *data, struct ib_device *ibdev,
3423                                         u8 port, u32 *resp_len)
3424 {
3425         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3426         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3427         struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3428         s64 ts;
3429         int i;
3430
3431         if (am != 0) {
3432                 smp->status |= IB_SMP_INVALID_FIELD;
3433                 return reply((struct ib_mad_hdr *)smp);
3434         }
3435
3436         spin_lock_irq(&ppd->cc_log_lock);
3437
3438         cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3439         cong_log->congestion_flags = 0;
3440         cong_log->threshold_event_counter =
3441                 cpu_to_be16(ppd->threshold_event_counter);
3442         memcpy(cong_log->threshold_cong_event_map,
3443                ppd->threshold_cong_event_map,
3444                sizeof(cong_log->threshold_cong_event_map));
3445         /* keep timestamp in units of 1.024 usec */
3446         ts = ktime_to_ns(ktime_get()) / 1024;
3447         cong_log->current_time_stamp = cpu_to_be32(ts);
3448         for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3449                 struct opa_hfi1_cong_log_event_internal *cce =
3450                         &ppd->cc_events[ppd->cc_mad_idx++];
3451                 if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3452                         ppd->cc_mad_idx = 0;
3453                 /*
3454                  * Entries which are older than twice the time
3455                  * required to wrap the counter are supposed to
3456                  * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3457                  */
3458                 if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3459                         continue;
3460                 memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3461                 memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3462                        &cce->rqpn, 3);
3463                 cong_log->events[i].sl_svc_type_cn_entry =
3464                         ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3465                 cong_log->events[i].remote_lid_cn_entry =
3466                         cpu_to_be32(cce->rlid);
3467                 cong_log->events[i].timestamp_cn_entry =
3468                         cpu_to_be32(cce->timestamp);
3469         }
3470
3471         /*
3472          * Reset threshold_cong_event_map, and threshold_event_counter
3473          * to 0 when log is read.
3474          */
3475         memset(ppd->threshold_cong_event_map, 0x0,
3476                sizeof(ppd->threshold_cong_event_map));
3477         ppd->threshold_event_counter = 0;
3478
3479         spin_unlock_irq(&ppd->cc_log_lock);
3480
3481         if (resp_len)
3482                 *resp_len += sizeof(struct opa_hfi1_cong_log);
3483
3484         return reply((struct ib_mad_hdr *)smp);
3485 }
3486
3487 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3488                                    struct ib_device *ibdev, u8 port,
3489                                    u32 *resp_len)
3490 {
3491         struct ib_cc_table_attr *cc_table_attr =
3492                 (struct ib_cc_table_attr *)data;
3493         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3494         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3495         u32 start_block = OPA_AM_START_BLK(am);
3496         u32 n_blocks = OPA_AM_NBLK(am);
3497         struct ib_cc_table_entry_shadow *entries;
3498         int i, j;
3499         u32 sentry, eentry;
3500         struct cc_state *cc_state;
3501
3502         /* sanity check n_blocks, start_block */
3503         if (n_blocks == 0 ||
3504             start_block + n_blocks > ppd->cc_max_table_entries) {
3505                 smp->status |= IB_SMP_INVALID_FIELD;
3506                 return reply((struct ib_mad_hdr *)smp);
3507         }
3508
3509         rcu_read_lock();
3510
3511         cc_state = get_cc_state(ppd);
3512
3513         if (!cc_state) {
3514                 rcu_read_unlock();
3515                 return reply((struct ib_mad_hdr *)smp);
3516         }
3517
3518         sentry = start_block * IB_CCT_ENTRIES;
3519         eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3520
3521         cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3522
3523         entries = cc_state->cct.entries;
3524
3525         /* return n_blocks, though the last block may not be full */
3526         for (j = 0, i = sentry; i < eentry; j++, i++)
3527                 cc_table_attr->ccti_entries[j].entry =
3528                         cpu_to_be16(entries[i].entry);
3529
3530         rcu_read_unlock();
3531
3532         if (resp_len)
3533                 *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3534
3535         return reply((struct ib_mad_hdr *)smp);
3536 }
3537
3538 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3539                                    struct ib_device *ibdev, u8 port,
3540                                    u32 *resp_len)
3541 {
3542         struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3543         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3544         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3545         u32 start_block = OPA_AM_START_BLK(am);
3546         u32 n_blocks = OPA_AM_NBLK(am);
3547         struct ib_cc_table_entry_shadow *entries;
3548         int i, j;
3549         u32 sentry, eentry;
3550         u16 ccti_limit;
3551
3552         /* sanity check n_blocks, start_block */
3553         if (n_blocks == 0 ||
3554             start_block + n_blocks > ppd->cc_max_table_entries) {
3555                 smp->status |= IB_SMP_INVALID_FIELD;
3556                 return reply((struct ib_mad_hdr *)smp);
3557         }
3558
3559         sentry = start_block * IB_CCT_ENTRIES;
3560         eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3561                  (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3562
3563         /* sanity check ccti_limit */
3564         ccti_limit = be16_to_cpu(p->ccti_limit);
3565         if (ccti_limit + 1 > eentry) {
3566                 smp->status |= IB_SMP_INVALID_FIELD;
3567                 return reply((struct ib_mad_hdr *)smp);
3568         }
3569
3570         /*
3571          * Save details from packet into the ppd.  Hold the cc_state_lock so
3572          * our information is consistent with anyone trying to apply the state.
3573          */
3574         spin_lock(&ppd->cc_state_lock);
3575         ppd->total_cct_entry = ccti_limit + 1;
3576         entries = ppd->ccti_entries;
3577         for (j = 0, i = sentry; i < eentry; j++, i++)
3578                 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3579         spin_unlock(&ppd->cc_state_lock);
3580
3581         /* now apply the information */
3582         apply_cc_state(ppd);
3583
3584         return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
3585 }
3586
3587 struct opa_led_info {
3588         __be32 rsvd_led_mask;
3589         __be32 rsvd;
3590 };
3591
3592 #define OPA_LED_SHIFT   31
3593 #define OPA_LED_MASK    BIT(OPA_LED_SHIFT)
3594
3595 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3596                                    struct ib_device *ibdev, u8 port,
3597                                    u32 *resp_len)
3598 {
3599         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3600         struct hfi1_pportdata *ppd = dd->pport;
3601         struct opa_led_info *p = (struct opa_led_info *)data;
3602         u32 nport = OPA_AM_NPORT(am);
3603         u32 is_beaconing_active;
3604
3605         if (nport != 1) {
3606                 smp->status |= IB_SMP_INVALID_FIELD;
3607                 return reply((struct ib_mad_hdr *)smp);
3608         }
3609
3610         /*
3611          * This pairs with the memory barrier in hfi1_start_led_override to
3612          * ensure that we read the correct state of LED beaconing represented
3613          * by led_override_timer_active
3614          */
3615         smp_rmb();
3616         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3617         p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3618
3619         if (resp_len)
3620                 *resp_len += sizeof(struct opa_led_info);
3621
3622         return reply((struct ib_mad_hdr *)smp);
3623 }
3624
3625 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3626                                    struct ib_device *ibdev, u8 port,
3627                                    u32 *resp_len)
3628 {
3629         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3630         struct opa_led_info *p = (struct opa_led_info *)data;
3631         u32 nport = OPA_AM_NPORT(am);
3632         int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3633
3634         if (nport != 1) {
3635                 smp->status |= IB_SMP_INVALID_FIELD;
3636                 return reply((struct ib_mad_hdr *)smp);
3637         }
3638
3639         if (on)
3640                 hfi1_start_led_override(dd->pport, 2000, 1500);
3641         else
3642                 shutdown_led_override(dd->pport);
3643
3644         return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
3645 }
3646
3647 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3648                             u8 *data, struct ib_device *ibdev, u8 port,
3649                             u32 *resp_len)
3650 {
3651         int ret;
3652         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3653
3654         switch (attr_id) {
3655         case IB_SMP_ATTR_NODE_DESC:
3656                 ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3657                                               resp_len);
3658                 break;
3659         case IB_SMP_ATTR_NODE_INFO:
3660                 ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3661                                               resp_len);
3662                 break;
3663         case IB_SMP_ATTR_PORT_INFO:
3664                 ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3665                                               resp_len);
3666                 break;
3667         case IB_SMP_ATTR_PKEY_TABLE:
3668                 ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3669                                                resp_len);
3670                 break;
3671         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3672                 ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3673                                               resp_len);
3674                 break;
3675         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3676                 ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
3677                                               resp_len);
3678                 break;
3679         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3680                 ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
3681                                                resp_len);
3682                 break;
3683         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3684                 ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3685                                                 resp_len);
3686                 break;
3687         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3688                 ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
3689                                          resp_len);
3690                 break;
3691         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3692                 ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
3693                                          resp_len);
3694                 break;
3695         case OPA_ATTRIB_ID_CABLE_INFO:
3696                 ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
3697                                                 resp_len);
3698                 break;
3699         case IB_SMP_ATTR_VL_ARB_TABLE:
3700                 ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
3701                                             resp_len);
3702                 break;
3703         case OPA_ATTRIB_ID_CONGESTION_INFO:
3704                 ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
3705                                                resp_len);
3706                 break;
3707         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3708                 ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
3709                                                   port, resp_len);
3710                 break;
3711         case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
3712                 ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
3713                                                    port, resp_len);
3714                 break;
3715         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3716                 ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
3717                                               resp_len);
3718                 break;
3719         case IB_SMP_ATTR_LED_INFO:
3720                 ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
3721                                               resp_len);
3722                 break;
3723         case IB_SMP_ATTR_SM_INFO:
3724                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3725                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3726                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3727                         return IB_MAD_RESULT_SUCCESS;
3728                 /* FALLTHROUGH */
3729         default:
3730                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3731                 ret = reply((struct ib_mad_hdr *)smp);
3732                 break;
3733         }
3734         return ret;
3735 }
3736
3737 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3738                             u8 *data, struct ib_device *ibdev, u8 port,
3739                             u32 *resp_len)
3740 {
3741         int ret;
3742         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3743
3744         switch (attr_id) {
3745         case IB_SMP_ATTR_PORT_INFO:
3746                 ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
3747                                               resp_len);
3748                 break;
3749         case IB_SMP_ATTR_PKEY_TABLE:
3750                 ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
3751                                                resp_len);
3752                 break;
3753         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3754                 ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
3755                                               resp_len);
3756                 break;
3757         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3758                 ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
3759                                               resp_len);
3760                 break;
3761         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
3762                 ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
3763                                                resp_len);
3764                 break;
3765         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
3766                 ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
3767                                                 resp_len);
3768                 break;
3769         case OPA_ATTRIB_ID_PORT_STATE_INFO:
3770                 ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
3771                                          resp_len);
3772                 break;
3773         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
3774                 ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
3775                                          resp_len);
3776                 break;
3777         case IB_SMP_ATTR_VL_ARB_TABLE:
3778                 ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
3779                                             resp_len);
3780                 break;
3781         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
3782                 ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
3783                                                   port, resp_len);
3784                 break;
3785         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
3786                 ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
3787                                               resp_len);
3788                 break;
3789         case IB_SMP_ATTR_LED_INFO:
3790                 ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
3791                                               resp_len);
3792                 break;
3793         case IB_SMP_ATTR_SM_INFO:
3794                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
3795                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
3796                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
3797                         return IB_MAD_RESULT_SUCCESS;
3798                 /* FALLTHROUGH */
3799         default:
3800                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
3801                 ret = reply((struct ib_mad_hdr *)smp);
3802                 break;
3803         }
3804         return ret;
3805 }
3806
3807 static inline void set_aggr_error(struct opa_aggregate *ag)
3808 {
3809         ag->err_reqlength |= cpu_to_be16(0x8000);
3810 }
3811
3812 static int subn_get_opa_aggregate(struct opa_smp *smp,
3813                                   struct ib_device *ibdev, u8 port,
3814                                   u32 *resp_len)
3815 {
3816         int i;
3817         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3818         u8 *next_smp = opa_get_smp_data(smp);
3819
3820         if (num_attr < 1 || num_attr > 117) {
3821                 smp->status |= IB_SMP_INVALID_FIELD;
3822                 return reply((struct ib_mad_hdr *)smp);
3823         }
3824
3825         for (i = 0; i < num_attr; i++) {
3826                 struct opa_aggregate *agg;
3827                 size_t agg_data_len;
3828                 size_t agg_size;
3829                 u32 am;
3830
3831                 agg = (struct opa_aggregate *)next_smp;
3832                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3833                 agg_size = sizeof(*agg) + agg_data_len;
3834                 am = be32_to_cpu(agg->attr_mod);
3835
3836                 *resp_len += agg_size;
3837
3838                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3839                         smp->status |= IB_SMP_INVALID_FIELD;
3840                         return reply((struct ib_mad_hdr *)smp);
3841                 }
3842
3843                 /* zero the payload for this segment */
3844                 memset(next_smp + sizeof(*agg), 0, agg_data_len);
3845
3846                 (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
3847                                         ibdev, port, NULL);
3848                 if (smp->status & ~IB_SMP_DIRECTION) {
3849                         set_aggr_error(agg);
3850                         return reply((struct ib_mad_hdr *)smp);
3851                 }
3852                 next_smp += agg_size;
3853         }
3854
3855         return reply((struct ib_mad_hdr *)smp);
3856 }
3857
3858 static int subn_set_opa_aggregate(struct opa_smp *smp,
3859                                   struct ib_device *ibdev, u8 port,
3860                                   u32 *resp_len)
3861 {
3862         int i;
3863         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
3864         u8 *next_smp = opa_get_smp_data(smp);
3865
3866         if (num_attr < 1 || num_attr > 117) {
3867                 smp->status |= IB_SMP_INVALID_FIELD;
3868                 return reply((struct ib_mad_hdr *)smp);
3869         }
3870
3871         for (i = 0; i < num_attr; i++) {
3872                 struct opa_aggregate *agg;
3873                 size_t agg_data_len;
3874                 size_t agg_size;
3875                 u32 am;
3876
3877                 agg = (struct opa_aggregate *)next_smp;
3878                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
3879                 agg_size = sizeof(*agg) + agg_data_len;
3880                 am = be32_to_cpu(agg->attr_mod);
3881
3882                 *resp_len += agg_size;
3883
3884                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
3885                         smp->status |= IB_SMP_INVALID_FIELD;
3886                         return reply((struct ib_mad_hdr *)smp);
3887                 }
3888
3889                 (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
3890                                         ibdev, port, NULL);
3891                 if (smp->status & ~IB_SMP_DIRECTION) {
3892                         set_aggr_error(agg);
3893                         return reply((struct ib_mad_hdr *)smp);
3894                 }
3895                 next_smp += agg_size;
3896         }
3897
3898         return reply((struct ib_mad_hdr *)smp);
3899 }
3900
3901 /*
3902  * OPAv1 specifies that, on the transition to link up, these counters
3903  * are cleared:
3904  *   PortRcvErrors [*]
3905  *   LinkErrorRecovery
3906  *   LocalLinkIntegrityErrors
3907  *   ExcessiveBufferOverruns [*]
3908  *
3909  * [*] Error info associated with these counters is retained, but the
3910  * error info status is reset to 0.
3911  */
3912 void clear_linkup_counters(struct hfi1_devdata *dd)
3913 {
3914         /* PortRcvErrors */
3915         write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3916         dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3917         /* LinkErrorRecovery */
3918         write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3919         write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
3920         /* LocalLinkIntegrityErrors */
3921         write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3922         /* ExcessiveBufferOverruns */
3923         write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3924         dd->rcv_ovfl_cnt = 0;
3925         dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3926 }
3927
3928 /*
3929  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
3930  * local node, 0 otherwise.
3931  */
3932 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
3933                         const struct ib_wc *in_wc)
3934 {
3935         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3936         const struct opa_smp *smp = (const struct opa_smp *)mad;
3937
3938         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
3939                 return (smp->hop_cnt == 0 &&
3940                         smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
3941                         smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
3942         }
3943
3944         return (in_wc->slid == ppd->lid);
3945 }
3946
3947 /*
3948  * opa_local_smp_check() should only be called on MADs for which
3949  * is_local_mad() returns true. It applies the SMP checks that are
3950  * specific to SMPs which are sent from, and destined to this node.
3951  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
3952  * otherwise.
3953  *
3954  * SMPs which arrive from other nodes are instead checked by
3955  * opa_smp_check().
3956  */
3957 static int opa_local_smp_check(struct hfi1_ibport *ibp,
3958                                const struct ib_wc *in_wc)
3959 {
3960         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3961         u16 slid = in_wc->slid;
3962         u16 pkey;
3963
3964         if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
3965                 return 1;
3966
3967         pkey = ppd->pkeys[in_wc->pkey_index];
3968         /*
3969          * We need to do the "node-local" checks specified in OPAv1,
3970          * rev 0.90, section 9.10.26, which are:
3971          *   - pkey is 0x7fff, or 0xffff
3972          *   - Source QPN == 0 || Destination QPN == 0
3973          *   - the MAD header's management class is either
3974          *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
3975          *     IB_MGMT_CLASS_SUBN_LID_ROUTED
3976          *   - SLID != 0
3977          *
3978          * However, we know (and so don't need to check again) that,
3979          * for local SMPs, the MAD stack passes MADs with:
3980          *   - Source QPN of 0
3981          *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
3982          *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
3983          *     our own port's lid
3984          *
3985          */
3986         if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
3987                 return 0;
3988         ingress_pkey_table_fail(ppd, pkey, slid);
3989         return 1;
3990 }
3991
3992 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
3993                             u8 port, const struct opa_mad *in_mad,
3994                             struct opa_mad *out_mad,
3995                             u32 *resp_len)
3996 {
3997         struct opa_smp *smp = (struct opa_smp *)out_mad;
3998         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3999         u8 *data;
4000         u32 am;
4001         __be16 attr_id;
4002         int ret;
4003
4004         *out_mad = *in_mad;
4005         data = opa_get_smp_data(smp);
4006
4007         am = be32_to_cpu(smp->attr_mod);
4008         attr_id = smp->attr_id;
4009         if (smp->class_version != OPA_SM_CLASS_VERSION) {
4010                 smp->status |= IB_SMP_UNSUP_VERSION;
4011                 ret = reply((struct ib_mad_hdr *)smp);
4012                 return ret;
4013         }
4014         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4015                          smp->route.dr.dr_slid, smp->route.dr.return_path,
4016                          smp->hop_cnt);
4017         if (ret) {
4018                 u32 port_num = be32_to_cpu(smp->attr_mod);
4019
4020                 /*
4021                  * If this is a get/set portinfo, we already check the
4022                  * M_Key if the MAD is for another port and the M_Key
4023                  * is OK on the receiving port. This check is needed
4024                  * to increment the error counters when the M_Key
4025                  * fails to match on *both* ports.
4026                  */
4027                 if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4028                     (smp->method == IB_MGMT_METHOD_GET ||
4029                      smp->method == IB_MGMT_METHOD_SET) &&
4030                     port_num && port_num <= ibdev->phys_port_cnt &&
4031                     port != port_num)
4032                         (void)check_mkey(to_iport(ibdev, port_num),
4033                                           (struct ib_mad_hdr *)smp, 0,
4034                                           smp->mkey, smp->route.dr.dr_slid,
4035                                           smp->route.dr.return_path,
4036                                           smp->hop_cnt);
4037                 ret = IB_MAD_RESULT_FAILURE;
4038                 return ret;
4039         }
4040
4041         *resp_len = opa_get_smp_header_size(smp);
4042
4043         switch (smp->method) {
4044         case IB_MGMT_METHOD_GET:
4045                 switch (attr_id) {
4046                 default:
4047                         clear_opa_smp_data(smp);
4048                         ret = subn_get_opa_sma(attr_id, smp, am, data,
4049                                                ibdev, port, resp_len);
4050                         break;
4051                 case OPA_ATTRIB_ID_AGGREGATE:
4052                         ret = subn_get_opa_aggregate(smp, ibdev, port,
4053                                                      resp_len);
4054                         break;
4055                 }
4056                 break;
4057         case IB_MGMT_METHOD_SET:
4058                 switch (attr_id) {
4059                 default:
4060                         ret = subn_set_opa_sma(attr_id, smp, am, data,
4061                                                ibdev, port, resp_len);
4062                         break;
4063                 case OPA_ATTRIB_ID_AGGREGATE:
4064                         ret = subn_set_opa_aggregate(smp, ibdev, port,
4065                                                      resp_len);
4066                         break;
4067                 }
4068                 break;
4069         case IB_MGMT_METHOD_TRAP:
4070         case IB_MGMT_METHOD_REPORT:
4071         case IB_MGMT_METHOD_REPORT_RESP:
4072         case IB_MGMT_METHOD_GET_RESP:
4073                 /*
4074                  * The ib_mad module will call us to process responses
4075                  * before checking for other consumers.
4076                  * Just tell the caller to process it normally.
4077                  */
4078                 ret = IB_MAD_RESULT_SUCCESS;
4079                 break;
4080         default:
4081                 smp->status |= IB_SMP_UNSUP_METHOD;
4082                 ret = reply((struct ib_mad_hdr *)smp);
4083                 break;
4084         }
4085
4086         return ret;
4087 }
4088
4089 static int process_subn(struct ib_device *ibdev, int mad_flags,
4090                         u8 port, const struct ib_mad *in_mad,
4091                         struct ib_mad *out_mad)
4092 {
4093         struct ib_smp *smp = (struct ib_smp *)out_mad;
4094         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4095         int ret;
4096
4097         *out_mad = *in_mad;
4098         if (smp->class_version != 1) {
4099                 smp->status |= IB_SMP_UNSUP_VERSION;
4100                 ret = reply((struct ib_mad_hdr *)smp);
4101                 return ret;
4102         }
4103
4104         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4105                          smp->mkey, (__force __be32)smp->dr_slid,
4106                          smp->return_path, smp->hop_cnt);
4107         if (ret) {
4108                 u32 port_num = be32_to_cpu(smp->attr_mod);
4109
4110                 /*
4111                  * If this is a get/set portinfo, we already check the
4112                  * M_Key if the MAD is for another port and the M_Key
4113                  * is OK on the receiving port. This check is needed
4114                  * to increment the error counters when the M_Key
4115                  * fails to match on *both* ports.
4116                  */
4117                 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4118                     (smp->method == IB_MGMT_METHOD_GET ||
4119                      smp->method == IB_MGMT_METHOD_SET) &&
4120                     port_num && port_num <= ibdev->phys_port_cnt &&
4121                     port != port_num)
4122                         (void)check_mkey(to_iport(ibdev, port_num),
4123                                          (struct ib_mad_hdr *)smp, 0,
4124                                          smp->mkey,
4125                                          (__force __be32)smp->dr_slid,
4126                                          smp->return_path, smp->hop_cnt);
4127                 ret = IB_MAD_RESULT_FAILURE;
4128                 return ret;
4129         }
4130
4131         switch (smp->method) {
4132         case IB_MGMT_METHOD_GET:
4133                 switch (smp->attr_id) {
4134                 case IB_SMP_ATTR_NODE_INFO:
4135                         ret = subn_get_nodeinfo(smp, ibdev, port);
4136                         break;
4137                 default:
4138                         smp->status |= IB_SMP_UNSUP_METH_ATTR;
4139                         ret = reply((struct ib_mad_hdr *)smp);
4140                         break;
4141                 }
4142                 break;
4143         }
4144
4145         return ret;
4146 }
4147
4148 static int process_perf(struct ib_device *ibdev, u8 port,
4149                         const struct ib_mad *in_mad,
4150                         struct ib_mad *out_mad)
4151 {
4152         struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4153         struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4154                                                 &pmp->data;
4155         int ret = IB_MAD_RESULT_FAILURE;
4156
4157         *out_mad = *in_mad;
4158         if (pmp->mad_hdr.class_version != 1) {
4159                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4160                 ret = reply((struct ib_mad_hdr *)pmp);
4161                 return ret;
4162         }
4163
4164         switch (pmp->mad_hdr.method) {
4165         case IB_MGMT_METHOD_GET:
4166                 switch (pmp->mad_hdr.attr_id) {
4167                 case IB_PMA_PORT_COUNTERS:
4168                         ret = pma_get_ib_portcounters(pmp, ibdev, port);
4169                         break;
4170                 case IB_PMA_PORT_COUNTERS_EXT:
4171                         ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4172                         break;
4173                 case IB_PMA_CLASS_PORT_INFO:
4174                         cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4175                         ret = reply((struct ib_mad_hdr *)pmp);
4176                         break;
4177                 default:
4178                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4179                         ret = reply((struct ib_mad_hdr *)pmp);
4180                         break;
4181                 }
4182                 break;
4183
4184         case IB_MGMT_METHOD_SET:
4185                 if (pmp->mad_hdr.attr_id) {
4186                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4187                         ret = reply((struct ib_mad_hdr *)pmp);
4188                 }
4189                 break;
4190
4191         case IB_MGMT_METHOD_TRAP:
4192         case IB_MGMT_METHOD_GET_RESP:
4193                 /*
4194                  * The ib_mad module will call us to process responses
4195                  * before checking for other consumers.
4196                  * Just tell the caller to process it normally.
4197                  */
4198                 ret = IB_MAD_RESULT_SUCCESS;
4199                 break;
4200
4201         default:
4202                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4203                 ret = reply((struct ib_mad_hdr *)pmp);
4204                 break;
4205         }
4206
4207         return ret;
4208 }
4209
4210 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4211                             const struct opa_mad *in_mad,
4212                             struct opa_mad *out_mad, u32 *resp_len)
4213 {
4214         struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4215         int ret;
4216
4217         *out_mad = *in_mad;
4218
4219         if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4220                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4221                 return reply((struct ib_mad_hdr *)pmp);
4222         }
4223
4224         *resp_len = sizeof(pmp->mad_hdr);
4225
4226         switch (pmp->mad_hdr.method) {
4227         case IB_MGMT_METHOD_GET:
4228                 switch (pmp->mad_hdr.attr_id) {
4229                 case IB_PMA_CLASS_PORT_INFO:
4230                         ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4231                         break;
4232                 case OPA_PM_ATTRIB_ID_PORT_STATUS:
4233                         ret = pma_get_opa_portstatus(pmp, ibdev, port,
4234                                                      resp_len);
4235                         break;
4236                 case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4237                         ret = pma_get_opa_datacounters(pmp, ibdev, port,
4238                                                        resp_len);
4239                         break;
4240                 case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4241                         ret = pma_get_opa_porterrors(pmp, ibdev, port,
4242                                                      resp_len);
4243                         break;
4244                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4245                         ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4246                                                     resp_len);
4247                         break;
4248                 default:
4249                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4250                         ret = reply((struct ib_mad_hdr *)pmp);
4251                         break;
4252                 }
4253                 break;
4254
4255         case IB_MGMT_METHOD_SET:
4256                 switch (pmp->mad_hdr.attr_id) {
4257                 case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4258                         ret = pma_set_opa_portstatus(pmp, ibdev, port,
4259                                                      resp_len);
4260                         break;
4261                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4262                         ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4263                                                     resp_len);
4264                         break;
4265                 default:
4266                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4267                         ret = reply((struct ib_mad_hdr *)pmp);
4268                         break;
4269                 }
4270                 break;
4271
4272         case IB_MGMT_METHOD_TRAP:
4273         case IB_MGMT_METHOD_GET_RESP:
4274                 /*
4275                  * The ib_mad module will call us to process responses
4276                  * before checking for other consumers.
4277                  * Just tell the caller to process it normally.
4278                  */
4279                 ret = IB_MAD_RESULT_SUCCESS;
4280                 break;
4281
4282         default:
4283                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4284                 ret = reply((struct ib_mad_hdr *)pmp);
4285                 break;
4286         }
4287
4288         return ret;
4289 }
4290
4291 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4292                                 u8 port, const struct ib_wc *in_wc,
4293                                 const struct ib_grh *in_grh,
4294                                 const struct opa_mad *in_mad,
4295                                 struct opa_mad *out_mad, size_t *out_mad_size,
4296                                 u16 *out_mad_pkey_index)
4297 {
4298         int ret;
4299         int pkey_idx;
4300         u32 resp_len = 0;
4301         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4302
4303         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4304         if (pkey_idx < 0) {
4305                 pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4306                         hfi1_get_pkey(ibp, 1));
4307                 pkey_idx = 1;
4308         }
4309         *out_mad_pkey_index = (u16)pkey_idx;
4310
4311         switch (in_mad->mad_hdr.mgmt_class) {
4312         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4313         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4314                 if (is_local_mad(ibp, in_mad, in_wc)) {
4315                         ret = opa_local_smp_check(ibp, in_wc);
4316                         if (ret)
4317                                 return IB_MAD_RESULT_FAILURE;
4318                 }
4319                 ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4320                                        out_mad, &resp_len);
4321                 goto bail;
4322         case IB_MGMT_CLASS_PERF_MGMT:
4323                 ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4324                                        &resp_len);
4325                 goto bail;
4326
4327         default:
4328                 ret = IB_MAD_RESULT_SUCCESS;
4329         }
4330
4331 bail:
4332         if (ret & IB_MAD_RESULT_REPLY)
4333                 *out_mad_size = round_up(resp_len, 8);
4334         else if (ret & IB_MAD_RESULT_SUCCESS)
4335                 *out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4336
4337         return ret;
4338 }
4339
4340 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4341                                const struct ib_wc *in_wc,
4342                                const struct ib_grh *in_grh,
4343                                const struct ib_mad *in_mad,
4344                                struct ib_mad *out_mad)
4345 {
4346         int ret;
4347
4348         switch (in_mad->mad_hdr.mgmt_class) {
4349         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4350         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4351                 ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4352                 break;
4353         case IB_MGMT_CLASS_PERF_MGMT:
4354                 ret = process_perf(ibdev, port, in_mad, out_mad);
4355                 break;
4356         default:
4357                 ret = IB_MAD_RESULT_SUCCESS;
4358                 break;
4359         }
4360
4361         return ret;
4362 }
4363
4364 /**
4365  * hfi1_process_mad - process an incoming MAD packet
4366  * @ibdev: the infiniband device this packet came in on
4367  * @mad_flags: MAD flags
4368  * @port: the port number this packet came in on
4369  * @in_wc: the work completion entry for this packet
4370  * @in_grh: the global route header for this packet
4371  * @in_mad: the incoming MAD
4372  * @out_mad: any outgoing MAD reply
4373  *
4374  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4375  * interested in processing.
4376  *
4377  * Note that the verbs framework has already done the MAD sanity checks,
4378  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4379  * MADs.
4380  *
4381  * This is called by the ib_mad module.
4382  */
4383 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4384                      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4385                      const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4386                      struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4387                      u16 *out_mad_pkey_index)
4388 {
4389         switch (in_mad->base_version) {
4390         case OPA_MGMT_BASE_VERSION:
4391                 if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4392                         dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
4393                         return IB_MAD_RESULT_FAILURE;
4394                 }
4395                 return hfi1_process_opa_mad(ibdev, mad_flags, port,
4396                                             in_wc, in_grh,
4397                                             (struct opa_mad *)in_mad,
4398                                             (struct opa_mad *)out_mad,
4399                                             out_mad_size,
4400                                             out_mad_pkey_index);
4401         case IB_MGMT_BASE_VERSION:
4402                 return hfi1_process_ib_mad(ibdev, mad_flags, port,
4403                                           in_wc, in_grh,
4404                                           (const struct ib_mad *)in_mad,
4405                                           (struct ib_mad *)out_mad);
4406         default:
4407                 break;
4408         }
4409
4410         return IB_MAD_RESULT_FAILURE;
4411 }