drivers/infiniband/hw/hfi1/driver.c

   1 /*
   2  * Copyright(c) 2015-2017 Intel Corporation.
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of version 2 of the GNU General Public License as
  11  * published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * General Public License for more details.
  17  *
  18  * BSD LICENSE
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  *
  24  *  - Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  *  - Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in
  28  *    the documentation and/or other materials provided with the
  29  *    distribution.
  30  *  - Neither the name of Intel Corporation nor the names of its
  31  *    contributors may be used to endorse or promote products derived
  32  *    from this software without specific prior written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45  *
  46  */
  47
  48 #include <linux/spinlock.h>
  49 #include <linux/pci.h>
  50 #include <linux/io.h>
  51 #include <linux/delay.h>
  52 #include <linux/netdevice.h>
  53 #include <linux/vmalloc.h>
  54 #include <linux/module.h>
  55 #include <linux/prefetch.h>
  56 #include <rdma/ib_verbs.h>
  57
  58 #include "hfi.h"
  59 #include "trace.h"
  60 #include "qp.h"
  61 #include "sdma.h"
  62 #include "debugfs.h"
  63 #include "vnic.h"
  64
  65 #undef pr_fmt
  66 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
  67
  68 /*
  69  * The size has to be longer than this string, so we can append
  70  * board/chip information to it in the initialization code.
  71  */
  72 const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
  73
  74 DEFINE_SPINLOCK(hfi1_devs_lock);
  75 LIST_HEAD(hfi1_dev_list);
  76 DEFINE_MUTEX(hfi1_mutex);       /* general driver use */
  77
  78 unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
  79 module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
  80 MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
  81                  HFI1_DEFAULT_MAX_MTU));
  82
  83 unsigned int hfi1_cu = 1;
  84 module_param_named(cu, hfi1_cu, uint, S_IRUGO);
  85 MODULE_PARM_DESC(cu, "Credit return units");
  86
  87 unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
  88 static int hfi1_caps_set(const char *, const struct kernel_param *);
  89 static int hfi1_caps_get(char *, const struct kernel_param *);
  90 static const struct kernel_param_ops cap_ops = {
  91         .set = hfi1_caps_set,
  92         .get = hfi1_caps_get
  93 };
  94 module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO);
  95 MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
  96
  97 MODULE_LICENSE("Dual BSD/GPL");
  98 MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
  99 MODULE_VERSION(HFI1_DRIVER_VERSION);
 100
 101 /*
 102  * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
 103  */
 104 #define MAX_PKT_RECV 64
 105 /*
 106  * MAX_PKT_THREAD_RCV is the max # of packets processed before
 107  * the qp_wait_list queue is flushed.
 108  */
 109 #define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
 110 #define EGR_HEAD_UPDATE_THRESHOLD 16
 111
 112 struct hfi1_ib_stats hfi1_stats;
 113
 114 static int hfi1_caps_set(const char *val, const struct kernel_param *kp)
 115 {
 116         int ret = 0;
 117         unsigned long *cap_mask_ptr = (unsigned long *)kp->arg,
 118                 cap_mask = *cap_mask_ptr, value, diff,
 119                 write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) |
 120                               HFI1_CAP_WRITABLE_MASK);
 121
 122         ret = kstrtoul(val, 0, &value);
 123         if (ret) {
 124                 pr_warn("Invalid module parameter value for 'cap_mask'\n");
 125                 goto done;
 126         }
 127         /* Get the changed bits (except the locked bit) */
 128         diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK);
 129
 130         /* Remove any bits that are not allowed to change after driver load */
 131         if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) {
 132                 pr_warn("Ignoring non-writable capability bits %#lx\n",
 133                         diff & ~write_mask);
 134                 diff &= write_mask;
 135         }
 136
 137         /* Mask off any reserved bits */
 138         diff &= ~HFI1_CAP_RESERVED_MASK;
 139         /* Clear any previously set and changing bits */
 140         cap_mask &= ~diff;
 141         /* Update the bits with the new capability */
 142         cap_mask |= (value & diff);
 143         /* Check for any kernel/user restrictions */
 144         diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^
 145                 ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT);
 146         cap_mask &= ~diff;
 147         /* Set the bitmask to the final set */
 148         *cap_mask_ptr = cap_mask;
 149 done:
 150         return ret;
 151 }
 152
 153 static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
 154 {
 155         unsigned long cap_mask = *(unsigned long *)kp->arg;
 156
 157         cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
 158         cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
 159
 160         return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
 161 }
 162
 163 const char *get_unit_name(int unit)
 164 {
 165         static char iname[16];
 166
 167         snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
 168         return iname;
 169 }
 170
 171 const char *get_card_name(struct rvt_dev_info *rdi)
 172 {
 173         struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
 174         struct hfi1_devdata *dd = container_of(ibdev,
 175                                                struct hfi1_devdata, verbs_dev);
 176         return get_unit_name(dd->unit);
 177 }
 178
 179 struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
 180 {
 181         struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
 182         struct hfi1_devdata *dd = container_of(ibdev,
 183                                                struct hfi1_devdata, verbs_dev);
 184         return dd->pcidev;
 185 }
 186
 187 /*
 188  * Return count of units with at least one port ACTIVE.
 189  */
 190 int hfi1_count_active_units(void)
 191 {
 192         struct hfi1_devdata *dd;
 193         struct hfi1_pportdata *ppd;
 194         unsigned long flags;
 195         int pidx, nunits_active = 0;
 196
 197         spin_lock_irqsave(&hfi1_devs_lock, flags);
 198         list_for_each_entry(dd, &hfi1_dev_list, list) {
 199                 if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
 200                         continue;
 201                 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 202                         ppd = dd->pport + pidx;
 203                         if (ppd->lid && ppd->linkup) {
 204                                 nunits_active++;
 205                                 break;
 206                         }
 207                 }
 208         }
 209         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 210         return nunits_active;
 211 }
 212
 213 /*
 214  * Return count of all units, optionally return in arguments
 215  * the number of usable (present) units, and the number of
 216  * ports that are up.
 217  */
 218 int hfi1_count_units(int *npresentp, int *nupp)
 219 {
 220         int nunits = 0, npresent = 0, nup = 0;
 221         struct hfi1_devdata *dd;
 222         unsigned long flags;
 223         int pidx;
 224         struct hfi1_pportdata *ppd;
 225
 226         spin_lock_irqsave(&hfi1_devs_lock, flags);
 227
 228         list_for_each_entry(dd, &hfi1_dev_list, list) {
 229                 nunits++;
 230                 if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
 231                         npresent++;
 232                 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 233                         ppd = dd->pport + pidx;
 234                         if (ppd->lid && ppd->linkup)
 235                                 nup++;
 236                 }
 237         }
 238
 239         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 240
 241         if (npresentp)
 242                 *npresentp = npresent;
 243         if (nupp)
 244                 *nupp = nup;
 245
 246         return nunits;
 247 }
 248
 249 /*
 250  * Get address of eager buffer from it's index (allocated in chunks, not
 251  * contiguous).
 252  */
 253 static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
 254                                u8 *update)
 255 {
 256         u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf);
 257
 258         *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset;
 259         return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) +
 260                         (offset * RCV_BUF_BLOCK_SIZE));
 261 }
 262
 263 /*
 264  * Validate and encode the a given RcvArray Buffer size.
 265  * The function will check whether the given size falls within
 266  * allowed size ranges for the respective type and, optionally,
 267  * return the proper encoding.
 268  */
 269 int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
 270 {
 271         if (unlikely(!PAGE_ALIGNED(size)))
 272                 return 0;
 273         if (unlikely(size < MIN_EAGER_BUFFER))
 274                 return 0;
 275         if (size >
 276             (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER))
 277                 return 0;
 278         if (encoded)
 279                 *encoded = ilog2(size / PAGE_SIZE) + 1;
 280         return 1;
 281 }
 282
 283 static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 284                        struct hfi1_packet *packet)
 285 {
 286         struct ib_header *rhdr = packet->hdr;
 287         u32 rte = rhf_rcv_type_err(packet->rhf);
 288         int lnh = ib_get_lnh(rhdr);
 289         struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 290         struct hfi1_devdata *dd = ppd->dd;
 291         struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 292
 293         if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
 294                 return;
 295
 296         if (packet->rhf & RHF_TID_ERR) {
 297                 /* For TIDERR and RC QPs preemptively schedule a NAK */
 298                 struct ib_other_headers *ohdr = NULL;
 299                 u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
 300                 u16 lid  = ib_get_dlid(rhdr);
 301                 u32 qp_num;
 302                 u32 rcv_flags = 0;
 303
 304                 /* Sanity check packet */
 305                 if (tlen < 24)
 306                         goto drop;
 307
 308                 /* Check for GRH */
 309                 if (lnh == HFI1_LRH_BTH) {
 310                         ohdr = &rhdr->u.oth;
 311                 } else if (lnh == HFI1_LRH_GRH) {
 312                         u32 vtf;
 313
 314                         ohdr = &rhdr->u.l.oth;
 315                         if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 316                                 goto drop;
 317                         vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
 318                         if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 319                                 goto drop;
 320                         rcv_flags |= HFI1_HAS_GRH;
 321                 } else {
 322                         goto drop;
 323                 }
 324                 /* Get the destination QP number. */
 325                 qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 326                 if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 327                         struct rvt_qp *qp;
 328                         unsigned long flags;
 329
 330                         rcu_read_lock();
 331                         qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 332                         if (!qp) {
 333                                 rcu_read_unlock();
 334                                 goto drop;
 335                         }
 336
 337                         /*
 338                          * Handle only RC QPs - for other QP types drop error
 339                          * packet.
 340                          */
 341                         spin_lock_irqsave(&qp->r_lock, flags);
 342
 343                         /* Check for valid receive state. */
 344                         if (!(ib_rvt_state_ops[qp->state] &
 345                               RVT_PROCESS_RECV_OK)) {
 346                                 ibp->rvp.n_pkt_drops++;
 347                         }
 348
 349                         switch (qp->ibqp.qp_type) {
 350                         case IB_QPT_RC:
 351                                 hfi1_rc_hdrerr(
 352                                         rcd,
 353                                         rhdr,
 354                                         rcv_flags,
 355                                         qp);
 356                                 break;
 357                         default:
 358                                 /* For now don't handle any other QP types */
 359                                 break;
 360                         }
 361
 362                         spin_unlock_irqrestore(&qp->r_lock, flags);
 363                         rcu_read_unlock();
 364                 } /* Unicast QP */
 365         } /* Valid packet with TIDErr */
 366
 367         /* handle "RcvTypeErr" flags */
 368         switch (rte) {
 369         case RHF_RTE_ERROR_OP_CODE_ERR:
 370         {
 371                 u32 opcode;
 372                 void *ebuf = NULL;
 373                 __be32 *bth = NULL;
 374
 375                 if (rhf_use_egr_bfr(packet->rhf))
 376                         ebuf = packet->ebuf;
 377
 378                 if (!ebuf)
 379                         goto drop; /* this should never happen */
 380
 381                 if (lnh == HFI1_LRH_BTH)
 382                         bth = (__be32 *)ebuf;
 383                 else if (lnh == HFI1_LRH_GRH)
 384                         bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
 385                 else
 386                         goto drop;
 387
 388                 opcode = be32_to_cpu(bth[0]) >> 24;
 389                 opcode &= 0xff;
 390
 391                 if (opcode == IB_OPCODE_CNP) {
 392                         /*
 393                          * Only in pre-B0 h/w is the CNP_OPCODE handled
 394                          * via this code path.
 395                          */
 396                         struct rvt_qp *qp = NULL;
 397                         u32 lqpn, rqpn;
 398                         u16 rlid;
 399                         u8 svc_type, sl, sc5;
 400
 401                         sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
 402                         sl = ibp->sc_to_sl[sc5];
 403
 404                         lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
 405                         rcu_read_lock();
 406                         qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
 407                         if (!qp) {
 408                                 rcu_read_unlock();
 409                                 goto drop;
 410                         }
 411
 412                         switch (qp->ibqp.qp_type) {
 413                         case IB_QPT_UD:
 414                                 rlid = 0;
 415                                 rqpn = 0;
 416                                 svc_type = IB_CC_SVCTYPE_UD;
 417                                 break;
 418                         case IB_QPT_UC:
 419                                 rlid = ib_get_slid(rhdr);
 420                                 rqpn = qp->remote_qpn;
 421                                 svc_type = IB_CC_SVCTYPE_UC;
 422                                 break;
 423                         default:
 424                                 goto drop;
 425                         }
 426
 427                         process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
 428                         rcu_read_unlock();
 429                 }
 430
 431                 packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK;
 432                 break;
 433         }
 434         default:
 435                 break;
 436         }
 437
 438 drop:
 439         return;
 440 }
 441
 442 static inline void init_packet(struct hfi1_ctxtdata *rcd,
 443                                struct hfi1_packet *packet)
 444 {
 445         packet->rsize = rcd->rcvhdrqentsize; /* words */
 446         packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
 447         packet->rcd = rcd;
 448         packet->updegr = 0;
 449         packet->etail = -1;
 450         packet->rhf_addr = get_rhf_addr(rcd);
 451         packet->rhf = rhf_to_cpu(packet->rhf_addr);
 452         packet->rhqoff = rcd->head;
 453         packet->numpkt = 0;
 454         packet->rcv_flags = 0;
 455 }
 456
 457 void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
 458                                bool do_cnp)
 459 {
 460         struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 461         struct ib_header *hdr = pkt->hdr;
 462         struct ib_other_headers *ohdr = pkt->ohdr;
 463         struct ib_grh *grh = NULL;
 464         u32 rqpn = 0, bth1;
 465         u16 rlid, dlid = ib_get_dlid(hdr);
 466         u8 sc, svc_type;
 467         bool is_mcast = false;
 468
 469         if (pkt->rcv_flags & HFI1_HAS_GRH)
 470                 grh = &hdr->u.l.grh;
 471
 472         switch (qp->ibqp.qp_type) {
 473         case IB_QPT_SMI:
 474         case IB_QPT_GSI:
 475         case IB_QPT_UD:
 476                 rlid = ib_get_slid(hdr);
 477                 rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 478                 svc_type = IB_CC_SVCTYPE_UD;
 479                 is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
 480                         (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 481                 break;
 482         case IB_QPT_UC:
 483                 rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
 484                 rqpn = qp->remote_qpn;
 485                 svc_type = IB_CC_SVCTYPE_UC;
 486                 break;
 487         case IB_QPT_RC:
 488                 rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
 489                 rqpn = qp->remote_qpn;
 490                 svc_type = IB_CC_SVCTYPE_RC;
 491                 break;
 492         default:
 493                 return;
 494         }
 495
 496         sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
 497
 498         bth1 = be32_to_cpu(ohdr->bth[1]);
 499         if (do_cnp && (bth1 & IB_FECN_SMASK)) {
 500                 u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
 501
 502                 return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
 503         }
 504
 505         if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
 506                 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 507                 u32 lqpn = bth1 & RVT_QPN_MASK;
 508                 u8 sl = ibp->sc_to_sl[sc];
 509
 510                 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
 511         }
 512
 513 }
 514
 515 struct ps_mdata {
 516         struct hfi1_ctxtdata *rcd;
 517         u32 rsize;
 518         u32 maxcnt;
 519         u32 ps_head;
 520         u32 ps_tail;
 521         u32 ps_seq;
 522 };
 523
 524 static inline void init_ps_mdata(struct ps_mdata *mdata,
 525                                  struct hfi1_packet *packet)
 526 {
 527         struct hfi1_ctxtdata *rcd = packet->rcd;
 528
 529         mdata->rcd = rcd;
 530         mdata->rsize = packet->rsize;
 531         mdata->maxcnt = packet->maxcnt;
 532         mdata->ps_head = packet->rhqoff;
 533
 534         if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 535                 mdata->ps_tail = get_rcvhdrtail(rcd);
 536                 if (rcd->ctxt == HFI1_CTRL_CTXT)
 537                         mdata->ps_seq = rcd->seq_cnt;
 538                 else
 539                         mdata->ps_seq = 0; /* not used with DMA_RTAIL */
 540         } else {
 541                 mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
 542                 mdata->ps_seq = rcd->seq_cnt;
 543         }
 544 }
 545
 546 static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
 547                           struct hfi1_ctxtdata *rcd)
 548 {
 549         if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
 550                 return mdata->ps_head == mdata->ps_tail;
 551         return mdata->ps_seq != rhf_rcv_seq(rhf);
 552 }
 553
 554 static inline int ps_skip(struct ps_mdata *mdata, u64 rhf,
 555                           struct hfi1_ctxtdata *rcd)
 556 {
 557         /*
 558          * Control context can potentially receive an invalid rhf.
 559          * Drop such packets.
 560          */
 561         if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail))
 562                 return mdata->ps_seq != rhf_rcv_seq(rhf);
 563
 564         return 0;
 565 }
 566
 567 static inline void update_ps_mdata(struct ps_mdata *mdata,
 568                                    struct hfi1_ctxtdata *rcd)
 569 {
 570         mdata->ps_head += mdata->rsize;
 571         if (mdata->ps_head >= mdata->maxcnt)
 572                 mdata->ps_head = 0;
 573
 574         /* Control context must do seq counting */
 575         if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
 576             (rcd->ctxt == HFI1_CTRL_CTXT)) {
 577                 if (++mdata->ps_seq > 13)
 578                         mdata->ps_seq = 1;
 579         }
 580 }
 581
 582 /*
 583  * prescan_rxq - search through the receive queue looking for packets
 584  * containing Excplicit Congestion Notifications (FECNs, or BECNs).
 585  * When an ECN is found, process the Congestion Notification, and toggle
 586  * it off.
 587  * This is declared as a macro to allow quick checking of the port to avoid
 588  * the overhead of a function call if not enabled.
 589  */
 590 #define prescan_rxq(rcd, packet) \
 591         do { \
 592                 if (rcd->ppd->cc_prescan) \
 593                         __prescan_rxq(packet); \
 594         } while (0)
 595 static void __prescan_rxq(struct hfi1_packet *packet)
 596 {
 597         struct hfi1_ctxtdata *rcd = packet->rcd;
 598         struct ps_mdata mdata;
 599
 600         init_ps_mdata(&mdata, packet);
 601
 602         while (1) {
 603                 struct hfi1_devdata *dd = rcd->dd;
 604                 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 605                 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 606                                          dd->rhf_offset;
 607                 struct rvt_qp *qp;
 608                 struct ib_header *hdr;
 609                 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 610                 u64 rhf = rhf_to_cpu(rhf_addr);
 611                 u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 612                 int is_ecn = 0;
 613                 u8 lnh;
 614
 615                 if (ps_done(&mdata, rhf, rcd))
 616                         break;
 617
 618                 if (ps_skip(&mdata, rhf, rcd))
 619                         goto next;
 620
 621                 if (etype != RHF_RCV_TYPE_IB)
 622                         goto next;
 623
 624                 packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
 625                 hdr = packet->hdr;
 626                 lnh = ib_get_lnh(hdr);
 627
 628                 if (lnh == HFI1_LRH_BTH) {
 629                         packet->ohdr = &hdr->u.oth;
 630                 } else if (lnh == HFI1_LRH_GRH) {
 631                         packet->ohdr = &hdr->u.l.oth;
 632                         packet->rcv_flags |= HFI1_HAS_GRH;
 633                 } else {
 634                         goto next; /* just in case */
 635                 }
 636
 637                 bth1 = be32_to_cpu(packet->ohdr->bth[1]);
 638                 is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
 639
 640                 if (!is_ecn)
 641                         goto next;
 642
 643                 qpn = bth1 & RVT_QPN_MASK;
 644                 rcu_read_lock();
 645                 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
 646
 647                 if (!qp) {
 648                         rcu_read_unlock();
 649                         goto next;
 650                 }
 651
 652                 process_ecn(qp, packet, true);
 653                 rcu_read_unlock();
 654
 655                 /* turn off BECN, FECN */
 656                 bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
 657                 packet->ohdr->bth[1] = cpu_to_be32(bth1);
 658 next:
 659                 update_ps_mdata(&mdata, rcd);
 660         }
 661 }
 662
 663 static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
 664 {
 665         struct rvt_qp *qp, *nqp;
 666
 667         /*
 668          * Iterate over all QPs waiting to respond.
 669          * The list won't change since the IRQ is only run on one CPU.
 670          */
 671         list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 672                 list_del_init(&qp->rspwait);
 673                 if (qp->r_flags & RVT_R_RSP_NAK) {
 674                         qp->r_flags &= ~RVT_R_RSP_NAK;
 675                         hfi1_send_rc_ack(rcd, qp, 0);
 676                 }
 677                 if (qp->r_flags & RVT_R_RSP_SEND) {
 678                         unsigned long flags;
 679
 680                         qp->r_flags &= ~RVT_R_RSP_SEND;
 681                         spin_lock_irqsave(&qp->s_lock, flags);
 682                         if (ib_rvt_state_ops[qp->state] &
 683                                         RVT_PROCESS_OR_FLUSH_SEND)
 684                                 hfi1_schedule_send(qp);
 685                         spin_unlock_irqrestore(&qp->s_lock, flags);
 686                 }
 687                 rvt_put_qp(qp);
 688         }
 689 }
 690
 691 static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
 692 {
 693         if (thread) {
 694                 if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
 695                         /* allow defered processing */
 696                         process_rcv_qp_work(packet->rcd);
 697                 cond_resched();
 698                 return RCV_PKT_OK;
 699         } else {
 700                 this_cpu_inc(*packet->rcd->dd->rcv_limit);
 701                 return RCV_PKT_LIMIT;
 702         }
 703 }
 704
 705 static inline int check_max_packet(struct hfi1_packet *packet, int thread)
 706 {
 707         int ret = RCV_PKT_OK;
 708
 709         if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
 710                 ret = max_packet_exceeded(packet, thread);
 711         return ret;
 712 }
 713
 714 static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 715 {
 716         int ret;
 717
 718         /* Set up for the next packet */
 719         packet->rhqoff += packet->rsize;
 720         if (packet->rhqoff >= packet->maxcnt)
 721                 packet->rhqoff = 0;
 722
 723         packet->numpkt++;
 724         ret = check_max_packet(packet, thread);
 725
 726         packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 727                                      packet->rcd->dd->rhf_offset;
 728         packet->rhf = rhf_to_cpu(packet->rhf_addr);
 729
 730         return ret;
 731 }
 732
 733 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 734 {
 735         int ret;
 736
 737         packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
 738                                          packet->rhf_addr);
 739         packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
 740         packet->etype = rhf_rcv_type(packet->rhf);
 741         /* total length */
 742         packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
 743         /* retrieve eager buffer details */
 744         packet->ebuf = NULL;
 745         if (rhf_use_egr_bfr(packet->rhf)) {
 746                 packet->etail = rhf_egr_index(packet->rhf);
 747                 packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
 748                                  &packet->updegr);
 749                 /*
 750                  * Prefetch the contents of the eager buffer.  It is
 751                  * OK to send a negative length to prefetch_range().
 752                  * The +2 is the size of the RHF.
 753                  */
 754                 prefetch_range(packet->ebuf,
 755                                packet->tlen - ((packet->rcd->rcvhdrqentsize -
 756                                                (rhf_hdrq_offset(packet->rhf)
 757                                                 + 2)) * 4));
 758         }
 759
 760         /*
 761          * Call a type specific handler for the packet. We
 762          * should be able to trust that etype won't be beyond
 763          * the range of valid indexes. If so something is really
 764          * wrong and we can probably just let things come
 765          * crashing down. There is no need to eat another
 766          * comparison in this performance critical code.
 767          */
 768         packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
 769         packet->numpkt++;
 770
 771         /* Set up for the next packet */
 772         packet->rhqoff += packet->rsize;
 773         if (packet->rhqoff >= packet->maxcnt)
 774                 packet->rhqoff = 0;
 775
 776         ret = check_max_packet(packet, thread);
 777
 778         packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 779                                       packet->rcd->dd->rhf_offset;
 780         packet->rhf = rhf_to_cpu(packet->rhf_addr);
 781
 782         return ret;
 783 }
 784
 785 static inline void process_rcv_update(int last, struct hfi1_packet *packet)
 786 {
 787         /*
 788          * Update head regs etc., every 16 packets, if not last pkt,
 789          * to help prevent rcvhdrq overflows, when many packets
 790          * are processed and queue is nearly full.
 791          * Don't request an interrupt for intermediate updates.
 792          */
 793         if (!last && !(packet->numpkt & 0xf)) {
 794                 update_usrhead(packet->rcd, packet->rhqoff, packet->updegr,
 795                                packet->etail, 0, 0);
 796                 packet->updegr = 0;
 797         }
 798         packet->rcv_flags = 0;
 799 }
 800
 801 static inline void finish_packet(struct hfi1_packet *packet)
 802 {
 803         /*
 804          * Nothing we need to free for the packet.
 805          *
 806          * The only thing we need to do is a final update and call for an
 807          * interrupt
 808          */
 809         update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
 810                        packet->etail, rcv_intr_dynamic, packet->numpkt);
 811 }
 812
 813 /*
 814  * Handle receive interrupts when using the no dma rtail option.
 815  */
 816 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 817 {
 818         u32 seq;
 819         int last = RCV_PKT_OK;
 820         struct hfi1_packet packet;
 821
 822         init_packet(rcd, &packet);
 823         seq = rhf_rcv_seq(packet.rhf);
 824         if (seq != rcd->seq_cnt) {
 825                 last = RCV_PKT_DONE;
 826                 goto bail;
 827         }
 828
 829         prescan_rxq(rcd, &packet);
 830
 831         while (last == RCV_PKT_OK) {
 832                 last = process_rcv_packet(&packet, thread);
 833                 seq = rhf_rcv_seq(packet.rhf);
 834                 if (++rcd->seq_cnt > 13)
 835                         rcd->seq_cnt = 1;
 836                 if (seq != rcd->seq_cnt)
 837                         last = RCV_PKT_DONE;
 838                 process_rcv_update(last, &packet);
 839         }
 840         process_rcv_qp_work(rcd);
 841         rcd->head = packet.rhqoff;
 842 bail:
 843         finish_packet(&packet);
 844         return last;
 845 }
 846
 847 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 848 {
 849         u32 hdrqtail;
 850         int last = RCV_PKT_OK;
 851         struct hfi1_packet packet;
 852
 853         init_packet(rcd, &packet);
 854         hdrqtail = get_rcvhdrtail(rcd);
 855         if (packet.rhqoff == hdrqtail) {
 856                 last = RCV_PKT_DONE;
 857                 goto bail;
 858         }
 859         smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 860
 861         prescan_rxq(rcd, &packet);
 862
 863         while (last == RCV_PKT_OK) {
 864                 last = process_rcv_packet(&packet, thread);
 865                 if (packet.rhqoff == hdrqtail)
 866                         last = RCV_PKT_DONE;
 867                 process_rcv_update(last, &packet);
 868         }
 869         process_rcv_qp_work(rcd);
 870         rcd->head = packet.rhqoff;
 871 bail:
 872         finish_packet(&packet);
 873         return last;
 874 }
 875
 876 static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
 877 {
 878         int i;
 879
 880         /*
 881          * For dynamically allocated kernel contexts (like vnic) switch
 882          * interrupt handler only for that context. Otherwise, switch
 883          * interrupt handler for all statically allocated kernel contexts.
 884          */
 885         if (ctxt >= dd->first_dyn_alloc_ctxt) {
 886                 dd->rcd[ctxt]->do_interrupt =
 887                         &handle_receive_interrupt_nodma_rtail;
 888                 return;
 889         }
 890
 891         for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
 892                 dd->rcd[i]->do_interrupt =
 893                         &handle_receive_interrupt_nodma_rtail;
 894 }
 895
 896 static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
 897 {
 898         int i;
 899
 900         /*
 901          * For dynamically allocated kernel contexts (like vnic) switch
 902          * interrupt handler only for that context. Otherwise, switch
 903          * interrupt handler for all statically allocated kernel contexts.
 904          */
 905         if (ctxt >= dd->first_dyn_alloc_ctxt) {
 906                 dd->rcd[ctxt]->do_interrupt =
 907                         &handle_receive_interrupt_dma_rtail;
 908                 return;
 909         }
 910
 911         for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
 912                 dd->rcd[i]->do_interrupt =
 913                         &handle_receive_interrupt_dma_rtail;
 914 }
 915
 916 void set_all_slowpath(struct hfi1_devdata *dd)
 917 {
 918         int i;
 919
 920         /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
 921         for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
 922                 struct hfi1_ctxtdata *rcd = dd->rcd[i];
 923
 924                 if ((i < dd->first_dyn_alloc_ctxt) ||
 925                     (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
 926                         rcd->do_interrupt = &handle_receive_interrupt;
 927         }
 928 }
 929
 930 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
 931                                       struct hfi1_packet *packet,
 932                                       struct hfi1_devdata *dd)
 933 {
 934         struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
 935         struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
 936                                                    packet->rhf_addr);
 937         u8 etype = rhf_rcv_type(packet->rhf);
 938
 939         if (etype == RHF_RCV_TYPE_IB &&
 940             hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
 941                 int hwstate = read_logical_state(dd);
 942
 943                 if (hwstate != LSTATE_ACTIVE) {
 944                         dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
 945                         return 0;
 946                 }
 947
 948                 queue_work(rcd->ppd->hfi1_wq, lsaw);
 949                 return 1;
 950         }
 951         return 0;
 952 }
 953
 954 /*
 955  * handle_receive_interrupt - receive a packet
 956  * @rcd: the context
 957  *
 958  * Called from interrupt handler for errors or receive interrupt.
 959  * This is the slow path interrupt handler.
 960  */
 961 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 962 {
 963         struct hfi1_devdata *dd = rcd->dd;
 964         u32 hdrqtail;
 965         int needset, last = RCV_PKT_OK;
 966         struct hfi1_packet packet;
 967         int skip_pkt = 0;
 968
 969         /* Control context will always use the slow path interrupt handler */
 970         needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
 971
 972         init_packet(rcd, &packet);
 973
 974         if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 975                 u32 seq = rhf_rcv_seq(packet.rhf);
 976
 977                 if (seq != rcd->seq_cnt) {
 978                         last = RCV_PKT_DONE;
 979                         goto bail;
 980                 }
 981                 hdrqtail = 0;
 982         } else {
 983                 hdrqtail = get_rcvhdrtail(rcd);
 984                 if (packet.rhqoff == hdrqtail) {
 985                         last = RCV_PKT_DONE;
 986                         goto bail;
 987                 }
 988                 smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 989
 990                 /*
 991                  * Control context can potentially receive an invalid
 992                  * rhf. Drop such packets.
 993                  */
 994                 if (rcd->ctxt == HFI1_CTRL_CTXT) {
 995                         u32 seq = rhf_rcv_seq(packet.rhf);
 996
 997                         if (seq != rcd->seq_cnt)
 998                                 skip_pkt = 1;
 999                 }
1000         }
1001
1002         prescan_rxq(rcd, &packet);
1003
1004         while (last == RCV_PKT_OK) {
1005                 if (unlikely(dd->do_drop &&
1006                              atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
1007                              DROP_PACKET_ON)) {
1008                         dd->do_drop = 0;
1009
1010                         /* On to the next packet */
1011                         packet.rhqoff += packet.rsize;
1012                         packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
1013                                           packet.rhqoff +
1014                                           dd->rhf_offset;
1015                         packet.rhf = rhf_to_cpu(packet.rhf_addr);
1016
1017                 } else if (skip_pkt) {
1018                         last = skip_rcv_packet(&packet, thread);
1019                         skip_pkt = 0;
1020                 } else {
1021                         /* Auto activate link on non-SC15 packet receive */
1022                         if (unlikely(rcd->ppd->host_link_state ==
1023                                      HLS_UP_ARMED) &&
1024                             set_armed_to_active(rcd, &packet, dd))
1025                                 goto bail;
1026                         last = process_rcv_packet(&packet, thread);
1027                 }
1028
1029                 if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
1030                         u32 seq = rhf_rcv_seq(packet.rhf);
1031
1032                         if (++rcd->seq_cnt > 13)
1033                                 rcd->seq_cnt = 1;
1034                         if (seq != rcd->seq_cnt)
1035                                 last = RCV_PKT_DONE;
1036                         if (needset) {
1037                                 dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
1038                                 set_nodma_rtail(dd, rcd->ctxt);
1039                                 needset = 0;
1040                         }
1041                 } else {
1042                         if (packet.rhqoff == hdrqtail)
1043                                 last = RCV_PKT_DONE;
1044                         /*
1045                          * Control context can potentially receive an invalid
1046                          * rhf. Drop such packets.
1047                          */
1048                         if (rcd->ctxt == HFI1_CTRL_CTXT) {
1049                                 u32 seq = rhf_rcv_seq(packet.rhf);
1050
1051                                 if (++rcd->seq_cnt > 13)
1052                                         rcd->seq_cnt = 1;
1053                                 if (!last && (seq != rcd->seq_cnt))
1054                                         skip_pkt = 1;
1055                         }
1056
1057                         if (needset) {
1058                                 dd_dev_info(dd,
1059                                             "Switching to DMA_RTAIL\n");
1060                                 set_dma_rtail(dd, rcd->ctxt);
1061                                 needset = 0;
1062                         }
1063                 }
1064
1065                 process_rcv_update(last, &packet);
1066         }
1067
1068         process_rcv_qp_work(rcd);
1069         rcd->head = packet.rhqoff;
1070
1071 bail:
1072         /*
1073          * Always write head at end, and setup rcv interrupt, even
1074          * if no packets were processed.
1075          */
1076         finish_packet(&packet);
1077         return last;
1078 }
1079
1080 /*
1081  * We may discover in the interrupt that the hardware link state has
1082  * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
1083  * and we need to update the driver's notion of the link state.  We cannot
1084  * run set_link_state from interrupt context, so we queue this function on
1085  * a workqueue.
1086  *
1087  * We delay the regular interrupt processing until after the state changes
1088  * so that the link will be in the correct state by the time any application
1089  * we wake up attempts to send a reply to any message it received.
1090  * (Subsequent receive interrupts may possibly force the wakeup before we
1091  * update the link state.)
1092  *
1093  * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
1094  * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
1095  * so we're safe from use-after-free of the rcd.
1096  */
1097 void receive_interrupt_work(struct work_struct *work)
1098 {
1099         struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
1100                                                   linkstate_active_work);
1101         struct hfi1_devdata *dd = ppd->dd;
1102         int i;
1103
1104         /* Received non-SC15 packet implies neighbor_normal */
1105         ppd->neighbor_normal = 1;
1106         set_link_state(ppd, HLS_UP_ACTIVE);
1107
1108         /*
1109          * Interrupt all statically allocated kernel contexts that could
1110          * have had an interrupt during auto activation.
1111          */
1112         for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
1113                 force_recv_intr(dd->rcd[i]);
1114 }
1115
1116 /*
1117  * Convert a given MTU size to the on-wire MAD packet enumeration.
1118  * Return -1 if the size is invalid.
1119  */
1120 int mtu_to_enum(u32 mtu, int default_if_bad)
1121 {
1122         switch (mtu) {
1123         case     0: return OPA_MTU_0;
1124         case   256: return OPA_MTU_256;
1125         case   512: return OPA_MTU_512;
1126         case  1024: return OPA_MTU_1024;
1127         case  2048: return OPA_MTU_2048;
1128         case  4096: return OPA_MTU_4096;
1129         case  8192: return OPA_MTU_8192;
1130         case 10240: return OPA_MTU_10240;
1131         }
1132         return default_if_bad;
1133 }
1134
1135 u16 enum_to_mtu(int mtu)
1136 {
1137         switch (mtu) {
1138         case OPA_MTU_0:     return 0;
1139         case OPA_MTU_256:   return 256;
1140         case OPA_MTU_512:   return 512;
1141         case OPA_MTU_1024:  return 1024;
1142         case OPA_MTU_2048:  return 2048;
1143         case OPA_MTU_4096:  return 4096;
1144         case OPA_MTU_8192:  return 8192;
1145         case OPA_MTU_10240: return 10240;
1146         default: return 0xffff;
1147         }
1148 }
1149
1150 /*
1151  * set_mtu - set the MTU
1152  * @ppd: the per port data
1153  *
1154  * We can handle "any" incoming size, the issue here is whether we
1155  * need to restrict our outgoing size.  We do not deal with what happens
1156  * to programs that are already running when the size changes.
1157  */
1158 int set_mtu(struct hfi1_pportdata *ppd)
1159 {
1160         struct hfi1_devdata *dd = ppd->dd;
1161         int i, drain, ret = 0, is_up = 0;
1162
1163         ppd->ibmtu = 0;
1164         for (i = 0; i < ppd->vls_supported; i++)
1165                 if (ppd->ibmtu < dd->vld[i].mtu)
1166                         ppd->ibmtu = dd->vld[i].mtu;
1167         ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
1168
1169         mutex_lock(&ppd->hls_lock);
1170         if (ppd->host_link_state == HLS_UP_INIT ||
1171             ppd->host_link_state == HLS_UP_ARMED ||
1172             ppd->host_link_state == HLS_UP_ACTIVE)
1173                 is_up = 1;
1174
1175         drain = !is_ax(dd) && is_up;
1176
1177         if (drain)
1178                 /*
1179                  * MTU is specified per-VL. To ensure that no packet gets
1180                  * stuck (due, e.g., to the MTU for the packet's VL being
1181                  * reduced), empty the per-VL FIFOs before adjusting MTU.
1182                  */
1183                 ret = stop_drain_data_vls(dd);
1184
1185         if (ret) {
1186                 dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n",
1187                            __func__);
1188                 goto err;
1189         }
1190
1191         hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, 0);
1192
1193         if (drain)
1194                 open_fill_data_vls(dd); /* reopen all VLs */
1195
1196 err:
1197         mutex_unlock(&ppd->hls_lock);
1198
1199         return ret;
1200 }
1201
1202 int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
1203 {
1204         struct hfi1_devdata *dd = ppd->dd;
1205
1206         ppd->lid = lid;
1207         ppd->lmc = lmc;
1208         hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
1209
1210         dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
1211
1212         return 0;
1213 }
1214
1215 void shutdown_led_override(struct hfi1_pportdata *ppd)
1216 {
1217         struct hfi1_devdata *dd = ppd->dd;
1218
1219         /*
1220          * This pairs with the memory barrier in hfi1_start_led_override to
1221          * ensure that we read the correct state of LED beaconing represented
1222          * by led_override_timer_active
1223          */
1224         smp_rmb();
1225         if (atomic_read(&ppd->led_override_timer_active)) {
1226                 del_timer_sync(&ppd->led_override_timer);
1227                 atomic_set(&ppd->led_override_timer_active, 0);
1228                 /* Ensure the atomic_set is visible to all CPUs */
1229                 smp_wmb();
1230         }
1231
1232         /* Hand control of the LED to the DC for normal operation */
1233         write_csr(dd, DCC_CFG_LED_CNTRL, 0);
1234 }
1235
1236 static void run_led_override(unsigned long opaque)
1237 {
1238         struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
1239         struct hfi1_devdata *dd = ppd->dd;
1240         unsigned long timeout;
1241         int phase_idx;
1242
1243         if (!(dd->flags & HFI1_INITTED))
1244                 return;
1245
1246         phase_idx = ppd->led_override_phase & 1;
1247
1248         setextled(dd, phase_idx);
1249
1250         timeout = ppd->led_override_vals[phase_idx];
1251
1252         /* Set up for next phase */
1253         ppd->led_override_phase = !ppd->led_override_phase;
1254
1255         mod_timer(&ppd->led_override_timer, jiffies + timeout);
1256 }
1257
1258 /*
1259  * To have the LED blink in a particular pattern, provide timeon and timeoff
1260  * in milliseconds.
1261  * To turn off custom blinking and return to normal operation, use
1262  * shutdown_led_override()
1263  */
1264 void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
1265                              unsigned int timeoff)
1266 {
1267         if (!(ppd->dd->flags & HFI1_INITTED))
1268                 return;
1269
1270         /* Convert to jiffies for direct use in timer */
1271         ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
1272         ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
1273
1274         /* Arbitrarily start from LED on phase */
1275         ppd->led_override_phase = 1;
1276
1277         /*
1278          * If the timer has not already been started, do so. Use a "quick"
1279          * timeout so the handler will be called soon to look at our request.
1280          */
1281         if (!timer_pending(&ppd->led_override_timer)) {
1282                 setup_timer(&ppd->led_override_timer, run_led_override,
1283                             (unsigned long)ppd);
1284                 ppd->led_override_timer.expires = jiffies + 1;
1285                 add_timer(&ppd->led_override_timer);
1286                 atomic_set(&ppd->led_override_timer_active, 1);
1287                 /* Ensure the atomic_set is visible to all CPUs */
1288                 smp_wmb();
1289         }
1290 }
1291
1292 /**
1293  * hfi1_reset_device - reset the chip if possible
1294  * @unit: the device to reset
1295  *
1296  * Whether or not reset is successful, we attempt to re-initialize the chip
1297  * (that is, much like a driver unload/reload).  We clear the INITTED flag
1298  * so that the various entry points will fail until we reinitialize.  For
1299  * now, we only allow this if no user contexts are open that use chip resources
1300  */
1301 int hfi1_reset_device(int unit)
1302 {
1303         int ret, i;
1304         struct hfi1_devdata *dd = hfi1_lookup(unit);
1305         struct hfi1_pportdata *ppd;
1306         unsigned long flags;
1307         int pidx;
1308
1309         if (!dd) {
1310                 ret = -ENODEV;
1311                 goto bail;
1312         }
1313
1314         dd_dev_info(dd, "Reset on unit %u requested\n", unit);
1315
1316         if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
1317                 dd_dev_info(dd,
1318                             "Invalid unit number %u or not initialized or not present\n",
1319                             unit);
1320                 ret = -ENXIO;
1321                 goto bail;
1322         }
1323
1324         spin_lock_irqsave(&dd->uctxt_lock, flags);
1325         if (dd->rcd)
1326                 for (i = dd->first_dyn_alloc_ctxt;
1327                      i < dd->num_rcv_contexts; i++) {
1328                         if (!dd->rcd[i] || !dd->rcd[i]->cnt)
1329                                 continue;
1330                         spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1331                         ret = -EBUSY;
1332                         goto bail;
1333                 }
1334         spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1335
1336         for (pidx = 0; pidx < dd->num_pports; ++pidx) {
1337                 ppd = dd->pport + pidx;
1338
1339                 shutdown_led_override(ppd);
1340         }
1341         if (dd->flags & HFI1_HAS_SEND_DMA)
1342                 sdma_exit(dd);
1343
1344         hfi1_reset_cpu_counters(dd);
1345
1346         ret = hfi1_init(dd, 1);
1347
1348         if (ret)
1349                 dd_dev_err(dd,
1350                            "Reinitialize unit %u after reset failed with %d\n",
1351                            unit, ret);
1352         else
1353                 dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
1354                             unit);
1355
1356 bail:
1357         return ret;
1358 }
1359
1360 void handle_eflags(struct hfi1_packet *packet)
1361 {
1362         struct hfi1_ctxtdata *rcd = packet->rcd;
1363         u32 rte = rhf_rcv_type_err(packet->rhf);
1364
1365         rcv_hdrerr(rcd, rcd->ppd, packet);
1366         if (rhf_err_flags(packet->rhf))
1367                 dd_dev_err(rcd->dd,
1368                            "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
1369                            rcd->ctxt, packet->rhf,
1370                            packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
1371                            packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
1372                            packet->rhf & RHF_DC_ERR ? "dc " : "",
1373                            packet->rhf & RHF_TID_ERR ? "tid " : "",
1374                            packet->rhf & RHF_LEN_ERR ? "len " : "",
1375                            packet->rhf & RHF_ECC_ERR ? "ecc " : "",
1376                            packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
1377                            packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
1378                            rte);
1379 }
1380
1381 /*
1382  * The following functions are called by the interrupt handler. They are type
1383  * specific handlers for each packet type.
1384  */
1385 int process_receive_ib(struct hfi1_packet *packet)
1386 {
1387         if (unlikely(hfi1_dbg_fault_packet(packet)))
1388                 return RHF_RCV_CONTINUE;
1389
1390         trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
1391                           packet->rcd->ctxt,
1392                           rhf_err_flags(packet->rhf),
1393                           RHF_RCV_TYPE_IB,
1394                           packet->hlen,
1395                           packet->tlen,
1396                           packet->updegr,
1397                           rhf_egr_index(packet->rhf));
1398
1399         if (unlikely(
1400                  (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
1401                  (packet->rhf & RHF_DC_ERR))))
1402                 return RHF_RCV_CONTINUE;
1403
1404         if (unlikely(rhf_err_flags(packet->rhf))) {
1405                 handle_eflags(packet);
1406                 return RHF_RCV_CONTINUE;
1407         }
1408
1409         hfi1_ib_rcv(packet);
1410         return RHF_RCV_CONTINUE;
1411 }
1412
1413 static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
1414 {
1415         /* Packet received in VNIC context via RSM */
1416         if (packet->rcd->is_vnic)
1417                 return true;
1418
1419         if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
1420             (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
1421                 return true;
1422
1423         return false;
1424 }
1425
1426 int process_receive_bypass(struct hfi1_packet *packet)
1427 {
1428         struct hfi1_devdata *dd = packet->rcd->dd;
1429
1430         if (unlikely(rhf_err_flags(packet->rhf))) {
1431                 handle_eflags(packet);
1432         } else if (hfi1_is_vnic_packet(packet)) {
1433                 hfi1_vnic_bypass_rcv(packet);
1434                 return RHF_RCV_CONTINUE;
1435         }
1436
1437         dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
1438         incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
1439         if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
1440                 u64 *flits = packet->ebuf;
1441
1442                 if (flits && !(packet->rhf & RHF_LEN_ERR)) {
1443                         dd->err_info_rcvport.packet_flit1 = flits[0];
1444                         dd->err_info_rcvport.packet_flit2 =
1445                                 packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
1446                 }
1447                 dd->err_info_rcvport.status_and_code |=
1448                         (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
1449         }
1450         return RHF_RCV_CONTINUE;
1451 }
1452
1453 int process_receive_error(struct hfi1_packet *packet)
1454 {
1455         /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
1456         if (unlikely(
1457                  hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
1458                  rhf_rcv_type_err(packet->rhf) == 3))
1459                 return RHF_RCV_CONTINUE;
1460
1461         handle_eflags(packet);
1462
1463         if (unlikely(rhf_err_flags(packet->rhf)))
1464                 dd_dev_err(packet->rcd->dd,
1465                            "Unhandled error packet received. Dropping.\n");
1466
1467         return RHF_RCV_CONTINUE;
1468 }
1469
1470 int kdeth_process_expected(struct hfi1_packet *packet)
1471 {
1472         if (unlikely(hfi1_dbg_fault_packet(packet)))
1473                 return RHF_RCV_CONTINUE;
1474         if (unlikely(rhf_err_flags(packet->rhf)))
1475                 handle_eflags(packet);
1476
1477         dd_dev_err(packet->rcd->dd,
1478                    "Unhandled expected packet received. Dropping.\n");
1479         return RHF_RCV_CONTINUE;
1480 }
1481
1482 int kdeth_process_eager(struct hfi1_packet *packet)
1483 {
1484         if (unlikely(rhf_err_flags(packet->rhf)))
1485                 handle_eflags(packet);
1486         if (unlikely(hfi1_dbg_fault_packet(packet)))
1487                 return RHF_RCV_CONTINUE;
1488
1489         dd_dev_err(packet->rcd->dd,
1490                    "Unhandled eager packet received. Dropping.\n");
1491         return RHF_RCV_CONTINUE;
1492 }
1493
1494 int process_receive_invalid(struct hfi1_packet *packet)
1495 {
1496         dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
1497                    rhf_rcv_type(packet->rhf));
1498         return RHF_RCV_CONTINUE;
1499 }