2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
44 #include <linux/if_vlan.h>
45 #include <linux/init.h>
46 #include <linux/log2.h>
47 #include <linux/mdio.h>
48 #include <linux/module.h>
49 #include <linux/moduleparam.h>
50 #include <linux/mutex.h>
51 #include <linux/netdevice.h>
52 #include <linux/pci.h>
53 #include <linux/aer.h>
54 #include <linux/rtnetlink.h>
55 #include <linux/sched.h>
56 #include <linux/seq_file.h>
57 #include <linux/sockios.h>
58 #include <linux/vmalloc.h>
59 #include <linux/workqueue.h>
60 #include <net/neighbour.h>
61 #include <net/netevent.h>
62 #include <asm/uaccess.h>
70 #define DRV_VERSION "1.3.0-ko"
71 #define DRV_DESC "Chelsio T4 Network Driver"
74 * Max interrupt hold-off timer value in us. Queues fall back to this value
75 * under extreme memory pressure so it's largish to give the system time to
78 #define MAX_SGE_TIMERVAL 200U
82 * Virtual Function provisioning constants. We need two extra Ingress Queues
83 * with Interrupt capability to serve as the VF's Firmware Event Queue and
84 * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free
85 * Lists associated with them). For each Ethernet/Control Egress Queue and
86 * for each Free List, we need an Egress Context.
89 VFRES_NPORTS = 1, /* # of "ports" per VF */
90 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
92 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
93 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
94 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
95 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
96 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
97 VFRES_TC = 0, /* PCI-E traffic class */
98 VFRES_NEXACTF = 16, /* # of exact MPS filters */
100 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
101 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
105 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
106 * static and likely not to be useful in the long run. We really need to
107 * implement some form of persistent configuration which the firmware
110 static unsigned int pfvfres_pmask(struct adapter *adapter,
111 unsigned int pf, unsigned int vf)
113 unsigned int portn, portvec;
116 * Give PF's access to all of the ports.
119 return FW_PFVF_CMD_PMASK_MASK;
122 * For VFs, we'll assign them access to the ports based purely on the
123 * PF. We assign active ports in order, wrapping around if there are
124 * fewer active ports than PFs: e.g. active port[pf % nports].
125 * Unfortunately the adapter's port_info structs haven't been
126 * initialized yet so we have to compute this.
128 if (adapter->params.nports == 0)
131 portn = pf % adapter->params.nports;
132 portvec = adapter->params.portvec;
135 * Isolate the lowest set bit in the port vector. If we're at
136 * the port number that we want, return that as the pmask.
137 * otherwise mask that bit out of the port vector and
138 * decrement our port number ...
140 unsigned int pmask = portvec ^ (portvec & (portvec-1));
151 MEMWIN0_APERTURE = 65536,
152 MEMWIN0_BASE = 0x30000,
153 MEMWIN1_APERTURE = 32768,
154 MEMWIN1_BASE = 0x28000,
155 MEMWIN2_APERTURE = 2048,
156 MEMWIN2_BASE = 0x1b800,
160 MAX_TXQ_ENTRIES = 16384,
161 MAX_CTRL_TXQ_ENTRIES = 1024,
162 MAX_RSPQ_ENTRIES = 16384,
163 MAX_RX_BUFFERS = 16384,
164 MIN_TXQ_ENTRIES = 32,
165 MIN_CTRL_TXQ_ENTRIES = 32,
166 MIN_RSPQ_ENTRIES = 128,
170 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
171 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
172 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
174 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
176 static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
177 CH_DEVICE(0xa000, 0), /* PE10K */
178 CH_DEVICE(0x4001, -1),
179 CH_DEVICE(0x4002, -1),
180 CH_DEVICE(0x4003, -1),
181 CH_DEVICE(0x4004, -1),
182 CH_DEVICE(0x4005, -1),
183 CH_DEVICE(0x4006, -1),
184 CH_DEVICE(0x4007, -1),
185 CH_DEVICE(0x4008, -1),
186 CH_DEVICE(0x4009, -1),
187 CH_DEVICE(0x400a, -1),
188 CH_DEVICE(0x4401, 4),
189 CH_DEVICE(0x4402, 4),
190 CH_DEVICE(0x4403, 4),
191 CH_DEVICE(0x4404, 4),
192 CH_DEVICE(0x4405, 4),
193 CH_DEVICE(0x4406, 4),
194 CH_DEVICE(0x4407, 4),
195 CH_DEVICE(0x4408, 4),
196 CH_DEVICE(0x4409, 4),
197 CH_DEVICE(0x440a, 4),
201 #define FW_FNAME "cxgb4/t4fw.bin"
203 MODULE_DESCRIPTION(DRV_DESC);
204 MODULE_AUTHOR("Chelsio Communications");
205 MODULE_LICENSE("Dual BSD/GPL");
206 MODULE_VERSION(DRV_VERSION);
207 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
208 MODULE_FIRMWARE(FW_FNAME);
210 static int dflt_msg_enable = DFLT_MSG_ENABLE;
212 module_param(dflt_msg_enable, int, 0644);
213 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
216 * The driver uses the best interrupt scheme available on a platform in the
217 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
218 * of these schemes the driver may consider as follows:
220 * msi = 2: choose from among all three options
221 * msi = 1: only consider MSI and INTx interrupts
222 * msi = 0: force INTx interrupts
226 module_param(msi, int, 0644);
227 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
230 * Queue interrupt hold-off timer values. Queues default to the first of these
233 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
235 module_param_array(intr_holdoff, uint, NULL, 0644);
236 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
237 "0..4 in microseconds");
239 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
241 module_param_array(intr_cnt, uint, NULL, 0644);
242 MODULE_PARM_DESC(intr_cnt,
243 "thresholds 1..3 for queue interrupt packet counters");
247 #ifdef CONFIG_PCI_IOV
248 module_param(vf_acls, bool, 0644);
249 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
251 static unsigned int num_vf[4];
253 module_param_array(num_vf, uint, NULL, 0644);
254 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
257 static struct dentry *cxgb4_debugfs_root;
259 static LIST_HEAD(adapter_list);
260 static DEFINE_MUTEX(uld_mutex);
261 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
262 static const char *uld_str[] = { "RDMA", "iSCSI" };
264 static void link_report(struct net_device *dev)
266 if (!netif_carrier_ok(dev))
267 netdev_info(dev, "link down\n");
269 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
271 const char *s = "10Mbps";
272 const struct port_info *p = netdev_priv(dev);
274 switch (p->link_cfg.speed) {
286 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
291 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
293 struct net_device *dev = adapter->port[port_id];
295 /* Skip changes from disabled ports. */
296 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
298 netif_carrier_on(dev);
300 netif_carrier_off(dev);
306 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
308 static const char *mod_str[] = {
309 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
312 const struct net_device *dev = adap->port[port_id];
313 const struct port_info *pi = netdev_priv(dev);
315 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
316 netdev_info(dev, "port module unplugged\n");
317 else if (pi->mod_type < ARRAY_SIZE(mod_str))
318 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
322 * Configure the exact and hash address filters to handle a port's multicast
323 * and secondary unicast MAC addresses.
325 static int set_addr_filters(const struct net_device *dev, bool sleep)
333 const struct netdev_hw_addr *ha;
334 int uc_cnt = netdev_uc_count(dev);
335 int mc_cnt = netdev_mc_count(dev);
336 const struct port_info *pi = netdev_priv(dev);
337 unsigned int mb = pi->adapter->fn;
339 /* first do the secondary unicast addresses */
340 netdev_for_each_uc_addr(ha, dev) {
341 addr[naddr++] = ha->addr;
342 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
343 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
344 naddr, addr, filt_idx, &uhash, sleep);
353 /* next set up the multicast addresses */
354 netdev_for_each_mc_addr(ha, dev) {
355 addr[naddr++] = ha->addr;
356 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
357 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
358 naddr, addr, filt_idx, &mhash, sleep);
367 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
368 uhash | mhash, sleep);
372 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
373 * If @mtu is -1 it is left unchanged.
375 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
378 struct port_info *pi = netdev_priv(dev);
380 ret = set_addr_filters(dev, sleep_ok);
382 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
383 (dev->flags & IFF_PROMISC) ? 1 : 0,
384 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
390 * link_start - enable a port
391 * @dev: the port to enable
393 * Performs the MAC and PHY actions needed to enable a port.
395 static int link_start(struct net_device *dev)
398 struct port_info *pi = netdev_priv(dev);
399 unsigned int mb = pi->adapter->fn;
402 * We do not set address filters and promiscuity here, the stack does
403 * that step explicitly.
405 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
406 !!(dev->features & NETIF_F_HW_VLAN_RX), true);
408 ret = t4_change_mac(pi->adapter, mb, pi->viid,
409 pi->xact_addr_filt, dev->dev_addr, true,
412 pi->xact_addr_filt = ret;
417 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
420 ret = t4_enable_vi(pi->adapter, mb, pi->viid, true, true);
425 * Response queue handler for the FW event queue.
427 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
428 const struct pkt_gl *gl)
430 u8 opcode = ((const struct rss_header *)rsp)->opcode;
432 rsp++; /* skip RSS header */
433 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
434 const struct cpl_sge_egr_update *p = (void *)rsp;
435 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
438 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
440 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
441 struct sge_eth_txq *eq;
443 eq = container_of(txq, struct sge_eth_txq, q);
444 netif_tx_wake_queue(eq->txq);
446 struct sge_ofld_txq *oq;
448 oq = container_of(txq, struct sge_ofld_txq, q);
449 tasklet_schedule(&oq->qresume_tsk);
451 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
452 const struct cpl_fw6_msg *p = (void *)rsp;
455 t4_handle_fw_rpl(q->adap, p->data);
456 } else if (opcode == CPL_L2T_WRITE_RPL) {
457 const struct cpl_l2t_write_rpl *p = (void *)rsp;
459 do_l2t_write_rpl(q->adap, p);
461 dev_err(q->adap->pdev_dev,
462 "unexpected CPL %#x on FW event queue\n", opcode);
467 * uldrx_handler - response queue handler for ULD queues
468 * @q: the response queue that received the packet
469 * @rsp: the response queue descriptor holding the offload message
470 * @gl: the gather list of packet fragments
472 * Deliver an ingress offload packet to a ULD. All processing is done by
473 * the ULD, we just maintain statistics.
475 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
476 const struct pkt_gl *gl)
478 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
480 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
486 else if (gl == CXGB4_MSG_AN)
493 static void disable_msi(struct adapter *adapter)
495 if (adapter->flags & USING_MSIX) {
496 pci_disable_msix(adapter->pdev);
497 adapter->flags &= ~USING_MSIX;
498 } else if (adapter->flags & USING_MSI) {
499 pci_disable_msi(adapter->pdev);
500 adapter->flags &= ~USING_MSI;
505 * Interrupt handler for non-data events used with MSI-X.
507 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
509 struct adapter *adap = cookie;
511 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
514 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
516 t4_slow_intr_handler(adap);
521 * Name the MSI-X interrupts.
523 static void name_msix_vecs(struct adapter *adap)
525 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
527 /* non-data interrupts */
528 snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
531 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
532 adap->port[0]->name);
534 /* Ethernet queues */
535 for_each_port(adap, j) {
536 struct net_device *d = adap->port[j];
537 const struct port_info *pi = netdev_priv(d);
539 for (i = 0; i < pi->nqsets; i++, msi_idx++)
540 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
545 for_each_ofldrxq(&adap->sge, i)
546 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-ofld%d",
547 adap->port[0]->name, i);
549 for_each_rdmarxq(&adap->sge, i)
550 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
551 adap->port[0]->name, i);
554 static int request_msix_queue_irqs(struct adapter *adap)
556 struct sge *s = &adap->sge;
557 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2;
559 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
560 adap->msix_info[1].desc, &s->fw_evtq);
564 for_each_ethrxq(s, ethqidx) {
565 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
566 adap->msix_info[msi].desc,
567 &s->ethrxq[ethqidx].rspq);
572 for_each_ofldrxq(s, ofldqidx) {
573 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
574 adap->msix_info[msi].desc,
575 &s->ofldrxq[ofldqidx].rspq);
580 for_each_rdmarxq(s, rdmaqidx) {
581 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
582 adap->msix_info[msi].desc,
583 &s->rdmarxq[rdmaqidx].rspq);
591 while (--rdmaqidx >= 0)
592 free_irq(adap->msix_info[--msi].vec,
593 &s->rdmarxq[rdmaqidx].rspq);
594 while (--ofldqidx >= 0)
595 free_irq(adap->msix_info[--msi].vec,
596 &s->ofldrxq[ofldqidx].rspq);
597 while (--ethqidx >= 0)
598 free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq);
599 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
603 static void free_msix_queue_irqs(struct adapter *adap)
606 struct sge *s = &adap->sge;
608 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
609 for_each_ethrxq(s, i)
610 free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq);
611 for_each_ofldrxq(s, i)
612 free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq);
613 for_each_rdmarxq(s, i)
614 free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq);
618 * write_rss - write the RSS table for a given port
620 * @queues: array of queue indices for RSS
622 * Sets up the portion of the HW RSS table for the port's VI to distribute
623 * packets to the Rx queues in @queues.
625 static int write_rss(const struct port_info *pi, const u16 *queues)
629 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
631 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
635 /* map the queue indices to queue ids */
636 for (i = 0; i < pi->rss_size; i++, queues++)
637 rss[i] = q[*queues].rspq.abs_id;
639 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
640 pi->rss_size, rss, pi->rss_size);
646 * setup_rss - configure RSS
649 * Sets up RSS for each port.
651 static int setup_rss(struct adapter *adap)
655 for_each_port(adap, i) {
656 const struct port_info *pi = adap2pinfo(adap, i);
658 err = write_rss(pi, pi->rss);
666 * Return the channel of the ingress queue with the given qid.
668 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
670 qid -= p->ingr_start;
671 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
675 * Wait until all NAPI handlers are descheduled.
677 static void quiesce_rx(struct adapter *adap)
681 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
682 struct sge_rspq *q = adap->sge.ingr_map[i];
685 napi_disable(&q->napi);
690 * Enable NAPI scheduling and interrupt generation for all Rx queues.
692 static void enable_rx(struct adapter *adap)
696 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
697 struct sge_rspq *q = adap->sge.ingr_map[i];
702 napi_enable(&q->napi);
703 /* 0-increment GTS to start the timer and enable interrupts */
704 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
705 SEINTARM(q->intr_params) |
706 INGRESSQID(q->cntxt_id));
711 * setup_sge_queues - configure SGE Tx/Rx/response queues
714 * Determines how many sets of SGE queues to use and initializes them.
715 * We support multiple queue sets per port if we have MSI-X, otherwise
716 * just one queue set per port.
718 static int setup_sge_queues(struct adapter *adap)
720 int err, msi_idx, i, j;
721 struct sge *s = &adap->sge;
723 bitmap_zero(s->starving_fl, MAX_EGRQ);
724 bitmap_zero(s->txq_maperr, MAX_EGRQ);
726 if (adap->flags & USING_MSIX)
727 msi_idx = 1; /* vector 0 is for non-queue interrupts */
729 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
733 msi_idx = -((int)s->intrq.abs_id + 1);
736 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
737 msi_idx, NULL, fwevtq_handler);
739 freeout: t4_free_sge_resources(adap);
743 for_each_port(adap, i) {
744 struct net_device *dev = adap->port[i];
745 struct port_info *pi = netdev_priv(dev);
746 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
747 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
749 for (j = 0; j < pi->nqsets; j++, q++) {
752 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
758 memset(&q->stats, 0, sizeof(q->stats));
760 for (j = 0; j < pi->nqsets; j++, t++) {
761 err = t4_sge_alloc_eth_txq(adap, t, dev,
762 netdev_get_tx_queue(dev, j),
763 s->fw_evtq.cntxt_id);
769 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
770 for_each_ofldrxq(s, i) {
771 struct sge_ofld_rxq *q = &s->ofldrxq[i];
772 struct net_device *dev = adap->port[i / j];
776 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
777 &q->fl, uldrx_handler);
780 memset(&q->stats, 0, sizeof(q->stats));
781 s->ofld_rxq[i] = q->rspq.abs_id;
782 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
783 s->fw_evtq.cntxt_id);
788 for_each_rdmarxq(s, i) {
789 struct sge_ofld_rxq *q = &s->rdmarxq[i];
793 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
794 msi_idx, &q->fl, uldrx_handler);
797 memset(&q->stats, 0, sizeof(q->stats));
798 s->rdma_rxq[i] = q->rspq.abs_id;
801 for_each_port(adap, i) {
803 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
804 * have RDMA queues, and that's the right value.
806 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
808 s->rdmarxq[i].rspq.cntxt_id);
813 t4_write_reg(adap, MPS_TRC_RSS_CONTROL,
814 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
815 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
820 * Returns 0 if new FW was successfully loaded, a positive errno if a load was
821 * started but failed, and a negative errno if flash load couldn't start.
823 static int upgrade_fw(struct adapter *adap)
827 const struct fw_hdr *hdr;
828 const struct firmware *fw;
829 struct device *dev = adap->pdev_dev;
831 ret = request_firmware(&fw, FW_FNAME, dev);
833 dev_err(dev, "unable to load firmware image " FW_FNAME
834 ", error %d\n", ret);
838 hdr = (const struct fw_hdr *)fw->data;
839 vers = ntohl(hdr->fw_ver);
840 if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) {
841 ret = -EINVAL; /* wrong major version, won't do */
846 * If the flash FW is unusable or we found something newer, load it.
848 if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
849 vers > adap->params.fw_vers) {
850 ret = -t4_load_fw(adap, fw->data, fw->size);
852 dev_info(dev, "firmware upgraded to version %pI4 from "
853 FW_FNAME "\n", &hdr->fw_ver);
855 out: release_firmware(fw);
860 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
861 * The allocated memory is cleared.
863 void *t4_alloc_mem(size_t size)
865 void *p = kzalloc(size, GFP_KERNEL);
873 * Free memory allocated through alloc_mem().
875 static void t4_free_mem(void *addr)
877 if (is_vmalloc_addr(addr))
883 static inline int is_offload(const struct adapter *adap)
885 return adap->params.offload;
889 * Implementation of ethtool operations.
892 static u32 get_msglevel(struct net_device *dev)
894 return netdev2adap(dev)->msg_enable;
897 static void set_msglevel(struct net_device *dev, u32 val)
899 netdev2adap(dev)->msg_enable = val;
902 static char stats_strings[][ETH_GSTRING_LEN] = {
905 "TxBroadcastFrames ",
906 "TxMulticastFrames ",
914 "TxFrames512To1023 ",
915 "TxFrames1024To1518 ",
916 "TxFrames1519ToMax ",
931 "RxBroadcastFrames ",
932 "RxMulticastFrames ",
946 "RxFrames512To1023 ",
947 "RxFrames1024To1518 ",
948 "RxFrames1519ToMax ",
960 "RxBG0FramesDropped ",
961 "RxBG1FramesDropped ",
962 "RxBG2FramesDropped ",
963 "RxBG3FramesDropped ",
978 static int get_sset_count(struct net_device *dev, int sset)
982 return ARRAY_SIZE(stats_strings);
988 #define T4_REGMAP_SIZE (160 * 1024)
990 static int get_regs_len(struct net_device *dev)
992 return T4_REGMAP_SIZE;
995 static int get_eeprom_len(struct net_device *dev)
1000 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1002 struct adapter *adapter = netdev2adap(dev);
1004 strcpy(info->driver, KBUILD_MODNAME);
1005 strcpy(info->version, DRV_VERSION);
1006 strcpy(info->bus_info, pci_name(adapter->pdev));
1008 if (!adapter->params.fw_vers)
1009 strcpy(info->fw_version, "N/A");
1011 snprintf(info->fw_version, sizeof(info->fw_version),
1012 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1013 FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers),
1014 FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers),
1015 FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers),
1016 FW_HDR_FW_VER_BUILD_GET(adapter->params.fw_vers),
1017 FW_HDR_FW_VER_MAJOR_GET(adapter->params.tp_vers),
1018 FW_HDR_FW_VER_MINOR_GET(adapter->params.tp_vers),
1019 FW_HDR_FW_VER_MICRO_GET(adapter->params.tp_vers),
1020 FW_HDR_FW_VER_BUILD_GET(adapter->params.tp_vers));
1023 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1025 if (stringset == ETH_SS_STATS)
1026 memcpy(data, stats_strings, sizeof(stats_strings));
1030 * port stats maintained per queue of the port. They should be in the same
1031 * order as in stats_strings above.
1033 struct queue_port_stats {
1043 static void collect_sge_port_stats(const struct adapter *adap,
1044 const struct port_info *p, struct queue_port_stats *s)
1047 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1048 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1050 memset(s, 0, sizeof(*s));
1051 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1053 s->tx_csum += tx->tx_cso;
1054 s->rx_csum += rx->stats.rx_cso;
1055 s->vlan_ex += rx->stats.vlan_ex;
1056 s->vlan_ins += tx->vlan_ins;
1057 s->gro_pkts += rx->stats.lro_pkts;
1058 s->gro_merged += rx->stats.lro_merged;
1062 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1065 struct port_info *pi = netdev_priv(dev);
1066 struct adapter *adapter = pi->adapter;
1068 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1070 data += sizeof(struct port_stats) / sizeof(u64);
1071 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1075 * Return a version number to identify the type of adapter. The scheme is:
1076 * - bits 0..9: chip version
1077 * - bits 10..15: chip revision
1078 * - bits 16..23: register dump version
1080 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1082 return 4 | (ap->params.rev << 10) | (1 << 16);
1085 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1088 u32 *p = buf + start;
1090 for ( ; start <= end; start += sizeof(u32))
1091 *p++ = t4_read_reg(ap, start);
1094 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1097 static const unsigned int reg_ranges[] = {
1318 struct adapter *ap = netdev2adap(dev);
1320 regs->version = mk_adap_vers(ap);
1322 memset(buf, 0, T4_REGMAP_SIZE);
1323 for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2)
1324 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
1327 static int restart_autoneg(struct net_device *dev)
1329 struct port_info *p = netdev_priv(dev);
1331 if (!netif_running(dev))
1333 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
1335 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
1339 static int identify_port(struct net_device *dev, u32 data)
1341 struct adapter *adap = netdev2adap(dev);
1344 data = 2; /* default to 2 seconds */
1346 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid,
1350 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
1354 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1355 type == FW_PORT_TYPE_BT_XAUI) {
1357 if (caps & FW_PORT_CAP_SPEED_100M)
1358 v |= SUPPORTED_100baseT_Full;
1359 if (caps & FW_PORT_CAP_SPEED_1G)
1360 v |= SUPPORTED_1000baseT_Full;
1361 if (caps & FW_PORT_CAP_SPEED_10G)
1362 v |= SUPPORTED_10000baseT_Full;
1363 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1364 v |= SUPPORTED_Backplane;
1365 if (caps & FW_PORT_CAP_SPEED_1G)
1366 v |= SUPPORTED_1000baseKX_Full;
1367 if (caps & FW_PORT_CAP_SPEED_10G)
1368 v |= SUPPORTED_10000baseKX4_Full;
1369 } else if (type == FW_PORT_TYPE_KR)
1370 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1371 else if (type == FW_PORT_TYPE_BP_AP)
1372 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1373 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1374 else if (type == FW_PORT_TYPE_BP4_AP)
1375 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1376 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1377 SUPPORTED_10000baseKX4_Full;
1378 else if (type == FW_PORT_TYPE_FIBER_XFI ||
1379 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
1380 v |= SUPPORTED_FIBRE;
1382 if (caps & FW_PORT_CAP_ANEG)
1383 v |= SUPPORTED_Autoneg;
1387 static unsigned int to_fw_linkcaps(unsigned int caps)
1391 if (caps & ADVERTISED_100baseT_Full)
1392 v |= FW_PORT_CAP_SPEED_100M;
1393 if (caps & ADVERTISED_1000baseT_Full)
1394 v |= FW_PORT_CAP_SPEED_1G;
1395 if (caps & ADVERTISED_10000baseT_Full)
1396 v |= FW_PORT_CAP_SPEED_10G;
1400 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1402 const struct port_info *p = netdev_priv(dev);
1404 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1405 p->port_type == FW_PORT_TYPE_BT_XFI ||
1406 p->port_type == FW_PORT_TYPE_BT_XAUI)
1407 cmd->port = PORT_TP;
1408 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1409 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1410 cmd->port = PORT_FIBRE;
1411 else if (p->port_type == FW_PORT_TYPE_SFP) {
1412 if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1413 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1414 cmd->port = PORT_DA;
1416 cmd->port = PORT_FIBRE;
1418 cmd->port = PORT_OTHER;
1420 if (p->mdio_addr >= 0) {
1421 cmd->phy_address = p->mdio_addr;
1422 cmd->transceiver = XCVR_EXTERNAL;
1423 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1424 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1426 cmd->phy_address = 0; /* not really, but no better option */
1427 cmd->transceiver = XCVR_INTERNAL;
1428 cmd->mdio_support = 0;
1431 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
1432 cmd->advertising = from_fw_linkcaps(p->port_type,
1433 p->link_cfg.advertising);
1434 cmd->speed = netif_carrier_ok(dev) ? p->link_cfg.speed : 0;
1435 cmd->duplex = DUPLEX_FULL;
1436 cmd->autoneg = p->link_cfg.autoneg;
1442 static unsigned int speed_to_caps(int speed)
1444 if (speed == SPEED_100)
1445 return FW_PORT_CAP_SPEED_100M;
1446 if (speed == SPEED_1000)
1447 return FW_PORT_CAP_SPEED_1G;
1448 if (speed == SPEED_10000)
1449 return FW_PORT_CAP_SPEED_10G;
1453 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1456 struct port_info *p = netdev_priv(dev);
1457 struct link_config *lc = &p->link_cfg;
1459 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
1462 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
1464 * PHY offers a single speed. See if that's what's
1467 if (cmd->autoneg == AUTONEG_DISABLE &&
1468 (lc->supported & speed_to_caps(cmd->speed)))
1473 if (cmd->autoneg == AUTONEG_DISABLE) {
1474 cap = speed_to_caps(cmd->speed);
1476 if (!(lc->supported & cap) || cmd->speed == SPEED_1000 ||
1477 cmd->speed == SPEED_10000)
1479 lc->requested_speed = cap;
1480 lc->advertising = 0;
1482 cap = to_fw_linkcaps(cmd->advertising);
1483 if (!(lc->supported & cap))
1485 lc->requested_speed = 0;
1486 lc->advertising = cap | FW_PORT_CAP_ANEG;
1488 lc->autoneg = cmd->autoneg;
1490 if (netif_running(dev))
1491 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1496 static void get_pauseparam(struct net_device *dev,
1497 struct ethtool_pauseparam *epause)
1499 struct port_info *p = netdev_priv(dev);
1501 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1502 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
1503 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
1506 static int set_pauseparam(struct net_device *dev,
1507 struct ethtool_pauseparam *epause)
1509 struct port_info *p = netdev_priv(dev);
1510 struct link_config *lc = &p->link_cfg;
1512 if (epause->autoneg == AUTONEG_DISABLE)
1513 lc->requested_fc = 0;
1514 else if (lc->supported & FW_PORT_CAP_ANEG)
1515 lc->requested_fc = PAUSE_AUTONEG;
1519 if (epause->rx_pause)
1520 lc->requested_fc |= PAUSE_RX;
1521 if (epause->tx_pause)
1522 lc->requested_fc |= PAUSE_TX;
1523 if (netif_running(dev))
1524 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1529 static u32 get_rx_csum(struct net_device *dev)
1531 struct port_info *p = netdev_priv(dev);
1533 return p->rx_offload & RX_CSO;
1536 static int set_rx_csum(struct net_device *dev, u32 data)
1538 struct port_info *p = netdev_priv(dev);
1541 p->rx_offload |= RX_CSO;
1543 p->rx_offload &= ~RX_CSO;
1547 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1549 const struct port_info *pi = netdev_priv(dev);
1550 const struct sge *s = &pi->adapter->sge;
1552 e->rx_max_pending = MAX_RX_BUFFERS;
1553 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1554 e->rx_jumbo_max_pending = 0;
1555 e->tx_max_pending = MAX_TXQ_ENTRIES;
1557 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
1558 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1559 e->rx_jumbo_pending = 0;
1560 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
1563 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1566 const struct port_info *pi = netdev_priv(dev);
1567 struct adapter *adapter = pi->adapter;
1568 struct sge *s = &adapter->sge;
1570 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
1571 e->tx_pending > MAX_TXQ_ENTRIES ||
1572 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1573 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1574 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
1577 if (adapter->flags & FULL_INIT_DONE)
1580 for (i = 0; i < pi->nqsets; ++i) {
1581 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
1582 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
1583 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
1588 static int closest_timer(const struct sge *s, int time)
1590 int i, delta, match = 0, min_delta = INT_MAX;
1592 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1593 delta = time - s->timer_val[i];
1596 if (delta < min_delta) {
1604 static int closest_thres(const struct sge *s, int thres)
1606 int i, delta, match = 0, min_delta = INT_MAX;
1608 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1609 delta = thres - s->counter_val[i];
1612 if (delta < min_delta) {
1621 * Return a queue's interrupt hold-off time in us. 0 means no timer.
1623 static unsigned int qtimer_val(const struct adapter *adap,
1624 const struct sge_rspq *q)
1626 unsigned int idx = q->intr_params >> 1;
1628 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
1632 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
1633 * @adap: the adapter
1635 * @us: the hold-off time in us, or 0 to disable timer
1636 * @cnt: the hold-off packet count, or 0 to disable counter
1638 * Sets an Rx queue's interrupt hold-off time and packet count. At least
1639 * one of the two needs to be enabled for the queue to generate interrupts.
1641 static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
1642 unsigned int us, unsigned int cnt)
1644 if ((us | cnt) == 0)
1651 new_idx = closest_thres(&adap->sge, cnt);
1652 if (q->desc && q->pktcnt_idx != new_idx) {
1653 /* the queue has already been created, update it */
1654 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1655 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1656 FW_PARAMS_PARAM_YZ(q->cntxt_id);
1657 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
1662 q->pktcnt_idx = new_idx;
1665 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1666 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
1670 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1672 const struct port_info *pi = netdev_priv(dev);
1673 struct adapter *adap = pi->adapter;
1675 return set_rxq_intr_params(adap, &adap->sge.ethrxq[pi->first_qset].rspq,
1676 c->rx_coalesce_usecs, c->rx_max_coalesced_frames);
1679 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1681 const struct port_info *pi = netdev_priv(dev);
1682 const struct adapter *adap = pi->adapter;
1683 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
1685 c->rx_coalesce_usecs = qtimer_val(adap, rq);
1686 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
1687 adap->sge.counter_val[rq->pktcnt_idx] : 0;
1692 * eeprom_ptov - translate a physical EEPROM address to virtual
1693 * @phys_addr: the physical EEPROM address
1694 * @fn: the PCI function number
1695 * @sz: size of function-specific area
1697 * Translate a physical EEPROM address to virtual. The first 1K is
1698 * accessed through virtual addresses starting at 31K, the rest is
1699 * accessed through virtual addresses starting at 0.
1701 * The mapping is as follows:
1702 * [0..1K) -> [31K..32K)
1703 * [1K..1K+A) -> [31K-A..31K)
1704 * [1K+A..ES) -> [0..ES-A-1K)
1706 * where A = @fn * @sz, and ES = EEPROM size.
1708 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
1711 if (phys_addr < 1024)
1712 return phys_addr + (31 << 10);
1713 if (phys_addr < 1024 + fn)
1714 return 31744 - fn + phys_addr - 1024;
1715 if (phys_addr < EEPROMSIZE)
1716 return phys_addr - 1024 - fn;
1721 * The next two routines implement eeprom read/write from physical addresses.
1723 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
1725 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1728 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
1729 return vaddr < 0 ? vaddr : 0;
1732 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
1734 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1737 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
1738 return vaddr < 0 ? vaddr : 0;
1741 #define EEPROM_MAGIC 0x38E2F10C
1743 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
1747 struct adapter *adapter = netdev2adap(dev);
1749 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
1753 e->magic = EEPROM_MAGIC;
1754 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
1755 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
1758 memcpy(data, buf + e->offset, e->len);
1763 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
1768 u32 aligned_offset, aligned_len, *p;
1769 struct adapter *adapter = netdev2adap(dev);
1771 if (eeprom->magic != EEPROM_MAGIC)
1774 aligned_offset = eeprom->offset & ~3;
1775 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
1777 if (adapter->fn > 0) {
1778 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
1780 if (aligned_offset < start ||
1781 aligned_offset + aligned_len > start + EEPROMPFSIZE)
1785 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
1787 * RMW possibly needed for first or last words.
1789 buf = kmalloc(aligned_len, GFP_KERNEL);
1792 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
1793 if (!err && aligned_len > 4)
1794 err = eeprom_rd_phys(adapter,
1795 aligned_offset + aligned_len - 4,
1796 (u32 *)&buf[aligned_len - 4]);
1799 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
1803 err = t4_seeprom_wp(adapter, false);
1807 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
1808 err = eeprom_wr_phys(adapter, aligned_offset, *p);
1809 aligned_offset += 4;
1813 err = t4_seeprom_wp(adapter, true);
1820 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
1823 const struct firmware *fw;
1824 struct adapter *adap = netdev2adap(netdev);
1826 ef->data[sizeof(ef->data) - 1] = '\0';
1827 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
1831 ret = t4_load_fw(adap, fw->data, fw->size);
1832 release_firmware(fw);
1834 dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data);
1838 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
1839 #define BCAST_CRC 0xa0ccc1a6
1841 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1843 wol->supported = WAKE_BCAST | WAKE_MAGIC;
1844 wol->wolopts = netdev2adap(dev)->wol;
1845 memset(&wol->sopass, 0, sizeof(wol->sopass));
1848 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1851 struct port_info *pi = netdev_priv(dev);
1853 if (wol->wolopts & ~WOL_SUPPORTED)
1855 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
1856 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
1857 if (wol->wolopts & WAKE_BCAST) {
1858 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
1861 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
1862 ~6ULL, ~0ULL, BCAST_CRC, true);
1864 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
1868 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1870 static int set_tso(struct net_device *dev, u32 value)
1873 dev->features |= TSO_FLAGS;
1875 dev->features &= ~TSO_FLAGS;
1879 static int set_flags(struct net_device *dev, u32 flags)
1882 unsigned long old_feat = dev->features;
1884 err = ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH |
1885 ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN);
1889 if ((old_feat ^ dev->features) & NETIF_F_HW_VLAN_RX) {
1890 const struct port_info *pi = netdev_priv(dev);
1892 err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1,
1893 -1, -1, -1, !!(flags & ETH_FLAG_RXVLAN),
1896 dev->features = old_feat;
1901 static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
1903 const struct port_info *pi = netdev_priv(dev);
1904 unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
1906 p->size = pi->rss_size;
1908 p->ring_index[n] = pi->rss[n];
1912 static int set_rss_table(struct net_device *dev,
1913 const struct ethtool_rxfh_indir *p)
1916 struct port_info *pi = netdev_priv(dev);
1918 if (p->size != pi->rss_size)
1920 for (i = 0; i < p->size; i++)
1921 if (p->ring_index[i] >= pi->nqsets)
1923 for (i = 0; i < p->size; i++)
1924 pi->rss[i] = p->ring_index[i];
1925 if (pi->adapter->flags & FULL_INIT_DONE)
1926 return write_rss(pi, pi->rss);
1930 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1933 const struct port_info *pi = netdev_priv(dev);
1935 switch (info->cmd) {
1936 case ETHTOOL_GRXFH: {
1937 unsigned int v = pi->rss_mode;
1940 switch (info->flow_type) {
1942 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
1943 info->data = RXH_IP_SRC | RXH_IP_DST |
1944 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1945 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1946 info->data = RXH_IP_SRC | RXH_IP_DST;
1949 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) &&
1950 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1951 info->data = RXH_IP_SRC | RXH_IP_DST |
1952 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1953 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1954 info->data = RXH_IP_SRC | RXH_IP_DST;
1957 case AH_ESP_V4_FLOW:
1959 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1960 info->data = RXH_IP_SRC | RXH_IP_DST;
1963 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
1964 info->data = RXH_IP_SRC | RXH_IP_DST |
1965 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1966 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1967 info->data = RXH_IP_SRC | RXH_IP_DST;
1970 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) &&
1971 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1972 info->data = RXH_IP_SRC | RXH_IP_DST |
1973 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1974 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1975 info->data = RXH_IP_SRC | RXH_IP_DST;
1978 case AH_ESP_V6_FLOW:
1980 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1981 info->data = RXH_IP_SRC | RXH_IP_DST;
1986 case ETHTOOL_GRXRINGS:
1987 info->data = pi->nqsets;
1993 static struct ethtool_ops cxgb_ethtool_ops = {
1994 .get_settings = get_settings,
1995 .set_settings = set_settings,
1996 .get_drvinfo = get_drvinfo,
1997 .get_msglevel = get_msglevel,
1998 .set_msglevel = set_msglevel,
1999 .get_ringparam = get_sge_param,
2000 .set_ringparam = set_sge_param,
2001 .get_coalesce = get_coalesce,
2002 .set_coalesce = set_coalesce,
2003 .get_eeprom_len = get_eeprom_len,
2004 .get_eeprom = get_eeprom,
2005 .set_eeprom = set_eeprom,
2006 .get_pauseparam = get_pauseparam,
2007 .set_pauseparam = set_pauseparam,
2008 .get_rx_csum = get_rx_csum,
2009 .set_rx_csum = set_rx_csum,
2010 .set_tx_csum = ethtool_op_set_tx_ipv6_csum,
2011 .set_sg = ethtool_op_set_sg,
2012 .get_link = ethtool_op_get_link,
2013 .get_strings = get_strings,
2014 .phys_id = identify_port,
2015 .nway_reset = restart_autoneg,
2016 .get_sset_count = get_sset_count,
2017 .get_ethtool_stats = get_stats,
2018 .get_regs_len = get_regs_len,
2019 .get_regs = get_regs,
2023 .set_flags = set_flags,
2024 .get_rxnfc = get_rxnfc,
2025 .get_rxfh_indir = get_rss_table,
2026 .set_rxfh_indir = set_rss_table,
2027 .flash_device = set_flash,
2034 static int mem_open(struct inode *inode, struct file *file)
2036 file->private_data = inode->i_private;
2040 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
2044 loff_t avail = file->f_path.dentry->d_inode->i_size;
2045 unsigned int mem = (uintptr_t)file->private_data & 3;
2046 struct adapter *adap = file->private_data - mem;
2052 if (count > avail - pos)
2053 count = avail - pos;
2061 ret = t4_mc_read(adap, pos, data, NULL);
2063 ret = t4_edc_read(adap, mem, pos, data, NULL);
2067 ofst = pos % sizeof(data);
2068 len = min(count, sizeof(data) - ofst);
2069 if (copy_to_user(buf, (u8 *)data + ofst, len))
2076 count = pos - *ppos;
2081 static const struct file_operations mem_debugfs_fops = {
2082 .owner = THIS_MODULE,
2085 .llseek = default_llseek,
2088 static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
2089 unsigned int idx, unsigned int size_mb)
2093 de = debugfs_create_file(name, S_IRUSR, adap->debugfs_root,
2094 (void *)adap + idx, &mem_debugfs_fops);
2095 if (de && de->d_inode)
2096 de->d_inode->i_size = size_mb << 20;
2099 static int __devinit setup_debugfs(struct adapter *adap)
2103 if (IS_ERR_OR_NULL(adap->debugfs_root))
2106 i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE);
2107 if (i & EDRAM0_ENABLE)
2108 add_debugfs_mem(adap, "edc0", MEM_EDC0, 5);
2109 if (i & EDRAM1_ENABLE)
2110 add_debugfs_mem(adap, "edc1", MEM_EDC1, 5);
2111 if (i & EXT_MEM_ENABLE)
2112 add_debugfs_mem(adap, "mc", MEM_MC,
2113 EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR)));
2115 debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap,
2121 * upper-layer driver support
2125 * Allocate an active-open TID and set it to the supplied value.
2127 int cxgb4_alloc_atid(struct tid_info *t, void *data)
2131 spin_lock_bh(&t->atid_lock);
2133 union aopen_entry *p = t->afree;
2135 atid = p - t->atid_tab;
2140 spin_unlock_bh(&t->atid_lock);
2143 EXPORT_SYMBOL(cxgb4_alloc_atid);
2146 * Release an active-open TID.
2148 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
2150 union aopen_entry *p = &t->atid_tab[atid];
2152 spin_lock_bh(&t->atid_lock);
2156 spin_unlock_bh(&t->atid_lock);
2158 EXPORT_SYMBOL(cxgb4_free_atid);
2161 * Allocate a server TID and set it to the supplied value.
2163 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
2167 spin_lock_bh(&t->stid_lock);
2168 if (family == PF_INET) {
2169 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
2170 if (stid < t->nstids)
2171 __set_bit(stid, t->stid_bmap);
2175 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
2180 t->stid_tab[stid].data = data;
2181 stid += t->stid_base;
2184 spin_unlock_bh(&t->stid_lock);
2187 EXPORT_SYMBOL(cxgb4_alloc_stid);
2190 * Release a server TID.
2192 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
2194 stid -= t->stid_base;
2195 spin_lock_bh(&t->stid_lock);
2196 if (family == PF_INET)
2197 __clear_bit(stid, t->stid_bmap);
2199 bitmap_release_region(t->stid_bmap, stid, 2);
2200 t->stid_tab[stid].data = NULL;
2202 spin_unlock_bh(&t->stid_lock);
2204 EXPORT_SYMBOL(cxgb4_free_stid);
2207 * Populate a TID_RELEASE WR. Caller must properly size the skb.
2209 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
2212 struct cpl_tid_release *req;
2214 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
2215 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
2216 INIT_TP_WR(req, tid);
2217 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
2221 * Queue a TID release request and if necessary schedule a work queue to
2224 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
2227 void **p = &t->tid_tab[tid];
2228 struct adapter *adap = container_of(t, struct adapter, tids);
2230 spin_lock_bh(&adap->tid_release_lock);
2231 *p = adap->tid_release_head;
2232 /* Low 2 bits encode the Tx channel number */
2233 adap->tid_release_head = (void **)((uintptr_t)p | chan);
2234 if (!adap->tid_release_task_busy) {
2235 adap->tid_release_task_busy = true;
2236 schedule_work(&adap->tid_release_task);
2238 spin_unlock_bh(&adap->tid_release_lock);
2242 * Process the list of pending TID release requests.
2244 static void process_tid_release_list(struct work_struct *work)
2246 struct sk_buff *skb;
2247 struct adapter *adap;
2249 adap = container_of(work, struct adapter, tid_release_task);
2251 spin_lock_bh(&adap->tid_release_lock);
2252 while (adap->tid_release_head) {
2253 void **p = adap->tid_release_head;
2254 unsigned int chan = (uintptr_t)p & 3;
2255 p = (void *)p - chan;
2257 adap->tid_release_head = *p;
2259 spin_unlock_bh(&adap->tid_release_lock);
2261 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
2263 schedule_timeout_uninterruptible(1);
2265 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
2266 t4_ofld_send(adap, skb);
2267 spin_lock_bh(&adap->tid_release_lock);
2269 adap->tid_release_task_busy = false;
2270 spin_unlock_bh(&adap->tid_release_lock);
2274 * Release a TID and inform HW. If we are unable to allocate the release
2275 * message we defer to a work queue.
2277 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
2280 struct sk_buff *skb;
2281 struct adapter *adap = container_of(t, struct adapter, tids);
2283 old = t->tid_tab[tid];
2284 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
2286 t->tid_tab[tid] = NULL;
2287 mk_tid_release(skb, chan, tid);
2288 t4_ofld_send(adap, skb);
2290 cxgb4_queue_tid_release(t, chan, tid);
2292 atomic_dec(&t->tids_in_use);
2294 EXPORT_SYMBOL(cxgb4_remove_tid);
2297 * Allocate and initialize the TID tables. Returns 0 on success.
2299 static int tid_init(struct tid_info *t)
2302 unsigned int natids = t->natids;
2304 size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
2305 t->nstids * sizeof(*t->stid_tab) +
2306 BITS_TO_LONGS(t->nstids) * sizeof(long);
2307 t->tid_tab = t4_alloc_mem(size);
2311 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
2312 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
2313 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
2314 spin_lock_init(&t->stid_lock);
2315 spin_lock_init(&t->atid_lock);
2317 t->stids_in_use = 0;
2319 t->atids_in_use = 0;
2320 atomic_set(&t->tids_in_use, 0);
2322 /* Setup the free list for atid_tab and clear the stid bitmap. */
2325 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
2326 t->afree = t->atid_tab;
2328 bitmap_zero(t->stid_bmap, t->nstids);
2333 * cxgb4_create_server - create an IP server
2335 * @stid: the server TID
2336 * @sip: local IP address to bind server to
2337 * @sport: the server's TCP port
2338 * @queue: queue to direct messages from this server to
2340 * Create an IP server for the given port and address.
2341 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2343 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
2344 __be32 sip, __be16 sport, unsigned int queue)
2347 struct sk_buff *skb;
2348 struct adapter *adap;
2349 struct cpl_pass_open_req *req;
2351 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2355 adap = netdev2adap(dev);
2356 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
2358 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
2359 req->local_port = sport;
2360 req->peer_port = htons(0);
2361 req->local_ip = sip;
2362 req->peer_ip = htonl(0);
2363 chan = rxq_to_chan(&adap->sge, queue);
2364 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2365 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2366 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2367 return t4_mgmt_tx(adap, skb);
2369 EXPORT_SYMBOL(cxgb4_create_server);
2372 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
2373 * @mtus: the HW MTU table
2374 * @mtu: the target MTU
2375 * @idx: index of selected entry in the MTU table
2377 * Returns the index and the value in the HW MTU table that is closest to
2378 * but does not exceed @mtu, unless @mtu is smaller than any value in the
2379 * table, in which case that smallest available value is selected.
2381 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
2386 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
2392 EXPORT_SYMBOL(cxgb4_best_mtu);
2395 * cxgb4_port_chan - get the HW channel of a port
2396 * @dev: the net device for the port
2398 * Return the HW Tx channel of the given port.
2400 unsigned int cxgb4_port_chan(const struct net_device *dev)
2402 return netdev2pinfo(dev)->tx_chan;
2404 EXPORT_SYMBOL(cxgb4_port_chan);
2407 * cxgb4_port_viid - get the VI id of a port
2408 * @dev: the net device for the port
2410 * Return the VI id of the given port.
2412 unsigned int cxgb4_port_viid(const struct net_device *dev)
2414 return netdev2pinfo(dev)->viid;
2416 EXPORT_SYMBOL(cxgb4_port_viid);
2419 * cxgb4_port_idx - get the index of a port
2420 * @dev: the net device for the port
2422 * Return the index of the given port.
2424 unsigned int cxgb4_port_idx(const struct net_device *dev)
2426 return netdev2pinfo(dev)->port_id;
2428 EXPORT_SYMBOL(cxgb4_port_idx);
2430 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
2431 struct tp_tcp_stats *v6)
2433 struct adapter *adap = pci_get_drvdata(pdev);
2435 spin_lock(&adap->stats_lock);
2436 t4_tp_get_tcp_stats(adap, v4, v6);
2437 spin_unlock(&adap->stats_lock);
2439 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
2441 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
2442 const unsigned int *pgsz_order)
2444 struct adapter *adap = netdev2adap(dev);
2446 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
2447 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
2448 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
2449 HPZ3(pgsz_order[3]));
2451 EXPORT_SYMBOL(cxgb4_iscsi_init);
2453 static struct pci_driver cxgb4_driver;
2455 static void check_neigh_update(struct neighbour *neigh)
2457 const struct device *parent;
2458 const struct net_device *netdev = neigh->dev;
2460 if (netdev->priv_flags & IFF_802_1Q_VLAN)
2461 netdev = vlan_dev_real_dev(netdev);
2462 parent = netdev->dev.parent;
2463 if (parent && parent->driver == &cxgb4_driver.driver)
2464 t4_l2t_update(dev_get_drvdata(parent), neigh);
2467 static int netevent_cb(struct notifier_block *nb, unsigned long event,
2471 case NETEVENT_NEIGH_UPDATE:
2472 check_neigh_update(data);
2474 case NETEVENT_PMTU_UPDATE:
2475 case NETEVENT_REDIRECT:
2482 static bool netevent_registered;
2483 static struct notifier_block cxgb4_netevent_nb = {
2484 .notifier_call = netevent_cb
2487 static void uld_attach(struct adapter *adap, unsigned int uld)
2490 struct cxgb4_lld_info lli;
2492 lli.pdev = adap->pdev;
2493 lli.l2t = adap->l2t;
2494 lli.tids = &adap->tids;
2495 lli.ports = adap->port;
2496 lli.vr = &adap->vres;
2497 lli.mtus = adap->params.mtus;
2498 if (uld == CXGB4_ULD_RDMA) {
2499 lli.rxq_ids = adap->sge.rdma_rxq;
2500 lli.nrxq = adap->sge.rdmaqs;
2501 } else if (uld == CXGB4_ULD_ISCSI) {
2502 lli.rxq_ids = adap->sge.ofld_rxq;
2503 lli.nrxq = adap->sge.ofldqsets;
2505 lli.ntxq = adap->sge.ofldqsets;
2506 lli.nchan = adap->params.nports;
2507 lli.nports = adap->params.nports;
2508 lli.wr_cred = adap->params.ofldq_wr_cred;
2509 lli.adapter_type = adap->params.rev;
2510 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
2511 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
2512 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
2514 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
2515 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
2517 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
2518 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
2519 lli.fw_vers = adap->params.fw_vers;
2521 handle = ulds[uld].add(&lli);
2522 if (IS_ERR(handle)) {
2523 dev_warn(adap->pdev_dev,
2524 "could not attach to the %s driver, error %ld\n",
2525 uld_str[uld], PTR_ERR(handle));
2529 adap->uld_handle[uld] = handle;
2531 if (!netevent_registered) {
2532 register_netevent_notifier(&cxgb4_netevent_nb);
2533 netevent_registered = true;
2536 if (adap->flags & FULL_INIT_DONE)
2537 ulds[uld].state_change(handle, CXGB4_STATE_UP);
2540 static void attach_ulds(struct adapter *adap)
2544 mutex_lock(&uld_mutex);
2545 list_add_tail(&adap->list_node, &adapter_list);
2546 for (i = 0; i < CXGB4_ULD_MAX; i++)
2548 uld_attach(adap, i);
2549 mutex_unlock(&uld_mutex);
2552 static void detach_ulds(struct adapter *adap)
2556 mutex_lock(&uld_mutex);
2557 list_del(&adap->list_node);
2558 for (i = 0; i < CXGB4_ULD_MAX; i++)
2559 if (adap->uld_handle[i]) {
2560 ulds[i].state_change(adap->uld_handle[i],
2561 CXGB4_STATE_DETACH);
2562 adap->uld_handle[i] = NULL;
2564 if (netevent_registered && list_empty(&adapter_list)) {
2565 unregister_netevent_notifier(&cxgb4_netevent_nb);
2566 netevent_registered = false;
2568 mutex_unlock(&uld_mutex);
2571 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2575 mutex_lock(&uld_mutex);
2576 for (i = 0; i < CXGB4_ULD_MAX; i++)
2577 if (adap->uld_handle[i])
2578 ulds[i].state_change(adap->uld_handle[i], new_state);
2579 mutex_unlock(&uld_mutex);
2583 * cxgb4_register_uld - register an upper-layer driver
2584 * @type: the ULD type
2585 * @p: the ULD methods
2587 * Registers an upper-layer driver with this driver and notifies the ULD
2588 * about any presently available devices that support its type. Returns
2589 * %-EBUSY if a ULD of the same type is already registered.
2591 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
2594 struct adapter *adap;
2596 if (type >= CXGB4_ULD_MAX)
2598 mutex_lock(&uld_mutex);
2599 if (ulds[type].add) {
2604 list_for_each_entry(adap, &adapter_list, list_node)
2605 uld_attach(adap, type);
2606 out: mutex_unlock(&uld_mutex);
2609 EXPORT_SYMBOL(cxgb4_register_uld);
2612 * cxgb4_unregister_uld - unregister an upper-layer driver
2613 * @type: the ULD type
2615 * Unregisters an existing upper-layer driver.
2617 int cxgb4_unregister_uld(enum cxgb4_uld type)
2619 struct adapter *adap;
2621 if (type >= CXGB4_ULD_MAX)
2623 mutex_lock(&uld_mutex);
2624 list_for_each_entry(adap, &adapter_list, list_node)
2625 adap->uld_handle[type] = NULL;
2626 ulds[type].add = NULL;
2627 mutex_unlock(&uld_mutex);
2630 EXPORT_SYMBOL(cxgb4_unregister_uld);
2633 * cxgb_up - enable the adapter
2634 * @adap: adapter being enabled
2636 * Called when the first port is enabled, this function performs the
2637 * actions necessary to make an adapter operational, such as completing
2638 * the initialization of HW modules, and enabling interrupts.
2640 * Must be called with the rtnl lock held.
2642 static int cxgb_up(struct adapter *adap)
2646 err = setup_sge_queues(adap);
2649 err = setup_rss(adap);
2653 if (adap->flags & USING_MSIX) {
2654 name_msix_vecs(adap);
2655 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
2656 adap->msix_info[0].desc, adap);
2660 err = request_msix_queue_irqs(adap);
2662 free_irq(adap->msix_info[0].vec, adap);
2666 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2667 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
2668 adap->port[0]->name, adap);
2674 t4_intr_enable(adap);
2675 adap->flags |= FULL_INIT_DONE;
2676 notify_ulds(adap, CXGB4_STATE_UP);
2680 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2682 t4_free_sge_resources(adap);
2686 static void cxgb_down(struct adapter *adapter)
2688 t4_intr_disable(adapter);
2689 cancel_work_sync(&adapter->tid_release_task);
2690 adapter->tid_release_task_busy = false;
2691 adapter->tid_release_head = NULL;
2693 if (adapter->flags & USING_MSIX) {
2694 free_msix_queue_irqs(adapter);
2695 free_irq(adapter->msix_info[0].vec, adapter);
2697 free_irq(adapter->pdev->irq, adapter);
2698 quiesce_rx(adapter);
2699 t4_sge_stop(adapter);
2700 t4_free_sge_resources(adapter);
2701 adapter->flags &= ~FULL_INIT_DONE;
2705 * net_device operations
2707 static int cxgb_open(struct net_device *dev)
2710 struct port_info *pi = netdev_priv(dev);
2711 struct adapter *adapter = pi->adapter;
2713 netif_carrier_off(dev);
2715 if (!(adapter->flags & FULL_INIT_DONE)) {
2716 err = cxgb_up(adapter);
2721 err = link_start(dev);
2723 netif_tx_start_all_queues(dev);
2727 static int cxgb_close(struct net_device *dev)
2729 struct port_info *pi = netdev_priv(dev);
2730 struct adapter *adapter = pi->adapter;
2732 netif_tx_stop_all_queues(dev);
2733 netif_carrier_off(dev);
2734 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
2737 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
2738 struct rtnl_link_stats64 *ns)
2740 struct port_stats stats;
2741 struct port_info *p = netdev_priv(dev);
2742 struct adapter *adapter = p->adapter;
2744 spin_lock(&adapter->stats_lock);
2745 t4_get_port_stats(adapter, p->tx_chan, &stats);
2746 spin_unlock(&adapter->stats_lock);
2748 ns->tx_bytes = stats.tx_octets;
2749 ns->tx_packets = stats.tx_frames;
2750 ns->rx_bytes = stats.rx_octets;
2751 ns->rx_packets = stats.rx_frames;
2752 ns->multicast = stats.rx_mcast_frames;
2754 /* detailed rx_errors */
2755 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
2757 ns->rx_over_errors = 0;
2758 ns->rx_crc_errors = stats.rx_fcs_err;
2759 ns->rx_frame_errors = stats.rx_symbol_err;
2760 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
2761 stats.rx_ovflow2 + stats.rx_ovflow3 +
2762 stats.rx_trunc0 + stats.rx_trunc1 +
2763 stats.rx_trunc2 + stats.rx_trunc3;
2764 ns->rx_missed_errors = 0;
2766 /* detailed tx_errors */
2767 ns->tx_aborted_errors = 0;
2768 ns->tx_carrier_errors = 0;
2769 ns->tx_fifo_errors = 0;
2770 ns->tx_heartbeat_errors = 0;
2771 ns->tx_window_errors = 0;
2773 ns->tx_errors = stats.tx_error_frames;
2774 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
2775 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
2779 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
2782 int ret = 0, prtad, devad;
2783 struct port_info *pi = netdev_priv(dev);
2784 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
2788 if (pi->mdio_addr < 0)
2790 data->phy_id = pi->mdio_addr;
2794 if (mdio_phy_id_is_c45(data->phy_id)) {
2795 prtad = mdio_phy_id_prtad(data->phy_id);
2796 devad = mdio_phy_id_devad(data->phy_id);
2797 } else if (data->phy_id < 32) {
2798 prtad = data->phy_id;
2800 data->reg_num &= 0x1f;
2804 mbox = pi->adapter->fn;
2805 if (cmd == SIOCGMIIREG)
2806 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
2807 data->reg_num, &data->val_out);
2809 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
2810 data->reg_num, data->val_in);
2818 static void cxgb_set_rxmode(struct net_device *dev)
2820 /* unfortunately we can't return errors to the stack */
2821 set_rxmode(dev, -1, false);
2824 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
2827 struct port_info *pi = netdev_priv(dev);
2829 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
2831 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
2838 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
2841 struct sockaddr *addr = p;
2842 struct port_info *pi = netdev_priv(dev);
2844 if (!is_valid_ether_addr(addr->sa_data))
2847 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
2848 pi->xact_addr_filt, addr->sa_data, true, true);
2852 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2853 pi->xact_addr_filt = ret;
2857 #ifdef CONFIG_NET_POLL_CONTROLLER
2858 static void cxgb_netpoll(struct net_device *dev)
2860 struct port_info *pi = netdev_priv(dev);
2861 struct adapter *adap = pi->adapter;
2863 if (adap->flags & USING_MSIX) {
2865 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
2867 for (i = pi->nqsets; i; i--, rx++)
2868 t4_sge_intr_msix(0, &rx->rspq);
2870 t4_intr_handler(adap)(0, adap);
2874 static const struct net_device_ops cxgb4_netdev_ops = {
2875 .ndo_open = cxgb_open,
2876 .ndo_stop = cxgb_close,
2877 .ndo_start_xmit = t4_eth_xmit,
2878 .ndo_get_stats64 = cxgb_get_stats,
2879 .ndo_set_rx_mode = cxgb_set_rxmode,
2880 .ndo_set_mac_address = cxgb_set_mac_addr,
2881 .ndo_validate_addr = eth_validate_addr,
2882 .ndo_do_ioctl = cxgb_ioctl,
2883 .ndo_change_mtu = cxgb_change_mtu,
2884 #ifdef CONFIG_NET_POLL_CONTROLLER
2885 .ndo_poll_controller = cxgb_netpoll,
2889 void t4_fatal_err(struct adapter *adap)
2891 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
2892 t4_intr_disable(adap);
2893 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
2896 static void setup_memwin(struct adapter *adap)
2900 bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */
2901 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
2902 (bar0 + MEMWIN0_BASE) | BIR(0) |
2903 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
2904 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
2905 (bar0 + MEMWIN1_BASE) | BIR(0) |
2906 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
2907 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
2908 (bar0 + MEMWIN2_BASE) | BIR(0) |
2909 WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
2910 if (adap->vres.ocq.size) {
2911 unsigned int start, sz_kb;
2913 start = pci_resource_start(adap->pdev, 2) +
2914 OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
2915 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
2917 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
2918 start | BIR(1) | WINDOW(ilog2(sz_kb)));
2920 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
2921 adap->vres.ocq.start);
2923 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
2927 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
2932 /* get device capabilities */
2933 memset(c, 0, sizeof(*c));
2934 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2935 FW_CMD_REQUEST | FW_CMD_READ);
2936 c->retval_len16 = htonl(FW_LEN16(*c));
2937 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
2941 /* select capabilities we'll be using */
2942 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
2944 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
2946 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
2947 } else if (vf_acls) {
2948 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
2951 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2952 FW_CMD_REQUEST | FW_CMD_WRITE);
2953 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
2957 ret = t4_config_glbl_rss(adap, adap->fn,
2958 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
2959 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
2960 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
2964 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
2965 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
2971 /* tweak some settings */
2972 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
2973 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
2974 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
2975 v = t4_read_reg(adap, TP_PIO_DATA);
2976 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
2978 /* get basic stuff going */
2979 return t4_early_init(adap, adap->fn);
2983 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
2985 #define MAX_ATIDS 8192U
2988 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
2990 static int adap_init0(struct adapter *adap)
2994 enum dev_state state;
2995 u32 params[7], val[7];
2996 struct fw_caps_config_cmd c;
2998 ret = t4_check_fw_version(adap);
2999 if (ret == -EINVAL || ret > 0) {
3000 if (upgrade_fw(adap) >= 0) /* recache FW version */
3001 ret = t4_check_fw_version(adap);
3006 /* contact FW, request master */
3007 ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state);
3009 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
3015 ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST);
3019 for (v = 0; v < SGE_NTIMERS - 1; v++)
3020 adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL);
3021 adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
3022 adap->sge.counter_val[0] = 1;
3023 for (v = 1; v < SGE_NCOUNTERS; v++)
3024 adap->sge.counter_val[v] = min(intr_cnt[v - 1],
3026 #define FW_PARAM_DEV(param) \
3027 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3028 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3030 params[0] = FW_PARAM_DEV(CCLK);
3031 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val);
3034 adap->params.vpd.cclk = val[0];
3036 ret = adap_init1(adap, &c);
3040 #define FW_PARAM_PFVF(param) \
3041 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3042 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \
3043 FW_PARAMS_PARAM_Y(adap->fn))
3045 params[0] = FW_PARAM_DEV(PORTVEC);
3046 params[1] = FW_PARAM_PFVF(L2T_START);
3047 params[2] = FW_PARAM_PFVF(L2T_END);
3048 params[3] = FW_PARAM_PFVF(FILTER_START);
3049 params[4] = FW_PARAM_PFVF(FILTER_END);
3050 params[5] = FW_PARAM_PFVF(IQFLINT_START);
3051 params[6] = FW_PARAM_PFVF(EQ_START);
3052 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 7, params, val);
3056 adap->tids.ftid_base = val[3];
3057 adap->tids.nftids = val[4] - val[3] + 1;
3058 adap->sge.ingr_start = val[5];
3059 adap->sge.egr_start = val[6];
3062 /* query offload-related parameters */
3063 params[0] = FW_PARAM_DEV(NTID);
3064 params[1] = FW_PARAM_PFVF(SERVER_START);
3065 params[2] = FW_PARAM_PFVF(SERVER_END);
3066 params[3] = FW_PARAM_PFVF(TDDP_START);
3067 params[4] = FW_PARAM_PFVF(TDDP_END);
3068 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3069 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3073 adap->tids.ntids = val[0];
3074 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
3075 adap->tids.stid_base = val[1];
3076 adap->tids.nstids = val[2] - val[1] + 1;
3077 adap->vres.ddp.start = val[3];
3078 adap->vres.ddp.size = val[4] - val[3] + 1;
3079 adap->params.ofldq_wr_cred = val[5];
3080 adap->params.offload = 1;
3083 params[0] = FW_PARAM_PFVF(STAG_START);
3084 params[1] = FW_PARAM_PFVF(STAG_END);
3085 params[2] = FW_PARAM_PFVF(RQ_START);
3086 params[3] = FW_PARAM_PFVF(RQ_END);
3087 params[4] = FW_PARAM_PFVF(PBL_START);
3088 params[5] = FW_PARAM_PFVF(PBL_END);
3089 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3093 adap->vres.stag.start = val[0];
3094 adap->vres.stag.size = val[1] - val[0] + 1;
3095 adap->vres.rq.start = val[2];
3096 adap->vres.rq.size = val[3] - val[2] + 1;
3097 adap->vres.pbl.start = val[4];
3098 adap->vres.pbl.size = val[5] - val[4] + 1;
3100 params[0] = FW_PARAM_PFVF(SQRQ_START);
3101 params[1] = FW_PARAM_PFVF(SQRQ_END);
3102 params[2] = FW_PARAM_PFVF(CQ_START);
3103 params[3] = FW_PARAM_PFVF(CQ_END);
3104 params[4] = FW_PARAM_PFVF(OCQ_START);
3105 params[5] = FW_PARAM_PFVF(OCQ_END);
3106 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3110 adap->vres.qp.start = val[0];
3111 adap->vres.qp.size = val[1] - val[0] + 1;
3112 adap->vres.cq.start = val[2];
3113 adap->vres.cq.size = val[3] - val[2] + 1;
3114 adap->vres.ocq.start = val[4];
3115 adap->vres.ocq.size = val[5] - val[4] + 1;
3118 params[0] = FW_PARAM_PFVF(ISCSI_START);
3119 params[1] = FW_PARAM_PFVF(ISCSI_END);
3120 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params,
3124 adap->vres.iscsi.start = val[0];
3125 adap->vres.iscsi.size = val[1] - val[0] + 1;
3127 #undef FW_PARAM_PFVF
3130 adap->params.nports = hweight32(port_vec);
3131 adap->params.portvec = port_vec;
3132 adap->flags |= FW_OK;
3134 /* These are finalized by FW initialization, load their values now */
3135 v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
3136 adap->params.tp.tre = TIMERRESOLUTION_GET(v);
3137 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
3138 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3139 adap->params.b_wnd);
3141 #ifdef CONFIG_PCI_IOV
3143 * Provision resource limits for Virtual Functions. We currently
3144 * grant them all the same static resource limits except for the Port
3145 * Access Rights Mask which we're assigning based on the PF. All of
3146 * the static provisioning stuff for both the PF and VF really needs
3147 * to be managed in a persistent manner for each device which the
3148 * firmware controls.
3153 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
3154 if (num_vf[pf] <= 0)
3157 /* VF numbering starts at 1! */
3158 for (vf = 1; vf <= num_vf[pf]; vf++) {
3159 ret = t4_cfg_pfvf(adap, adap->fn, pf, vf,
3160 VFRES_NEQ, VFRES_NETHCTRL,
3161 VFRES_NIQFLINT, VFRES_NIQ,
3162 VFRES_TC, VFRES_NVI,
3163 FW_PFVF_CMD_CMASK_MASK,
3164 pfvfres_pmask(adap, pf, vf),
3166 VFRES_R_CAPS, VFRES_WX_CAPS);
3168 dev_warn(adap->pdev_dev, "failed to "
3169 "provision pf/vf=%d/%d; "
3170 "err=%d\n", pf, vf, ret);
3180 * If a command timed out or failed with EIO FW does not operate within
3181 * its spec or something catastrophic happened to HW/FW, stop issuing
3184 bye: if (ret != -ETIMEDOUT && ret != -EIO)
3185 t4_fw_bye(adap, adap->fn);
3191 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
3192 pci_channel_state_t state)
3195 struct adapter *adap = pci_get_drvdata(pdev);
3201 adap->flags &= ~FW_OK;
3202 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
3203 for_each_port(adap, i) {
3204 struct net_device *dev = adap->port[i];
3206 netif_device_detach(dev);
3207 netif_carrier_off(dev);
3209 if (adap->flags & FULL_INIT_DONE)
3212 pci_disable_device(pdev);
3213 out: return state == pci_channel_io_perm_failure ?
3214 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3217 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
3220 struct fw_caps_config_cmd c;
3221 struct adapter *adap = pci_get_drvdata(pdev);
3224 pci_restore_state(pdev);
3225 pci_save_state(pdev);
3226 return PCI_ERS_RESULT_RECOVERED;
3229 if (pci_enable_device(pdev)) {
3230 dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
3231 return PCI_ERS_RESULT_DISCONNECT;
3234 pci_set_master(pdev);
3235 pci_restore_state(pdev);
3236 pci_save_state(pdev);
3237 pci_cleanup_aer_uncorrect_error_status(pdev);
3239 if (t4_wait_dev_ready(adap) < 0)
3240 return PCI_ERS_RESULT_DISCONNECT;
3241 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL))
3242 return PCI_ERS_RESULT_DISCONNECT;
3243 adap->flags |= FW_OK;
3244 if (adap_init1(adap, &c))
3245 return PCI_ERS_RESULT_DISCONNECT;
3247 for_each_port(adap, i) {
3248 struct port_info *p = adap2pinfo(adap, i);
3250 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
3253 return PCI_ERS_RESULT_DISCONNECT;
3255 p->xact_addr_filt = -1;
3258 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3259 adap->params.b_wnd);
3262 return PCI_ERS_RESULT_DISCONNECT;
3263 return PCI_ERS_RESULT_RECOVERED;
3266 static void eeh_resume(struct pci_dev *pdev)
3269 struct adapter *adap = pci_get_drvdata(pdev);
3275 for_each_port(adap, i) {
3276 struct net_device *dev = adap->port[i];
3278 if (netif_running(dev)) {
3280 cxgb_set_rxmode(dev);
3282 netif_device_attach(dev);
3287 static struct pci_error_handlers cxgb4_eeh = {
3288 .error_detected = eeh_err_detected,
3289 .slot_reset = eeh_slot_reset,
3290 .resume = eeh_resume,
3293 static inline bool is_10g_port(const struct link_config *lc)
3295 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
3298 static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
3299 unsigned int size, unsigned int iqe_size)
3301 q->intr_params = QINTR_TIMER_IDX(timer_idx) |
3302 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
3303 q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
3304 q->iqe_len = iqe_size;
3309 * Perform default configuration of DMA queues depending on the number and type
3310 * of ports we found and the number of available CPUs. Most settings can be
3311 * modified by the admin prior to actual use.
3313 static void __devinit cfg_queues(struct adapter *adap)
3315 struct sge *s = &adap->sge;
3316 int i, q10g = 0, n10g = 0, qidx = 0;
3318 for_each_port(adap, i)
3319 n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);
3322 * We default to 1 queue per non-10G port and up to # of cores queues
3326 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
3327 if (q10g > num_online_cpus())
3328 q10g = num_online_cpus();
3330 for_each_port(adap, i) {
3331 struct port_info *pi = adap2pinfo(adap, i);
3333 pi->first_qset = qidx;
3334 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
3339 s->max_ethqsets = qidx; /* MSI-X may lower it later */
3341 if (is_offload(adap)) {
3343 * For offload we use 1 queue/channel if all ports are up to 1G,
3344 * otherwise we divide all available queues amongst the channels
3345 * capped by the number of available cores.
3348 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
3350 s->ofldqsets = roundup(i, adap->params.nports);
3352 s->ofldqsets = adap->params.nports;
3353 /* For RDMA one Rx queue per channel suffices */
3354 s->rdmaqs = adap->params.nports;
3357 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
3358 struct sge_eth_rxq *r = &s->ethrxq[i];
3360 init_rspq(&r->rspq, 0, 0, 1024, 64);
3364 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
3365 s->ethtxq[i].q.size = 1024;
3367 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
3368 s->ctrlq[i].q.size = 512;
3370 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
3371 s->ofldtxq[i].q.size = 1024;
3373 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
3374 struct sge_ofld_rxq *r = &s->ofldrxq[i];
3376 init_rspq(&r->rspq, 0, 0, 1024, 64);
3377 r->rspq.uld = CXGB4_ULD_ISCSI;
3381 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
3382 struct sge_ofld_rxq *r = &s->rdmarxq[i];
3384 init_rspq(&r->rspq, 0, 0, 511, 64);
3385 r->rspq.uld = CXGB4_ULD_RDMA;
3389 init_rspq(&s->fw_evtq, 6, 0, 512, 64);
3390 init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
3394 * Reduce the number of Ethernet queues across all ports to at most n.
3395 * n provides at least one queue per port.
3397 static void __devinit reduce_ethqs(struct adapter *adap, int n)
3400 struct port_info *pi;
3402 while (n < adap->sge.ethqsets)
3403 for_each_port(adap, i) {
3404 pi = adap2pinfo(adap, i);
3405 if (pi->nqsets > 1) {
3407 adap->sge.ethqsets--;
3408 if (adap->sge.ethqsets <= n)
3414 for_each_port(adap, i) {
3415 pi = adap2pinfo(adap, i);
3421 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
3422 #define EXTRA_VECS 2
3424 static int __devinit enable_msix(struct adapter *adap)
3427 int i, err, want, need;
3428 struct sge *s = &adap->sge;
3429 unsigned int nchan = adap->params.nports;
3430 struct msix_entry entries[MAX_INGQ + 1];
3432 for (i = 0; i < ARRAY_SIZE(entries); ++i)
3433 entries[i].entry = i;
3435 want = s->max_ethqsets + EXTRA_VECS;
3436 if (is_offload(adap)) {
3437 want += s->rdmaqs + s->ofldqsets;
3438 /* need nchan for each possible ULD */
3439 ofld_need = 2 * nchan;
3441 need = adap->params.nports + EXTRA_VECS + ofld_need;
3443 while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
3448 * Distribute available vectors to the various queue groups.
3449 * Every group gets its minimum requirement and NIC gets top
3450 * priority for leftovers.
3452 i = want - EXTRA_VECS - ofld_need;
3453 if (i < s->max_ethqsets) {
3454 s->max_ethqsets = i;
3455 if (i < s->ethqsets)
3456 reduce_ethqs(adap, i);
3458 if (is_offload(adap)) {
3459 i = want - EXTRA_VECS - s->max_ethqsets;
3460 i -= ofld_need - nchan;
3461 s->ofldqsets = (i / nchan) * nchan; /* round down */
3463 for (i = 0; i < want; ++i)
3464 adap->msix_info[i].vec = entries[i].vector;
3466 dev_info(adap->pdev_dev,
3467 "only %d MSI-X vectors left, not using MSI-X\n", err);
3473 static int __devinit init_rss(struct adapter *adap)
3477 for_each_port(adap, i) {
3478 struct port_info *pi = adap2pinfo(adap, i);
3480 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
3483 for (j = 0; j < pi->rss_size; j++)
3484 pi->rss[j] = j % pi->nqsets;
3489 static void __devinit print_port_info(const struct net_device *dev)
3491 static const char *base[] = {
3492 "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
3493 "KX", "KR", "R SFP+", "KR/KX", "KR/KX/KX4"
3498 const char *spd = "";
3499 const struct port_info *pi = netdev_priv(dev);
3500 const struct adapter *adap = pi->adapter;
3502 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
3504 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
3507 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
3508 bufp += sprintf(bufp, "100/");
3509 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
3510 bufp += sprintf(bufp, "1000/");
3511 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
3512 bufp += sprintf(bufp, "10G/");
3515 sprintf(bufp, "BASE-%s", base[pi->port_type]);
3517 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
3518 adap->params.vpd.id, adap->params.rev, buf,
3519 is_offload(adap) ? "R" : "", adap->params.pci.width, spd,
3520 (adap->flags & USING_MSIX) ? " MSI-X" :
3521 (adap->flags & USING_MSI) ? " MSI" : "");
3522 netdev_info(dev, "S/N: %s, E/C: %s\n",
3523 adap->params.vpd.sn, adap->params.vpd.ec);
3526 static void __devinit enable_pcie_relaxed_ordering(struct pci_dev *dev)
3531 pos = pci_pcie_cap(dev);
3533 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &v);
3534 v |= PCI_EXP_DEVCTL_RELAX_EN;
3535 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, v);
3540 * Free the following resources:
3541 * - memory used for tables
3544 * - resources FW is holding for us
3546 static void free_some_resources(struct adapter *adapter)
3550 t4_free_mem(adapter->l2t);
3551 t4_free_mem(adapter->tids.tid_tab);
3552 disable_msi(adapter);
3554 for_each_port(adapter, i)
3555 if (adapter->port[i]) {
3556 kfree(adap2pinfo(adapter, i)->rss);
3557 free_netdev(adapter->port[i]);
3559 if (adapter->flags & FW_OK)
3560 t4_fw_bye(adapter, adapter->fn);
3563 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
3564 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
3566 static int __devinit init_one(struct pci_dev *pdev,
3567 const struct pci_device_id *ent)
3570 struct port_info *pi;
3571 unsigned int highdma = 0;
3572 struct adapter *adapter = NULL;
3574 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
3576 err = pci_request_regions(pdev, KBUILD_MODNAME);
3578 /* Just info, some other driver may have claimed the device. */
3579 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
3583 /* We control everything through one PF */
3584 func = PCI_FUNC(pdev->devfn);
3585 if (func != ent->driver_data) {
3586 pci_save_state(pdev); /* to restore SR-IOV later */
3590 err = pci_enable_device(pdev);
3592 dev_err(&pdev->dev, "cannot enable PCI device\n");
3593 goto out_release_regions;
3596 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3597 highdma = NETIF_F_HIGHDMA;
3598 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3600 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
3601 "coherent allocations\n");
3602 goto out_disable_device;
3605 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3607 dev_err(&pdev->dev, "no usable DMA configuration\n");
3608 goto out_disable_device;
3612 pci_enable_pcie_error_reporting(pdev);
3613 enable_pcie_relaxed_ordering(pdev);
3614 pci_set_master(pdev);
3615 pci_save_state(pdev);
3617 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
3620 goto out_disable_device;
3623 adapter->regs = pci_ioremap_bar(pdev, 0);
3624 if (!adapter->regs) {
3625 dev_err(&pdev->dev, "cannot map device registers\n");
3627 goto out_free_adapter;
3630 adapter->pdev = pdev;
3631 adapter->pdev_dev = &pdev->dev;
3633 adapter->msg_enable = dflt_msg_enable;
3634 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
3636 spin_lock_init(&adapter->stats_lock);
3637 spin_lock_init(&adapter->tid_release_lock);
3639 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
3641 err = t4_prep_adapter(adapter);
3644 err = adap_init0(adapter);
3648 for_each_port(adapter, i) {
3649 struct net_device *netdev;
3651 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3658 SET_NETDEV_DEV(netdev, &pdev->dev);
3660 adapter->port[i] = netdev;
3661 pi = netdev_priv(netdev);
3662 pi->adapter = adapter;
3663 pi->xact_addr_filt = -1;
3664 pi->rx_offload = RX_CSO;
3666 netdev->irq = pdev->irq;
3668 netdev->features |= NETIF_F_SG | TSO_FLAGS;
3669 netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3670 netdev->features |= NETIF_F_GRO | NETIF_F_RXHASH | highdma;
3671 netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
3672 netdev->vlan_features = netdev->features & VLAN_FEAT;
3674 netdev->netdev_ops = &cxgb4_netdev_ops;
3675 SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
3678 pci_set_drvdata(pdev, adapter);
3680 if (adapter->flags & FW_OK) {
3681 err = t4_port_init(adapter, func, func, 0);
3687 * Configure queues and allocate tables now, they can be needed as
3688 * soon as the first register_netdev completes.
3690 cfg_queues(adapter);
3692 adapter->l2t = t4_init_l2t();
3693 if (!adapter->l2t) {
3694 /* We tolerate a lack of L2T, giving up some functionality */
3695 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
3696 adapter->params.offload = 0;
3699 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
3700 dev_warn(&pdev->dev, "could not allocate TID table, "
3702 adapter->params.offload = 0;
3705 /* See what interrupts we'll be using */
3706 if (msi > 1 && enable_msix(adapter) == 0)
3707 adapter->flags |= USING_MSIX;
3708 else if (msi > 0 && pci_enable_msi(pdev) == 0)
3709 adapter->flags |= USING_MSI;
3711 err = init_rss(adapter);
3716 * The card is now ready to go. If any errors occur during device
3717 * registration we do not fail the whole card but rather proceed only
3718 * with the ports we manage to register successfully. However we must
3719 * register at least one net device.
3721 for_each_port(adapter, i) {
3722 pi = adap2pinfo(adapter, i);
3723 netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
3724 netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
3726 err = register_netdev(adapter->port[i]);
3729 adapter->chan_map[pi->tx_chan] = i;
3730 print_port_info(adapter->port[i]);
3733 dev_err(&pdev->dev, "could not register any net devices\n");
3737 dev_warn(&pdev->dev, "only %d net devices registered\n", i);
3741 if (cxgb4_debugfs_root) {
3742 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
3743 cxgb4_debugfs_root);
3744 setup_debugfs(adapter);
3747 if (is_offload(adapter))
3748 attach_ulds(adapter);
3751 #ifdef CONFIG_PCI_IOV
3752 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
3753 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
3754 dev_info(&pdev->dev,
3755 "instantiated %u virtual functions\n",
3761 free_some_resources(adapter);
3763 iounmap(adapter->regs);
3767 pci_disable_pcie_error_reporting(pdev);
3768 pci_disable_device(pdev);
3769 out_release_regions:
3770 pci_release_regions(pdev);
3771 pci_set_drvdata(pdev, NULL);
3775 static void __devexit remove_one(struct pci_dev *pdev)
3777 struct adapter *adapter = pci_get_drvdata(pdev);
3779 pci_disable_sriov(pdev);
3784 if (is_offload(adapter))
3785 detach_ulds(adapter);
3787 for_each_port(adapter, i)
3788 if (adapter->port[i]->reg_state == NETREG_REGISTERED)
3789 unregister_netdev(adapter->port[i]);
3791 if (adapter->debugfs_root)
3792 debugfs_remove_recursive(adapter->debugfs_root);
3794 if (adapter->flags & FULL_INIT_DONE)
3797 free_some_resources(adapter);
3798 iounmap(adapter->regs);
3800 pci_disable_pcie_error_reporting(pdev);
3801 pci_disable_device(pdev);
3802 pci_release_regions(pdev);
3803 pci_set_drvdata(pdev, NULL);
3805 pci_release_regions(pdev);
3808 static struct pci_driver cxgb4_driver = {
3809 .name = KBUILD_MODNAME,
3810 .id_table = cxgb4_pci_tbl,
3812 .remove = __devexit_p(remove_one),
3813 .err_handler = &cxgb4_eeh,
3816 static int __init cxgb4_init_module(void)
3820 /* Debugfs support is optional, just warn if this fails */
3821 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3822 if (!cxgb4_debugfs_root)
3823 pr_warning("could not create debugfs entry, continuing\n");
3825 ret = pci_register_driver(&cxgb4_driver);
3827 debugfs_remove(cxgb4_debugfs_root);
3831 static void __exit cxgb4_cleanup_module(void)
3833 pci_unregister_driver(&cxgb4_driver);
3834 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
3837 module_init(cxgb4_init_module);
3838 module_exit(cxgb4_cleanup_module);