2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
44 #include <linux/if_vlan.h>
45 #include <linux/init.h>
46 #include <linux/log2.h>
47 #include <linux/mdio.h>
48 #include <linux/module.h>
49 #include <linux/moduleparam.h>
50 #include <linux/mutex.h>
51 #include <linux/netdevice.h>
52 #include <linux/pci.h>
53 #include <linux/aer.h>
54 #include <linux/rtnetlink.h>
55 #include <linux/sched.h>
56 #include <linux/seq_file.h>
57 #include <linux/sockios.h>
58 #include <linux/vmalloc.h>
59 #include <linux/workqueue.h>
60 #include <net/neighbour.h>
61 #include <net/netevent.h>
62 #include <asm/uaccess.h>
70 #define DRV_VERSION "1.3.0-ko"
71 #define DRV_DESC "Chelsio T4 Network Driver"
74 * Max interrupt hold-off timer value in us. Queues fall back to this value
75 * under extreme memory pressure so it's largish to give the system time to
78 #define MAX_SGE_TIMERVAL 200U
82 * Virtual Function provisioning constants. We need two extra Ingress Queues
83 * with Interrupt capability to serve as the VF's Firmware Event Queue and
84 * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free
85 * Lists associated with them). For each Ethernet/Control Egress Queue and
86 * for each Free List, we need an Egress Context.
89 VFRES_NPORTS = 1, /* # of "ports" per VF */
90 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
92 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
93 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
94 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
95 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
96 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
97 VFRES_TC = 0, /* PCI-E traffic class */
98 VFRES_NEXACTF = 16, /* # of exact MPS filters */
100 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
101 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
105 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
106 * static and likely not to be useful in the long run. We really need to
107 * implement some form of persistent configuration which the firmware
110 static unsigned int pfvfres_pmask(struct adapter *adapter,
111 unsigned int pf, unsigned int vf)
113 unsigned int portn, portvec;
116 * Give PF's access to all of the ports.
119 return FW_PFVF_CMD_PMASK_MASK;
122 * For VFs, we'll assign them access to the ports based purely on the
123 * PF. We assign active ports in order, wrapping around if there are
124 * fewer active ports than PFs: e.g. active port[pf % nports].
125 * Unfortunately the adapter's port_info structs haven't been
126 * initialized yet so we have to compute this.
128 if (adapter->params.nports == 0)
131 portn = pf % adapter->params.nports;
132 portvec = adapter->params.portvec;
135 * Isolate the lowest set bit in the port vector. If we're at
136 * the port number that we want, return that as the pmask.
137 * otherwise mask that bit out of the port vector and
138 * decrement our port number ...
140 unsigned int pmask = portvec ^ (portvec & (portvec-1));
151 MEMWIN0_APERTURE = 65536,
152 MEMWIN0_BASE = 0x30000,
153 MEMWIN1_APERTURE = 32768,
154 MEMWIN1_BASE = 0x28000,
155 MEMWIN2_APERTURE = 2048,
156 MEMWIN2_BASE = 0x1b800,
160 MAX_TXQ_ENTRIES = 16384,
161 MAX_CTRL_TXQ_ENTRIES = 1024,
162 MAX_RSPQ_ENTRIES = 16384,
163 MAX_RX_BUFFERS = 16384,
164 MIN_TXQ_ENTRIES = 32,
165 MIN_CTRL_TXQ_ENTRIES = 32,
166 MIN_RSPQ_ENTRIES = 128,
170 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
171 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
172 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
174 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
176 static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
177 CH_DEVICE(0xa000, 0), /* PE10K */
178 CH_DEVICE(0x4001, -1),
179 CH_DEVICE(0x4002, -1),
180 CH_DEVICE(0x4003, -1),
181 CH_DEVICE(0x4004, -1),
182 CH_DEVICE(0x4005, -1),
183 CH_DEVICE(0x4006, -1),
184 CH_DEVICE(0x4007, -1),
185 CH_DEVICE(0x4008, -1),
186 CH_DEVICE(0x4009, -1),
187 CH_DEVICE(0x400a, -1),
188 CH_DEVICE(0x4401, 4),
189 CH_DEVICE(0x4402, 4),
190 CH_DEVICE(0x4403, 4),
191 CH_DEVICE(0x4404, 4),
192 CH_DEVICE(0x4405, 4),
193 CH_DEVICE(0x4406, 4),
194 CH_DEVICE(0x4407, 4),
195 CH_DEVICE(0x4408, 4),
196 CH_DEVICE(0x4409, 4),
197 CH_DEVICE(0x440a, 4),
201 #define FW_FNAME "cxgb4/t4fw.bin"
203 MODULE_DESCRIPTION(DRV_DESC);
204 MODULE_AUTHOR("Chelsio Communications");
205 MODULE_LICENSE("Dual BSD/GPL");
206 MODULE_VERSION(DRV_VERSION);
207 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
208 MODULE_FIRMWARE(FW_FNAME);
210 static int dflt_msg_enable = DFLT_MSG_ENABLE;
212 module_param(dflt_msg_enable, int, 0644);
213 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
216 * The driver uses the best interrupt scheme available on a platform in the
217 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
218 * of these schemes the driver may consider as follows:
220 * msi = 2: choose from among all three options
221 * msi = 1: only consider MSI and INTx interrupts
222 * msi = 0: force INTx interrupts
226 module_param(msi, int, 0644);
227 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
230 * Queue interrupt hold-off timer values. Queues default to the first of these
233 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
235 module_param_array(intr_holdoff, uint, NULL, 0644);
236 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
237 "0..4 in microseconds");
239 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
241 module_param_array(intr_cnt, uint, NULL, 0644);
242 MODULE_PARM_DESC(intr_cnt,
243 "thresholds 1..3 for queue interrupt packet counters");
247 #ifdef CONFIG_PCI_IOV
248 module_param(vf_acls, bool, 0644);
249 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
251 static unsigned int num_vf[4];
253 module_param_array(num_vf, uint, NULL, 0644);
254 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
257 static struct dentry *cxgb4_debugfs_root;
259 static LIST_HEAD(adapter_list);
260 static DEFINE_MUTEX(uld_mutex);
261 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
262 static const char *uld_str[] = { "RDMA", "iSCSI" };
264 static void link_report(struct net_device *dev)
266 if (!netif_carrier_ok(dev))
267 netdev_info(dev, "link down\n");
269 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
271 const char *s = "10Mbps";
272 const struct port_info *p = netdev_priv(dev);
274 switch (p->link_cfg.speed) {
286 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
291 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
293 struct net_device *dev = adapter->port[port_id];
295 /* Skip changes from disabled ports. */
296 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
298 netif_carrier_on(dev);
300 netif_carrier_off(dev);
306 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
308 static const char *mod_str[] = {
309 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
312 const struct net_device *dev = adap->port[port_id];
313 const struct port_info *pi = netdev_priv(dev);
315 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
316 netdev_info(dev, "port module unplugged\n");
317 else if (pi->mod_type < ARRAY_SIZE(mod_str))
318 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
322 * Configure the exact and hash address filters to handle a port's multicast
323 * and secondary unicast MAC addresses.
325 static int set_addr_filters(const struct net_device *dev, bool sleep)
333 const struct netdev_hw_addr *ha;
334 int uc_cnt = netdev_uc_count(dev);
335 int mc_cnt = netdev_mc_count(dev);
336 const struct port_info *pi = netdev_priv(dev);
337 unsigned int mb = pi->adapter->fn;
339 /* first do the secondary unicast addresses */
340 netdev_for_each_uc_addr(ha, dev) {
341 addr[naddr++] = ha->addr;
342 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
343 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
344 naddr, addr, filt_idx, &uhash, sleep);
353 /* next set up the multicast addresses */
354 netdev_for_each_mc_addr(ha, dev) {
355 addr[naddr++] = ha->addr;
356 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
357 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
358 naddr, addr, filt_idx, &mhash, sleep);
367 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
368 uhash | mhash, sleep);
372 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
373 * If @mtu is -1 it is left unchanged.
375 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
378 struct port_info *pi = netdev_priv(dev);
380 ret = set_addr_filters(dev, sleep_ok);
382 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
383 (dev->flags & IFF_PROMISC) ? 1 : 0,
384 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
390 * link_start - enable a port
391 * @dev: the port to enable
393 * Performs the MAC and PHY actions needed to enable a port.
395 static int link_start(struct net_device *dev)
398 struct port_info *pi = netdev_priv(dev);
399 unsigned int mb = pi->adapter->fn;
402 * We do not set address filters and promiscuity here, the stack does
403 * that step explicitly.
405 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
406 pi->vlan_grp != NULL, true);
408 ret = t4_change_mac(pi->adapter, mb, pi->viid,
409 pi->xact_addr_filt, dev->dev_addr, true,
412 pi->xact_addr_filt = ret;
417 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
420 ret = t4_enable_vi(pi->adapter, mb, pi->viid, true, true);
425 * Response queue handler for the FW event queue.
427 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
428 const struct pkt_gl *gl)
430 u8 opcode = ((const struct rss_header *)rsp)->opcode;
432 rsp++; /* skip RSS header */
433 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
434 const struct cpl_sge_egr_update *p = (void *)rsp;
435 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
438 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
440 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
441 struct sge_eth_txq *eq;
443 eq = container_of(txq, struct sge_eth_txq, q);
444 netif_tx_wake_queue(eq->txq);
446 struct sge_ofld_txq *oq;
448 oq = container_of(txq, struct sge_ofld_txq, q);
449 tasklet_schedule(&oq->qresume_tsk);
451 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
452 const struct cpl_fw6_msg *p = (void *)rsp;
455 t4_handle_fw_rpl(q->adap, p->data);
456 } else if (opcode == CPL_L2T_WRITE_RPL) {
457 const struct cpl_l2t_write_rpl *p = (void *)rsp;
459 do_l2t_write_rpl(q->adap, p);
461 dev_err(q->adap->pdev_dev,
462 "unexpected CPL %#x on FW event queue\n", opcode);
467 * uldrx_handler - response queue handler for ULD queues
468 * @q: the response queue that received the packet
469 * @rsp: the response queue descriptor holding the offload message
470 * @gl: the gather list of packet fragments
472 * Deliver an ingress offload packet to a ULD. All processing is done by
473 * the ULD, we just maintain statistics.
475 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
476 const struct pkt_gl *gl)
478 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
480 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
486 else if (gl == CXGB4_MSG_AN)
493 static void disable_msi(struct adapter *adapter)
495 if (adapter->flags & USING_MSIX) {
496 pci_disable_msix(adapter->pdev);
497 adapter->flags &= ~USING_MSIX;
498 } else if (adapter->flags & USING_MSI) {
499 pci_disable_msi(adapter->pdev);
500 adapter->flags &= ~USING_MSI;
505 * Interrupt handler for non-data events used with MSI-X.
507 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
509 struct adapter *adap = cookie;
511 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
514 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
516 t4_slow_intr_handler(adap);
521 * Name the MSI-X interrupts.
523 static void name_msix_vecs(struct adapter *adap)
525 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc) - 1;
527 /* non-data interrupts */
528 snprintf(adap->msix_info[0].desc, n, "%s", adap->name);
529 adap->msix_info[0].desc[n] = 0;
532 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq", adap->name);
533 adap->msix_info[1].desc[n] = 0;
535 /* Ethernet queues */
536 for_each_port(adap, j) {
537 struct net_device *d = adap->port[j];
538 const struct port_info *pi = netdev_priv(d);
540 for (i = 0; i < pi->nqsets; i++, msi_idx++) {
541 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
543 adap->msix_info[msi_idx].desc[n] = 0;
548 for_each_ofldrxq(&adap->sge, i) {
549 snprintf(adap->msix_info[msi_idx].desc, n, "%s-ofld%d",
551 adap->msix_info[msi_idx++].desc[n] = 0;
553 for_each_rdmarxq(&adap->sge, i) {
554 snprintf(adap->msix_info[msi_idx].desc, n, "%s-rdma%d",
556 adap->msix_info[msi_idx++].desc[n] = 0;
560 static int request_msix_queue_irqs(struct adapter *adap)
562 struct sge *s = &adap->sge;
563 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2;
565 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
566 adap->msix_info[1].desc, &s->fw_evtq);
570 for_each_ethrxq(s, ethqidx) {
571 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
572 adap->msix_info[msi].desc,
573 &s->ethrxq[ethqidx].rspq);
578 for_each_ofldrxq(s, ofldqidx) {
579 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
580 adap->msix_info[msi].desc,
581 &s->ofldrxq[ofldqidx].rspq);
586 for_each_rdmarxq(s, rdmaqidx) {
587 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
588 adap->msix_info[msi].desc,
589 &s->rdmarxq[rdmaqidx].rspq);
597 while (--rdmaqidx >= 0)
598 free_irq(adap->msix_info[--msi].vec,
599 &s->rdmarxq[rdmaqidx].rspq);
600 while (--ofldqidx >= 0)
601 free_irq(adap->msix_info[--msi].vec,
602 &s->ofldrxq[ofldqidx].rspq);
603 while (--ethqidx >= 0)
604 free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq);
605 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
609 static void free_msix_queue_irqs(struct adapter *adap)
612 struct sge *s = &adap->sge;
614 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
615 for_each_ethrxq(s, i)
616 free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq);
617 for_each_ofldrxq(s, i)
618 free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq);
619 for_each_rdmarxq(s, i)
620 free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq);
624 * write_rss - write the RSS table for a given port
626 * @queues: array of queue indices for RSS
628 * Sets up the portion of the HW RSS table for the port's VI to distribute
629 * packets to the Rx queues in @queues.
631 static int write_rss(const struct port_info *pi, const u16 *queues)
635 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
637 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
641 /* map the queue indices to queue ids */
642 for (i = 0; i < pi->rss_size; i++, queues++)
643 rss[i] = q[*queues].rspq.abs_id;
645 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
646 pi->rss_size, rss, pi->rss_size);
652 * setup_rss - configure RSS
655 * Sets up RSS for each port.
657 static int setup_rss(struct adapter *adap)
661 for_each_port(adap, i) {
662 const struct port_info *pi = adap2pinfo(adap, i);
664 err = write_rss(pi, pi->rss);
672 * Return the channel of the ingress queue with the given qid.
674 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
676 qid -= p->ingr_start;
677 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
681 * Wait until all NAPI handlers are descheduled.
683 static void quiesce_rx(struct adapter *adap)
687 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
688 struct sge_rspq *q = adap->sge.ingr_map[i];
691 napi_disable(&q->napi);
696 * Enable NAPI scheduling and interrupt generation for all Rx queues.
698 static void enable_rx(struct adapter *adap)
702 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
703 struct sge_rspq *q = adap->sge.ingr_map[i];
708 napi_enable(&q->napi);
709 /* 0-increment GTS to start the timer and enable interrupts */
710 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
711 SEINTARM(q->intr_params) |
712 INGRESSQID(q->cntxt_id));
717 * setup_sge_queues - configure SGE Tx/Rx/response queues
720 * Determines how many sets of SGE queues to use and initializes them.
721 * We support multiple queue sets per port if we have MSI-X, otherwise
722 * just one queue set per port.
724 static int setup_sge_queues(struct adapter *adap)
726 int err, msi_idx, i, j;
727 struct sge *s = &adap->sge;
729 bitmap_zero(s->starving_fl, MAX_EGRQ);
730 bitmap_zero(s->txq_maperr, MAX_EGRQ);
732 if (adap->flags & USING_MSIX)
733 msi_idx = 1; /* vector 0 is for non-queue interrupts */
735 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
739 msi_idx = -((int)s->intrq.abs_id + 1);
742 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
743 msi_idx, NULL, fwevtq_handler);
745 freeout: t4_free_sge_resources(adap);
749 for_each_port(adap, i) {
750 struct net_device *dev = adap->port[i];
751 struct port_info *pi = netdev_priv(dev);
752 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
753 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
755 for (j = 0; j < pi->nqsets; j++, q++) {
758 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
764 memset(&q->stats, 0, sizeof(q->stats));
766 for (j = 0; j < pi->nqsets; j++, t++) {
767 err = t4_sge_alloc_eth_txq(adap, t, dev,
768 netdev_get_tx_queue(dev, j),
769 s->fw_evtq.cntxt_id);
775 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
776 for_each_ofldrxq(s, i) {
777 struct sge_ofld_rxq *q = &s->ofldrxq[i];
778 struct net_device *dev = adap->port[i / j];
782 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
783 &q->fl, uldrx_handler);
786 memset(&q->stats, 0, sizeof(q->stats));
787 s->ofld_rxq[i] = q->rspq.abs_id;
788 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
789 s->fw_evtq.cntxt_id);
794 for_each_rdmarxq(s, i) {
795 struct sge_ofld_rxq *q = &s->rdmarxq[i];
799 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
800 msi_idx, &q->fl, uldrx_handler);
803 memset(&q->stats, 0, sizeof(q->stats));
804 s->rdma_rxq[i] = q->rspq.abs_id;
807 for_each_port(adap, i) {
809 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
810 * have RDMA queues, and that's the right value.
812 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
814 s->rdmarxq[i].rspq.cntxt_id);
819 t4_write_reg(adap, MPS_TRC_RSS_CONTROL,
820 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
821 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
826 * Returns 0 if new FW was successfully loaded, a positive errno if a load was
827 * started but failed, and a negative errno if flash load couldn't start.
829 static int upgrade_fw(struct adapter *adap)
833 const struct fw_hdr *hdr;
834 const struct firmware *fw;
835 struct device *dev = adap->pdev_dev;
837 ret = request_firmware(&fw, FW_FNAME, dev);
839 dev_err(dev, "unable to load firmware image " FW_FNAME
840 ", error %d\n", ret);
844 hdr = (const struct fw_hdr *)fw->data;
845 vers = ntohl(hdr->fw_ver);
846 if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) {
847 ret = -EINVAL; /* wrong major version, won't do */
852 * If the flash FW is unusable or we found something newer, load it.
854 if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
855 vers > adap->params.fw_vers) {
856 ret = -t4_load_fw(adap, fw->data, fw->size);
858 dev_info(dev, "firmware upgraded to version %pI4 from "
859 FW_FNAME "\n", &hdr->fw_ver);
861 out: release_firmware(fw);
866 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
867 * The allocated memory is cleared.
869 void *t4_alloc_mem(size_t size)
871 void *p = kmalloc(size, GFP_KERNEL);
881 * Free memory allocated through alloc_mem().
883 void t4_free_mem(void *addr)
885 if (is_vmalloc_addr(addr))
891 static inline int is_offload(const struct adapter *adap)
893 return adap->params.offload;
897 * Implementation of ethtool operations.
900 static u32 get_msglevel(struct net_device *dev)
902 return netdev2adap(dev)->msg_enable;
905 static void set_msglevel(struct net_device *dev, u32 val)
907 netdev2adap(dev)->msg_enable = val;
910 static char stats_strings[][ETH_GSTRING_LEN] = {
913 "TxBroadcastFrames ",
914 "TxMulticastFrames ",
922 "TxFrames512To1023 ",
923 "TxFrames1024To1518 ",
924 "TxFrames1519ToMax ",
939 "RxBroadcastFrames ",
940 "RxMulticastFrames ",
954 "RxFrames512To1023 ",
955 "RxFrames1024To1518 ",
956 "RxFrames1519ToMax ",
968 "RxBG0FramesDropped ",
969 "RxBG1FramesDropped ",
970 "RxBG2FramesDropped ",
971 "RxBG3FramesDropped ",
986 static int get_sset_count(struct net_device *dev, int sset)
990 return ARRAY_SIZE(stats_strings);
996 #define T4_REGMAP_SIZE (160 * 1024)
998 static int get_regs_len(struct net_device *dev)
1000 return T4_REGMAP_SIZE;
1003 static int get_eeprom_len(struct net_device *dev)
1008 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1010 struct adapter *adapter = netdev2adap(dev);
1012 strcpy(info->driver, KBUILD_MODNAME);
1013 strcpy(info->version, DRV_VERSION);
1014 strcpy(info->bus_info, pci_name(adapter->pdev));
1016 if (!adapter->params.fw_vers)
1017 strcpy(info->fw_version, "N/A");
1019 snprintf(info->fw_version, sizeof(info->fw_version),
1020 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1021 FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers),
1022 FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers),
1023 FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers),
1024 FW_HDR_FW_VER_BUILD_GET(adapter->params.fw_vers),
1025 FW_HDR_FW_VER_MAJOR_GET(adapter->params.tp_vers),
1026 FW_HDR_FW_VER_MINOR_GET(adapter->params.tp_vers),
1027 FW_HDR_FW_VER_MICRO_GET(adapter->params.tp_vers),
1028 FW_HDR_FW_VER_BUILD_GET(adapter->params.tp_vers));
1031 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1033 if (stringset == ETH_SS_STATS)
1034 memcpy(data, stats_strings, sizeof(stats_strings));
1038 * port stats maintained per queue of the port. They should be in the same
1039 * order as in stats_strings above.
1041 struct queue_port_stats {
1051 static void collect_sge_port_stats(const struct adapter *adap,
1052 const struct port_info *p, struct queue_port_stats *s)
1055 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1056 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1058 memset(s, 0, sizeof(*s));
1059 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1061 s->tx_csum += tx->tx_cso;
1062 s->rx_csum += rx->stats.rx_cso;
1063 s->vlan_ex += rx->stats.vlan_ex;
1064 s->vlan_ins += tx->vlan_ins;
1065 s->gro_pkts += rx->stats.lro_pkts;
1066 s->gro_merged += rx->stats.lro_merged;
1070 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1073 struct port_info *pi = netdev_priv(dev);
1074 struct adapter *adapter = pi->adapter;
1076 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1078 data += sizeof(struct port_stats) / sizeof(u64);
1079 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1083 * Return a version number to identify the type of adapter. The scheme is:
1084 * - bits 0..9: chip version
1085 * - bits 10..15: chip revision
1086 * - bits 16..23: register dump version
1088 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1090 return 4 | (ap->params.rev << 10) | (1 << 16);
1093 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1096 u32 *p = buf + start;
1098 for ( ; start <= end; start += sizeof(u32))
1099 *p++ = t4_read_reg(ap, start);
1102 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1105 static const unsigned int reg_ranges[] = {
1326 struct adapter *ap = netdev2adap(dev);
1328 regs->version = mk_adap_vers(ap);
1330 memset(buf, 0, T4_REGMAP_SIZE);
1331 for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2)
1332 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
1335 static int restart_autoneg(struct net_device *dev)
1337 struct port_info *p = netdev_priv(dev);
1339 if (!netif_running(dev))
1341 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
1343 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
1347 static int identify_port(struct net_device *dev, u32 data)
1349 struct adapter *adap = netdev2adap(dev);
1352 data = 2; /* default to 2 seconds */
1354 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid,
1358 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
1362 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1363 type == FW_PORT_TYPE_BT_XAUI) {
1365 if (caps & FW_PORT_CAP_SPEED_100M)
1366 v |= SUPPORTED_100baseT_Full;
1367 if (caps & FW_PORT_CAP_SPEED_1G)
1368 v |= SUPPORTED_1000baseT_Full;
1369 if (caps & FW_PORT_CAP_SPEED_10G)
1370 v |= SUPPORTED_10000baseT_Full;
1371 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1372 v |= SUPPORTED_Backplane;
1373 if (caps & FW_PORT_CAP_SPEED_1G)
1374 v |= SUPPORTED_1000baseKX_Full;
1375 if (caps & FW_PORT_CAP_SPEED_10G)
1376 v |= SUPPORTED_10000baseKX4_Full;
1377 } else if (type == FW_PORT_TYPE_KR)
1378 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1379 else if (type == FW_PORT_TYPE_BP_AP)
1380 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC;
1381 else if (type == FW_PORT_TYPE_FIBER_XFI ||
1382 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
1383 v |= SUPPORTED_FIBRE;
1385 if (caps & FW_PORT_CAP_ANEG)
1386 v |= SUPPORTED_Autoneg;
1390 static unsigned int to_fw_linkcaps(unsigned int caps)
1394 if (caps & ADVERTISED_100baseT_Full)
1395 v |= FW_PORT_CAP_SPEED_100M;
1396 if (caps & ADVERTISED_1000baseT_Full)
1397 v |= FW_PORT_CAP_SPEED_1G;
1398 if (caps & ADVERTISED_10000baseT_Full)
1399 v |= FW_PORT_CAP_SPEED_10G;
1403 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1405 const struct port_info *p = netdev_priv(dev);
1407 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1408 p->port_type == FW_PORT_TYPE_BT_XFI ||
1409 p->port_type == FW_PORT_TYPE_BT_XAUI)
1410 cmd->port = PORT_TP;
1411 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1412 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1413 cmd->port = PORT_FIBRE;
1414 else if (p->port_type == FW_PORT_TYPE_SFP) {
1415 if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1416 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1417 cmd->port = PORT_DA;
1419 cmd->port = PORT_FIBRE;
1421 cmd->port = PORT_OTHER;
1423 if (p->mdio_addr >= 0) {
1424 cmd->phy_address = p->mdio_addr;
1425 cmd->transceiver = XCVR_EXTERNAL;
1426 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1427 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1429 cmd->phy_address = 0; /* not really, but no better option */
1430 cmd->transceiver = XCVR_INTERNAL;
1431 cmd->mdio_support = 0;
1434 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
1435 cmd->advertising = from_fw_linkcaps(p->port_type,
1436 p->link_cfg.advertising);
1437 cmd->speed = netif_carrier_ok(dev) ? p->link_cfg.speed : 0;
1438 cmd->duplex = DUPLEX_FULL;
1439 cmd->autoneg = p->link_cfg.autoneg;
1445 static unsigned int speed_to_caps(int speed)
1447 if (speed == SPEED_100)
1448 return FW_PORT_CAP_SPEED_100M;
1449 if (speed == SPEED_1000)
1450 return FW_PORT_CAP_SPEED_1G;
1451 if (speed == SPEED_10000)
1452 return FW_PORT_CAP_SPEED_10G;
1456 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1459 struct port_info *p = netdev_priv(dev);
1460 struct link_config *lc = &p->link_cfg;
1462 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
1465 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
1467 * PHY offers a single speed. See if that's what's
1470 if (cmd->autoneg == AUTONEG_DISABLE &&
1471 (lc->supported & speed_to_caps(cmd->speed)))
1476 if (cmd->autoneg == AUTONEG_DISABLE) {
1477 cap = speed_to_caps(cmd->speed);
1479 if (!(lc->supported & cap) || cmd->speed == SPEED_1000 ||
1480 cmd->speed == SPEED_10000)
1482 lc->requested_speed = cap;
1483 lc->advertising = 0;
1485 cap = to_fw_linkcaps(cmd->advertising);
1486 if (!(lc->supported & cap))
1488 lc->requested_speed = 0;
1489 lc->advertising = cap | FW_PORT_CAP_ANEG;
1491 lc->autoneg = cmd->autoneg;
1493 if (netif_running(dev))
1494 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1499 static void get_pauseparam(struct net_device *dev,
1500 struct ethtool_pauseparam *epause)
1502 struct port_info *p = netdev_priv(dev);
1504 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1505 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
1506 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
1509 static int set_pauseparam(struct net_device *dev,
1510 struct ethtool_pauseparam *epause)
1512 struct port_info *p = netdev_priv(dev);
1513 struct link_config *lc = &p->link_cfg;
1515 if (epause->autoneg == AUTONEG_DISABLE)
1516 lc->requested_fc = 0;
1517 else if (lc->supported & FW_PORT_CAP_ANEG)
1518 lc->requested_fc = PAUSE_AUTONEG;
1522 if (epause->rx_pause)
1523 lc->requested_fc |= PAUSE_RX;
1524 if (epause->tx_pause)
1525 lc->requested_fc |= PAUSE_TX;
1526 if (netif_running(dev))
1527 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1532 static u32 get_rx_csum(struct net_device *dev)
1534 struct port_info *p = netdev_priv(dev);
1536 return p->rx_offload & RX_CSO;
1539 static int set_rx_csum(struct net_device *dev, u32 data)
1541 struct port_info *p = netdev_priv(dev);
1544 p->rx_offload |= RX_CSO;
1546 p->rx_offload &= ~RX_CSO;
1550 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1552 const struct port_info *pi = netdev_priv(dev);
1553 const struct sge *s = &pi->adapter->sge;
1555 e->rx_max_pending = MAX_RX_BUFFERS;
1556 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1557 e->rx_jumbo_max_pending = 0;
1558 e->tx_max_pending = MAX_TXQ_ENTRIES;
1560 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
1561 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1562 e->rx_jumbo_pending = 0;
1563 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
1566 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1569 const struct port_info *pi = netdev_priv(dev);
1570 struct adapter *adapter = pi->adapter;
1571 struct sge *s = &adapter->sge;
1573 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
1574 e->tx_pending > MAX_TXQ_ENTRIES ||
1575 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1576 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1577 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
1580 if (adapter->flags & FULL_INIT_DONE)
1583 for (i = 0; i < pi->nqsets; ++i) {
1584 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
1585 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
1586 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
1591 static int closest_timer(const struct sge *s, int time)
1593 int i, delta, match = 0, min_delta = INT_MAX;
1595 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1596 delta = time - s->timer_val[i];
1599 if (delta < min_delta) {
1607 static int closest_thres(const struct sge *s, int thres)
1609 int i, delta, match = 0, min_delta = INT_MAX;
1611 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1612 delta = thres - s->counter_val[i];
1615 if (delta < min_delta) {
1624 * Return a queue's interrupt hold-off time in us. 0 means no timer.
1626 static unsigned int qtimer_val(const struct adapter *adap,
1627 const struct sge_rspq *q)
1629 unsigned int idx = q->intr_params >> 1;
1631 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
1635 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
1636 * @adap: the adapter
1638 * @us: the hold-off time in us, or 0 to disable timer
1639 * @cnt: the hold-off packet count, or 0 to disable counter
1641 * Sets an Rx queue's interrupt hold-off time and packet count. At least
1642 * one of the two needs to be enabled for the queue to generate interrupts.
1644 static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
1645 unsigned int us, unsigned int cnt)
1647 if ((us | cnt) == 0)
1654 new_idx = closest_thres(&adap->sge, cnt);
1655 if (q->desc && q->pktcnt_idx != new_idx) {
1656 /* the queue has already been created, update it */
1657 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1658 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1659 FW_PARAMS_PARAM_YZ(q->cntxt_id);
1660 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
1665 q->pktcnt_idx = new_idx;
1668 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1669 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
1673 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1675 const struct port_info *pi = netdev_priv(dev);
1676 struct adapter *adap = pi->adapter;
1678 return set_rxq_intr_params(adap, &adap->sge.ethrxq[pi->first_qset].rspq,
1679 c->rx_coalesce_usecs, c->rx_max_coalesced_frames);
1682 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1684 const struct port_info *pi = netdev_priv(dev);
1685 const struct adapter *adap = pi->adapter;
1686 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
1688 c->rx_coalesce_usecs = qtimer_val(adap, rq);
1689 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
1690 adap->sge.counter_val[rq->pktcnt_idx] : 0;
1695 * eeprom_ptov - translate a physical EEPROM address to virtual
1696 * @phys_addr: the physical EEPROM address
1697 * @fn: the PCI function number
1698 * @sz: size of function-specific area
1700 * Translate a physical EEPROM address to virtual. The first 1K is
1701 * accessed through virtual addresses starting at 31K, the rest is
1702 * accessed through virtual addresses starting at 0.
1704 * The mapping is as follows:
1705 * [0..1K) -> [31K..32K)
1706 * [1K..1K+A) -> [31K-A..31K)
1707 * [1K+A..ES) -> [0..ES-A-1K)
1709 * where A = @fn * @sz, and ES = EEPROM size.
1711 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
1714 if (phys_addr < 1024)
1715 return phys_addr + (31 << 10);
1716 if (phys_addr < 1024 + fn)
1717 return 31744 - fn + phys_addr - 1024;
1718 if (phys_addr < EEPROMSIZE)
1719 return phys_addr - 1024 - fn;
1724 * The next two routines implement eeprom read/write from physical addresses.
1726 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
1728 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1731 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
1732 return vaddr < 0 ? vaddr : 0;
1735 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
1737 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1740 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
1741 return vaddr < 0 ? vaddr : 0;
1744 #define EEPROM_MAGIC 0x38E2F10C
1746 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
1750 struct adapter *adapter = netdev2adap(dev);
1752 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
1756 e->magic = EEPROM_MAGIC;
1757 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
1758 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
1761 memcpy(data, buf + e->offset, e->len);
1766 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
1771 u32 aligned_offset, aligned_len, *p;
1772 struct adapter *adapter = netdev2adap(dev);
1774 if (eeprom->magic != EEPROM_MAGIC)
1777 aligned_offset = eeprom->offset & ~3;
1778 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
1780 if (adapter->fn > 0) {
1781 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
1783 if (aligned_offset < start ||
1784 aligned_offset + aligned_len > start + EEPROMPFSIZE)
1788 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
1790 * RMW possibly needed for first or last words.
1792 buf = kmalloc(aligned_len, GFP_KERNEL);
1795 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
1796 if (!err && aligned_len > 4)
1797 err = eeprom_rd_phys(adapter,
1798 aligned_offset + aligned_len - 4,
1799 (u32 *)&buf[aligned_len - 4]);
1802 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
1806 err = t4_seeprom_wp(adapter, false);
1810 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
1811 err = eeprom_wr_phys(adapter, aligned_offset, *p);
1812 aligned_offset += 4;
1816 err = t4_seeprom_wp(adapter, true);
1823 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
1826 const struct firmware *fw;
1827 struct adapter *adap = netdev2adap(netdev);
1829 ef->data[sizeof(ef->data) - 1] = '\0';
1830 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
1834 ret = t4_load_fw(adap, fw->data, fw->size);
1835 release_firmware(fw);
1837 dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data);
1841 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
1842 #define BCAST_CRC 0xa0ccc1a6
1844 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1846 wol->supported = WAKE_BCAST | WAKE_MAGIC;
1847 wol->wolopts = netdev2adap(dev)->wol;
1848 memset(&wol->sopass, 0, sizeof(wol->sopass));
1851 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1854 struct port_info *pi = netdev_priv(dev);
1856 if (wol->wolopts & ~WOL_SUPPORTED)
1858 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
1859 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
1860 if (wol->wolopts & WAKE_BCAST) {
1861 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
1864 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
1865 ~6ULL, ~0ULL, BCAST_CRC, true);
1867 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
1871 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1873 static int set_tso(struct net_device *dev, u32 value)
1876 dev->features |= TSO_FLAGS;
1878 dev->features &= ~TSO_FLAGS;
1882 static int set_flags(struct net_device *dev, u32 flags)
1884 return ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH);
1887 static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
1889 const struct port_info *pi = netdev_priv(dev);
1890 unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
1892 p->size = pi->rss_size;
1894 p->ring_index[n] = pi->rss[n];
1898 static int set_rss_table(struct net_device *dev,
1899 const struct ethtool_rxfh_indir *p)
1902 struct port_info *pi = netdev_priv(dev);
1904 if (p->size != pi->rss_size)
1906 for (i = 0; i < p->size; i++)
1907 if (p->ring_index[i] >= pi->nqsets)
1909 for (i = 0; i < p->size; i++)
1910 pi->rss[i] = p->ring_index[i];
1911 if (pi->adapter->flags & FULL_INIT_DONE)
1912 return write_rss(pi, pi->rss);
1916 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1919 const struct port_info *pi = netdev_priv(dev);
1921 switch (info->cmd) {
1922 case ETHTOOL_GRXFH: {
1923 unsigned int v = pi->rss_mode;
1926 switch (info->flow_type) {
1928 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
1929 info->data = RXH_IP_SRC | RXH_IP_DST |
1930 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1931 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1932 info->data = RXH_IP_SRC | RXH_IP_DST;
1935 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) &&
1936 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1937 info->data = RXH_IP_SRC | RXH_IP_DST |
1938 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1939 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1940 info->data = RXH_IP_SRC | RXH_IP_DST;
1943 case AH_ESP_V4_FLOW:
1945 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1946 info->data = RXH_IP_SRC | RXH_IP_DST;
1949 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
1950 info->data = RXH_IP_SRC | RXH_IP_DST |
1951 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1952 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1953 info->data = RXH_IP_SRC | RXH_IP_DST;
1956 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) &&
1957 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1958 info->data = RXH_IP_SRC | RXH_IP_DST |
1959 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1960 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1961 info->data = RXH_IP_SRC | RXH_IP_DST;
1964 case AH_ESP_V6_FLOW:
1966 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1967 info->data = RXH_IP_SRC | RXH_IP_DST;
1972 case ETHTOOL_GRXRINGS:
1973 info->data = pi->nqsets;
1979 static struct ethtool_ops cxgb_ethtool_ops = {
1980 .get_settings = get_settings,
1981 .set_settings = set_settings,
1982 .get_drvinfo = get_drvinfo,
1983 .get_msglevel = get_msglevel,
1984 .set_msglevel = set_msglevel,
1985 .get_ringparam = get_sge_param,
1986 .set_ringparam = set_sge_param,
1987 .get_coalesce = get_coalesce,
1988 .set_coalesce = set_coalesce,
1989 .get_eeprom_len = get_eeprom_len,
1990 .get_eeprom = get_eeprom,
1991 .set_eeprom = set_eeprom,
1992 .get_pauseparam = get_pauseparam,
1993 .set_pauseparam = set_pauseparam,
1994 .get_rx_csum = get_rx_csum,
1995 .set_rx_csum = set_rx_csum,
1996 .set_tx_csum = ethtool_op_set_tx_ipv6_csum,
1997 .set_sg = ethtool_op_set_sg,
1998 .get_link = ethtool_op_get_link,
1999 .get_strings = get_strings,
2000 .phys_id = identify_port,
2001 .nway_reset = restart_autoneg,
2002 .get_sset_count = get_sset_count,
2003 .get_ethtool_stats = get_stats,
2004 .get_regs_len = get_regs_len,
2005 .get_regs = get_regs,
2009 .set_flags = set_flags,
2010 .get_rxnfc = get_rxnfc,
2011 .get_rxfh_indir = get_rss_table,
2012 .set_rxfh_indir = set_rss_table,
2013 .flash_device = set_flash,
2020 static int mem_open(struct inode *inode, struct file *file)
2022 file->private_data = inode->i_private;
2026 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
2030 loff_t avail = file->f_path.dentry->d_inode->i_size;
2031 unsigned int mem = (uintptr_t)file->private_data & 3;
2032 struct adapter *adap = file->private_data - mem;
2038 if (count > avail - pos)
2039 count = avail - pos;
2047 ret = t4_mc_read(adap, pos, data, NULL);
2049 ret = t4_edc_read(adap, mem, pos, data, NULL);
2053 ofst = pos % sizeof(data);
2054 len = min(count, sizeof(data) - ofst);
2055 if (copy_to_user(buf, (u8 *)data + ofst, len))
2062 count = pos - *ppos;
2067 static const struct file_operations mem_debugfs_fops = {
2068 .owner = THIS_MODULE,
2073 static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
2074 unsigned int idx, unsigned int size_mb)
2078 de = debugfs_create_file(name, S_IRUSR, adap->debugfs_root,
2079 (void *)adap + idx, &mem_debugfs_fops);
2080 if (de && de->d_inode)
2081 de->d_inode->i_size = size_mb << 20;
2084 static int __devinit setup_debugfs(struct adapter *adap)
2088 if (IS_ERR_OR_NULL(adap->debugfs_root))
2091 i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE);
2092 if (i & EDRAM0_ENABLE)
2093 add_debugfs_mem(adap, "edc0", MEM_EDC0, 5);
2094 if (i & EDRAM1_ENABLE)
2095 add_debugfs_mem(adap, "edc1", MEM_EDC1, 5);
2096 if (i & EXT_MEM_ENABLE)
2097 add_debugfs_mem(adap, "mc", MEM_MC,
2098 EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR)));
2100 debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap,
2106 * upper-layer driver support
2110 * Allocate an active-open TID and set it to the supplied value.
2112 int cxgb4_alloc_atid(struct tid_info *t, void *data)
2116 spin_lock_bh(&t->atid_lock);
2118 union aopen_entry *p = t->afree;
2120 atid = p - t->atid_tab;
2125 spin_unlock_bh(&t->atid_lock);
2128 EXPORT_SYMBOL(cxgb4_alloc_atid);
2131 * Release an active-open TID.
2133 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
2135 union aopen_entry *p = &t->atid_tab[atid];
2137 spin_lock_bh(&t->atid_lock);
2141 spin_unlock_bh(&t->atid_lock);
2143 EXPORT_SYMBOL(cxgb4_free_atid);
2146 * Allocate a server TID and set it to the supplied value.
2148 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
2152 spin_lock_bh(&t->stid_lock);
2153 if (family == PF_INET) {
2154 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
2155 if (stid < t->nstids)
2156 __set_bit(stid, t->stid_bmap);
2160 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
2165 t->stid_tab[stid].data = data;
2166 stid += t->stid_base;
2169 spin_unlock_bh(&t->stid_lock);
2172 EXPORT_SYMBOL(cxgb4_alloc_stid);
2175 * Release a server TID.
2177 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
2179 stid -= t->stid_base;
2180 spin_lock_bh(&t->stid_lock);
2181 if (family == PF_INET)
2182 __clear_bit(stid, t->stid_bmap);
2184 bitmap_release_region(t->stid_bmap, stid, 2);
2185 t->stid_tab[stid].data = NULL;
2187 spin_unlock_bh(&t->stid_lock);
2189 EXPORT_SYMBOL(cxgb4_free_stid);
2192 * Populate a TID_RELEASE WR. Caller must properly size the skb.
2194 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
2197 struct cpl_tid_release *req;
2199 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
2200 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
2201 INIT_TP_WR(req, tid);
2202 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
2206 * Queue a TID release request and if necessary schedule a work queue to
2209 void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
2212 void **p = &t->tid_tab[tid];
2213 struct adapter *adap = container_of(t, struct adapter, tids);
2215 spin_lock_bh(&adap->tid_release_lock);
2216 *p = adap->tid_release_head;
2217 /* Low 2 bits encode the Tx channel number */
2218 adap->tid_release_head = (void **)((uintptr_t)p | chan);
2219 if (!adap->tid_release_task_busy) {
2220 adap->tid_release_task_busy = true;
2221 schedule_work(&adap->tid_release_task);
2223 spin_unlock_bh(&adap->tid_release_lock);
2225 EXPORT_SYMBOL(cxgb4_queue_tid_release);
2228 * Process the list of pending TID release requests.
2230 static void process_tid_release_list(struct work_struct *work)
2232 struct sk_buff *skb;
2233 struct adapter *adap;
2235 adap = container_of(work, struct adapter, tid_release_task);
2237 spin_lock_bh(&adap->tid_release_lock);
2238 while (adap->tid_release_head) {
2239 void **p = adap->tid_release_head;
2240 unsigned int chan = (uintptr_t)p & 3;
2241 p = (void *)p - chan;
2243 adap->tid_release_head = *p;
2245 spin_unlock_bh(&adap->tid_release_lock);
2247 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
2249 schedule_timeout_uninterruptible(1);
2251 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
2252 t4_ofld_send(adap, skb);
2253 spin_lock_bh(&adap->tid_release_lock);
2255 adap->tid_release_task_busy = false;
2256 spin_unlock_bh(&adap->tid_release_lock);
2260 * Release a TID and inform HW. If we are unable to allocate the release
2261 * message we defer to a work queue.
2263 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
2266 struct sk_buff *skb;
2267 struct adapter *adap = container_of(t, struct adapter, tids);
2269 old = t->tid_tab[tid];
2270 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
2272 t->tid_tab[tid] = NULL;
2273 mk_tid_release(skb, chan, tid);
2274 t4_ofld_send(adap, skb);
2276 cxgb4_queue_tid_release(t, chan, tid);
2278 atomic_dec(&t->tids_in_use);
2280 EXPORT_SYMBOL(cxgb4_remove_tid);
2283 * Allocate and initialize the TID tables. Returns 0 on success.
2285 static int tid_init(struct tid_info *t)
2288 unsigned int natids = t->natids;
2290 size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
2291 t->nstids * sizeof(*t->stid_tab) +
2292 BITS_TO_LONGS(t->nstids) * sizeof(long);
2293 t->tid_tab = t4_alloc_mem(size);
2297 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
2298 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
2299 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
2300 spin_lock_init(&t->stid_lock);
2301 spin_lock_init(&t->atid_lock);
2303 t->stids_in_use = 0;
2305 t->atids_in_use = 0;
2306 atomic_set(&t->tids_in_use, 0);
2308 /* Setup the free list for atid_tab and clear the stid bitmap. */
2311 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
2312 t->afree = t->atid_tab;
2314 bitmap_zero(t->stid_bmap, t->nstids);
2319 * cxgb4_create_server - create an IP server
2321 * @stid: the server TID
2322 * @sip: local IP address to bind server to
2323 * @sport: the server's TCP port
2324 * @queue: queue to direct messages from this server to
2326 * Create an IP server for the given port and address.
2327 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2329 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
2330 __be32 sip, __be16 sport, unsigned int queue)
2333 struct sk_buff *skb;
2334 struct adapter *adap;
2335 struct cpl_pass_open_req *req;
2337 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2341 adap = netdev2adap(dev);
2342 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
2344 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
2345 req->local_port = sport;
2346 req->peer_port = htons(0);
2347 req->local_ip = sip;
2348 req->peer_ip = htonl(0);
2349 chan = rxq_to_chan(&adap->sge, queue);
2350 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2351 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2352 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2353 return t4_mgmt_tx(adap, skb);
2355 EXPORT_SYMBOL(cxgb4_create_server);
2358 * cxgb4_create_server6 - create an IPv6 server
2360 * @stid: the server TID
2361 * @sip: local IPv6 address to bind server to
2362 * @sport: the server's TCP port
2363 * @queue: queue to direct messages from this server to
2365 * Create an IPv6 server for the given port and address.
2366 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2368 int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
2369 const struct in6_addr *sip, __be16 sport,
2373 struct sk_buff *skb;
2374 struct adapter *adap;
2375 struct cpl_pass_open_req6 *req;
2377 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2381 adap = netdev2adap(dev);
2382 req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
2384 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
2385 req->local_port = sport;
2386 req->peer_port = htons(0);
2387 req->local_ip_hi = *(__be64 *)(sip->s6_addr);
2388 req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
2389 req->peer_ip_hi = cpu_to_be64(0);
2390 req->peer_ip_lo = cpu_to_be64(0);
2391 chan = rxq_to_chan(&adap->sge, queue);
2392 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2393 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2394 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2395 return t4_mgmt_tx(adap, skb);
2397 EXPORT_SYMBOL(cxgb4_create_server6);
2400 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
2401 * @mtus: the HW MTU table
2402 * @mtu: the target MTU
2403 * @idx: index of selected entry in the MTU table
2405 * Returns the index and the value in the HW MTU table that is closest to
2406 * but does not exceed @mtu, unless @mtu is smaller than any value in the
2407 * table, in which case that smallest available value is selected.
2409 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
2414 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
2420 EXPORT_SYMBOL(cxgb4_best_mtu);
2423 * cxgb4_port_chan - get the HW channel of a port
2424 * @dev: the net device for the port
2426 * Return the HW Tx channel of the given port.
2428 unsigned int cxgb4_port_chan(const struct net_device *dev)
2430 return netdev2pinfo(dev)->tx_chan;
2432 EXPORT_SYMBOL(cxgb4_port_chan);
2435 * cxgb4_port_viid - get the VI id of a port
2436 * @dev: the net device for the port
2438 * Return the VI id of the given port.
2440 unsigned int cxgb4_port_viid(const struct net_device *dev)
2442 return netdev2pinfo(dev)->viid;
2444 EXPORT_SYMBOL(cxgb4_port_viid);
2447 * cxgb4_port_idx - get the index of a port
2448 * @dev: the net device for the port
2450 * Return the index of the given port.
2452 unsigned int cxgb4_port_idx(const struct net_device *dev)
2454 return netdev2pinfo(dev)->port_id;
2456 EXPORT_SYMBOL(cxgb4_port_idx);
2459 * cxgb4_netdev_by_hwid - return the net device of a HW port
2460 * @pdev: identifies the adapter
2461 * @id: the HW port id
2463 * Return the net device associated with the interface with the given HW
2466 struct net_device *cxgb4_netdev_by_hwid(struct pci_dev *pdev, unsigned int id)
2468 const struct adapter *adap = pci_get_drvdata(pdev);
2470 if (!adap || id >= NCHAN)
2472 id = adap->chan_map[id];
2473 return id < MAX_NPORTS ? adap->port[id] : NULL;
2475 EXPORT_SYMBOL(cxgb4_netdev_by_hwid);
2477 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
2478 struct tp_tcp_stats *v6)
2480 struct adapter *adap = pci_get_drvdata(pdev);
2482 spin_lock(&adap->stats_lock);
2483 t4_tp_get_tcp_stats(adap, v4, v6);
2484 spin_unlock(&adap->stats_lock);
2486 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
2488 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
2489 const unsigned int *pgsz_order)
2491 struct adapter *adap = netdev2adap(dev);
2493 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
2494 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
2495 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
2496 HPZ3(pgsz_order[3]));
2498 EXPORT_SYMBOL(cxgb4_iscsi_init);
2500 static struct pci_driver cxgb4_driver;
2502 static void check_neigh_update(struct neighbour *neigh)
2504 const struct device *parent;
2505 const struct net_device *netdev = neigh->dev;
2507 if (netdev->priv_flags & IFF_802_1Q_VLAN)
2508 netdev = vlan_dev_real_dev(netdev);
2509 parent = netdev->dev.parent;
2510 if (parent && parent->driver == &cxgb4_driver.driver)
2511 t4_l2t_update(dev_get_drvdata(parent), neigh);
2514 static int netevent_cb(struct notifier_block *nb, unsigned long event,
2518 case NETEVENT_NEIGH_UPDATE:
2519 check_neigh_update(data);
2521 case NETEVENT_PMTU_UPDATE:
2522 case NETEVENT_REDIRECT:
2529 static bool netevent_registered;
2530 static struct notifier_block cxgb4_netevent_nb = {
2531 .notifier_call = netevent_cb
2534 static void uld_attach(struct adapter *adap, unsigned int uld)
2537 struct cxgb4_lld_info lli;
2539 lli.pdev = adap->pdev;
2540 lli.l2t = adap->l2t;
2541 lli.tids = &adap->tids;
2542 lli.ports = adap->port;
2543 lli.vr = &adap->vres;
2544 lli.mtus = adap->params.mtus;
2545 if (uld == CXGB4_ULD_RDMA) {
2546 lli.rxq_ids = adap->sge.rdma_rxq;
2547 lli.nrxq = adap->sge.rdmaqs;
2548 } else if (uld == CXGB4_ULD_ISCSI) {
2549 lli.rxq_ids = adap->sge.ofld_rxq;
2550 lli.nrxq = adap->sge.ofldqsets;
2552 lli.ntxq = adap->sge.ofldqsets;
2553 lli.nchan = adap->params.nports;
2554 lli.nports = adap->params.nports;
2555 lli.wr_cred = adap->params.ofldq_wr_cred;
2556 lli.adapter_type = adap->params.rev;
2557 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
2558 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
2559 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
2561 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
2562 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
2564 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
2565 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
2566 lli.fw_vers = adap->params.fw_vers;
2568 handle = ulds[uld].add(&lli);
2569 if (IS_ERR(handle)) {
2570 dev_warn(adap->pdev_dev,
2571 "could not attach to the %s driver, error %ld\n",
2572 uld_str[uld], PTR_ERR(handle));
2576 adap->uld_handle[uld] = handle;
2578 if (!netevent_registered) {
2579 register_netevent_notifier(&cxgb4_netevent_nb);
2580 netevent_registered = true;
2583 if (adap->flags & FULL_INIT_DONE)
2584 ulds[uld].state_change(handle, CXGB4_STATE_UP);
2587 static void attach_ulds(struct adapter *adap)
2591 mutex_lock(&uld_mutex);
2592 list_add_tail(&adap->list_node, &adapter_list);
2593 for (i = 0; i < CXGB4_ULD_MAX; i++)
2595 uld_attach(adap, i);
2596 mutex_unlock(&uld_mutex);
2599 static void detach_ulds(struct adapter *adap)
2603 mutex_lock(&uld_mutex);
2604 list_del(&adap->list_node);
2605 for (i = 0; i < CXGB4_ULD_MAX; i++)
2606 if (adap->uld_handle[i]) {
2607 ulds[i].state_change(adap->uld_handle[i],
2608 CXGB4_STATE_DETACH);
2609 adap->uld_handle[i] = NULL;
2611 if (netevent_registered && list_empty(&adapter_list)) {
2612 unregister_netevent_notifier(&cxgb4_netevent_nb);
2613 netevent_registered = false;
2615 mutex_unlock(&uld_mutex);
2618 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2622 mutex_lock(&uld_mutex);
2623 for (i = 0; i < CXGB4_ULD_MAX; i++)
2624 if (adap->uld_handle[i])
2625 ulds[i].state_change(adap->uld_handle[i], new_state);
2626 mutex_unlock(&uld_mutex);
2630 * cxgb4_register_uld - register an upper-layer driver
2631 * @type: the ULD type
2632 * @p: the ULD methods
2634 * Registers an upper-layer driver with this driver and notifies the ULD
2635 * about any presently available devices that support its type. Returns
2636 * %-EBUSY if a ULD of the same type is already registered.
2638 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
2641 struct adapter *adap;
2643 if (type >= CXGB4_ULD_MAX)
2645 mutex_lock(&uld_mutex);
2646 if (ulds[type].add) {
2651 list_for_each_entry(adap, &adapter_list, list_node)
2652 uld_attach(adap, type);
2653 out: mutex_unlock(&uld_mutex);
2656 EXPORT_SYMBOL(cxgb4_register_uld);
2659 * cxgb4_unregister_uld - unregister an upper-layer driver
2660 * @type: the ULD type
2662 * Unregisters an existing upper-layer driver.
2664 int cxgb4_unregister_uld(enum cxgb4_uld type)
2666 struct adapter *adap;
2668 if (type >= CXGB4_ULD_MAX)
2670 mutex_lock(&uld_mutex);
2671 list_for_each_entry(adap, &adapter_list, list_node)
2672 adap->uld_handle[type] = NULL;
2673 ulds[type].add = NULL;
2674 mutex_unlock(&uld_mutex);
2677 EXPORT_SYMBOL(cxgb4_unregister_uld);
2680 * cxgb_up - enable the adapter
2681 * @adap: adapter being enabled
2683 * Called when the first port is enabled, this function performs the
2684 * actions necessary to make an adapter operational, such as completing
2685 * the initialization of HW modules, and enabling interrupts.
2687 * Must be called with the rtnl lock held.
2689 static int cxgb_up(struct adapter *adap)
2693 err = setup_sge_queues(adap);
2696 err = setup_rss(adap);
2700 if (adap->flags & USING_MSIX) {
2701 name_msix_vecs(adap);
2702 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
2703 adap->msix_info[0].desc, adap);
2707 err = request_msix_queue_irqs(adap);
2709 free_irq(adap->msix_info[0].vec, adap);
2713 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2714 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
2721 t4_intr_enable(adap);
2722 adap->flags |= FULL_INIT_DONE;
2723 notify_ulds(adap, CXGB4_STATE_UP);
2727 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2729 t4_free_sge_resources(adap);
2733 static void cxgb_down(struct adapter *adapter)
2735 t4_intr_disable(adapter);
2736 cancel_work_sync(&adapter->tid_release_task);
2737 adapter->tid_release_task_busy = false;
2738 adapter->tid_release_head = NULL;
2740 if (adapter->flags & USING_MSIX) {
2741 free_msix_queue_irqs(adapter);
2742 free_irq(adapter->msix_info[0].vec, adapter);
2744 free_irq(adapter->pdev->irq, adapter);
2745 quiesce_rx(adapter);
2746 t4_sge_stop(adapter);
2747 t4_free_sge_resources(adapter);
2748 adapter->flags &= ~FULL_INIT_DONE;
2752 * net_device operations
2754 static int cxgb_open(struct net_device *dev)
2757 struct port_info *pi = netdev_priv(dev);
2758 struct adapter *adapter = pi->adapter;
2760 if (!(adapter->flags & FULL_INIT_DONE)) {
2761 err = cxgb_up(adapter);
2766 netif_set_real_num_tx_queues(dev, pi->nqsets);
2767 err = netif_set_real_num_rx_queues(dev, pi->nqsets);
2770 err = link_start(dev);
2772 netif_tx_start_all_queues(dev);
2776 static int cxgb_close(struct net_device *dev)
2778 struct port_info *pi = netdev_priv(dev);
2779 struct adapter *adapter = pi->adapter;
2781 netif_tx_stop_all_queues(dev);
2782 netif_carrier_off(dev);
2783 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
2786 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
2787 struct rtnl_link_stats64 *ns)
2789 struct port_stats stats;
2790 struct port_info *p = netdev_priv(dev);
2791 struct adapter *adapter = p->adapter;
2793 spin_lock(&adapter->stats_lock);
2794 t4_get_port_stats(adapter, p->tx_chan, &stats);
2795 spin_unlock(&adapter->stats_lock);
2797 ns->tx_bytes = stats.tx_octets;
2798 ns->tx_packets = stats.tx_frames;
2799 ns->rx_bytes = stats.rx_octets;
2800 ns->rx_packets = stats.rx_frames;
2801 ns->multicast = stats.rx_mcast_frames;
2803 /* detailed rx_errors */
2804 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
2806 ns->rx_over_errors = 0;
2807 ns->rx_crc_errors = stats.rx_fcs_err;
2808 ns->rx_frame_errors = stats.rx_symbol_err;
2809 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
2810 stats.rx_ovflow2 + stats.rx_ovflow3 +
2811 stats.rx_trunc0 + stats.rx_trunc1 +
2812 stats.rx_trunc2 + stats.rx_trunc3;
2813 ns->rx_missed_errors = 0;
2815 /* detailed tx_errors */
2816 ns->tx_aborted_errors = 0;
2817 ns->tx_carrier_errors = 0;
2818 ns->tx_fifo_errors = 0;
2819 ns->tx_heartbeat_errors = 0;
2820 ns->tx_window_errors = 0;
2822 ns->tx_errors = stats.tx_error_frames;
2823 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
2824 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
2828 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
2831 int ret = 0, prtad, devad;
2832 struct port_info *pi = netdev_priv(dev);
2833 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
2837 if (pi->mdio_addr < 0)
2839 data->phy_id = pi->mdio_addr;
2843 if (mdio_phy_id_is_c45(data->phy_id)) {
2844 prtad = mdio_phy_id_prtad(data->phy_id);
2845 devad = mdio_phy_id_devad(data->phy_id);
2846 } else if (data->phy_id < 32) {
2847 prtad = data->phy_id;
2849 data->reg_num &= 0x1f;
2853 mbox = pi->adapter->fn;
2854 if (cmd == SIOCGMIIREG)
2855 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
2856 data->reg_num, &data->val_out);
2858 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
2859 data->reg_num, data->val_in);
2867 static void cxgb_set_rxmode(struct net_device *dev)
2869 /* unfortunately we can't return errors to the stack */
2870 set_rxmode(dev, -1, false);
2873 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
2876 struct port_info *pi = netdev_priv(dev);
2878 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
2880 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
2887 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
2890 struct sockaddr *addr = p;
2891 struct port_info *pi = netdev_priv(dev);
2893 if (!is_valid_ether_addr(addr->sa_data))
2896 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
2897 pi->xact_addr_filt, addr->sa_data, true, true);
2901 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2902 pi->xact_addr_filt = ret;
2906 static void vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
2908 struct port_info *pi = netdev_priv(dev);
2911 t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1, -1, -1, -1,
2915 #ifdef CONFIG_NET_POLL_CONTROLLER
2916 static void cxgb_netpoll(struct net_device *dev)
2918 struct port_info *pi = netdev_priv(dev);
2919 struct adapter *adap = pi->adapter;
2921 if (adap->flags & USING_MSIX) {
2923 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
2925 for (i = pi->nqsets; i; i--, rx++)
2926 t4_sge_intr_msix(0, &rx->rspq);
2928 t4_intr_handler(adap)(0, adap);
2932 static const struct net_device_ops cxgb4_netdev_ops = {
2933 .ndo_open = cxgb_open,
2934 .ndo_stop = cxgb_close,
2935 .ndo_start_xmit = t4_eth_xmit,
2936 .ndo_get_stats64 = cxgb_get_stats,
2937 .ndo_set_rx_mode = cxgb_set_rxmode,
2938 .ndo_set_mac_address = cxgb_set_mac_addr,
2939 .ndo_validate_addr = eth_validate_addr,
2940 .ndo_do_ioctl = cxgb_ioctl,
2941 .ndo_change_mtu = cxgb_change_mtu,
2942 .ndo_vlan_rx_register = vlan_rx_register,
2943 #ifdef CONFIG_NET_POLL_CONTROLLER
2944 .ndo_poll_controller = cxgb_netpoll,
2948 void t4_fatal_err(struct adapter *adap)
2950 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
2951 t4_intr_disable(adap);
2952 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
2955 static void setup_memwin(struct adapter *adap)
2959 bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */
2960 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
2961 (bar0 + MEMWIN0_BASE) | BIR(0) |
2962 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
2963 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
2964 (bar0 + MEMWIN1_BASE) | BIR(0) |
2965 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
2966 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
2967 (bar0 + MEMWIN2_BASE) | BIR(0) |
2968 WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
2969 if (adap->vres.ocq.size) {
2970 unsigned int start, sz_kb;
2972 start = pci_resource_start(adap->pdev, 2) +
2973 OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
2974 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
2976 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
2977 start | BIR(1) | WINDOW(ilog2(sz_kb)));
2979 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
2980 adap->vres.ocq.start);
2982 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
2986 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
2991 /* get device capabilities */
2992 memset(c, 0, sizeof(*c));
2993 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2994 FW_CMD_REQUEST | FW_CMD_READ);
2995 c->retval_len16 = htonl(FW_LEN16(*c));
2996 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
3000 /* select capabilities we'll be using */
3001 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
3003 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
3005 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
3006 } else if (vf_acls) {
3007 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
3010 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3011 FW_CMD_REQUEST | FW_CMD_WRITE);
3012 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
3016 ret = t4_config_glbl_rss(adap, adap->fn,
3017 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
3018 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
3019 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
3023 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
3024 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
3030 /* tweak some settings */
3031 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
3032 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
3033 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
3034 v = t4_read_reg(adap, TP_PIO_DATA);
3035 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
3037 /* get basic stuff going */
3038 return t4_early_init(adap, adap->fn);
3042 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
3044 #define MAX_ATIDS 8192U
3047 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
3049 static int adap_init0(struct adapter *adap)
3053 enum dev_state state;
3054 u32 params[7], val[7];
3055 struct fw_caps_config_cmd c;
3057 ret = t4_check_fw_version(adap);
3058 if (ret == -EINVAL || ret > 0) {
3059 if (upgrade_fw(adap) >= 0) /* recache FW version */
3060 ret = t4_check_fw_version(adap);
3065 /* contact FW, request master */
3066 ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state);
3068 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
3074 ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST);
3078 for (v = 0; v < SGE_NTIMERS - 1; v++)
3079 adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL);
3080 adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
3081 adap->sge.counter_val[0] = 1;
3082 for (v = 1; v < SGE_NCOUNTERS; v++)
3083 adap->sge.counter_val[v] = min(intr_cnt[v - 1],
3085 #define FW_PARAM_DEV(param) \
3086 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3087 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3089 params[0] = FW_PARAM_DEV(CCLK);
3090 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val);
3093 adap->params.vpd.cclk = val[0];
3095 ret = adap_init1(adap, &c);
3099 #define FW_PARAM_PFVF(param) \
3100 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3101 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \
3102 FW_PARAMS_PARAM_Y(adap->fn))
3104 params[0] = FW_PARAM_DEV(PORTVEC);
3105 params[1] = FW_PARAM_PFVF(L2T_START);
3106 params[2] = FW_PARAM_PFVF(L2T_END);
3107 params[3] = FW_PARAM_PFVF(FILTER_START);
3108 params[4] = FW_PARAM_PFVF(FILTER_END);
3109 params[5] = FW_PARAM_PFVF(IQFLINT_START);
3110 params[6] = FW_PARAM_PFVF(EQ_START);
3111 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 7, params, val);
3115 adap->tids.ftid_base = val[3];
3116 adap->tids.nftids = val[4] - val[3] + 1;
3117 adap->sge.ingr_start = val[5];
3118 adap->sge.egr_start = val[6];
3121 /* query offload-related parameters */
3122 params[0] = FW_PARAM_DEV(NTID);
3123 params[1] = FW_PARAM_PFVF(SERVER_START);
3124 params[2] = FW_PARAM_PFVF(SERVER_END);
3125 params[3] = FW_PARAM_PFVF(TDDP_START);
3126 params[4] = FW_PARAM_PFVF(TDDP_END);
3127 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3128 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3132 adap->tids.ntids = val[0];
3133 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
3134 adap->tids.stid_base = val[1];
3135 adap->tids.nstids = val[2] - val[1] + 1;
3136 adap->vres.ddp.start = val[3];
3137 adap->vres.ddp.size = val[4] - val[3] + 1;
3138 adap->params.ofldq_wr_cred = val[5];
3139 adap->params.offload = 1;
3142 params[0] = FW_PARAM_PFVF(STAG_START);
3143 params[1] = FW_PARAM_PFVF(STAG_END);
3144 params[2] = FW_PARAM_PFVF(RQ_START);
3145 params[3] = FW_PARAM_PFVF(RQ_END);
3146 params[4] = FW_PARAM_PFVF(PBL_START);
3147 params[5] = FW_PARAM_PFVF(PBL_END);
3148 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3152 adap->vres.stag.start = val[0];
3153 adap->vres.stag.size = val[1] - val[0] + 1;
3154 adap->vres.rq.start = val[2];
3155 adap->vres.rq.size = val[3] - val[2] + 1;
3156 adap->vres.pbl.start = val[4];
3157 adap->vres.pbl.size = val[5] - val[4] + 1;
3159 params[0] = FW_PARAM_PFVF(SQRQ_START);
3160 params[1] = FW_PARAM_PFVF(SQRQ_END);
3161 params[2] = FW_PARAM_PFVF(CQ_START);
3162 params[3] = FW_PARAM_PFVF(CQ_END);
3163 params[4] = FW_PARAM_PFVF(OCQ_START);
3164 params[5] = FW_PARAM_PFVF(OCQ_END);
3165 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3169 adap->vres.qp.start = val[0];
3170 adap->vres.qp.size = val[1] - val[0] + 1;
3171 adap->vres.cq.start = val[2];
3172 adap->vres.cq.size = val[3] - val[2] + 1;
3173 adap->vres.ocq.start = val[4];
3174 adap->vres.ocq.size = val[5] - val[4] + 1;
3177 params[0] = FW_PARAM_PFVF(ISCSI_START);
3178 params[1] = FW_PARAM_PFVF(ISCSI_END);
3179 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params,
3183 adap->vres.iscsi.start = val[0];
3184 adap->vres.iscsi.size = val[1] - val[0] + 1;
3186 #undef FW_PARAM_PFVF
3189 adap->params.nports = hweight32(port_vec);
3190 adap->params.portvec = port_vec;
3191 adap->flags |= FW_OK;
3193 /* These are finalized by FW initialization, load their values now */
3194 v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
3195 adap->params.tp.tre = TIMERRESOLUTION_GET(v);
3196 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
3197 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3198 adap->params.b_wnd);
3200 #ifdef CONFIG_PCI_IOV
3202 * Provision resource limits for Virtual Functions. We currently
3203 * grant them all the same static resource limits except for the Port
3204 * Access Rights Mask which we're assigning based on the PF. All of
3205 * the static provisioning stuff for both the PF and VF really needs
3206 * to be managed in a persistent manner for each device which the
3207 * firmware controls.
3212 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
3213 if (num_vf[pf] <= 0)
3216 /* VF numbering starts at 1! */
3217 for (vf = 1; vf <= num_vf[pf]; vf++) {
3218 ret = t4_cfg_pfvf(adap, adap->fn, pf, vf,
3219 VFRES_NEQ, VFRES_NETHCTRL,
3220 VFRES_NIQFLINT, VFRES_NIQ,
3221 VFRES_TC, VFRES_NVI,
3222 FW_PFVF_CMD_CMASK_MASK,
3223 pfvfres_pmask(adap, pf, vf),
3225 VFRES_R_CAPS, VFRES_WX_CAPS);
3227 dev_warn(adap->pdev_dev, "failed to "
3228 "provision pf/vf=%d/%d; "
3229 "err=%d\n", pf, vf, ret);
3239 * If a command timed out or failed with EIO FW does not operate within
3240 * its spec or something catastrophic happened to HW/FW, stop issuing
3243 bye: if (ret != -ETIMEDOUT && ret != -EIO)
3244 t4_fw_bye(adap, adap->fn);
3250 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
3251 pci_channel_state_t state)
3254 struct adapter *adap = pci_get_drvdata(pdev);
3260 adap->flags &= ~FW_OK;
3261 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
3262 for_each_port(adap, i) {
3263 struct net_device *dev = adap->port[i];
3265 netif_device_detach(dev);
3266 netif_carrier_off(dev);
3268 if (adap->flags & FULL_INIT_DONE)
3271 pci_disable_device(pdev);
3272 out: return state == pci_channel_io_perm_failure ?
3273 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3276 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
3279 struct fw_caps_config_cmd c;
3280 struct adapter *adap = pci_get_drvdata(pdev);
3283 pci_restore_state(pdev);
3284 pci_save_state(pdev);
3285 return PCI_ERS_RESULT_RECOVERED;
3288 if (pci_enable_device(pdev)) {
3289 dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
3290 return PCI_ERS_RESULT_DISCONNECT;
3293 pci_set_master(pdev);
3294 pci_restore_state(pdev);
3295 pci_save_state(pdev);
3296 pci_cleanup_aer_uncorrect_error_status(pdev);
3298 if (t4_wait_dev_ready(adap) < 0)
3299 return PCI_ERS_RESULT_DISCONNECT;
3300 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL))
3301 return PCI_ERS_RESULT_DISCONNECT;
3302 adap->flags |= FW_OK;
3303 if (adap_init1(adap, &c))
3304 return PCI_ERS_RESULT_DISCONNECT;
3306 for_each_port(adap, i) {
3307 struct port_info *p = adap2pinfo(adap, i);
3309 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
3312 return PCI_ERS_RESULT_DISCONNECT;
3314 p->xact_addr_filt = -1;
3317 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3318 adap->params.b_wnd);
3321 return PCI_ERS_RESULT_DISCONNECT;
3322 return PCI_ERS_RESULT_RECOVERED;
3325 static void eeh_resume(struct pci_dev *pdev)
3328 struct adapter *adap = pci_get_drvdata(pdev);
3334 for_each_port(adap, i) {
3335 struct net_device *dev = adap->port[i];
3337 if (netif_running(dev)) {
3339 cxgb_set_rxmode(dev);
3341 netif_device_attach(dev);
3346 static struct pci_error_handlers cxgb4_eeh = {
3347 .error_detected = eeh_err_detected,
3348 .slot_reset = eeh_slot_reset,
3349 .resume = eeh_resume,
3352 static inline bool is_10g_port(const struct link_config *lc)
3354 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
3357 static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
3358 unsigned int size, unsigned int iqe_size)
3360 q->intr_params = QINTR_TIMER_IDX(timer_idx) |
3361 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
3362 q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
3363 q->iqe_len = iqe_size;
3368 * Perform default configuration of DMA queues depending on the number and type
3369 * of ports we found and the number of available CPUs. Most settings can be
3370 * modified by the admin prior to actual use.
3372 static void __devinit cfg_queues(struct adapter *adap)
3374 struct sge *s = &adap->sge;
3375 int i, q10g = 0, n10g = 0, qidx = 0;
3377 for_each_port(adap, i)
3378 n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);
3381 * We default to 1 queue per non-10G port and up to # of cores queues
3385 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
3386 if (q10g > num_online_cpus())
3387 q10g = num_online_cpus();
3389 for_each_port(adap, i) {
3390 struct port_info *pi = adap2pinfo(adap, i);
3392 pi->first_qset = qidx;
3393 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
3398 s->max_ethqsets = qidx; /* MSI-X may lower it later */
3400 if (is_offload(adap)) {
3402 * For offload we use 1 queue/channel if all ports are up to 1G,
3403 * otherwise we divide all available queues amongst the channels
3404 * capped by the number of available cores.
3407 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
3409 s->ofldqsets = roundup(i, adap->params.nports);
3411 s->ofldqsets = adap->params.nports;
3412 /* For RDMA one Rx queue per channel suffices */
3413 s->rdmaqs = adap->params.nports;
3416 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
3417 struct sge_eth_rxq *r = &s->ethrxq[i];
3419 init_rspq(&r->rspq, 0, 0, 1024, 64);
3423 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
3424 s->ethtxq[i].q.size = 1024;
3426 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
3427 s->ctrlq[i].q.size = 512;
3429 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
3430 s->ofldtxq[i].q.size = 1024;
3432 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
3433 struct sge_ofld_rxq *r = &s->ofldrxq[i];
3435 init_rspq(&r->rspq, 0, 0, 1024, 64);
3436 r->rspq.uld = CXGB4_ULD_ISCSI;
3440 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
3441 struct sge_ofld_rxq *r = &s->rdmarxq[i];
3443 init_rspq(&r->rspq, 0, 0, 511, 64);
3444 r->rspq.uld = CXGB4_ULD_RDMA;
3448 init_rspq(&s->fw_evtq, 6, 0, 512, 64);
3449 init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
3453 * Reduce the number of Ethernet queues across all ports to at most n.
3454 * n provides at least one queue per port.
3456 static void __devinit reduce_ethqs(struct adapter *adap, int n)
3459 struct port_info *pi;
3461 while (n < adap->sge.ethqsets)
3462 for_each_port(adap, i) {
3463 pi = adap2pinfo(adap, i);
3464 if (pi->nqsets > 1) {
3466 adap->sge.ethqsets--;
3467 if (adap->sge.ethqsets <= n)
3473 for_each_port(adap, i) {
3474 pi = adap2pinfo(adap, i);
3480 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
3481 #define EXTRA_VECS 2
3483 static int __devinit enable_msix(struct adapter *adap)
3486 int i, err, want, need;
3487 struct sge *s = &adap->sge;
3488 unsigned int nchan = adap->params.nports;
3489 struct msix_entry entries[MAX_INGQ + 1];
3491 for (i = 0; i < ARRAY_SIZE(entries); ++i)
3492 entries[i].entry = i;
3494 want = s->max_ethqsets + EXTRA_VECS;
3495 if (is_offload(adap)) {
3496 want += s->rdmaqs + s->ofldqsets;
3497 /* need nchan for each possible ULD */
3498 ofld_need = 2 * nchan;
3500 need = adap->params.nports + EXTRA_VECS + ofld_need;
3502 while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
3507 * Distribute available vectors to the various queue groups.
3508 * Every group gets its minimum requirement and NIC gets top
3509 * priority for leftovers.
3511 i = want - EXTRA_VECS - ofld_need;
3512 if (i < s->max_ethqsets) {
3513 s->max_ethqsets = i;
3514 if (i < s->ethqsets)
3515 reduce_ethqs(adap, i);
3517 if (is_offload(adap)) {
3518 i = want - EXTRA_VECS - s->max_ethqsets;
3519 i -= ofld_need - nchan;
3520 s->ofldqsets = (i / nchan) * nchan; /* round down */
3522 for (i = 0; i < want; ++i)
3523 adap->msix_info[i].vec = entries[i].vector;
3525 dev_info(adap->pdev_dev,
3526 "only %d MSI-X vectors left, not using MSI-X\n", err);
3532 static int __devinit init_rss(struct adapter *adap)
3536 for_each_port(adap, i) {
3537 struct port_info *pi = adap2pinfo(adap, i);
3539 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
3542 for (j = 0; j < pi->rss_size; j++)
3543 pi->rss[j] = j % pi->nqsets;
3548 static void __devinit print_port_info(struct adapter *adap)
3550 static const char *base[] = {
3551 "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
3552 "KX", "KR", "KR SFP+", "KR FEC"
3557 const char *spd = "";
3559 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
3561 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
3564 for_each_port(adap, i) {
3565 struct net_device *dev = adap->port[i];
3566 const struct port_info *pi = netdev_priv(dev);
3569 if (!test_bit(i, &adap->registered_device_map))
3572 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
3573 bufp += sprintf(bufp, "100/");
3574 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
3575 bufp += sprintf(bufp, "1000/");
3576 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
3577 bufp += sprintf(bufp, "10G/");
3580 sprintf(bufp, "BASE-%s", base[pi->port_type]);
3582 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
3583 adap->params.vpd.id, adap->params.rev,
3584 buf, is_offload(adap) ? "R" : "",
3585 adap->params.pci.width, spd,
3586 (adap->flags & USING_MSIX) ? " MSI-X" :
3587 (adap->flags & USING_MSI) ? " MSI" : "");
3588 if (adap->name == dev->name)
3589 netdev_info(dev, "S/N: %s, E/C: %s\n",
3590 adap->params.vpd.sn, adap->params.vpd.ec);
3595 * Free the following resources:
3596 * - memory used for tables
3599 * - resources FW is holding for us
3601 static void free_some_resources(struct adapter *adapter)
3605 t4_free_mem(adapter->l2t);
3606 t4_free_mem(adapter->tids.tid_tab);
3607 disable_msi(adapter);
3609 for_each_port(adapter, i)
3610 if (adapter->port[i]) {
3611 kfree(adap2pinfo(adapter, i)->rss);
3612 free_netdev(adapter->port[i]);
3614 if (adapter->flags & FW_OK)
3615 t4_fw_bye(adapter, adapter->fn);
3618 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
3619 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
3621 static int __devinit init_one(struct pci_dev *pdev,
3622 const struct pci_device_id *ent)
3625 struct port_info *pi;
3626 unsigned int highdma = 0;
3627 struct adapter *adapter = NULL;
3629 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
3631 err = pci_request_regions(pdev, KBUILD_MODNAME);
3633 /* Just info, some other driver may have claimed the device. */
3634 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
3638 /* We control everything through one PF */
3639 func = PCI_FUNC(pdev->devfn);
3640 if (func != ent->driver_data) {
3641 pci_save_state(pdev); /* to restore SR-IOV later */
3645 err = pci_enable_device(pdev);
3647 dev_err(&pdev->dev, "cannot enable PCI device\n");
3648 goto out_release_regions;
3651 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3652 highdma = NETIF_F_HIGHDMA;
3653 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3655 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
3656 "coherent allocations\n");
3657 goto out_disable_device;
3660 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3662 dev_err(&pdev->dev, "no usable DMA configuration\n");
3663 goto out_disable_device;
3667 pci_enable_pcie_error_reporting(pdev);
3668 pci_set_master(pdev);
3669 pci_save_state(pdev);
3671 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
3674 goto out_disable_device;
3677 adapter->regs = pci_ioremap_bar(pdev, 0);
3678 if (!adapter->regs) {
3679 dev_err(&pdev->dev, "cannot map device registers\n");
3681 goto out_free_adapter;
3684 adapter->pdev = pdev;
3685 adapter->pdev_dev = &pdev->dev;
3687 adapter->name = pci_name(pdev);
3688 adapter->msg_enable = dflt_msg_enable;
3689 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
3691 spin_lock_init(&adapter->stats_lock);
3692 spin_lock_init(&adapter->tid_release_lock);
3694 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
3696 err = t4_prep_adapter(adapter);
3699 err = adap_init0(adapter);
3703 for_each_port(adapter, i) {
3704 struct net_device *netdev;
3706 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3713 SET_NETDEV_DEV(netdev, &pdev->dev);
3715 adapter->port[i] = netdev;
3716 pi = netdev_priv(netdev);
3717 pi->adapter = adapter;
3718 pi->xact_addr_filt = -1;
3719 pi->rx_offload = RX_CSO;
3721 netif_carrier_off(netdev);
3722 netif_tx_stop_all_queues(netdev);
3723 netdev->irq = pdev->irq;
3725 netdev->features |= NETIF_F_SG | TSO_FLAGS;
3726 netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3727 netdev->features |= NETIF_F_GRO | NETIF_F_RXHASH | highdma;
3728 netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
3729 netdev->vlan_features = netdev->features & VLAN_FEAT;
3731 netdev->netdev_ops = &cxgb4_netdev_ops;
3732 SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
3735 pci_set_drvdata(pdev, adapter);
3737 if (adapter->flags & FW_OK) {
3738 err = t4_port_init(adapter, func, func, 0);
3744 * Configure queues and allocate tables now, they can be needed as
3745 * soon as the first register_netdev completes.
3747 cfg_queues(adapter);
3749 adapter->l2t = t4_init_l2t();
3750 if (!adapter->l2t) {
3751 /* We tolerate a lack of L2T, giving up some functionality */
3752 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
3753 adapter->params.offload = 0;
3756 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
3757 dev_warn(&pdev->dev, "could not allocate TID table, "
3759 adapter->params.offload = 0;
3762 /* See what interrupts we'll be using */
3763 if (msi > 1 && enable_msix(adapter) == 0)
3764 adapter->flags |= USING_MSIX;
3765 else if (msi > 0 && pci_enable_msi(pdev) == 0)
3766 adapter->flags |= USING_MSI;
3768 err = init_rss(adapter);
3773 * The card is now ready to go. If any errors occur during device
3774 * registration we do not fail the whole card but rather proceed only
3775 * with the ports we manage to register successfully. However we must
3776 * register at least one net device.
3778 for_each_port(adapter, i) {
3779 err = register_netdev(adapter->port[i]);
3781 dev_warn(&pdev->dev,
3782 "cannot register net device %s, skipping\n",
3783 adapter->port[i]->name);
3786 * Change the name we use for messages to the name of
3787 * the first successfully registered interface.
3789 if (!adapter->registered_device_map)
3790 adapter->name = adapter->port[i]->name;
3792 __set_bit(i, &adapter->registered_device_map);
3793 adapter->chan_map[adap2pinfo(adapter, i)->tx_chan] = i;
3796 if (!adapter->registered_device_map) {
3797 dev_err(&pdev->dev, "could not register any net devices\n");
3801 if (cxgb4_debugfs_root) {
3802 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
3803 cxgb4_debugfs_root);
3804 setup_debugfs(adapter);
3807 if (is_offload(adapter))
3808 attach_ulds(adapter);
3810 print_port_info(adapter);
3813 #ifdef CONFIG_PCI_IOV
3814 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
3815 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
3816 dev_info(&pdev->dev,
3817 "instantiated %u virtual functions\n",
3823 free_some_resources(adapter);
3825 iounmap(adapter->regs);
3829 pci_disable_pcie_error_reporting(pdev);
3830 pci_disable_device(pdev);
3831 out_release_regions:
3832 pci_release_regions(pdev);
3833 pci_set_drvdata(pdev, NULL);
3837 static void __devexit remove_one(struct pci_dev *pdev)
3839 struct adapter *adapter = pci_get_drvdata(pdev);
3841 pci_disable_sriov(pdev);
3846 if (is_offload(adapter))
3847 detach_ulds(adapter);
3849 for_each_port(adapter, i)
3850 if (test_bit(i, &adapter->registered_device_map))
3851 unregister_netdev(adapter->port[i]);
3853 if (adapter->debugfs_root)
3854 debugfs_remove_recursive(adapter->debugfs_root);
3856 if (adapter->flags & FULL_INIT_DONE)
3859 free_some_resources(adapter);
3860 iounmap(adapter->regs);
3862 pci_disable_pcie_error_reporting(pdev);
3863 pci_disable_device(pdev);
3864 pci_release_regions(pdev);
3865 pci_set_drvdata(pdev, NULL);
3866 } else if (PCI_FUNC(pdev->devfn) > 0)
3867 pci_release_regions(pdev);
3870 static struct pci_driver cxgb4_driver = {
3871 .name = KBUILD_MODNAME,
3872 .id_table = cxgb4_pci_tbl,
3874 .remove = __devexit_p(remove_one),
3875 .err_handler = &cxgb4_eeh,
3878 static int __init cxgb4_init_module(void)
3882 /* Debugfs support is optional, just warn if this fails */
3883 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3884 if (!cxgb4_debugfs_root)
3885 pr_warning("could not create debugfs entry, continuing\n");
3887 ret = pci_register_driver(&cxgb4_driver);
3889 debugfs_remove(cxgb4_debugfs_root);
3893 static void __exit cxgb4_cleanup_module(void)
3895 pci_unregister_driver(&cxgb4_driver);
3896 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
3899 module_init(cxgb4_init_module);
3900 module_exit(cxgb4_cleanup_module);