2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
45 #include <linux/if_vlan.h>
46 #include <linux/init.h>
47 #include <linux/log2.h>
48 #include <linux/mdio.h>
49 #include <linux/module.h>
50 #include <linux/moduleparam.h>
51 #include <linux/mutex.h>
52 #include <linux/netdevice.h>
53 #include <linux/pci.h>
54 #include <linux/aer.h>
55 #include <linux/rtnetlink.h>
56 #include <linux/sched.h>
57 #include <linux/seq_file.h>
58 #include <linux/sockios.h>
59 #include <linux/vmalloc.h>
60 #include <linux/workqueue.h>
61 #include <net/neighbour.h>
62 #include <net/netevent.h>
63 #include <asm/uaccess.h>
71 #define DRV_VERSION "1.3.0-ko"
72 #define DRV_DESC "Chelsio T4 Network Driver"
75 * Max interrupt hold-off timer value in us. Queues fall back to this value
76 * under extreme memory pressure so it's largish to give the system time to
79 #define MAX_SGE_TIMERVAL 200U
83 * Physical Function provisioning constants.
85 PFRES_NVI = 4, /* # of Virtual Interfaces */
86 PFRES_NETHCTRL = 128, /* # of EQs used for ETH or CTRL Qs */
87 PFRES_NIQFLINT = 128, /* # of ingress Qs/w Free List(s)/intr
89 PFRES_NEQ = 256, /* # of egress queues */
90 PFRES_NIQ = 0, /* # of ingress queues */
91 PFRES_TC = 0, /* PCI-E traffic class */
92 PFRES_NEXACTF = 128, /* # of exact MPS filters */
94 PFRES_R_CAPS = FW_CMD_CAP_PF,
95 PFRES_WX_CAPS = FW_CMD_CAP_PF,
99 * Virtual Function provisioning constants. We need two extra Ingress
100 * Queues with Interrupt capability to serve as the VF's Firmware
101 * Event Queue and Forwarded Interrupt Queue (when using MSI mode) --
102 * neither will have Free Lists associated with them). For each
103 * Ethernet/Control Egress Queue and for each Free List, we need an
106 VFRES_NPORTS = 1, /* # of "ports" per VF */
107 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
109 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
110 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
111 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
112 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
113 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
114 VFRES_TC = 0, /* PCI-E traffic class */
115 VFRES_NEXACTF = 16, /* # of exact MPS filters */
117 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
118 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
123 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
124 * static and likely not to be useful in the long run. We really need to
125 * implement some form of persistent configuration which the firmware
128 static unsigned int pfvfres_pmask(struct adapter *adapter,
129 unsigned int pf, unsigned int vf)
131 unsigned int portn, portvec;
134 * Give PF's access to all of the ports.
137 return FW_PFVF_CMD_PMASK_MASK;
140 * For VFs, we'll assign them access to the ports based purely on the
141 * PF. We assign active ports in order, wrapping around if there are
142 * fewer active ports than PFs: e.g. active port[pf % nports].
143 * Unfortunately the adapter's port_info structs haven't been
144 * initialized yet so we have to compute this.
146 if (adapter->params.nports == 0)
149 portn = pf % adapter->params.nports;
150 portvec = adapter->params.portvec;
153 * Isolate the lowest set bit in the port vector. If we're at
154 * the port number that we want, return that as the pmask.
155 * otherwise mask that bit out of the port vector and
156 * decrement our port number ...
158 unsigned int pmask = portvec ^ (portvec & (portvec-1));
168 MAX_TXQ_ENTRIES = 16384,
169 MAX_CTRL_TXQ_ENTRIES = 1024,
170 MAX_RSPQ_ENTRIES = 16384,
171 MAX_RX_BUFFERS = 16384,
172 MIN_TXQ_ENTRIES = 32,
173 MIN_CTRL_TXQ_ENTRIES = 32,
174 MIN_RSPQ_ENTRIES = 128,
178 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
179 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
180 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
182 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
184 static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
185 CH_DEVICE(0xa000, 0), /* PE10K */
186 CH_DEVICE(0x4001, -1),
187 CH_DEVICE(0x4002, -1),
188 CH_DEVICE(0x4003, -1),
189 CH_DEVICE(0x4004, -1),
190 CH_DEVICE(0x4005, -1),
191 CH_DEVICE(0x4006, -1),
192 CH_DEVICE(0x4007, -1),
193 CH_DEVICE(0x4008, -1),
194 CH_DEVICE(0x4009, -1),
195 CH_DEVICE(0x400a, -1),
196 CH_DEVICE(0x4401, 4),
197 CH_DEVICE(0x4402, 4),
198 CH_DEVICE(0x4403, 4),
199 CH_DEVICE(0x4404, 4),
200 CH_DEVICE(0x4405, 4),
201 CH_DEVICE(0x4406, 4),
202 CH_DEVICE(0x4407, 4),
203 CH_DEVICE(0x4408, 4),
204 CH_DEVICE(0x4409, 4),
205 CH_DEVICE(0x440a, 4),
206 CH_DEVICE(0x440d, 4),
207 CH_DEVICE(0x440e, 4),
211 #define FW_FNAME "cxgb4/t4fw.bin"
212 #define FW_CFNAME "cxgb4/t4-config.txt"
214 MODULE_DESCRIPTION(DRV_DESC);
215 MODULE_AUTHOR("Chelsio Communications");
216 MODULE_LICENSE("Dual BSD/GPL");
217 MODULE_VERSION(DRV_VERSION);
218 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
219 MODULE_FIRMWARE(FW_FNAME);
222 * Normally we're willing to become the firmware's Master PF but will be happy
223 * if another PF has already become the Master and initialized the adapter.
224 * Setting "force_init" will cause this driver to forcibly establish itself as
225 * the Master PF and initialize the adapter.
227 static uint force_init;
229 module_param(force_init, uint, 0644);
230 MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter");
233 * Normally if the firmware we connect to has Configuration File support, we
234 * use that and only fall back to the old Driver-based initialization if the
235 * Configuration File fails for some reason. If force_old_init is set, then
236 * we'll always use the old Driver-based initialization sequence.
238 static uint force_old_init;
240 module_param(force_old_init, uint, 0644);
241 MODULE_PARM_DESC(force_old_init, "Force old initialization sequence");
243 static int dflt_msg_enable = DFLT_MSG_ENABLE;
245 module_param(dflt_msg_enable, int, 0644);
246 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
249 * The driver uses the best interrupt scheme available on a platform in the
250 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
251 * of these schemes the driver may consider as follows:
253 * msi = 2: choose from among all three options
254 * msi = 1: only consider MSI and INTx interrupts
255 * msi = 0: force INTx interrupts
259 module_param(msi, int, 0644);
260 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
263 * Queue interrupt hold-off timer values. Queues default to the first of these
266 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
268 module_param_array(intr_holdoff, uint, NULL, 0644);
269 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
270 "0..4 in microseconds");
272 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
274 module_param_array(intr_cnt, uint, NULL, 0644);
275 MODULE_PARM_DESC(intr_cnt,
276 "thresholds 1..3 for queue interrupt packet counters");
279 * Normally we tell the chip to deliver Ingress Packets into our DMA buffers
280 * offset by 2 bytes in order to have the IP headers line up on 4-byte
281 * boundaries. This is a requirement for many architectures which will throw
282 * a machine check fault if an attempt is made to access one of the 4-byte IP
283 * header fields on a non-4-byte boundary. And it's a major performance issue
284 * even on some architectures which allow it like some implementations of the
285 * x86 ISA. However, some architectures don't mind this and for some very
286 * edge-case performance sensitive applications (like forwarding large volumes
287 * of small packets), setting this DMA offset to 0 will decrease the number of
288 * PCI-E Bus transfers enough to measurably affect performance.
290 static int rx_dma_offset = 2;
294 #ifdef CONFIG_PCI_IOV
295 module_param(vf_acls, bool, 0644);
296 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
298 static unsigned int num_vf[4];
300 module_param_array(num_vf, uint, NULL, 0644);
301 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
305 * The filter TCAM has a fixed portion and a variable portion. The fixed
306 * portion can match on source/destination IP IPv4/IPv6 addresses and TCP/UDP
307 * ports. The variable portion is 36 bits which can include things like Exact
308 * Match MAC Index (9 bits), Ether Type (16 bits), IP Protocol (8 bits),
309 * [Inner] VLAN Tag (17 bits), etc. which, if all were somehow selected, would
310 * far exceed the 36-bit budget for this "compressed" header portion of the
311 * filter. Thus, we have a scarce resource which must be carefully managed.
313 * By default we set this up to mostly match the set of filter matching
314 * capabilities of T3 but with accommodations for some of T4's more
315 * interesting features:
317 * { IP Fragment (1), MPS Match Type (3), IP Protocol (8),
318 * [Inner] VLAN (17), Port (3), FCoE (1) }
321 TP_VLAN_PRI_MAP_DEFAULT = HW_TPL_FR_MT_PR_IV_P_FC,
322 TP_VLAN_PRI_MAP_FIRST = FCOE_SHIFT,
323 TP_VLAN_PRI_MAP_LAST = FRAGMENTATION_SHIFT,
326 static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
328 static struct dentry *cxgb4_debugfs_root;
330 static LIST_HEAD(adapter_list);
331 static DEFINE_MUTEX(uld_mutex);
332 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
333 static const char *uld_str[] = { "RDMA", "iSCSI" };
335 static void link_report(struct net_device *dev)
337 if (!netif_carrier_ok(dev))
338 netdev_info(dev, "link down\n");
340 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
342 const char *s = "10Mbps";
343 const struct port_info *p = netdev_priv(dev);
345 switch (p->link_cfg.speed) {
357 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
362 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
364 struct net_device *dev = adapter->port[port_id];
366 /* Skip changes from disabled ports. */
367 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
369 netif_carrier_on(dev);
371 netif_carrier_off(dev);
377 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
379 static const char *mod_str[] = {
380 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
383 const struct net_device *dev = adap->port[port_id];
384 const struct port_info *pi = netdev_priv(dev);
386 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
387 netdev_info(dev, "port module unplugged\n");
388 else if (pi->mod_type < ARRAY_SIZE(mod_str))
389 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
393 * Configure the exact and hash address filters to handle a port's multicast
394 * and secondary unicast MAC addresses.
396 static int set_addr_filters(const struct net_device *dev, bool sleep)
404 const struct netdev_hw_addr *ha;
405 int uc_cnt = netdev_uc_count(dev);
406 int mc_cnt = netdev_mc_count(dev);
407 const struct port_info *pi = netdev_priv(dev);
408 unsigned int mb = pi->adapter->fn;
410 /* first do the secondary unicast addresses */
411 netdev_for_each_uc_addr(ha, dev) {
412 addr[naddr++] = ha->addr;
413 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
414 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
415 naddr, addr, filt_idx, &uhash, sleep);
424 /* next set up the multicast addresses */
425 netdev_for_each_mc_addr(ha, dev) {
426 addr[naddr++] = ha->addr;
427 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
428 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
429 naddr, addr, filt_idx, &mhash, sleep);
438 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
439 uhash | mhash, sleep);
442 int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
443 module_param(dbfifo_int_thresh, int, 0644);
444 MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
446 int dbfifo_drain_delay = 1000; /* usecs to sleep while draining the dbfifo */
447 module_param(dbfifo_drain_delay, int, 0644);
448 MODULE_PARM_DESC(dbfifo_drain_delay,
449 "usecs to sleep while draining the dbfifo");
452 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
453 * If @mtu is -1 it is left unchanged.
455 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
458 struct port_info *pi = netdev_priv(dev);
460 ret = set_addr_filters(dev, sleep_ok);
462 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
463 (dev->flags & IFF_PROMISC) ? 1 : 0,
464 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
469 static struct workqueue_struct *workq;
472 * link_start - enable a port
473 * @dev: the port to enable
475 * Performs the MAC and PHY actions needed to enable a port.
477 static int link_start(struct net_device *dev)
480 struct port_info *pi = netdev_priv(dev);
481 unsigned int mb = pi->adapter->fn;
484 * We do not set address filters and promiscuity here, the stack does
485 * that step explicitly.
487 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
488 !!(dev->features & NETIF_F_HW_VLAN_RX), true);
490 ret = t4_change_mac(pi->adapter, mb, pi->viid,
491 pi->xact_addr_filt, dev->dev_addr, true,
494 pi->xact_addr_filt = ret;
499 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
502 ret = t4_enable_vi(pi->adapter, mb, pi->viid, true, true);
507 * Response queue handler for the FW event queue.
509 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
510 const struct pkt_gl *gl)
512 u8 opcode = ((const struct rss_header *)rsp)->opcode;
514 rsp++; /* skip RSS header */
515 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
516 const struct cpl_sge_egr_update *p = (void *)rsp;
517 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
520 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
522 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
523 struct sge_eth_txq *eq;
525 eq = container_of(txq, struct sge_eth_txq, q);
526 netif_tx_wake_queue(eq->txq);
528 struct sge_ofld_txq *oq;
530 oq = container_of(txq, struct sge_ofld_txq, q);
531 tasklet_schedule(&oq->qresume_tsk);
533 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
534 const struct cpl_fw6_msg *p = (void *)rsp;
537 t4_handle_fw_rpl(q->adap, p->data);
538 } else if (opcode == CPL_L2T_WRITE_RPL) {
539 const struct cpl_l2t_write_rpl *p = (void *)rsp;
541 do_l2t_write_rpl(q->adap, p);
543 dev_err(q->adap->pdev_dev,
544 "unexpected CPL %#x on FW event queue\n", opcode);
549 * uldrx_handler - response queue handler for ULD queues
550 * @q: the response queue that received the packet
551 * @rsp: the response queue descriptor holding the offload message
552 * @gl: the gather list of packet fragments
554 * Deliver an ingress offload packet to a ULD. All processing is done by
555 * the ULD, we just maintain statistics.
557 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
558 const struct pkt_gl *gl)
560 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
562 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
568 else if (gl == CXGB4_MSG_AN)
575 static void disable_msi(struct adapter *adapter)
577 if (adapter->flags & USING_MSIX) {
578 pci_disable_msix(adapter->pdev);
579 adapter->flags &= ~USING_MSIX;
580 } else if (adapter->flags & USING_MSI) {
581 pci_disable_msi(adapter->pdev);
582 adapter->flags &= ~USING_MSI;
587 * Interrupt handler for non-data events used with MSI-X.
589 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
591 struct adapter *adap = cookie;
593 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
596 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
598 t4_slow_intr_handler(adap);
603 * Name the MSI-X interrupts.
605 static void name_msix_vecs(struct adapter *adap)
607 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc);
609 /* non-data interrupts */
610 snprintf(adap->msix_info[0].desc, n, "%s", adap->port[0]->name);
613 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq",
614 adap->port[0]->name);
616 /* Ethernet queues */
617 for_each_port(adap, j) {
618 struct net_device *d = adap->port[j];
619 const struct port_info *pi = netdev_priv(d);
621 for (i = 0; i < pi->nqsets; i++, msi_idx++)
622 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
627 for_each_ofldrxq(&adap->sge, i)
628 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-ofld%d",
629 adap->port[0]->name, i);
631 for_each_rdmarxq(&adap->sge, i)
632 snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
633 adap->port[0]->name, i);
636 static int request_msix_queue_irqs(struct adapter *adap)
638 struct sge *s = &adap->sge;
639 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2;
641 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
642 adap->msix_info[1].desc, &s->fw_evtq);
646 for_each_ethrxq(s, ethqidx) {
647 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
648 adap->msix_info[msi].desc,
649 &s->ethrxq[ethqidx].rspq);
654 for_each_ofldrxq(s, ofldqidx) {
655 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
656 adap->msix_info[msi].desc,
657 &s->ofldrxq[ofldqidx].rspq);
662 for_each_rdmarxq(s, rdmaqidx) {
663 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
664 adap->msix_info[msi].desc,
665 &s->rdmarxq[rdmaqidx].rspq);
673 while (--rdmaqidx >= 0)
674 free_irq(adap->msix_info[--msi].vec,
675 &s->rdmarxq[rdmaqidx].rspq);
676 while (--ofldqidx >= 0)
677 free_irq(adap->msix_info[--msi].vec,
678 &s->ofldrxq[ofldqidx].rspq);
679 while (--ethqidx >= 0)
680 free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq);
681 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
685 static void free_msix_queue_irqs(struct adapter *adap)
688 struct sge *s = &adap->sge;
690 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
691 for_each_ethrxq(s, i)
692 free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq);
693 for_each_ofldrxq(s, i)
694 free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq);
695 for_each_rdmarxq(s, i)
696 free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq);
700 * write_rss - write the RSS table for a given port
702 * @queues: array of queue indices for RSS
704 * Sets up the portion of the HW RSS table for the port's VI to distribute
705 * packets to the Rx queues in @queues.
707 static int write_rss(const struct port_info *pi, const u16 *queues)
711 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
713 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
717 /* map the queue indices to queue ids */
718 for (i = 0; i < pi->rss_size; i++, queues++)
719 rss[i] = q[*queues].rspq.abs_id;
721 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
722 pi->rss_size, rss, pi->rss_size);
728 * setup_rss - configure RSS
731 * Sets up RSS for each port.
733 static int setup_rss(struct adapter *adap)
737 for_each_port(adap, i) {
738 const struct port_info *pi = adap2pinfo(adap, i);
740 err = write_rss(pi, pi->rss);
748 * Return the channel of the ingress queue with the given qid.
750 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
752 qid -= p->ingr_start;
753 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
757 * Wait until all NAPI handlers are descheduled.
759 static void quiesce_rx(struct adapter *adap)
763 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
764 struct sge_rspq *q = adap->sge.ingr_map[i];
767 napi_disable(&q->napi);
772 * Enable NAPI scheduling and interrupt generation for all Rx queues.
774 static void enable_rx(struct adapter *adap)
778 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
779 struct sge_rspq *q = adap->sge.ingr_map[i];
784 napi_enable(&q->napi);
785 /* 0-increment GTS to start the timer and enable interrupts */
786 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
787 SEINTARM(q->intr_params) |
788 INGRESSQID(q->cntxt_id));
793 * setup_sge_queues - configure SGE Tx/Rx/response queues
796 * Determines how many sets of SGE queues to use and initializes them.
797 * We support multiple queue sets per port if we have MSI-X, otherwise
798 * just one queue set per port.
800 static int setup_sge_queues(struct adapter *adap)
802 int err, msi_idx, i, j;
803 struct sge *s = &adap->sge;
805 bitmap_zero(s->starving_fl, MAX_EGRQ);
806 bitmap_zero(s->txq_maperr, MAX_EGRQ);
808 if (adap->flags & USING_MSIX)
809 msi_idx = 1; /* vector 0 is for non-queue interrupts */
811 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
815 msi_idx = -((int)s->intrq.abs_id + 1);
818 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
819 msi_idx, NULL, fwevtq_handler);
821 freeout: t4_free_sge_resources(adap);
825 for_each_port(adap, i) {
826 struct net_device *dev = adap->port[i];
827 struct port_info *pi = netdev_priv(dev);
828 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
829 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
831 for (j = 0; j < pi->nqsets; j++, q++) {
834 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
840 memset(&q->stats, 0, sizeof(q->stats));
842 for (j = 0; j < pi->nqsets; j++, t++) {
843 err = t4_sge_alloc_eth_txq(adap, t, dev,
844 netdev_get_tx_queue(dev, j),
845 s->fw_evtq.cntxt_id);
851 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
852 for_each_ofldrxq(s, i) {
853 struct sge_ofld_rxq *q = &s->ofldrxq[i];
854 struct net_device *dev = adap->port[i / j];
858 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
859 &q->fl, uldrx_handler);
862 memset(&q->stats, 0, sizeof(q->stats));
863 s->ofld_rxq[i] = q->rspq.abs_id;
864 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
865 s->fw_evtq.cntxt_id);
870 for_each_rdmarxq(s, i) {
871 struct sge_ofld_rxq *q = &s->rdmarxq[i];
875 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
876 msi_idx, &q->fl, uldrx_handler);
879 memset(&q->stats, 0, sizeof(q->stats));
880 s->rdma_rxq[i] = q->rspq.abs_id;
883 for_each_port(adap, i) {
885 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
886 * have RDMA queues, and that's the right value.
888 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
890 s->rdmarxq[i].rspq.cntxt_id);
895 t4_write_reg(adap, MPS_TRC_RSS_CONTROL,
896 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
897 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
902 * Returns 0 if new FW was successfully loaded, a positive errno if a load was
903 * started but failed, and a negative errno if flash load couldn't start.
905 static int upgrade_fw(struct adapter *adap)
909 const struct fw_hdr *hdr;
910 const struct firmware *fw;
911 struct device *dev = adap->pdev_dev;
913 ret = request_firmware(&fw, FW_FNAME, dev);
915 dev_err(dev, "unable to load firmware image " FW_FNAME
916 ", error %d\n", ret);
920 hdr = (const struct fw_hdr *)fw->data;
921 vers = ntohl(hdr->fw_ver);
922 if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) {
923 ret = -EINVAL; /* wrong major version, won't do */
928 * If the flash FW is unusable or we found something newer, load it.
930 if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
931 vers > adap->params.fw_vers) {
932 ret = -t4_load_fw(adap, fw->data, fw->size);
934 dev_info(dev, "firmware upgraded to version %pI4 from "
935 FW_FNAME "\n", &hdr->fw_ver);
938 * Tell our caller that we didn't upgrade the firmware.
943 out: release_firmware(fw);
948 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
949 * The allocated memory is cleared.
951 void *t4_alloc_mem(size_t size)
953 void *p = kzalloc(size, GFP_KERNEL);
961 * Free memory allocated through alloc_mem().
963 static void t4_free_mem(void *addr)
965 if (is_vmalloc_addr(addr))
971 static inline int is_offload(const struct adapter *adap)
973 return adap->params.offload;
977 * Implementation of ethtool operations.
980 static u32 get_msglevel(struct net_device *dev)
982 return netdev2adap(dev)->msg_enable;
985 static void set_msglevel(struct net_device *dev, u32 val)
987 netdev2adap(dev)->msg_enable = val;
990 static char stats_strings[][ETH_GSTRING_LEN] = {
993 "TxBroadcastFrames ",
994 "TxMulticastFrames ",
1000 "TxFrames128To255 ",
1001 "TxFrames256To511 ",
1002 "TxFrames512To1023 ",
1003 "TxFrames1024To1518 ",
1004 "TxFrames1519ToMax ",
1019 "RxBroadcastFrames ",
1020 "RxMulticastFrames ",
1032 "RxFrames128To255 ",
1033 "RxFrames256To511 ",
1034 "RxFrames512To1023 ",
1035 "RxFrames1024To1518 ",
1036 "RxFrames1519ToMax ",
1048 "RxBG0FramesDropped ",
1049 "RxBG1FramesDropped ",
1050 "RxBG2FramesDropped ",
1051 "RxBG3FramesDropped ",
1052 "RxBG0FramesTrunc ",
1053 "RxBG1FramesTrunc ",
1054 "RxBG2FramesTrunc ",
1055 "RxBG3FramesTrunc ",
1066 static int get_sset_count(struct net_device *dev, int sset)
1070 return ARRAY_SIZE(stats_strings);
1076 #define T4_REGMAP_SIZE (160 * 1024)
1078 static int get_regs_len(struct net_device *dev)
1080 return T4_REGMAP_SIZE;
1083 static int get_eeprom_len(struct net_device *dev)
1088 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1090 struct adapter *adapter = netdev2adap(dev);
1092 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1093 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
1094 strlcpy(info->bus_info, pci_name(adapter->pdev),
1095 sizeof(info->bus_info));
1097 if (adapter->params.fw_vers)
1098 snprintf(info->fw_version, sizeof(info->fw_version),
1099 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1100 FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers),
1101 FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers),
1102 FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers),
1103 FW_HDR_FW_VER_BUILD_GET(adapter->params.fw_vers),
1104 FW_HDR_FW_VER_MAJOR_GET(adapter->params.tp_vers),
1105 FW_HDR_FW_VER_MINOR_GET(adapter->params.tp_vers),
1106 FW_HDR_FW_VER_MICRO_GET(adapter->params.tp_vers),
1107 FW_HDR_FW_VER_BUILD_GET(adapter->params.tp_vers));
1110 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1112 if (stringset == ETH_SS_STATS)
1113 memcpy(data, stats_strings, sizeof(stats_strings));
1117 * port stats maintained per queue of the port. They should be in the same
1118 * order as in stats_strings above.
1120 struct queue_port_stats {
1130 static void collect_sge_port_stats(const struct adapter *adap,
1131 const struct port_info *p, struct queue_port_stats *s)
1134 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1135 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1137 memset(s, 0, sizeof(*s));
1138 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1140 s->tx_csum += tx->tx_cso;
1141 s->rx_csum += rx->stats.rx_cso;
1142 s->vlan_ex += rx->stats.vlan_ex;
1143 s->vlan_ins += tx->vlan_ins;
1144 s->gro_pkts += rx->stats.lro_pkts;
1145 s->gro_merged += rx->stats.lro_merged;
1149 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1152 struct port_info *pi = netdev_priv(dev);
1153 struct adapter *adapter = pi->adapter;
1155 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1157 data += sizeof(struct port_stats) / sizeof(u64);
1158 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1162 * Return a version number to identify the type of adapter. The scheme is:
1163 * - bits 0..9: chip version
1164 * - bits 10..15: chip revision
1165 * - bits 16..23: register dump version
1167 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1169 return 4 | (ap->params.rev << 10) | (1 << 16);
1172 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1175 u32 *p = buf + start;
1177 for ( ; start <= end; start += sizeof(u32))
1178 *p++ = t4_read_reg(ap, start);
1181 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1184 static const unsigned int reg_ranges[] = {
1405 struct adapter *ap = netdev2adap(dev);
1407 regs->version = mk_adap_vers(ap);
1409 memset(buf, 0, T4_REGMAP_SIZE);
1410 for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2)
1411 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
1414 static int restart_autoneg(struct net_device *dev)
1416 struct port_info *p = netdev_priv(dev);
1418 if (!netif_running(dev))
1420 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
1422 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
1426 static int identify_port(struct net_device *dev,
1427 enum ethtool_phys_id_state state)
1430 struct adapter *adap = netdev2adap(dev);
1432 if (state == ETHTOOL_ID_ACTIVE)
1434 else if (state == ETHTOOL_ID_INACTIVE)
1439 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid, val);
1442 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
1446 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1447 type == FW_PORT_TYPE_BT_XAUI) {
1449 if (caps & FW_PORT_CAP_SPEED_100M)
1450 v |= SUPPORTED_100baseT_Full;
1451 if (caps & FW_PORT_CAP_SPEED_1G)
1452 v |= SUPPORTED_1000baseT_Full;
1453 if (caps & FW_PORT_CAP_SPEED_10G)
1454 v |= SUPPORTED_10000baseT_Full;
1455 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1456 v |= SUPPORTED_Backplane;
1457 if (caps & FW_PORT_CAP_SPEED_1G)
1458 v |= SUPPORTED_1000baseKX_Full;
1459 if (caps & FW_PORT_CAP_SPEED_10G)
1460 v |= SUPPORTED_10000baseKX4_Full;
1461 } else if (type == FW_PORT_TYPE_KR)
1462 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1463 else if (type == FW_PORT_TYPE_BP_AP)
1464 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1465 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1466 else if (type == FW_PORT_TYPE_BP4_AP)
1467 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1468 SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1469 SUPPORTED_10000baseKX4_Full;
1470 else if (type == FW_PORT_TYPE_FIBER_XFI ||
1471 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
1472 v |= SUPPORTED_FIBRE;
1474 if (caps & FW_PORT_CAP_ANEG)
1475 v |= SUPPORTED_Autoneg;
1479 static unsigned int to_fw_linkcaps(unsigned int caps)
1483 if (caps & ADVERTISED_100baseT_Full)
1484 v |= FW_PORT_CAP_SPEED_100M;
1485 if (caps & ADVERTISED_1000baseT_Full)
1486 v |= FW_PORT_CAP_SPEED_1G;
1487 if (caps & ADVERTISED_10000baseT_Full)
1488 v |= FW_PORT_CAP_SPEED_10G;
1492 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1494 const struct port_info *p = netdev_priv(dev);
1496 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1497 p->port_type == FW_PORT_TYPE_BT_XFI ||
1498 p->port_type == FW_PORT_TYPE_BT_XAUI)
1499 cmd->port = PORT_TP;
1500 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1501 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1502 cmd->port = PORT_FIBRE;
1503 else if (p->port_type == FW_PORT_TYPE_SFP) {
1504 if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1505 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1506 cmd->port = PORT_DA;
1508 cmd->port = PORT_FIBRE;
1510 cmd->port = PORT_OTHER;
1512 if (p->mdio_addr >= 0) {
1513 cmd->phy_address = p->mdio_addr;
1514 cmd->transceiver = XCVR_EXTERNAL;
1515 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1516 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1518 cmd->phy_address = 0; /* not really, but no better option */
1519 cmd->transceiver = XCVR_INTERNAL;
1520 cmd->mdio_support = 0;
1523 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
1524 cmd->advertising = from_fw_linkcaps(p->port_type,
1525 p->link_cfg.advertising);
1526 ethtool_cmd_speed_set(cmd,
1527 netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
1528 cmd->duplex = DUPLEX_FULL;
1529 cmd->autoneg = p->link_cfg.autoneg;
1535 static unsigned int speed_to_caps(int speed)
1537 if (speed == SPEED_100)
1538 return FW_PORT_CAP_SPEED_100M;
1539 if (speed == SPEED_1000)
1540 return FW_PORT_CAP_SPEED_1G;
1541 if (speed == SPEED_10000)
1542 return FW_PORT_CAP_SPEED_10G;
1546 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1549 struct port_info *p = netdev_priv(dev);
1550 struct link_config *lc = &p->link_cfg;
1551 u32 speed = ethtool_cmd_speed(cmd);
1553 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
1556 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
1558 * PHY offers a single speed. See if that's what's
1561 if (cmd->autoneg == AUTONEG_DISABLE &&
1562 (lc->supported & speed_to_caps(speed)))
1567 if (cmd->autoneg == AUTONEG_DISABLE) {
1568 cap = speed_to_caps(speed);
1570 if (!(lc->supported & cap) || (speed == SPEED_1000) ||
1571 (speed == SPEED_10000))
1573 lc->requested_speed = cap;
1574 lc->advertising = 0;
1576 cap = to_fw_linkcaps(cmd->advertising);
1577 if (!(lc->supported & cap))
1579 lc->requested_speed = 0;
1580 lc->advertising = cap | FW_PORT_CAP_ANEG;
1582 lc->autoneg = cmd->autoneg;
1584 if (netif_running(dev))
1585 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1590 static void get_pauseparam(struct net_device *dev,
1591 struct ethtool_pauseparam *epause)
1593 struct port_info *p = netdev_priv(dev);
1595 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1596 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
1597 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
1600 static int set_pauseparam(struct net_device *dev,
1601 struct ethtool_pauseparam *epause)
1603 struct port_info *p = netdev_priv(dev);
1604 struct link_config *lc = &p->link_cfg;
1606 if (epause->autoneg == AUTONEG_DISABLE)
1607 lc->requested_fc = 0;
1608 else if (lc->supported & FW_PORT_CAP_ANEG)
1609 lc->requested_fc = PAUSE_AUTONEG;
1613 if (epause->rx_pause)
1614 lc->requested_fc |= PAUSE_RX;
1615 if (epause->tx_pause)
1616 lc->requested_fc |= PAUSE_TX;
1617 if (netif_running(dev))
1618 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1623 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1625 const struct port_info *pi = netdev_priv(dev);
1626 const struct sge *s = &pi->adapter->sge;
1628 e->rx_max_pending = MAX_RX_BUFFERS;
1629 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1630 e->rx_jumbo_max_pending = 0;
1631 e->tx_max_pending = MAX_TXQ_ENTRIES;
1633 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
1634 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1635 e->rx_jumbo_pending = 0;
1636 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
1639 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1642 const struct port_info *pi = netdev_priv(dev);
1643 struct adapter *adapter = pi->adapter;
1644 struct sge *s = &adapter->sge;
1646 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
1647 e->tx_pending > MAX_TXQ_ENTRIES ||
1648 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1649 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1650 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
1653 if (adapter->flags & FULL_INIT_DONE)
1656 for (i = 0; i < pi->nqsets; ++i) {
1657 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
1658 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
1659 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
1664 static int closest_timer(const struct sge *s, int time)
1666 int i, delta, match = 0, min_delta = INT_MAX;
1668 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1669 delta = time - s->timer_val[i];
1672 if (delta < min_delta) {
1680 static int closest_thres(const struct sge *s, int thres)
1682 int i, delta, match = 0, min_delta = INT_MAX;
1684 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1685 delta = thres - s->counter_val[i];
1688 if (delta < min_delta) {
1697 * Return a queue's interrupt hold-off time in us. 0 means no timer.
1699 static unsigned int qtimer_val(const struct adapter *adap,
1700 const struct sge_rspq *q)
1702 unsigned int idx = q->intr_params >> 1;
1704 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
1708 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
1709 * @adap: the adapter
1711 * @us: the hold-off time in us, or 0 to disable timer
1712 * @cnt: the hold-off packet count, or 0 to disable counter
1714 * Sets an Rx queue's interrupt hold-off time and packet count. At least
1715 * one of the two needs to be enabled for the queue to generate interrupts.
1717 static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
1718 unsigned int us, unsigned int cnt)
1720 if ((us | cnt) == 0)
1727 new_idx = closest_thres(&adap->sge, cnt);
1728 if (q->desc && q->pktcnt_idx != new_idx) {
1729 /* the queue has already been created, update it */
1730 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1731 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1732 FW_PARAMS_PARAM_YZ(q->cntxt_id);
1733 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
1738 q->pktcnt_idx = new_idx;
1741 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1742 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
1746 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1748 const struct port_info *pi = netdev_priv(dev);
1749 struct adapter *adap = pi->adapter;
1751 return set_rxq_intr_params(adap, &adap->sge.ethrxq[pi->first_qset].rspq,
1752 c->rx_coalesce_usecs, c->rx_max_coalesced_frames);
1755 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1757 const struct port_info *pi = netdev_priv(dev);
1758 const struct adapter *adap = pi->adapter;
1759 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
1761 c->rx_coalesce_usecs = qtimer_val(adap, rq);
1762 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
1763 adap->sge.counter_val[rq->pktcnt_idx] : 0;
1768 * eeprom_ptov - translate a physical EEPROM address to virtual
1769 * @phys_addr: the physical EEPROM address
1770 * @fn: the PCI function number
1771 * @sz: size of function-specific area
1773 * Translate a physical EEPROM address to virtual. The first 1K is
1774 * accessed through virtual addresses starting at 31K, the rest is
1775 * accessed through virtual addresses starting at 0.
1777 * The mapping is as follows:
1778 * [0..1K) -> [31K..32K)
1779 * [1K..1K+A) -> [31K-A..31K)
1780 * [1K+A..ES) -> [0..ES-A-1K)
1782 * where A = @fn * @sz, and ES = EEPROM size.
1784 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
1787 if (phys_addr < 1024)
1788 return phys_addr + (31 << 10);
1789 if (phys_addr < 1024 + fn)
1790 return 31744 - fn + phys_addr - 1024;
1791 if (phys_addr < EEPROMSIZE)
1792 return phys_addr - 1024 - fn;
1797 * The next two routines implement eeprom read/write from physical addresses.
1799 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
1801 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1804 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
1805 return vaddr < 0 ? vaddr : 0;
1808 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
1810 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1813 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
1814 return vaddr < 0 ? vaddr : 0;
1817 #define EEPROM_MAGIC 0x38E2F10C
1819 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
1823 struct adapter *adapter = netdev2adap(dev);
1825 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
1829 e->magic = EEPROM_MAGIC;
1830 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
1831 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
1834 memcpy(data, buf + e->offset, e->len);
1839 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
1844 u32 aligned_offset, aligned_len, *p;
1845 struct adapter *adapter = netdev2adap(dev);
1847 if (eeprom->magic != EEPROM_MAGIC)
1850 aligned_offset = eeprom->offset & ~3;
1851 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
1853 if (adapter->fn > 0) {
1854 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
1856 if (aligned_offset < start ||
1857 aligned_offset + aligned_len > start + EEPROMPFSIZE)
1861 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
1863 * RMW possibly needed for first or last words.
1865 buf = kmalloc(aligned_len, GFP_KERNEL);
1868 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
1869 if (!err && aligned_len > 4)
1870 err = eeprom_rd_phys(adapter,
1871 aligned_offset + aligned_len - 4,
1872 (u32 *)&buf[aligned_len - 4]);
1875 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
1879 err = t4_seeprom_wp(adapter, false);
1883 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
1884 err = eeprom_wr_phys(adapter, aligned_offset, *p);
1885 aligned_offset += 4;
1889 err = t4_seeprom_wp(adapter, true);
1896 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
1899 const struct firmware *fw;
1900 struct adapter *adap = netdev2adap(netdev);
1902 ef->data[sizeof(ef->data) - 1] = '\0';
1903 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
1907 ret = t4_load_fw(adap, fw->data, fw->size);
1908 release_firmware(fw);
1910 dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data);
1914 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
1915 #define BCAST_CRC 0xa0ccc1a6
1917 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1919 wol->supported = WAKE_BCAST | WAKE_MAGIC;
1920 wol->wolopts = netdev2adap(dev)->wol;
1921 memset(&wol->sopass, 0, sizeof(wol->sopass));
1924 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1927 struct port_info *pi = netdev_priv(dev);
1929 if (wol->wolopts & ~WOL_SUPPORTED)
1931 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
1932 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
1933 if (wol->wolopts & WAKE_BCAST) {
1934 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
1937 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
1938 ~6ULL, ~0ULL, BCAST_CRC, true);
1940 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
1944 static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
1946 const struct port_info *pi = netdev_priv(dev);
1947 netdev_features_t changed = dev->features ^ features;
1950 if (!(changed & NETIF_F_HW_VLAN_RX))
1953 err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1,
1955 !!(features & NETIF_F_HW_VLAN_RX), true);
1957 dev->features = features ^ NETIF_F_HW_VLAN_RX;
1961 static u32 get_rss_table_size(struct net_device *dev)
1963 const struct port_info *pi = netdev_priv(dev);
1965 return pi->rss_size;
1968 static int get_rss_table(struct net_device *dev, u32 *p)
1970 const struct port_info *pi = netdev_priv(dev);
1971 unsigned int n = pi->rss_size;
1978 static int set_rss_table(struct net_device *dev, const u32 *p)
1981 struct port_info *pi = netdev_priv(dev);
1983 for (i = 0; i < pi->rss_size; i++)
1985 if (pi->adapter->flags & FULL_INIT_DONE)
1986 return write_rss(pi, pi->rss);
1990 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1993 const struct port_info *pi = netdev_priv(dev);
1995 switch (info->cmd) {
1996 case ETHTOOL_GRXFH: {
1997 unsigned int v = pi->rss_mode;
2000 switch (info->flow_type) {
2002 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
2003 info->data = RXH_IP_SRC | RXH_IP_DST |
2004 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2005 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
2006 info->data = RXH_IP_SRC | RXH_IP_DST;
2009 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) &&
2010 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
2011 info->data = RXH_IP_SRC | RXH_IP_DST |
2012 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2013 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
2014 info->data = RXH_IP_SRC | RXH_IP_DST;
2017 case AH_ESP_V4_FLOW:
2019 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
2020 info->data = RXH_IP_SRC | RXH_IP_DST;
2023 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
2024 info->data = RXH_IP_SRC | RXH_IP_DST |
2025 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2026 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
2027 info->data = RXH_IP_SRC | RXH_IP_DST;
2030 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) &&
2031 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
2032 info->data = RXH_IP_SRC | RXH_IP_DST |
2033 RXH_L4_B_0_1 | RXH_L4_B_2_3;
2034 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
2035 info->data = RXH_IP_SRC | RXH_IP_DST;
2038 case AH_ESP_V6_FLOW:
2040 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
2041 info->data = RXH_IP_SRC | RXH_IP_DST;
2046 case ETHTOOL_GRXRINGS:
2047 info->data = pi->nqsets;
2053 static const struct ethtool_ops cxgb_ethtool_ops = {
2054 .get_settings = get_settings,
2055 .set_settings = set_settings,
2056 .get_drvinfo = get_drvinfo,
2057 .get_msglevel = get_msglevel,
2058 .set_msglevel = set_msglevel,
2059 .get_ringparam = get_sge_param,
2060 .set_ringparam = set_sge_param,
2061 .get_coalesce = get_coalesce,
2062 .set_coalesce = set_coalesce,
2063 .get_eeprom_len = get_eeprom_len,
2064 .get_eeprom = get_eeprom,
2065 .set_eeprom = set_eeprom,
2066 .get_pauseparam = get_pauseparam,
2067 .set_pauseparam = set_pauseparam,
2068 .get_link = ethtool_op_get_link,
2069 .get_strings = get_strings,
2070 .set_phys_id = identify_port,
2071 .nway_reset = restart_autoneg,
2072 .get_sset_count = get_sset_count,
2073 .get_ethtool_stats = get_stats,
2074 .get_regs_len = get_regs_len,
2075 .get_regs = get_regs,
2078 .get_rxnfc = get_rxnfc,
2079 .get_rxfh_indir_size = get_rss_table_size,
2080 .get_rxfh_indir = get_rss_table,
2081 .set_rxfh_indir = set_rss_table,
2082 .flash_device = set_flash,
2088 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
2092 loff_t avail = file->f_path.dentry->d_inode->i_size;
2093 unsigned int mem = (uintptr_t)file->private_data & 3;
2094 struct adapter *adap = file->private_data - mem;
2100 if (count > avail - pos)
2101 count = avail - pos;
2109 ret = t4_mc_read(adap, pos, data, NULL);
2111 ret = t4_edc_read(adap, mem, pos, data, NULL);
2115 ofst = pos % sizeof(data);
2116 len = min(count, sizeof(data) - ofst);
2117 if (copy_to_user(buf, (u8 *)data + ofst, len))
2124 count = pos - *ppos;
2129 static const struct file_operations mem_debugfs_fops = {
2130 .owner = THIS_MODULE,
2131 .open = simple_open,
2133 .llseek = default_llseek,
2136 static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
2137 unsigned int idx, unsigned int size_mb)
2141 de = debugfs_create_file(name, S_IRUSR, adap->debugfs_root,
2142 (void *)adap + idx, &mem_debugfs_fops);
2143 if (de && de->d_inode)
2144 de->d_inode->i_size = size_mb << 20;
2147 static int __devinit setup_debugfs(struct adapter *adap)
2151 if (IS_ERR_OR_NULL(adap->debugfs_root))
2154 i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE);
2155 if (i & EDRAM0_ENABLE)
2156 add_debugfs_mem(adap, "edc0", MEM_EDC0, 5);
2157 if (i & EDRAM1_ENABLE)
2158 add_debugfs_mem(adap, "edc1", MEM_EDC1, 5);
2159 if (i & EXT_MEM_ENABLE)
2160 add_debugfs_mem(adap, "mc", MEM_MC,
2161 EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR)));
2163 debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap,
2169 * upper-layer driver support
2173 * Allocate an active-open TID and set it to the supplied value.
2175 int cxgb4_alloc_atid(struct tid_info *t, void *data)
2179 spin_lock_bh(&t->atid_lock);
2181 union aopen_entry *p = t->afree;
2183 atid = p - t->atid_tab;
2188 spin_unlock_bh(&t->atid_lock);
2191 EXPORT_SYMBOL(cxgb4_alloc_atid);
2194 * Release an active-open TID.
2196 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
2198 union aopen_entry *p = &t->atid_tab[atid];
2200 spin_lock_bh(&t->atid_lock);
2204 spin_unlock_bh(&t->atid_lock);
2206 EXPORT_SYMBOL(cxgb4_free_atid);
2209 * Allocate a server TID and set it to the supplied value.
2211 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
2215 spin_lock_bh(&t->stid_lock);
2216 if (family == PF_INET) {
2217 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
2218 if (stid < t->nstids)
2219 __set_bit(stid, t->stid_bmap);
2223 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
2228 t->stid_tab[stid].data = data;
2229 stid += t->stid_base;
2232 spin_unlock_bh(&t->stid_lock);
2235 EXPORT_SYMBOL(cxgb4_alloc_stid);
2238 * Release a server TID.
2240 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
2242 stid -= t->stid_base;
2243 spin_lock_bh(&t->stid_lock);
2244 if (family == PF_INET)
2245 __clear_bit(stid, t->stid_bmap);
2247 bitmap_release_region(t->stid_bmap, stid, 2);
2248 t->stid_tab[stid].data = NULL;
2250 spin_unlock_bh(&t->stid_lock);
2252 EXPORT_SYMBOL(cxgb4_free_stid);
2255 * Populate a TID_RELEASE WR. Caller must properly size the skb.
2257 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
2260 struct cpl_tid_release *req;
2262 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
2263 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
2264 INIT_TP_WR(req, tid);
2265 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
2269 * Queue a TID release request and if necessary schedule a work queue to
2272 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
2275 void **p = &t->tid_tab[tid];
2276 struct adapter *adap = container_of(t, struct adapter, tids);
2278 spin_lock_bh(&adap->tid_release_lock);
2279 *p = adap->tid_release_head;
2280 /* Low 2 bits encode the Tx channel number */
2281 adap->tid_release_head = (void **)((uintptr_t)p | chan);
2282 if (!adap->tid_release_task_busy) {
2283 adap->tid_release_task_busy = true;
2284 queue_work(workq, &adap->tid_release_task);
2286 spin_unlock_bh(&adap->tid_release_lock);
2290 * Process the list of pending TID release requests.
2292 static void process_tid_release_list(struct work_struct *work)
2294 struct sk_buff *skb;
2295 struct adapter *adap;
2297 adap = container_of(work, struct adapter, tid_release_task);
2299 spin_lock_bh(&adap->tid_release_lock);
2300 while (adap->tid_release_head) {
2301 void **p = adap->tid_release_head;
2302 unsigned int chan = (uintptr_t)p & 3;
2303 p = (void *)p - chan;
2305 adap->tid_release_head = *p;
2307 spin_unlock_bh(&adap->tid_release_lock);
2309 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
2311 schedule_timeout_uninterruptible(1);
2313 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
2314 t4_ofld_send(adap, skb);
2315 spin_lock_bh(&adap->tid_release_lock);
2317 adap->tid_release_task_busy = false;
2318 spin_unlock_bh(&adap->tid_release_lock);
2322 * Release a TID and inform HW. If we are unable to allocate the release
2323 * message we defer to a work queue.
2325 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
2328 struct sk_buff *skb;
2329 struct adapter *adap = container_of(t, struct adapter, tids);
2331 old = t->tid_tab[tid];
2332 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
2334 t->tid_tab[tid] = NULL;
2335 mk_tid_release(skb, chan, tid);
2336 t4_ofld_send(adap, skb);
2338 cxgb4_queue_tid_release(t, chan, tid);
2340 atomic_dec(&t->tids_in_use);
2342 EXPORT_SYMBOL(cxgb4_remove_tid);
2345 * Allocate and initialize the TID tables. Returns 0 on success.
2347 static int tid_init(struct tid_info *t)
2350 unsigned int natids = t->natids;
2352 size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
2353 t->nstids * sizeof(*t->stid_tab) +
2354 BITS_TO_LONGS(t->nstids) * sizeof(long);
2355 t->tid_tab = t4_alloc_mem(size);
2359 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
2360 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
2361 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
2362 spin_lock_init(&t->stid_lock);
2363 spin_lock_init(&t->atid_lock);
2365 t->stids_in_use = 0;
2367 t->atids_in_use = 0;
2368 atomic_set(&t->tids_in_use, 0);
2370 /* Setup the free list for atid_tab and clear the stid bitmap. */
2373 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
2374 t->afree = t->atid_tab;
2376 bitmap_zero(t->stid_bmap, t->nstids);
2381 * cxgb4_create_server - create an IP server
2383 * @stid: the server TID
2384 * @sip: local IP address to bind server to
2385 * @sport: the server's TCP port
2386 * @queue: queue to direct messages from this server to
2388 * Create an IP server for the given port and address.
2389 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2391 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
2392 __be32 sip, __be16 sport, unsigned int queue)
2395 struct sk_buff *skb;
2396 struct adapter *adap;
2397 struct cpl_pass_open_req *req;
2399 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2403 adap = netdev2adap(dev);
2404 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
2406 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
2407 req->local_port = sport;
2408 req->peer_port = htons(0);
2409 req->local_ip = sip;
2410 req->peer_ip = htonl(0);
2411 chan = rxq_to_chan(&adap->sge, queue);
2412 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2413 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2414 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2415 return t4_mgmt_tx(adap, skb);
2417 EXPORT_SYMBOL(cxgb4_create_server);
2420 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
2421 * @mtus: the HW MTU table
2422 * @mtu: the target MTU
2423 * @idx: index of selected entry in the MTU table
2425 * Returns the index and the value in the HW MTU table that is closest to
2426 * but does not exceed @mtu, unless @mtu is smaller than any value in the
2427 * table, in which case that smallest available value is selected.
2429 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
2434 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
2440 EXPORT_SYMBOL(cxgb4_best_mtu);
2443 * cxgb4_port_chan - get the HW channel of a port
2444 * @dev: the net device for the port
2446 * Return the HW Tx channel of the given port.
2448 unsigned int cxgb4_port_chan(const struct net_device *dev)
2450 return netdev2pinfo(dev)->tx_chan;
2452 EXPORT_SYMBOL(cxgb4_port_chan);
2454 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
2456 struct adapter *adap = netdev2adap(dev);
2459 v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
2460 return lpfifo ? G_LP_COUNT(v) : G_HP_COUNT(v);
2462 EXPORT_SYMBOL(cxgb4_dbfifo_count);
2465 * cxgb4_port_viid - get the VI id of a port
2466 * @dev: the net device for the port
2468 * Return the VI id of the given port.
2470 unsigned int cxgb4_port_viid(const struct net_device *dev)
2472 return netdev2pinfo(dev)->viid;
2474 EXPORT_SYMBOL(cxgb4_port_viid);
2477 * cxgb4_port_idx - get the index of a port
2478 * @dev: the net device for the port
2480 * Return the index of the given port.
2482 unsigned int cxgb4_port_idx(const struct net_device *dev)
2484 return netdev2pinfo(dev)->port_id;
2486 EXPORT_SYMBOL(cxgb4_port_idx);
2488 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
2489 struct tp_tcp_stats *v6)
2491 struct adapter *adap = pci_get_drvdata(pdev);
2493 spin_lock(&adap->stats_lock);
2494 t4_tp_get_tcp_stats(adap, v4, v6);
2495 spin_unlock(&adap->stats_lock);
2497 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
2499 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
2500 const unsigned int *pgsz_order)
2502 struct adapter *adap = netdev2adap(dev);
2504 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
2505 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
2506 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
2507 HPZ3(pgsz_order[3]));
2509 EXPORT_SYMBOL(cxgb4_iscsi_init);
2511 int cxgb4_flush_eq_cache(struct net_device *dev)
2513 struct adapter *adap = netdev2adap(dev);
2516 ret = t4_fwaddrspace_write(adap, adap->mbox,
2517 0xe1000000 + A_SGE_CTXT_CMD, 0x20000000);
2520 EXPORT_SYMBOL(cxgb4_flush_eq_cache);
2522 static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
2524 u32 addr = t4_read_reg(adap, A_SGE_DBQ_CTXT_BADDR) + 24 * qid + 8;
2528 ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8);
2530 indices = be64_to_cpu(indices);
2531 *cidx = (indices >> 25) & 0xffff;
2532 *pidx = (indices >> 9) & 0xffff;
2537 int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
2540 struct adapter *adap = netdev2adap(dev);
2541 u16 hw_pidx, hw_cidx;
2544 ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
2548 if (pidx != hw_pidx) {
2551 if (pidx >= hw_pidx)
2552 delta = pidx - hw_pidx;
2554 delta = size - hw_pidx + pidx;
2556 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
2557 QID(qid) | PIDX(delta));
2562 EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
2564 static struct pci_driver cxgb4_driver;
2566 static void check_neigh_update(struct neighbour *neigh)
2568 const struct device *parent;
2569 const struct net_device *netdev = neigh->dev;
2571 if (netdev->priv_flags & IFF_802_1Q_VLAN)
2572 netdev = vlan_dev_real_dev(netdev);
2573 parent = netdev->dev.parent;
2574 if (parent && parent->driver == &cxgb4_driver.driver)
2575 t4_l2t_update(dev_get_drvdata(parent), neigh);
2578 static int netevent_cb(struct notifier_block *nb, unsigned long event,
2582 case NETEVENT_NEIGH_UPDATE:
2583 check_neigh_update(data);
2585 case NETEVENT_REDIRECT:
2592 static bool netevent_registered;
2593 static struct notifier_block cxgb4_netevent_nb = {
2594 .notifier_call = netevent_cb
2597 static void drain_db_fifo(struct adapter *adap, int usecs)
2602 set_current_state(TASK_UNINTERRUPTIBLE);
2603 schedule_timeout(usecs_to_jiffies(usecs));
2604 v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
2605 if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0)
2610 static void disable_txq_db(struct sge_txq *q)
2612 spin_lock_irq(&q->db_lock);
2614 spin_unlock_irq(&q->db_lock);
2617 static void enable_txq_db(struct sge_txq *q)
2619 spin_lock_irq(&q->db_lock);
2621 spin_unlock_irq(&q->db_lock);
2624 static void disable_dbs(struct adapter *adap)
2628 for_each_ethrxq(&adap->sge, i)
2629 disable_txq_db(&adap->sge.ethtxq[i].q);
2630 for_each_ofldrxq(&adap->sge, i)
2631 disable_txq_db(&adap->sge.ofldtxq[i].q);
2632 for_each_port(adap, i)
2633 disable_txq_db(&adap->sge.ctrlq[i].q);
2636 static void enable_dbs(struct adapter *adap)
2640 for_each_ethrxq(&adap->sge, i)
2641 enable_txq_db(&adap->sge.ethtxq[i].q);
2642 for_each_ofldrxq(&adap->sge, i)
2643 enable_txq_db(&adap->sge.ofldtxq[i].q);
2644 for_each_port(adap, i)
2645 enable_txq_db(&adap->sge.ctrlq[i].q);
2648 static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
2650 u16 hw_pidx, hw_cidx;
2653 spin_lock_bh(&q->db_lock);
2654 ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
2657 if (q->db_pidx != hw_pidx) {
2660 if (q->db_pidx >= hw_pidx)
2661 delta = q->db_pidx - hw_pidx;
2663 delta = q->size - hw_pidx + q->db_pidx;
2665 t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
2666 QID(q->cntxt_id) | PIDX(delta));
2670 spin_unlock_bh(&q->db_lock);
2672 CH_WARN(adap, "DB drop recovery failed.\n");
2674 static void recover_all_queues(struct adapter *adap)
2678 for_each_ethrxq(&adap->sge, i)
2679 sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
2680 for_each_ofldrxq(&adap->sge, i)
2681 sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
2682 for_each_port(adap, i)
2683 sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
2686 static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
2688 mutex_lock(&uld_mutex);
2689 if (adap->uld_handle[CXGB4_ULD_RDMA])
2690 ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
2692 mutex_unlock(&uld_mutex);
2695 static void process_db_full(struct work_struct *work)
2697 struct adapter *adap;
2699 adap = container_of(work, struct adapter, db_full_task);
2701 notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
2702 drain_db_fifo(adap, dbfifo_drain_delay);
2703 t4_set_reg_field(adap, SGE_INT_ENABLE3,
2704 DBFIFO_HP_INT | DBFIFO_LP_INT,
2705 DBFIFO_HP_INT | DBFIFO_LP_INT);
2706 notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
2709 static void process_db_drop(struct work_struct *work)
2711 struct adapter *adap;
2713 adap = container_of(work, struct adapter, db_drop_task);
2715 t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0);
2717 notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
2718 drain_db_fifo(adap, 1);
2719 recover_all_queues(adap);
2723 void t4_db_full(struct adapter *adap)
2725 t4_set_reg_field(adap, SGE_INT_ENABLE3,
2726 DBFIFO_HP_INT | DBFIFO_LP_INT, 0);
2727 queue_work(workq, &adap->db_full_task);
2730 void t4_db_dropped(struct adapter *adap)
2732 queue_work(workq, &adap->db_drop_task);
2735 static void uld_attach(struct adapter *adap, unsigned int uld)
2738 struct cxgb4_lld_info lli;
2740 lli.pdev = adap->pdev;
2741 lli.l2t = adap->l2t;
2742 lli.tids = &adap->tids;
2743 lli.ports = adap->port;
2744 lli.vr = &adap->vres;
2745 lli.mtus = adap->params.mtus;
2746 if (uld == CXGB4_ULD_RDMA) {
2747 lli.rxq_ids = adap->sge.rdma_rxq;
2748 lli.nrxq = adap->sge.rdmaqs;
2749 } else if (uld == CXGB4_ULD_ISCSI) {
2750 lli.rxq_ids = adap->sge.ofld_rxq;
2751 lli.nrxq = adap->sge.ofldqsets;
2753 lli.ntxq = adap->sge.ofldqsets;
2754 lli.nchan = adap->params.nports;
2755 lli.nports = adap->params.nports;
2756 lli.wr_cred = adap->params.ofldq_wr_cred;
2757 lli.adapter_type = adap->params.rev;
2758 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
2759 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
2760 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
2762 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
2763 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
2765 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
2766 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
2767 lli.fw_vers = adap->params.fw_vers;
2768 lli.dbfifo_int_thresh = dbfifo_int_thresh;
2770 handle = ulds[uld].add(&lli);
2771 if (IS_ERR(handle)) {
2772 dev_warn(adap->pdev_dev,
2773 "could not attach to the %s driver, error %ld\n",
2774 uld_str[uld], PTR_ERR(handle));
2778 adap->uld_handle[uld] = handle;
2780 if (!netevent_registered) {
2781 register_netevent_notifier(&cxgb4_netevent_nb);
2782 netevent_registered = true;
2785 if (adap->flags & FULL_INIT_DONE)
2786 ulds[uld].state_change(handle, CXGB4_STATE_UP);
2789 static void attach_ulds(struct adapter *adap)
2793 mutex_lock(&uld_mutex);
2794 list_add_tail(&adap->list_node, &adapter_list);
2795 for (i = 0; i < CXGB4_ULD_MAX; i++)
2797 uld_attach(adap, i);
2798 mutex_unlock(&uld_mutex);
2801 static void detach_ulds(struct adapter *adap)
2805 mutex_lock(&uld_mutex);
2806 list_del(&adap->list_node);
2807 for (i = 0; i < CXGB4_ULD_MAX; i++)
2808 if (adap->uld_handle[i]) {
2809 ulds[i].state_change(adap->uld_handle[i],
2810 CXGB4_STATE_DETACH);
2811 adap->uld_handle[i] = NULL;
2813 if (netevent_registered && list_empty(&adapter_list)) {
2814 unregister_netevent_notifier(&cxgb4_netevent_nb);
2815 netevent_registered = false;
2817 mutex_unlock(&uld_mutex);
2820 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2824 mutex_lock(&uld_mutex);
2825 for (i = 0; i < CXGB4_ULD_MAX; i++)
2826 if (adap->uld_handle[i])
2827 ulds[i].state_change(adap->uld_handle[i], new_state);
2828 mutex_unlock(&uld_mutex);
2832 * cxgb4_register_uld - register an upper-layer driver
2833 * @type: the ULD type
2834 * @p: the ULD methods
2836 * Registers an upper-layer driver with this driver and notifies the ULD
2837 * about any presently available devices that support its type. Returns
2838 * %-EBUSY if a ULD of the same type is already registered.
2840 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
2843 struct adapter *adap;
2845 if (type >= CXGB4_ULD_MAX)
2847 mutex_lock(&uld_mutex);
2848 if (ulds[type].add) {
2853 list_for_each_entry(adap, &adapter_list, list_node)
2854 uld_attach(adap, type);
2855 out: mutex_unlock(&uld_mutex);
2858 EXPORT_SYMBOL(cxgb4_register_uld);
2861 * cxgb4_unregister_uld - unregister an upper-layer driver
2862 * @type: the ULD type
2864 * Unregisters an existing upper-layer driver.
2866 int cxgb4_unregister_uld(enum cxgb4_uld type)
2868 struct adapter *adap;
2870 if (type >= CXGB4_ULD_MAX)
2872 mutex_lock(&uld_mutex);
2873 list_for_each_entry(adap, &adapter_list, list_node)
2874 adap->uld_handle[type] = NULL;
2875 ulds[type].add = NULL;
2876 mutex_unlock(&uld_mutex);
2879 EXPORT_SYMBOL(cxgb4_unregister_uld);
2882 * cxgb_up - enable the adapter
2883 * @adap: adapter being enabled
2885 * Called when the first port is enabled, this function performs the
2886 * actions necessary to make an adapter operational, such as completing
2887 * the initialization of HW modules, and enabling interrupts.
2889 * Must be called with the rtnl lock held.
2891 static int cxgb_up(struct adapter *adap)
2895 err = setup_sge_queues(adap);
2898 err = setup_rss(adap);
2902 if (adap->flags & USING_MSIX) {
2903 name_msix_vecs(adap);
2904 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
2905 adap->msix_info[0].desc, adap);
2909 err = request_msix_queue_irqs(adap);
2911 free_irq(adap->msix_info[0].vec, adap);
2915 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2916 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
2917 adap->port[0]->name, adap);
2923 t4_intr_enable(adap);
2924 adap->flags |= FULL_INIT_DONE;
2925 notify_ulds(adap, CXGB4_STATE_UP);
2929 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2931 t4_free_sge_resources(adap);
2935 static void cxgb_down(struct adapter *adapter)
2937 t4_intr_disable(adapter);
2938 cancel_work_sync(&adapter->tid_release_task);
2939 cancel_work_sync(&adapter->db_full_task);
2940 cancel_work_sync(&adapter->db_drop_task);
2941 adapter->tid_release_task_busy = false;
2942 adapter->tid_release_head = NULL;
2944 if (adapter->flags & USING_MSIX) {
2945 free_msix_queue_irqs(adapter);
2946 free_irq(adapter->msix_info[0].vec, adapter);
2948 free_irq(adapter->pdev->irq, adapter);
2949 quiesce_rx(adapter);
2950 t4_sge_stop(adapter);
2951 t4_free_sge_resources(adapter);
2952 adapter->flags &= ~FULL_INIT_DONE;
2956 * net_device operations
2958 static int cxgb_open(struct net_device *dev)
2961 struct port_info *pi = netdev_priv(dev);
2962 struct adapter *adapter = pi->adapter;
2964 netif_carrier_off(dev);
2966 if (!(adapter->flags & FULL_INIT_DONE)) {
2967 err = cxgb_up(adapter);
2972 err = link_start(dev);
2974 netif_tx_start_all_queues(dev);
2978 static int cxgb_close(struct net_device *dev)
2980 struct port_info *pi = netdev_priv(dev);
2981 struct adapter *adapter = pi->adapter;
2983 netif_tx_stop_all_queues(dev);
2984 netif_carrier_off(dev);
2985 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
2988 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
2989 struct rtnl_link_stats64 *ns)
2991 struct port_stats stats;
2992 struct port_info *p = netdev_priv(dev);
2993 struct adapter *adapter = p->adapter;
2995 spin_lock(&adapter->stats_lock);
2996 t4_get_port_stats(adapter, p->tx_chan, &stats);
2997 spin_unlock(&adapter->stats_lock);
2999 ns->tx_bytes = stats.tx_octets;
3000 ns->tx_packets = stats.tx_frames;
3001 ns->rx_bytes = stats.rx_octets;
3002 ns->rx_packets = stats.rx_frames;
3003 ns->multicast = stats.rx_mcast_frames;
3005 /* detailed rx_errors */
3006 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
3008 ns->rx_over_errors = 0;
3009 ns->rx_crc_errors = stats.rx_fcs_err;
3010 ns->rx_frame_errors = stats.rx_symbol_err;
3011 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
3012 stats.rx_ovflow2 + stats.rx_ovflow3 +
3013 stats.rx_trunc0 + stats.rx_trunc1 +
3014 stats.rx_trunc2 + stats.rx_trunc3;
3015 ns->rx_missed_errors = 0;
3017 /* detailed tx_errors */
3018 ns->tx_aborted_errors = 0;
3019 ns->tx_carrier_errors = 0;
3020 ns->tx_fifo_errors = 0;
3021 ns->tx_heartbeat_errors = 0;
3022 ns->tx_window_errors = 0;
3024 ns->tx_errors = stats.tx_error_frames;
3025 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
3026 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
3030 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
3033 int ret = 0, prtad, devad;
3034 struct port_info *pi = netdev_priv(dev);
3035 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
3039 if (pi->mdio_addr < 0)
3041 data->phy_id = pi->mdio_addr;
3045 if (mdio_phy_id_is_c45(data->phy_id)) {
3046 prtad = mdio_phy_id_prtad(data->phy_id);
3047 devad = mdio_phy_id_devad(data->phy_id);
3048 } else if (data->phy_id < 32) {
3049 prtad = data->phy_id;
3051 data->reg_num &= 0x1f;
3055 mbox = pi->adapter->fn;
3056 if (cmd == SIOCGMIIREG)
3057 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
3058 data->reg_num, &data->val_out);
3060 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
3061 data->reg_num, data->val_in);
3069 static void cxgb_set_rxmode(struct net_device *dev)
3071 /* unfortunately we can't return errors to the stack */
3072 set_rxmode(dev, -1, false);
3075 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
3078 struct port_info *pi = netdev_priv(dev);
3080 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
3082 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
3089 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
3092 struct sockaddr *addr = p;
3093 struct port_info *pi = netdev_priv(dev);
3095 if (!is_valid_ether_addr(addr->sa_data))
3096 return -EADDRNOTAVAIL;
3098 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
3099 pi->xact_addr_filt, addr->sa_data, true, true);
3103 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
3104 pi->xact_addr_filt = ret;
3108 #ifdef CONFIG_NET_POLL_CONTROLLER
3109 static void cxgb_netpoll(struct net_device *dev)
3111 struct port_info *pi = netdev_priv(dev);
3112 struct adapter *adap = pi->adapter;
3114 if (adap->flags & USING_MSIX) {
3116 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
3118 for (i = pi->nqsets; i; i--, rx++)
3119 t4_sge_intr_msix(0, &rx->rspq);
3121 t4_intr_handler(adap)(0, adap);
3125 static const struct net_device_ops cxgb4_netdev_ops = {
3126 .ndo_open = cxgb_open,
3127 .ndo_stop = cxgb_close,
3128 .ndo_start_xmit = t4_eth_xmit,
3129 .ndo_get_stats64 = cxgb_get_stats,
3130 .ndo_set_rx_mode = cxgb_set_rxmode,
3131 .ndo_set_mac_address = cxgb_set_mac_addr,
3132 .ndo_set_features = cxgb_set_features,
3133 .ndo_validate_addr = eth_validate_addr,
3134 .ndo_do_ioctl = cxgb_ioctl,
3135 .ndo_change_mtu = cxgb_change_mtu,
3136 #ifdef CONFIG_NET_POLL_CONTROLLER
3137 .ndo_poll_controller = cxgb_netpoll,
3141 void t4_fatal_err(struct adapter *adap)
3143 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
3144 t4_intr_disable(adap);
3145 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
3148 static void setup_memwin(struct adapter *adap)
3152 bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */
3153 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
3154 (bar0 + MEMWIN0_BASE) | BIR(0) |
3155 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
3156 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
3157 (bar0 + MEMWIN1_BASE) | BIR(0) |
3158 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
3159 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
3160 (bar0 + MEMWIN2_BASE) | BIR(0) |
3161 WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
3164 static void setup_memwin_rdma(struct adapter *adap)
3166 if (adap->vres.ocq.size) {
3167 unsigned int start, sz_kb;
3169 start = pci_resource_start(adap->pdev, 2) +
3170 OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
3171 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
3173 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
3174 start | BIR(1) | WINDOW(ilog2(sz_kb)));
3176 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
3177 adap->vres.ocq.start);
3179 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
3183 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
3188 /* get device capabilities */
3189 memset(c, 0, sizeof(*c));
3190 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3191 FW_CMD_REQUEST | FW_CMD_READ);
3192 c->retval_len16 = htonl(FW_LEN16(*c));
3193 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
3197 /* select capabilities we'll be using */
3198 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
3200 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
3202 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
3203 } else if (vf_acls) {
3204 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
3207 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3208 FW_CMD_REQUEST | FW_CMD_WRITE);
3209 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
3213 ret = t4_config_glbl_rss(adap, adap->fn,
3214 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
3215 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
3216 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
3220 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
3221 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
3227 /* tweak some settings */
3228 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
3229 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
3230 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
3231 v = t4_read_reg(adap, TP_PIO_DATA);
3232 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
3234 /* get basic stuff going */
3235 return t4_early_init(adap, adap->fn);
3239 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
3241 #define MAX_ATIDS 8192U
3244 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
3246 * If the firmware we're dealing with has Configuration File support, then
3247 * we use that to perform all configuration
3251 * Tweak configuration based on module parameters, etc. Most of these have
3252 * defaults assigned to them by Firmware Configuration Files (if we're using
3253 * them) but need to be explicitly set if we're using hard-coded
3254 * initialization. But even in the case of using Firmware Configuration
3255 * Files, we'd like to expose the ability to change these via module
3256 * parameters so these are essentially common tweaks/settings for
3257 * Configuration Files and hard-coded initialization ...
3259 static int adap_init0_tweaks(struct adapter *adapter)
3262 * Fix up various Host-Dependent Parameters like Page Size, Cache
3263 * Line Size, etc. The firmware default is for a 4KB Page Size and
3264 * 64B Cache Line Size ...
3266 t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES);
3269 * Process module parameters which affect early initialization.
3271 if (rx_dma_offset != 2 && rx_dma_offset != 0) {
3272 dev_err(&adapter->pdev->dev,
3273 "Ignoring illegal rx_dma_offset=%d, using 2\n",
3277 t4_set_reg_field(adapter, SGE_CONTROL,
3279 PKTSHIFT(rx_dma_offset));
3282 * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
3283 * adds the pseudo header itself.
3285 t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG,
3286 CSUM_HAS_PSEUDO_HDR, 0);
3292 * Attempt to initialize the adapter via a Firmware Configuration File.
3294 static int adap_init0_config(struct adapter *adapter, int reset)
3296 struct fw_caps_config_cmd caps_cmd;
3297 const struct firmware *cf;
3298 unsigned long mtype = 0, maddr = 0;
3299 u32 finiver, finicsum, cfcsum;
3300 int ret, using_flash;
3303 * Reset device if necessary.
3306 ret = t4_fw_reset(adapter, adapter->mbox,
3307 PIORSTMODE | PIORST);
3313 * If we have a T4 configuration file under /lib/firmware/cxgb4/,
3314 * then use that. Otherwise, use the configuration file stored
3315 * in the adapter flash ...
3317 ret = request_firmware(&cf, FW_CFNAME, adapter->pdev_dev);
3320 mtype = FW_MEMTYPE_CF_FLASH;
3321 maddr = t4_flash_cfg_addr(adapter);
3323 u32 params[7], val[7];
3326 if (cf->size >= FLASH_CFG_MAX_SIZE)
3329 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
3330 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF));
3331 ret = t4_query_params(adapter, adapter->mbox,
3332 adapter->fn, 0, 1, params, val);
3335 * For t4_memory_write() below addresses and
3336 * sizes have to be in terms of multiples of 4
3337 * bytes. So, if the Configuration File isn't
3338 * a multiple of 4 bytes in length we'll have
3339 * to write that out separately since we can't
3340 * guarantee that the bytes following the
3341 * residual byte in the buffer returned by
3342 * request_firmware() are zeroed out ...
3344 size_t resid = cf->size & 0x3;
3345 size_t size = cf->size & ~0x3;
3346 __be32 *data = (__be32 *)cf->data;
3348 mtype = FW_PARAMS_PARAM_Y_GET(val[0]);
3349 maddr = FW_PARAMS_PARAM_Z_GET(val[0]) << 16;
3351 ret = t4_memory_write(adapter, mtype, maddr,
3353 if (ret == 0 && resid != 0) {
3360 last.word = data[size >> 2];
3361 for (i = resid; i < 4; i++)
3363 ret = t4_memory_write(adapter, mtype,
3370 release_firmware(cf);
3376 * Issue a Capability Configuration command to the firmware to get it
3377 * to parse the Configuration File. We don't use t4_fw_config_file()
3378 * because we want the ability to modify various features after we've
3379 * processed the configuration file ...
3381 memset(&caps_cmd, 0, sizeof(caps_cmd));
3382 caps_cmd.op_to_write =
3383 htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3386 caps_cmd.retval_len16 =
3387 htonl(FW_CAPS_CONFIG_CMD_CFVALID |
3388 FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3389 FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
3390 FW_LEN16(caps_cmd));
3391 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
3396 finiver = ntohl(caps_cmd.finiver);
3397 finicsum = ntohl(caps_cmd.finicsum);
3398 cfcsum = ntohl(caps_cmd.cfcsum);
3399 if (finicsum != cfcsum)
3400 dev_warn(adapter->pdev_dev, "Configuration File checksum "\
3401 "mismatch: [fini] csum=%#x, computed csum=%#x\n",
3405 * If we're a pure NIC driver then disable all offloading facilities.
3406 * This will allow the firmware to optimize aspects of the hardware
3407 * configuration which will result in improved performance.
3409 caps_cmd.ofldcaps = 0;
3410 caps_cmd.iscsicaps = 0;
3411 caps_cmd.rdmacaps = 0;
3412 caps_cmd.fcoecaps = 0;
3415 * And now tell the firmware to use the configuration we just loaded.
3417 caps_cmd.op_to_write =
3418 htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3421 caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
3422 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
3428 * Tweak configuration based on system architecture, module
3431 ret = adap_init0_tweaks(adapter);
3436 * And finally tell the firmware to initialize itself using the
3437 * parameters from the Configuration File.
3439 ret = t4_fw_initialize(adapter, adapter->mbox);
3444 * Return successfully and note that we're operating with parameters
3445 * not supplied by the driver, rather than from hard-wired
3446 * initialization constants burried in the driver.
3448 adapter->flags |= USING_SOFT_PARAMS;
3449 dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\
3450 "Configuration File %s, version %#x, computed checksum %#x\n",
3453 : "/lib/firmware/" FW_CFNAME),
3458 * Something bad happened. Return the error ... (If the "error"
3459 * is that there's no Configuration File on the adapter we don't
3460 * want to issue a warning since this is fairly common.)
3464 dev_warn(adapter->pdev_dev, "Configuration file error %d\n",
3470 * Attempt to initialize the adapter via hard-coded, driver supplied
3473 static int adap_init0_no_config(struct adapter *adapter, int reset)
3475 struct sge *s = &adapter->sge;
3476 struct fw_caps_config_cmd caps_cmd;
3481 * Reset device if necessary
3484 ret = t4_fw_reset(adapter, adapter->mbox,
3485 PIORSTMODE | PIORST);
3491 * Get device capabilities and select which we'll be using.
3493 memset(&caps_cmd, 0, sizeof(caps_cmd));
3494 caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3495 FW_CMD_REQUEST | FW_CMD_READ);
3496 caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
3497 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
3502 #ifndef CONFIG_CHELSIO_T4_OFFLOAD
3504 * If we're a pure NIC driver then disable all offloading facilities.
3505 * This will allow the firmware to optimize aspects of the hardware
3506 * configuration which will result in improved performance.
3508 caps_cmd.ofldcaps = 0;
3509 caps_cmd.iscsicaps = 0;
3510 caps_cmd.rdmacaps = 0;
3511 caps_cmd.fcoecaps = 0;
3514 if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
3516 caps_cmd.niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
3518 caps_cmd.niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
3519 } else if (vf_acls) {
3520 dev_err(adapter->pdev_dev, "virtualization ACLs not supported");
3523 caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3524 FW_CMD_REQUEST | FW_CMD_WRITE);
3525 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
3531 * Tweak configuration based on system architecture, module
3534 ret = adap_init0_tweaks(adapter);
3539 * Select RSS Global Mode we want to use. We use "Basic Virtual"
3540 * mode which maps each Virtual Interface to its own section of
3541 * the RSS Table and we turn on all map and hash enables ...
3543 adapter->flags |= RSS_TNLALLLOOKUP;
3544 ret = t4_config_glbl_rss(adapter, adapter->mbox,
3545 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
3546 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
3547 FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ |
3548 ((adapter->flags & RSS_TNLALLLOOKUP) ?
3549 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP : 0));
3554 * Set up our own fundamental resource provisioning ...
3556 ret = t4_cfg_pfvf(adapter, adapter->mbox, adapter->fn, 0,
3557 PFRES_NEQ, PFRES_NETHCTRL,
3558 PFRES_NIQFLINT, PFRES_NIQ,
3559 PFRES_TC, PFRES_NVI,
3560 FW_PFVF_CMD_CMASK_MASK,
3561 pfvfres_pmask(adapter, adapter->fn, 0),
3563 PFRES_R_CAPS, PFRES_WX_CAPS);
3568 * Perform low level SGE initialization. We need to do this before we
3569 * send the firmware the INITIALIZE command because that will cause
3570 * any other PF Drivers which are waiting for the Master
3571 * Initialization to proceed forward.
3573 for (i = 0; i < SGE_NTIMERS - 1; i++)
3574 s->timer_val[i] = min(intr_holdoff[i], MAX_SGE_TIMERVAL);
3575 s->timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
3576 s->counter_val[0] = 1;
3577 for (i = 1; i < SGE_NCOUNTERS; i++)
3578 s->counter_val[i] = min(intr_cnt[i - 1],
3579 THRESHOLD_0_GET(THRESHOLD_0_MASK));
3580 t4_sge_init(adapter);
3582 #ifdef CONFIG_PCI_IOV
3584 * Provision resource limits for Virtual Functions. We currently
3585 * grant them all the same static resource limits except for the Port
3586 * Access Rights Mask which we're assigning based on the PF. All of
3587 * the static provisioning stuff for both the PF and VF really needs
3588 * to be managed in a persistent manner for each device which the
3589 * firmware controls.
3594 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
3595 if (num_vf[pf] <= 0)
3598 /* VF numbering starts at 1! */
3599 for (vf = 1; vf <= num_vf[pf]; vf++) {
3600 ret = t4_cfg_pfvf(adapter, adapter->mbox,
3602 VFRES_NEQ, VFRES_NETHCTRL,
3603 VFRES_NIQFLINT, VFRES_NIQ,
3604 VFRES_TC, VFRES_NVI,
3605 FW_PFVF_CMD_CMASK_GET(
3606 FW_PFVF_CMD_CMASK_MASK),
3610 VFRES_R_CAPS, VFRES_WX_CAPS);
3612 dev_warn(adapter->pdev_dev,
3614 "provision pf/vf=%d/%d; "
3615 "err=%d\n", pf, vf, ret);
3622 * Set up the default filter mode. Later we'll want to implement this
3623 * via a firmware command, etc. ... This needs to be done before the
3624 * firmare initialization command ... If the selected set of fields
3625 * isn't equal to the default value, we'll need to make sure that the
3626 * field selections will fit in the 36-bit budget.
3628 if (tp_vlan_pri_map != TP_VLAN_PRI_MAP_DEFAULT) {
3631 for (i = TP_VLAN_PRI_MAP_FIRST; i <= TP_VLAN_PRI_MAP_LAST; i++)
3632 switch (tp_vlan_pri_map & (1 << i)) {
3634 /* compressed filter field not enabled */
3654 case ETHERTYPE_MASK:
3660 case MPSHITTYPE_MASK:
3663 case FRAGMENTATION_MASK:
3669 dev_err(adapter->pdev_dev,
3670 "tp_vlan_pri_map=%#x needs %d bits > 36;"\
3671 " using %#x\n", tp_vlan_pri_map, bits,
3672 TP_VLAN_PRI_MAP_DEFAULT);
3673 tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
3676 v = tp_vlan_pri_map;
3677 t4_write_indirect(adapter, TP_PIO_ADDR, TP_PIO_DATA,
3678 &v, 1, TP_VLAN_PRI_MAP);
3681 * We need Five Tuple Lookup mode to be set in TP_GLOBAL_CONFIG order
3682 * to support any of the compressed filter fields above. Newer
3683 * versions of the firmware do this automatically but it doesn't hurt
3684 * to set it here. Meanwhile, we do _not_ need to set Lookup Every
3685 * Packet in TP_INGRESS_CONFIG to support matching non-TCP packets
3686 * since the firmware automatically turns this on and off when we have
3687 * a non-zero number of filters active (since it does have a
3688 * performance impact).
3690 if (tp_vlan_pri_map)
3691 t4_set_reg_field(adapter, TP_GLOBAL_CONFIG,
3692 FIVETUPLELOOKUP_MASK,
3693 FIVETUPLELOOKUP_MASK);
3696 * Tweak some settings.
3698 t4_write_reg(adapter, TP_SHIFT_CNT, SYNSHIFTMAX(6) |
3699 RXTSHIFTMAXR1(4) | RXTSHIFTMAXR2(15) |
3700 PERSHIFTBACKOFFMAX(8) | PERSHIFTMAX(8) |
3701 KEEPALIVEMAXR1(4) | KEEPALIVEMAXR2(9));
3704 * Get basic stuff going by issuing the Firmware Initialize command.
3705 * Note that this _must_ be after all PFVF commands ...
3707 ret = t4_fw_initialize(adapter, adapter->mbox);
3712 * Return successfully!
3714 dev_info(adapter->pdev_dev, "Successfully configured using built-in "\
3715 "driver parameters\n");
3719 * Something bad happened. Return the error ...
3726 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
3728 static int adap_init0(struct adapter *adap)
3732 enum dev_state state;
3733 u32 params[7], val[7];
3737 * Contact FW, advertising Master capability (and potentially forcing
3738 * ourselves as the Master PF if our module parameter force_init is
3741 ret = t4_fw_hello(adap, adap->mbox, adap->fn,
3742 force_init ? MASTER_MUST : MASTER_MAY,
3745 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
3749 if (ret == adap->mbox)
3750 adap->flags |= MASTER_PF;
3751 if (force_init && state == DEV_STATE_INIT)
3752 state = DEV_STATE_UNINIT;
3755 * If we're the Master PF Driver and the device is uninitialized,
3756 * then let's consider upgrading the firmware ... (We always want
3757 * to check the firmware version number in order to A. get it for
3758 * later reporting and B. to warn if the currently loaded firmware
3759 * is excessively mismatched relative to the driver.)
3761 ret = t4_check_fw_version(adap);
3762 if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) {
3763 if (ret == -EINVAL || ret > 0) {
3764 if (upgrade_fw(adap) >= 0) {
3766 * Note that the chip was reset as part of the
3767 * firmware upgrade so we don't reset it again
3768 * below and grab the new firmware version.
3771 ret = t4_check_fw_version(adap);
3779 * Grab VPD parameters. This should be done after we establish a
3780 * connection to the firmware since some of the VPD parameters
3781 * (notably the Core Clock frequency) are retrieved via requests to
3782 * the firmware. On the other hand, we need these fairly early on
3783 * so we do this right after getting ahold of the firmware.
3785 ret = get_vpd_params(adap, &adap->params.vpd);
3790 * Find out what ports are available to us. Note that we need to do
3791 * this before calling adap_init0_no_config() since it needs nports
3795 FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
3796 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_PORTVEC);
3797 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, &v, &port_vec);
3801 adap->params.nports = hweight32(port_vec);
3802 adap->params.portvec = port_vec;
3805 * If the firmware is initialized already (and we're not forcing a
3806 * master initialization), note that we're living with existing
3807 * adapter parameters. Otherwise, it's time to try initializing the
3810 if (state == DEV_STATE_INIT) {
3811 dev_info(adap->pdev_dev, "Coming up as %s: "\
3812 "Adapter already initialized\n",
3813 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
3814 adap->flags |= USING_SOFT_PARAMS;
3816 dev_info(adap->pdev_dev, "Coming up as MASTER: "\
3817 "Initializing adapter\n");
3820 * If the firmware doesn't support Configuration
3821 * Files warn user and exit,
3824 dev_warn(adap->pdev_dev, "Firmware doesn't support "
3825 "configuration file.\n");
3827 ret = adap_init0_no_config(adap, reset);
3830 * Find out whether we're dealing with a version of
3831 * the firmware which has configuration file support.
3833 params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
3834 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF));
3835 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1,
3839 * If the firmware doesn't support Configuration
3840 * Files, use the old Driver-based, hard-wired
3841 * initialization. Otherwise, try using the
3842 * Configuration File support and fall back to the
3843 * Driver-based initialization if there's no
3844 * Configuration File found.
3847 ret = adap_init0_no_config(adap, reset);
3850 * The firmware provides us with a memory
3851 * buffer where we can load a Configuration
3852 * File from the host if we want to override
3853 * the Configuration File in flash.
3856 ret = adap_init0_config(adap, reset);
3857 if (ret == -ENOENT) {
3858 dev_info(adap->pdev_dev,
3859 "No Configuration File present "
3860 "on adapter. Using hard-wired "
3861 "configuration parameters.\n");
3862 ret = adap_init0_no_config(adap, reset);
3867 dev_err(adap->pdev_dev,
3868 "could not initialize adapter, error %d\n",
3875 * If we're living with non-hard-coded parameters (either from a
3876 * Firmware Configuration File or values programmed by a different PF
3877 * Driver), give the SGE code a chance to pull in anything that it
3878 * needs ... Note that this must be called after we retrieve our VPD
3879 * parameters in order to know how to convert core ticks to seconds.
3881 if (adap->flags & USING_SOFT_PARAMS) {
3882 ret = t4_sge_init(adap);
3888 * Grab some of our basic fundamental operating parameters.
3890 #define FW_PARAM_DEV(param) \
3891 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3892 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3894 #define FW_PARAM_PFVF(param) \
3895 FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3896 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)| \
3897 FW_PARAMS_PARAM_Y(0) | \
3898 FW_PARAMS_PARAM_Z(0)
3900 params[0] = FW_PARAM_PFVF(EQ_START);
3901 params[1] = FW_PARAM_PFVF(L2T_START);
3902 params[2] = FW_PARAM_PFVF(L2T_END);
3903 params[3] = FW_PARAM_PFVF(FILTER_START);
3904 params[4] = FW_PARAM_PFVF(FILTER_END);
3905 params[5] = FW_PARAM_PFVF(IQFLINT_START);
3906 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, val);
3909 adap->sge.egr_start = val[0];
3910 adap->l2t_start = val[1];
3911 adap->l2t_end = val[2];
3912 adap->tids.ftid_base = val[3];
3913 adap->tids.nftids = val[4] - val[3] + 1;
3914 adap->sge.ingr_start = val[5];
3916 /* query params related to active filter region */
3917 params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
3918 params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
3919 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
3920 /* If Active filter size is set we enable establishing
3921 * offload connection through firmware work request
3923 if ((val[0] != val[1]) && (ret >= 0)) {
3924 adap->flags |= FW_OFLD_CONN;
3925 adap->tids.aftid_base = val[0];
3926 adap->tids.aftid_end = val[1];
3929 #ifdef CONFIG_CHELSIO_T4_OFFLOAD
3931 * Get device capabilities so we can determine what resources we need
3934 memset(&caps_cmd, 0, sizeof(caps_cmd));
3935 caps_cmd.op_to_write = htonl(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3936 FW_CMD_REQUEST | FW_CMD_READ);
3937 caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
3938 ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
3943 if (caps_cmd.ofldcaps) {
3944 /* query offload-related parameters */
3945 params[0] = FW_PARAM_DEV(NTID);
3946 params[1] = FW_PARAM_PFVF(SERVER_START);
3947 params[2] = FW_PARAM_PFVF(SERVER_END);
3948 params[3] = FW_PARAM_PFVF(TDDP_START);
3949 params[4] = FW_PARAM_PFVF(TDDP_END);
3950 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3951 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
3955 adap->tids.ntids = val[0];
3956 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
3957 adap->tids.stid_base = val[1];
3958 adap->tids.nstids = val[2] - val[1] + 1;
3960 * Setup server filter region. Divide the availble filter
3961 * region into two parts. Regular filters get 1/3rd and server
3962 * filters get 2/3rd part. This is only enabled if workarond
3964 * 1. For regular filters.
3965 * 2. Server filter: This are special filters which are used
3966 * to redirect SYN packets to offload queue.
3968 if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) {
3969 adap->tids.sftid_base = adap->tids.ftid_base +
3970 DIV_ROUND_UP(adap->tids.nftids, 3);
3971 adap->tids.nsftids = adap->tids.nftids -
3972 DIV_ROUND_UP(adap->tids.nftids, 3);
3973 adap->tids.nftids = adap->tids.sftid_base -
3974 adap->tids.ftid_base;
3976 adap->vres.ddp.start = val[3];
3977 adap->vres.ddp.size = val[4] - val[3] + 1;
3978 adap->params.ofldq_wr_cred = val[5];
3980 params[0] = FW_PARAM_PFVF(ETHOFLD_START);
3981 params[1] = FW_PARAM_PFVF(ETHOFLD_END);
3982 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
3984 if ((val[0] != val[1]) && (ret >= 0)) {
3985 adap->tids.uotid_base = val[0];
3986 adap->tids.nuotids = val[1] - val[0] + 1;
3989 adap->params.offload = 1;
3991 if (caps_cmd.rdmacaps) {
3992 params[0] = FW_PARAM_PFVF(STAG_START);
3993 params[1] = FW_PARAM_PFVF(STAG_END);
3994 params[2] = FW_PARAM_PFVF(RQ_START);
3995 params[3] = FW_PARAM_PFVF(RQ_END);
3996 params[4] = FW_PARAM_PFVF(PBL_START);
3997 params[5] = FW_PARAM_PFVF(PBL_END);
3998 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6,
4002 adap->vres.stag.start = val[0];
4003 adap->vres.stag.size = val[1] - val[0] + 1;
4004 adap->vres.rq.start = val[2];
4005 adap->vres.rq.size = val[3] - val[2] + 1;
4006 adap->vres.pbl.start = val[4];
4007 adap->vres.pbl.size = val[5] - val[4] + 1;
4009 params[0] = FW_PARAM_PFVF(SQRQ_START);
4010 params[1] = FW_PARAM_PFVF(SQRQ_END);
4011 params[2] = FW_PARAM_PFVF(CQ_START);
4012 params[3] = FW_PARAM_PFVF(CQ_END);
4013 params[4] = FW_PARAM_PFVF(OCQ_START);
4014 params[5] = FW_PARAM_PFVF(OCQ_END);
4015 ret = t4_query_params(adap, 0, 0, 0, 6, params, val);
4018 adap->vres.qp.start = val[0];
4019 adap->vres.qp.size = val[1] - val[0] + 1;
4020 adap->vres.cq.start = val[2];
4021 adap->vres.cq.size = val[3] - val[2] + 1;
4022 adap->vres.ocq.start = val[4];
4023 adap->vres.ocq.size = val[5] - val[4] + 1;
4025 if (caps_cmd.iscsicaps) {
4026 params[0] = FW_PARAM_PFVF(ISCSI_START);
4027 params[1] = FW_PARAM_PFVF(ISCSI_END);
4028 ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2,
4032 adap->vres.iscsi.start = val[0];
4033 adap->vres.iscsi.size = val[1] - val[0] + 1;
4035 #undef FW_PARAM_PFVF
4037 #endif /* CONFIG_CHELSIO_T4_OFFLOAD */
4040 * These are finalized by FW initialization, load their values now.
4042 v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
4043 adap->params.tp.tre = TIMERRESOLUTION_GET(v);
4044 adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v);
4045 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
4046 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
4047 adap->params.b_wnd);
4049 /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */
4050 for (j = 0; j < NCHAN; j++)
4051 adap->params.tp.tx_modq[j] = j;
4053 adap->flags |= FW_OK;
4057 * Something bad happened. If a command timed out or failed with EIO
4058 * FW does not operate within its spec or something catastrophic
4059 * happened to HW/FW, stop issuing commands.
4062 if (ret != -ETIMEDOUT && ret != -EIO)
4063 t4_fw_bye(adap, adap->mbox);
4069 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
4070 pci_channel_state_t state)
4073 struct adapter *adap = pci_get_drvdata(pdev);
4079 adap->flags &= ~FW_OK;
4080 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
4081 for_each_port(adap, i) {
4082 struct net_device *dev = adap->port[i];
4084 netif_device_detach(dev);
4085 netif_carrier_off(dev);
4087 if (adap->flags & FULL_INIT_DONE)
4090 pci_disable_device(pdev);
4091 out: return state == pci_channel_io_perm_failure ?
4092 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
4095 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
4098 struct fw_caps_config_cmd c;
4099 struct adapter *adap = pci_get_drvdata(pdev);
4102 pci_restore_state(pdev);
4103 pci_save_state(pdev);
4104 return PCI_ERS_RESULT_RECOVERED;
4107 if (pci_enable_device(pdev)) {
4108 dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
4109 return PCI_ERS_RESULT_DISCONNECT;
4112 pci_set_master(pdev);
4113 pci_restore_state(pdev);
4114 pci_save_state(pdev);
4115 pci_cleanup_aer_uncorrect_error_status(pdev);
4117 if (t4_wait_dev_ready(adap) < 0)
4118 return PCI_ERS_RESULT_DISCONNECT;
4119 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL))
4120 return PCI_ERS_RESULT_DISCONNECT;
4121 adap->flags |= FW_OK;
4122 if (adap_init1(adap, &c))
4123 return PCI_ERS_RESULT_DISCONNECT;
4125 for_each_port(adap, i) {
4126 struct port_info *p = adap2pinfo(adap, i);
4128 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
4131 return PCI_ERS_RESULT_DISCONNECT;
4133 p->xact_addr_filt = -1;
4136 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
4137 adap->params.b_wnd);
4140 return PCI_ERS_RESULT_DISCONNECT;
4141 return PCI_ERS_RESULT_RECOVERED;
4144 static void eeh_resume(struct pci_dev *pdev)
4147 struct adapter *adap = pci_get_drvdata(pdev);
4153 for_each_port(adap, i) {
4154 struct net_device *dev = adap->port[i];
4156 if (netif_running(dev)) {
4158 cxgb_set_rxmode(dev);
4160 netif_device_attach(dev);
4165 static struct pci_error_handlers cxgb4_eeh = {
4166 .error_detected = eeh_err_detected,
4167 .slot_reset = eeh_slot_reset,
4168 .resume = eeh_resume,
4171 static inline bool is_10g_port(const struct link_config *lc)
4173 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
4176 static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
4177 unsigned int size, unsigned int iqe_size)
4179 q->intr_params = QINTR_TIMER_IDX(timer_idx) |
4180 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
4181 q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
4182 q->iqe_len = iqe_size;
4187 * Perform default configuration of DMA queues depending on the number and type
4188 * of ports we found and the number of available CPUs. Most settings can be
4189 * modified by the admin prior to actual use.
4191 static void __devinit cfg_queues(struct adapter *adap)
4193 struct sge *s = &adap->sge;
4194 int i, q10g = 0, n10g = 0, qidx = 0;
4196 for_each_port(adap, i)
4197 n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);
4200 * We default to 1 queue per non-10G port and up to # of cores queues
4204 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
4205 if (q10g > netif_get_num_default_rss_queues())
4206 q10g = netif_get_num_default_rss_queues();
4208 for_each_port(adap, i) {
4209 struct port_info *pi = adap2pinfo(adap, i);
4211 pi->first_qset = qidx;
4212 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
4217 s->max_ethqsets = qidx; /* MSI-X may lower it later */
4219 if (is_offload(adap)) {
4221 * For offload we use 1 queue/channel if all ports are up to 1G,
4222 * otherwise we divide all available queues amongst the channels
4223 * capped by the number of available cores.
4226 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
4228 s->ofldqsets = roundup(i, adap->params.nports);
4230 s->ofldqsets = adap->params.nports;
4231 /* For RDMA one Rx queue per channel suffices */
4232 s->rdmaqs = adap->params.nports;
4235 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
4236 struct sge_eth_rxq *r = &s->ethrxq[i];
4238 init_rspq(&r->rspq, 0, 0, 1024, 64);
4242 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
4243 s->ethtxq[i].q.size = 1024;
4245 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
4246 s->ctrlq[i].q.size = 512;
4248 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
4249 s->ofldtxq[i].q.size = 1024;
4251 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
4252 struct sge_ofld_rxq *r = &s->ofldrxq[i];
4254 init_rspq(&r->rspq, 0, 0, 1024, 64);
4255 r->rspq.uld = CXGB4_ULD_ISCSI;
4259 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
4260 struct sge_ofld_rxq *r = &s->rdmarxq[i];
4262 init_rspq(&r->rspq, 0, 0, 511, 64);
4263 r->rspq.uld = CXGB4_ULD_RDMA;
4267 init_rspq(&s->fw_evtq, 6, 0, 512, 64);
4268 init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
4272 * Reduce the number of Ethernet queues across all ports to at most n.
4273 * n provides at least one queue per port.
4275 static void __devinit reduce_ethqs(struct adapter *adap, int n)
4278 struct port_info *pi;
4280 while (n < adap->sge.ethqsets)
4281 for_each_port(adap, i) {
4282 pi = adap2pinfo(adap, i);
4283 if (pi->nqsets > 1) {
4285 adap->sge.ethqsets--;
4286 if (adap->sge.ethqsets <= n)
4292 for_each_port(adap, i) {
4293 pi = adap2pinfo(adap, i);
4299 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
4300 #define EXTRA_VECS 2
4302 static int __devinit enable_msix(struct adapter *adap)
4305 int i, err, want, need;
4306 struct sge *s = &adap->sge;
4307 unsigned int nchan = adap->params.nports;
4308 struct msix_entry entries[MAX_INGQ + 1];
4310 for (i = 0; i < ARRAY_SIZE(entries); ++i)
4311 entries[i].entry = i;
4313 want = s->max_ethqsets + EXTRA_VECS;
4314 if (is_offload(adap)) {
4315 want += s->rdmaqs + s->ofldqsets;
4316 /* need nchan for each possible ULD */
4317 ofld_need = 2 * nchan;
4319 need = adap->params.nports + EXTRA_VECS + ofld_need;
4321 while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
4326 * Distribute available vectors to the various queue groups.
4327 * Every group gets its minimum requirement and NIC gets top
4328 * priority for leftovers.
4330 i = want - EXTRA_VECS - ofld_need;
4331 if (i < s->max_ethqsets) {
4332 s->max_ethqsets = i;
4333 if (i < s->ethqsets)
4334 reduce_ethqs(adap, i);
4336 if (is_offload(adap)) {
4337 i = want - EXTRA_VECS - s->max_ethqsets;
4338 i -= ofld_need - nchan;
4339 s->ofldqsets = (i / nchan) * nchan; /* round down */
4341 for (i = 0; i < want; ++i)
4342 adap->msix_info[i].vec = entries[i].vector;
4344 dev_info(adap->pdev_dev,
4345 "only %d MSI-X vectors left, not using MSI-X\n", err);
4351 static int __devinit init_rss(struct adapter *adap)
4355 for_each_port(adap, i) {
4356 struct port_info *pi = adap2pinfo(adap, i);
4358 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
4361 for (j = 0; j < pi->rss_size; j++)
4362 pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets);
4367 static void __devinit print_port_info(const struct net_device *dev)
4369 static const char *base[] = {
4370 "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
4371 "KX", "KR", "R SFP+", "KR/KX", "KR/KX/KX4"
4376 const char *spd = "";
4377 const struct port_info *pi = netdev_priv(dev);
4378 const struct adapter *adap = pi->adapter;
4380 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
4382 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
4385 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
4386 bufp += sprintf(bufp, "100/");
4387 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
4388 bufp += sprintf(bufp, "1000/");
4389 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
4390 bufp += sprintf(bufp, "10G/");
4393 sprintf(bufp, "BASE-%s", base[pi->port_type]);
4395 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
4396 adap->params.vpd.id, adap->params.rev, buf,
4397 is_offload(adap) ? "R" : "", adap->params.pci.width, spd,
4398 (adap->flags & USING_MSIX) ? " MSI-X" :
4399 (adap->flags & USING_MSI) ? " MSI" : "");
4400 netdev_info(dev, "S/N: %s, E/C: %s\n",
4401 adap->params.vpd.sn, adap->params.vpd.ec);
4404 static void __devinit enable_pcie_relaxed_ordering(struct pci_dev *dev)
4409 pos = pci_pcie_cap(dev);
4411 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &v);
4412 v |= PCI_EXP_DEVCTL_RELAX_EN;
4413 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, v);
4418 * Free the following resources:
4419 * - memory used for tables
4422 * - resources FW is holding for us
4424 static void free_some_resources(struct adapter *adapter)
4428 t4_free_mem(adapter->l2t);
4429 t4_free_mem(adapter->tids.tid_tab);
4430 disable_msi(adapter);
4432 for_each_port(adapter, i)
4433 if (adapter->port[i]) {
4434 kfree(adap2pinfo(adapter, i)->rss);
4435 free_netdev(adapter->port[i]);
4437 if (adapter->flags & FW_OK)
4438 t4_fw_bye(adapter, adapter->fn);
4441 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
4442 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
4443 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
4445 static int __devinit init_one(struct pci_dev *pdev,
4446 const struct pci_device_id *ent)
4449 struct port_info *pi;
4450 bool highdma = false;
4451 struct adapter *adapter = NULL;
4453 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
4455 err = pci_request_regions(pdev, KBUILD_MODNAME);
4457 /* Just info, some other driver may have claimed the device. */
4458 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
4462 /* We control everything through one PF */
4463 func = PCI_FUNC(pdev->devfn);
4464 if (func != ent->driver_data) {
4465 pci_save_state(pdev); /* to restore SR-IOV later */
4469 err = pci_enable_device(pdev);
4471 dev_err(&pdev->dev, "cannot enable PCI device\n");
4472 goto out_release_regions;
4475 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
4477 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
4479 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
4480 "coherent allocations\n");
4481 goto out_disable_device;
4484 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
4486 dev_err(&pdev->dev, "no usable DMA configuration\n");
4487 goto out_disable_device;
4491 pci_enable_pcie_error_reporting(pdev);
4492 enable_pcie_relaxed_ordering(pdev);
4493 pci_set_master(pdev);
4494 pci_save_state(pdev);
4496 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
4499 goto out_disable_device;
4502 adapter->regs = pci_ioremap_bar(pdev, 0);
4503 if (!adapter->regs) {
4504 dev_err(&pdev->dev, "cannot map device registers\n");
4506 goto out_free_adapter;
4509 adapter->pdev = pdev;
4510 adapter->pdev_dev = &pdev->dev;
4511 adapter->mbox = func;
4513 adapter->msg_enable = dflt_msg_enable;
4514 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
4516 spin_lock_init(&adapter->stats_lock);
4517 spin_lock_init(&adapter->tid_release_lock);
4519 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
4520 INIT_WORK(&adapter->db_full_task, process_db_full);
4521 INIT_WORK(&adapter->db_drop_task, process_db_drop);
4523 err = t4_prep_adapter(adapter);
4526 setup_memwin(adapter);
4527 err = adap_init0(adapter);
4528 setup_memwin_rdma(adapter);
4532 for_each_port(adapter, i) {
4533 struct net_device *netdev;
4535 netdev = alloc_etherdev_mq(sizeof(struct port_info),
4542 SET_NETDEV_DEV(netdev, &pdev->dev);
4544 adapter->port[i] = netdev;
4545 pi = netdev_priv(netdev);
4546 pi->adapter = adapter;
4547 pi->xact_addr_filt = -1;
4549 netdev->irq = pdev->irq;
4551 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
4552 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4553 NETIF_F_RXCSUM | NETIF_F_RXHASH |
4554 NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
4556 netdev->hw_features |= NETIF_F_HIGHDMA;
4557 netdev->features |= netdev->hw_features;
4558 netdev->vlan_features = netdev->features & VLAN_FEAT;
4560 netdev->priv_flags |= IFF_UNICAST_FLT;
4562 netdev->netdev_ops = &cxgb4_netdev_ops;
4563 SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
4566 pci_set_drvdata(pdev, adapter);
4568 if (adapter->flags & FW_OK) {
4569 err = t4_port_init(adapter, func, func, 0);
4575 * Configure queues and allocate tables now, they can be needed as
4576 * soon as the first register_netdev completes.
4578 cfg_queues(adapter);
4580 adapter->l2t = t4_init_l2t();
4581 if (!adapter->l2t) {
4582 /* We tolerate a lack of L2T, giving up some functionality */
4583 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
4584 adapter->params.offload = 0;
4587 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
4588 dev_warn(&pdev->dev, "could not allocate TID table, "
4590 adapter->params.offload = 0;
4593 /* See what interrupts we'll be using */
4594 if (msi > 1 && enable_msix(adapter) == 0)
4595 adapter->flags |= USING_MSIX;
4596 else if (msi > 0 && pci_enable_msi(pdev) == 0)
4597 adapter->flags |= USING_MSI;
4599 err = init_rss(adapter);
4604 * The card is now ready to go. If any errors occur during device
4605 * registration we do not fail the whole card but rather proceed only
4606 * with the ports we manage to register successfully. However we must
4607 * register at least one net device.
4609 for_each_port(adapter, i) {
4610 pi = adap2pinfo(adapter, i);
4611 netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
4612 netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
4614 err = register_netdev(adapter->port[i]);
4617 adapter->chan_map[pi->tx_chan] = i;
4618 print_port_info(adapter->port[i]);
4621 dev_err(&pdev->dev, "could not register any net devices\n");
4625 dev_warn(&pdev->dev, "only %d net devices registered\n", i);
4629 if (cxgb4_debugfs_root) {
4630 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
4631 cxgb4_debugfs_root);
4632 setup_debugfs(adapter);
4635 /* PCIe EEH recovery on powerpc platforms needs fundamental reset */
4636 pdev->needs_freset = 1;
4638 if (is_offload(adapter))
4639 attach_ulds(adapter);
4642 #ifdef CONFIG_PCI_IOV
4643 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
4644 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
4645 dev_info(&pdev->dev,
4646 "instantiated %u virtual functions\n",
4652 free_some_resources(adapter);
4654 iounmap(adapter->regs);
4658 pci_disable_pcie_error_reporting(pdev);
4659 pci_disable_device(pdev);
4660 out_release_regions:
4661 pci_release_regions(pdev);
4662 pci_set_drvdata(pdev, NULL);
4666 static void __devexit remove_one(struct pci_dev *pdev)
4668 struct adapter *adapter = pci_get_drvdata(pdev);
4670 #ifdef CONFIG_PCI_IOV
4671 pci_disable_sriov(pdev);
4678 if (is_offload(adapter))
4679 detach_ulds(adapter);
4681 for_each_port(adapter, i)
4682 if (adapter->port[i]->reg_state == NETREG_REGISTERED)
4683 unregister_netdev(adapter->port[i]);
4685 if (adapter->debugfs_root)
4686 debugfs_remove_recursive(adapter->debugfs_root);
4688 if (adapter->flags & FULL_INIT_DONE)
4691 free_some_resources(adapter);
4692 iounmap(adapter->regs);
4694 pci_disable_pcie_error_reporting(pdev);
4695 pci_disable_device(pdev);
4696 pci_release_regions(pdev);
4697 pci_set_drvdata(pdev, NULL);
4699 pci_release_regions(pdev);
4702 static struct pci_driver cxgb4_driver = {
4703 .name = KBUILD_MODNAME,
4704 .id_table = cxgb4_pci_tbl,
4706 .remove = __devexit_p(remove_one),
4707 .err_handler = &cxgb4_eeh,
4710 static int __init cxgb4_init_module(void)
4714 workq = create_singlethread_workqueue("cxgb4");
4718 /* Debugfs support is optional, just warn if this fails */
4719 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
4720 if (!cxgb4_debugfs_root)
4721 pr_warning("could not create debugfs entry, continuing\n");
4723 ret = pci_register_driver(&cxgb4_driver);
4725 debugfs_remove(cxgb4_debugfs_root);
4729 static void __exit cxgb4_cleanup_module(void)
4731 pci_unregister_driver(&cxgb4_driver);
4732 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
4733 flush_workqueue(workq);
4734 destroy_workqueue(workq);
4737 module_init(cxgb4_init_module);
4738 module_exit(cxgb4_cleanup_module);