1 /* bnx2x_cmn.c: Broadcom Everest network driver.
3 * Copyright (c) 2007-2011 Broadcom Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
9 * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10 * Written by: Eliezer Tamir
11 * Based on code from Michael Chan's bnx2 driver
12 * UDP CSUM errata workaround by Arik Gendelman
13 * Slowpath and fastpath rework by Vladislav Zolotarov
14 * Statistics and Link management by Yitchak Gertner
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/etherdevice.h>
21 #include <linux/if_vlan.h>
22 #include <linux/interrupt.h>
25 #include <net/ip6_checksum.h>
26 #include <linux/firmware.h>
27 #include <linux/prefetch.h>
28 #include "bnx2x_cmn.h"
29 #include "bnx2x_init.h"
35 * bnx2x_bz_fp - zero content of the fastpath structure.
38 * @index: fastpath index to be zeroed
40 * Makes sure the contents of the bp->fp[index].napi is kept
43 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
45 struct bnx2x_fastpath *fp = &bp->fp[index];
46 struct napi_struct orig_napi = fp->napi;
47 /* bzero bnx2x_fastpath contents */
48 memset(fp, 0, sizeof(*fp));
50 /* Restore the NAPI object as it has been already initialized */
56 fp->max_cos = bp->max_cos;
58 /* Special queues support only one CoS */
62 * set the tpa flag for each queue. The tpa flag determines the queue
63 * minimal size so it must be set prior to queue memory allocation
65 fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
68 /* We don't want TPA on FCoE, FWD and OOO L2 rings */
69 bnx2x_fcoe(bp, disable_tpa) = 1;
74 * bnx2x_move_fp - move content of the fastpath structure.
77 * @from: source FP index
78 * @to: destination FP index
80 * Makes sure the contents of the bp->fp[to].napi is kept
83 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
85 struct bnx2x_fastpath *from_fp = &bp->fp[from];
86 struct bnx2x_fastpath *to_fp = &bp->fp[to];
87 struct napi_struct orig_napi = to_fp->napi;
88 /* Move bnx2x_fastpath contents */
89 memcpy(to_fp, from_fp, sizeof(*to_fp));
92 /* Restore the NAPI object as it has been already initialized */
93 to_fp->napi = orig_napi;
96 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
98 /* free skb in the packet ring at pos idx
99 * return idx of last bd freed
101 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
104 struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
105 struct eth_tx_start_bd *tx_start_bd;
106 struct eth_tx_bd *tx_data_bd;
107 struct sk_buff *skb = tx_buf->skb;
108 u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
111 /* prefetch skb end pointer to speedup dev_kfree_skb() */
114 DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
115 txdata->txq_index, idx, tx_buf, skb);
118 DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
119 tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
120 dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
121 BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
124 nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
125 #ifdef BNX2X_STOP_ON_ERROR
126 if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
127 BNX2X_ERR("BAD nbd!\n");
131 new_cons = nbd + tx_buf->first_bd;
133 /* Get the next bd */
134 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
136 /* Skip a parse bd... */
138 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
140 /* ...and the TSO split header bd since they have no mapping */
141 if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
143 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
149 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
150 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
151 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
152 BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
154 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
159 dev_kfree_skb_any(skb);
160 tx_buf->first_bd = 0;
166 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
168 struct netdev_queue *txq;
169 u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
171 #ifdef BNX2X_STOP_ON_ERROR
172 if (unlikely(bp->panic))
176 txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
177 hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
178 sw_cons = txdata->tx_pkt_cons;
180 while (sw_cons != hw_cons) {
183 pkt_cons = TX_BD(sw_cons);
185 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u sw_cons %u "
187 txdata->txq_index, hw_cons, sw_cons, pkt_cons);
189 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
193 txdata->tx_pkt_cons = sw_cons;
194 txdata->tx_bd_cons = bd_cons;
196 /* Need to make the tx_bd_cons update visible to start_xmit()
197 * before checking for netif_tx_queue_stopped(). Without the
198 * memory barrier, there is a small possibility that
199 * start_xmit() will miss it and cause the queue to be stopped
201 * On the other hand we need an rmb() here to ensure the proper
202 * ordering of bit testing in the following
203 * netif_tx_queue_stopped(txq) call.
207 if (unlikely(netif_tx_queue_stopped(txq))) {
208 /* Taking tx_lock() is needed to prevent reenabling the queue
209 * while it's empty. This could have happen if rx_action() gets
210 * suspended in bnx2x_tx_int() after the condition before
211 * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
213 * stops the queue->sees fresh tx_bd_cons->releases the queue->
214 * sends some packets consuming the whole queue again->
218 __netif_tx_lock(txq, smp_processor_id());
220 if ((netif_tx_queue_stopped(txq)) &&
221 (bp->state == BNX2X_STATE_OPEN) &&
222 (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3))
223 netif_tx_wake_queue(txq);
225 __netif_tx_unlock(txq);
230 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
233 u16 last_max = fp->last_max_sge;
235 if (SUB_S16(idx, last_max) > 0)
236 fp->last_max_sge = idx;
239 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
240 struct eth_fast_path_rx_cqe *fp_cqe)
242 struct bnx2x *bp = fp->bp;
243 u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
244 le16_to_cpu(fp_cqe->len_on_bd)) >>
246 u16 last_max, last_elem, first_elem;
253 /* First mark all used pages */
254 for (i = 0; i < sge_len; i++)
255 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
256 RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
258 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
259 sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
261 /* Here we assume that the last SGE index is the biggest */
262 prefetch((void *)(fp->sge_mask));
263 bnx2x_update_last_max_sge(fp,
264 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
266 last_max = RX_SGE(fp->last_max_sge);
267 last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
268 first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
270 /* If ring is not full */
271 if (last_elem + 1 != first_elem)
274 /* Now update the prod */
275 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
276 if (likely(fp->sge_mask[i]))
279 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
280 delta += BIT_VEC64_ELEM_SZ;
284 fp->rx_sge_prod += delta;
285 /* clear page-end entries */
286 bnx2x_clear_sge_mask_next_elems(fp);
289 DP(NETIF_MSG_RX_STATUS,
290 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
291 fp->last_max_sge, fp->rx_sge_prod);
294 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
295 struct sk_buff *skb, u16 cons, u16 prod,
296 struct eth_fast_path_rx_cqe *cqe)
298 struct bnx2x *bp = fp->bp;
299 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
300 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
301 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
303 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
304 struct sw_rx_bd *first_buf = &tpa_info->first_buf;
306 /* print error if current state != stop */
307 if (tpa_info->tpa_state != BNX2X_TPA_STOP)
308 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
310 /* Try to map an empty skb from the aggregation info */
311 mapping = dma_map_single(&bp->pdev->dev,
312 first_buf->skb->data,
313 fp->rx_buf_size, DMA_FROM_DEVICE);
315 * ...if it fails - move the skb from the consumer to the producer
316 * and set the current aggregation state as ERROR to drop it
317 * when TPA_STOP arrives.
320 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
321 /* Move the BD from the consumer to the producer */
322 bnx2x_reuse_rx_skb(fp, cons, prod);
323 tpa_info->tpa_state = BNX2X_TPA_ERROR;
327 /* move empty skb from pool to prod */
328 prod_rx_buf->skb = first_buf->skb;
329 dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
330 /* point prod_bd to new skb */
331 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
332 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
334 /* move partial skb from cons to pool (don't unmap yet) */
335 *first_buf = *cons_rx_buf;
337 /* mark bin state as START */
338 tpa_info->parsing_flags =
339 le16_to_cpu(cqe->pars_flags.flags);
340 tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
341 tpa_info->tpa_state = BNX2X_TPA_START;
342 tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
343 tpa_info->placement_offset = cqe->placement_offset;
345 #ifdef BNX2X_STOP_ON_ERROR
346 fp->tpa_queue_used |= (1 << queue);
347 #ifdef _ASM_GENERIC_INT_L64_H
348 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
350 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
356 /* Timestamp option length allowed for TPA aggregation:
358 * nop nop kind length echo val
360 #define TPA_TSTAMP_OPT_LEN 12
362 * bnx2x_set_lro_mss - calculate the approximate value of the MSS
365 * @parsing_flags: parsing flags from the START CQE
366 * @len_on_bd: total length of the first packet for the
369 * Approximate value of the MSS for this aggregation calculated using
370 * the first packet of it.
372 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
376 * TPA arrgregation won't have either IP options or TCP options
377 * other than timestamp or IPv6 extension headers.
379 u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
381 if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
382 PRS_FLAG_OVERETH_IPV6)
383 hdrs_len += sizeof(struct ipv6hdr);
385 hdrs_len += sizeof(struct iphdr);
388 /* Check if there was a TCP timestamp, if there is it's will
389 * always be 12 bytes length: nop nop kind length echo val.
391 * Otherwise FW would close the aggregation.
393 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
394 hdrs_len += TPA_TSTAMP_OPT_LEN;
396 return len_on_bd - hdrs_len;
399 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
400 u16 queue, struct sk_buff *skb,
401 struct eth_end_agg_rx_cqe *cqe,
404 struct sw_rx_page *rx_pg, old_rx_pg;
405 u32 i, frag_len, frag_size, pages;
408 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
409 u16 len_on_bd = tpa_info->len_on_bd;
411 frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
412 pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
414 /* This is needed in order to enable forwarding support */
416 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
417 tpa_info->parsing_flags, len_on_bd);
419 #ifdef BNX2X_STOP_ON_ERROR
420 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
421 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
423 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
429 /* Run through the SGL and compose the fragmented skb */
430 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
431 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
433 /* FW gives the indices of the SGE as if the ring is an array
434 (meaning that "next" element will consume 2 indices) */
435 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
436 rx_pg = &fp->rx_page_ring[sge_idx];
439 /* If we fail to allocate a substitute page, we simply stop
440 where we are and drop the whole packet */
441 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
443 fp->eth_q_stats.rx_skb_alloc_failed++;
447 /* Unmap the page as we r going to pass it to the stack */
448 dma_unmap_page(&bp->pdev->dev,
449 dma_unmap_addr(&old_rx_pg, mapping),
450 SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
452 /* Add one frag and update the appropriate fields in the skb */
453 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
455 skb->data_len += frag_len;
456 skb->truesize += frag_len;
457 skb->len += frag_len;
459 frag_size -= frag_len;
465 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
466 u16 queue, struct eth_end_agg_rx_cqe *cqe,
469 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
470 struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
471 u8 pad = tpa_info->placement_offset;
472 u16 len = tpa_info->len_on_bd;
473 struct sk_buff *skb = rx_buf->skb;
475 struct sk_buff *new_skb;
476 u8 old_tpa_state = tpa_info->tpa_state;
478 tpa_info->tpa_state = BNX2X_TPA_STOP;
480 /* If we there was an error during the handling of the TPA_START -
481 * drop this aggregation.
483 if (old_tpa_state == BNX2X_TPA_ERROR)
486 /* Try to allocate the new skb */
487 new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
489 /* Unmap skb in the pool anyway, as we are going to change
490 pool entry status to BNX2X_TPA_STOP even if new skb allocation
492 dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
493 fp->rx_buf_size, DMA_FROM_DEVICE);
495 if (likely(new_skb)) {
497 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
499 #ifdef BNX2X_STOP_ON_ERROR
500 if (pad + len > fp->rx_buf_size) {
501 BNX2X_ERR("skb_put is about to fail... "
502 "pad %d len %d rx_buf_size %d\n",
503 pad, len, fp->rx_buf_size);
509 skb_reserve(skb, pad);
512 skb->protocol = eth_type_trans(skb, bp->dev);
513 skb->ip_summed = CHECKSUM_UNNECESSARY;
515 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
516 if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
517 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
518 napi_gro_receive(&fp->napi, skb);
520 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
521 " - dropping packet!\n");
522 dev_kfree_skb_any(skb);
526 /* put new skb in bin */
527 rx_buf->skb = new_skb;
533 /* drop the packet and keep the buffer in the bin */
534 DP(NETIF_MSG_RX_STATUS,
535 "Failed to allocate or map a new skb - dropping packet!\n");
536 fp->eth_q_stats.rx_skb_alloc_failed++;
539 /* Set Toeplitz hash value in the skb using the value from the
540 * CQE (calculated by HW).
542 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
545 /* Set Toeplitz hash from CQE */
546 if ((bp->dev->features & NETIF_F_RXHASH) &&
547 (cqe->fast_path_cqe.status_flags &
548 ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
550 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
553 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
555 struct bnx2x *bp = fp->bp;
556 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
557 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
560 #ifdef BNX2X_STOP_ON_ERROR
561 if (unlikely(bp->panic))
565 /* CQ "next element" is of the size of the regular element,
566 that's why it's ok here */
567 hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
568 if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
571 bd_cons = fp->rx_bd_cons;
572 bd_prod = fp->rx_bd_prod;
573 bd_prod_fw = bd_prod;
574 sw_comp_cons = fp->rx_comp_cons;
575 sw_comp_prod = fp->rx_comp_prod;
577 /* Memory barrier necessary as speculative reads of the rx
578 * buffer can be ahead of the index in the status block
582 DP(NETIF_MSG_RX_STATUS,
583 "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n",
584 fp->index, hw_comp_cons, sw_comp_cons);
586 while (sw_comp_cons != hw_comp_cons) {
587 struct sw_rx_bd *rx_buf = NULL;
589 union eth_rx_cqe *cqe;
590 struct eth_fast_path_rx_cqe *cqe_fp;
592 enum eth_rx_cqe_type cqe_fp_type;
595 #ifdef BNX2X_STOP_ON_ERROR
596 if (unlikely(bp->panic))
600 comp_ring_cons = RCQ_BD(sw_comp_cons);
601 bd_prod = RX_BD(bd_prod);
602 bd_cons = RX_BD(bd_cons);
604 /* Prefetch the page containing the BD descriptor
605 at producer's index. It will be needed when new skb is
607 prefetch((void *)(PAGE_ALIGN((unsigned long)
608 (&fp->rx_desc_ring[bd_prod])) -
611 cqe = &fp->rx_comp_ring[comp_ring_cons];
612 cqe_fp = &cqe->fast_path_cqe;
613 cqe_fp_flags = cqe_fp->type_error_flags;
614 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
616 DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x"
617 " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags),
618 cqe_fp_flags, cqe_fp->status_flags,
619 le32_to_cpu(cqe_fp->rss_hash_result),
620 le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
622 /* is this a slowpath msg? */
623 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
624 bnx2x_sp_event(fp, cqe);
627 /* this is an rx packet */
629 rx_buf = &fp->rx_buf_ring[bd_cons];
633 if (!CQE_TYPE_FAST(cqe_fp_type)) {
634 #ifdef BNX2X_STOP_ON_ERROR
636 if (fp->disable_tpa &&
637 (CQE_TYPE_START(cqe_fp_type) ||
638 CQE_TYPE_STOP(cqe_fp_type)))
639 BNX2X_ERR("START/STOP packet while "
640 "disable_tpa type %x\n",
641 CQE_TYPE(cqe_fp_type));
644 if (CQE_TYPE_START(cqe_fp_type)) {
645 u16 queue = cqe_fp->queue_index;
646 DP(NETIF_MSG_RX_STATUS,
647 "calling tpa_start on queue %d\n",
650 bnx2x_tpa_start(fp, queue, skb,
654 /* Set Toeplitz hash for LRO skb */
655 bnx2x_set_skb_rxhash(bp, cqe, skb);
661 cqe->end_agg_cqe.queue_index;
662 DP(NETIF_MSG_RX_STATUS,
663 "calling tpa_stop on queue %d\n",
666 bnx2x_tpa_stop(bp, fp, queue,
669 #ifdef BNX2X_STOP_ON_ERROR
674 bnx2x_update_sge_prod(fp, cqe_fp);
679 len = le16_to_cpu(cqe_fp->pkt_len);
680 pad = cqe_fp->placement_offset;
681 dma_sync_single_for_cpu(&bp->pdev->dev,
682 dma_unmap_addr(rx_buf, mapping),
683 pad + RX_COPY_THRESH,
685 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
687 /* is this an error packet? */
688 if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
690 "ERROR flags %x rx packet %u\n",
691 cqe_fp_flags, sw_comp_cons);
692 fp->eth_q_stats.rx_err_discard_pkt++;
696 /* Since we don't have a jumbo ring
697 * copy small packets if mtu > 1500
699 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
700 (len <= RX_COPY_THRESH)) {
701 struct sk_buff *new_skb;
703 new_skb = netdev_alloc_skb(bp->dev, len + pad);
704 if (new_skb == NULL) {
706 "ERROR packet dropped "
707 "because of alloc failure\n");
708 fp->eth_q_stats.rx_skb_alloc_failed++;
713 skb_copy_from_linear_data_offset(skb, pad,
714 new_skb->data + pad, len);
715 skb_reserve(new_skb, pad);
716 skb_put(new_skb, len);
718 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
723 if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
724 dma_unmap_single(&bp->pdev->dev,
725 dma_unmap_addr(rx_buf, mapping),
728 skb_reserve(skb, pad);
733 "ERROR packet dropped because "
734 "of alloc failure\n");
735 fp->eth_q_stats.rx_skb_alloc_failed++;
737 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
741 skb->protocol = eth_type_trans(skb, bp->dev);
743 /* Set Toeplitz hash for a none-LRO skb */
744 bnx2x_set_skb_rxhash(bp, cqe, skb);
746 skb_checksum_none_assert(skb);
748 if (bp->dev->features & NETIF_F_RXCSUM) {
750 if (likely(BNX2X_RX_CSUM_OK(cqe)))
751 skb->ip_summed = CHECKSUM_UNNECESSARY;
753 fp->eth_q_stats.hw_csum_err++;
757 skb_record_rx_queue(skb, fp->index);
759 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
761 __vlan_hwaccel_put_tag(skb,
762 le16_to_cpu(cqe_fp->vlan_tag));
763 napi_gro_receive(&fp->napi, skb);
769 bd_cons = NEXT_RX_IDX(bd_cons);
770 bd_prod = NEXT_RX_IDX(bd_prod);
771 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
774 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
775 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
777 if (rx_pkt == budget)
781 fp->rx_bd_cons = bd_cons;
782 fp->rx_bd_prod = bd_prod_fw;
783 fp->rx_comp_cons = sw_comp_cons;
784 fp->rx_comp_prod = sw_comp_prod;
786 /* Update producers */
787 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
790 fp->rx_pkt += rx_pkt;
796 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
798 struct bnx2x_fastpath *fp = fp_cookie;
799 struct bnx2x *bp = fp->bp;
802 DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
803 "[fp %d fw_sd %d igusb %d]\n",
804 fp->index, fp->fw_sb_id, fp->igu_sb_id);
805 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
807 #ifdef BNX2X_STOP_ON_ERROR
808 if (unlikely(bp->panic))
812 /* Handle Rx and Tx according to MSI-X vector */
813 prefetch(fp->rx_cons_sb);
815 for_each_cos_in_tx_queue(fp, cos)
816 prefetch(fp->txdata[cos].tx_cons_sb);
818 prefetch(&fp->sb_running_index[SM_RX_ID]);
819 napi_schedule(&bnx2x_fp(bp, fp->index, napi));
824 /* HW Lock for shared dual port PHYs */
825 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
827 mutex_lock(&bp->port.phy_mutex);
829 if (bp->port.need_hw_lock)
830 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
833 void bnx2x_release_phy_lock(struct bnx2x *bp)
835 if (bp->port.need_hw_lock)
836 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
838 mutex_unlock(&bp->port.phy_mutex);
841 /* calculates MF speed according to current linespeed and MF configuration */
842 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
844 u16 line_speed = bp->link_vars.line_speed;
846 u16 maxCfg = bnx2x_extract_max_cfg(bp,
847 bp->mf_config[BP_VN(bp)]);
849 /* Calculate the current MAX line speed limit for the MF
853 line_speed = (line_speed * maxCfg) / 100;
855 u16 vn_max_rate = maxCfg * 100;
857 if (vn_max_rate < line_speed)
858 line_speed = vn_max_rate;
866 * bnx2x_fill_report_data - fill link report data to report
869 * @data: link state to update
871 * It uses a none-atomic bit operations because is called under the mutex.
873 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
874 struct bnx2x_link_report_data *data)
876 u16 line_speed = bnx2x_get_mf_speed(bp);
878 memset(data, 0, sizeof(*data));
880 /* Fill the report data: efective line speed */
881 data->line_speed = line_speed;
884 if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
885 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
886 &data->link_report_flags);
889 if (bp->link_vars.duplex == DUPLEX_FULL)
890 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
892 /* Rx Flow Control is ON */
893 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
894 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
896 /* Tx Flow Control is ON */
897 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
898 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
902 * bnx2x_link_report - report link status to OS.
906 * Calls the __bnx2x_link_report() under the same locking scheme
907 * as a link/PHY state managing code to ensure a consistent link
911 void bnx2x_link_report(struct bnx2x *bp)
913 bnx2x_acquire_phy_lock(bp);
914 __bnx2x_link_report(bp);
915 bnx2x_release_phy_lock(bp);
919 * __bnx2x_link_report - report link status to OS.
923 * None atomic inmlementation.
924 * Should be called under the phy_lock.
926 void __bnx2x_link_report(struct bnx2x *bp)
928 struct bnx2x_link_report_data cur_data;
932 bnx2x_read_mf_cfg(bp);
934 /* Read the current link report info */
935 bnx2x_fill_report_data(bp, &cur_data);
937 /* Don't report link down or exactly the same link status twice */
938 if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
939 (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
940 &bp->last_reported_link.link_report_flags) &&
941 test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
942 &cur_data.link_report_flags)))
947 /* We are going to report a new link parameters now -
948 * remember the current data for the next time.
950 memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
952 if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
953 &cur_data.link_report_flags)) {
954 netif_carrier_off(bp->dev);
955 netdev_err(bp->dev, "NIC Link is Down\n");
961 netif_carrier_on(bp->dev);
963 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
964 &cur_data.link_report_flags))
969 /* Handle the FC at the end so that only these flags would be
970 * possibly set. This way we may easily check if there is no FC
973 if (cur_data.link_report_flags) {
974 if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
975 &cur_data.link_report_flags)) {
976 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
977 &cur_data.link_report_flags))
978 flow = "ON - receive & transmit";
980 flow = "ON - receive";
982 flow = "ON - transmit";
987 netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
988 cur_data.line_speed, duplex, flow);
992 void bnx2x_init_rx_rings(struct bnx2x *bp)
994 int func = BP_FUNC(bp);
995 int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
996 ETH_MAX_AGGREGATION_QUEUES_E1H_E2;
1000 /* Allocate TPA resources */
1001 for_each_rx_queue(bp, j) {
1002 struct bnx2x_fastpath *fp = &bp->fp[j];
1005 "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1007 if (!fp->disable_tpa) {
1008 /* Fill the per-aggregtion pool */
1009 for (i = 0; i < max_agg_queues; i++) {
1010 struct bnx2x_agg_info *tpa_info =
1012 struct sw_rx_bd *first_buf =
1013 &tpa_info->first_buf;
1015 first_buf->skb = netdev_alloc_skb(bp->dev,
1017 if (!first_buf->skb) {
1018 BNX2X_ERR("Failed to allocate TPA "
1019 "skb pool for queue[%d] - "
1020 "disabling TPA on this "
1022 bnx2x_free_tpa_pool(bp, fp, i);
1023 fp->disable_tpa = 1;
1026 dma_unmap_addr_set(first_buf, mapping, 0);
1027 tpa_info->tpa_state = BNX2X_TPA_STOP;
1030 /* "next page" elements initialization */
1031 bnx2x_set_next_page_sgl(fp);
1033 /* set SGEs bit mask */
1034 bnx2x_init_sge_ring_bit_mask(fp);
1036 /* Allocate SGEs and initialize the ring elements */
1037 for (i = 0, ring_prod = 0;
1038 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1040 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1041 BNX2X_ERR("was only able to allocate "
1043 BNX2X_ERR("disabling TPA for "
1045 /* Cleanup already allocated elements */
1046 bnx2x_free_rx_sge_range(bp, fp,
1048 bnx2x_free_tpa_pool(bp, fp,
1050 fp->disable_tpa = 1;
1054 ring_prod = NEXT_SGE_IDX(ring_prod);
1057 fp->rx_sge_prod = ring_prod;
1061 for_each_rx_queue(bp, j) {
1062 struct bnx2x_fastpath *fp = &bp->fp[j];
1066 /* Activate BD ring */
1068 * this will generate an interrupt (to the TSTORM)
1069 * must only be done after chip is initialized
1071 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1077 if (CHIP_IS_E1(bp)) {
1078 REG_WR(bp, BAR_USTRORM_INTMEM +
1079 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1080 U64_LO(fp->rx_comp_mapping));
1081 REG_WR(bp, BAR_USTRORM_INTMEM +
1082 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1083 U64_HI(fp->rx_comp_mapping));
1088 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1093 for_each_tx_queue(bp, i) {
1094 struct bnx2x_fastpath *fp = &bp->fp[i];
1095 for_each_cos_in_tx_queue(fp, cos) {
1096 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1098 u16 bd_cons = txdata->tx_bd_cons;
1099 u16 sw_prod = txdata->tx_pkt_prod;
1100 u16 sw_cons = txdata->tx_pkt_cons;
1102 while (sw_cons != sw_prod) {
1103 bd_cons = bnx2x_free_tx_pkt(bp, txdata,
1111 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1113 struct bnx2x *bp = fp->bp;
1116 /* ring wasn't allocated */
1117 if (fp->rx_buf_ring == NULL)
1120 for (i = 0; i < NUM_RX_BD; i++) {
1121 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1122 struct sk_buff *skb = rx_buf->skb;
1126 dma_unmap_single(&bp->pdev->dev,
1127 dma_unmap_addr(rx_buf, mapping),
1128 fp->rx_buf_size, DMA_FROM_DEVICE);
1135 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1139 for_each_rx_queue(bp, j) {
1140 struct bnx2x_fastpath *fp = &bp->fp[j];
1142 bnx2x_free_rx_bds(fp);
1144 if (!fp->disable_tpa)
1145 bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ?
1146 ETH_MAX_AGGREGATION_QUEUES_E1 :
1147 ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
1151 void bnx2x_free_skbs(struct bnx2x *bp)
1153 bnx2x_free_tx_skbs(bp);
1154 bnx2x_free_rx_skbs(bp);
1157 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1159 /* load old values */
1160 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1162 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1163 /* leave all but MAX value */
1164 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1166 /* set new MAX value */
1167 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1168 & FUNC_MF_CFG_MAX_BW_MASK;
1170 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1175 * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1177 * @bp: driver handle
1178 * @nvecs: number of vectors to be released
1180 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1184 if (nvecs == offset)
1186 free_irq(bp->msix_table[offset].vector, bp->dev);
1187 DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1188 bp->msix_table[offset].vector);
1191 if (nvecs == offset)
1196 for_each_eth_queue(bp, i) {
1197 if (nvecs == offset)
1199 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1200 "irq\n", i, bp->msix_table[offset].vector);
1202 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1206 void bnx2x_free_irq(struct bnx2x *bp)
1208 if (bp->flags & USING_MSIX_FLAG)
1209 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1211 else if (bp->flags & USING_MSI_FLAG)
1212 free_irq(bp->pdev->irq, bp->dev);
1214 free_irq(bp->pdev->irq, bp->dev);
1217 int bnx2x_enable_msix(struct bnx2x *bp)
1219 int msix_vec = 0, i, rc, req_cnt;
1221 bp->msix_table[msix_vec].entry = msix_vec;
1222 DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1223 bp->msix_table[0].entry);
1227 bp->msix_table[msix_vec].entry = msix_vec;
1228 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1229 bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1232 /* We need separate vectors for ETH queues only (not FCoE) */
1233 for_each_eth_queue(bp, i) {
1234 bp->msix_table[msix_vec].entry = msix_vec;
1235 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1236 "(fastpath #%u)\n", msix_vec, msix_vec, i);
1240 req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1242 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1245 * reconfigure number of tx/rx queues according to available
1248 if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1249 /* how less vectors we will have? */
1250 int diff = req_cnt - rc;
1253 "Trying to use less MSI-X vectors: %d\n", rc);
1255 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1259 "MSI-X is not attainable rc %d\n", rc);
1263 * decrease number of queues by number of unallocated entries
1265 bp->num_queues -= diff;
1267 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1270 /* fall to INTx if not enough memory */
1272 bp->flags |= DISABLE_MSI_FLAG;
1273 DP(NETIF_MSG_IFUP, "MSI-X is not attainable rc %d\n", rc);
1277 bp->flags |= USING_MSIX_FLAG;
1282 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1284 int i, rc, offset = 0;
1286 rc = request_irq(bp->msix_table[offset++].vector,
1287 bnx2x_msix_sp_int, 0,
1288 bp->dev->name, bp->dev);
1290 BNX2X_ERR("request sp irq failed\n");
1297 for_each_eth_queue(bp, i) {
1298 struct bnx2x_fastpath *fp = &bp->fp[i];
1299 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1302 rc = request_irq(bp->msix_table[offset].vector,
1303 bnx2x_msix_fp_int, 0, fp->name, fp);
1305 BNX2X_ERR("request fp #%d irq (%d) failed rc %d\n", i,
1306 bp->msix_table[offset].vector, rc);
1307 bnx2x_free_msix_irqs(bp, offset);
1314 i = BNX2X_NUM_ETH_QUEUES(bp);
1315 offset = 1 + CNIC_PRESENT;
1316 netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d"
1318 bp->msix_table[0].vector,
1319 0, bp->msix_table[offset].vector,
1320 i - 1, bp->msix_table[offset + i - 1].vector);
1325 int bnx2x_enable_msi(struct bnx2x *bp)
1329 rc = pci_enable_msi(bp->pdev);
1331 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1334 bp->flags |= USING_MSI_FLAG;
1339 static int bnx2x_req_irq(struct bnx2x *bp)
1341 unsigned long flags;
1344 if (bp->flags & USING_MSI_FLAG)
1347 flags = IRQF_SHARED;
1349 rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1350 bp->dev->name, bp->dev);
1354 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1357 if (bp->flags & USING_MSIX_FLAG) {
1358 rc = bnx2x_req_msix_irqs(bp);
1363 rc = bnx2x_req_irq(bp);
1365 BNX2X_ERR("IRQ request failed rc %d, aborting\n", rc);
1368 if (bp->flags & USING_MSI_FLAG) {
1369 bp->dev->irq = bp->pdev->irq;
1370 netdev_info(bp->dev, "using MSI IRQ %d\n",
1378 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1382 for_each_rx_queue(bp, i)
1383 napi_enable(&bnx2x_fp(bp, i, napi));
1386 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1390 for_each_rx_queue(bp, i)
1391 napi_disable(&bnx2x_fp(bp, i, napi));
1394 void bnx2x_netif_start(struct bnx2x *bp)
1396 if (netif_running(bp->dev)) {
1397 bnx2x_napi_enable(bp);
1398 bnx2x_int_enable(bp);
1399 if (bp->state == BNX2X_STATE_OPEN)
1400 netif_tx_wake_all_queues(bp->dev);
1404 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1406 bnx2x_int_disable_sync(bp, disable_hw);
1407 bnx2x_napi_disable(bp);
1410 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1412 struct bnx2x *bp = netdev_priv(dev);
1415 return skb_tx_hash(dev, skb);
1417 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1418 u16 ether_type = ntohs(hdr->h_proto);
1420 /* Skip VLAN tag if present */
1421 if (ether_type == ETH_P_8021Q) {
1422 struct vlan_ethhdr *vhdr =
1423 (struct vlan_ethhdr *)skb->data;
1425 ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1428 /* If ethertype is FCoE or FIP - use FCoE ring */
1429 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1430 return bnx2x_fcoe_tx(bp, txq_index);
1433 /* Select a none-FCoE queue: if FCoE is enabled, exclude FCoE L2 ring
1435 return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1438 void bnx2x_set_num_queues(struct bnx2x *bp)
1440 switch (bp->multi_mode) {
1441 case ETH_RSS_MODE_DISABLED:
1444 case ETH_RSS_MODE_REGULAR:
1445 bp->num_queues = bnx2x_calc_num_queues(bp);
1453 /* Add special queues */
1454 bp->num_queues += NON_ETH_CONTEXT_USE;
1457 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1461 tx = MAX_TXQS_PER_COS * bp->max_cos;
1462 rx = BNX2X_NUM_ETH_QUEUES(bp);
1464 /* account for fcoe queue */
1472 rc = netif_set_real_num_tx_queues(bp->dev, tx);
1474 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1477 rc = netif_set_real_num_rx_queues(bp->dev, rx);
1479 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1483 DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1489 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1493 for_each_queue(bp, i) {
1494 struct bnx2x_fastpath *fp = &bp->fp[i];
1496 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1499 * Although there are no IP frames expected to arrive to
1500 * this ring we still want to add an
1501 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1505 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1506 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1509 bp->dev->mtu + ETH_OVREHEAD +
1510 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1514 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1517 u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1518 u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1521 * Prepare the inital contents fo the indirection table if RSS is
1524 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1525 for (i = 0; i < sizeof(ind_table); i++)
1527 bp->fp->cl_id + (i % num_eth_queues);
1531 * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1532 * per-port, so if explicit configuration is needed , do it only
1535 * For 57712 and newer on the other hand it's a per-function
1538 return bnx2x_config_rss_pf(bp, ind_table,
1539 bp->port.pmf || !CHIP_IS_E1x(bp));
1542 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1544 struct bnx2x_config_rss_params params = {0};
1547 /* Although RSS is meaningless when there is a single HW queue we
1548 * still need it enabled in order to have HW Rx hash generated.
1550 * if (!is_eth_multi(bp))
1551 * bp->multi_mode = ETH_RSS_MODE_DISABLED;
1554 params.rss_obj = &bp->rss_conf_obj;
1556 __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags);
1559 switch (bp->multi_mode) {
1560 case ETH_RSS_MODE_DISABLED:
1561 __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
1563 case ETH_RSS_MODE_REGULAR:
1564 __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
1566 case ETH_RSS_MODE_VLAN_PRI:
1567 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, ¶ms.rss_flags);
1569 case ETH_RSS_MODE_E1HOV_PRI:
1570 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, ¶ms.rss_flags);
1572 case ETH_RSS_MODE_IP_DSCP:
1573 __set_bit(BNX2X_RSS_MODE_IP_DSCP, ¶ms.rss_flags);
1576 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1580 /* If RSS is enabled */
1581 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1582 /* RSS configuration */
1583 __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
1584 __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
1585 __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
1586 __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags);
1589 params.rss_result_mask = MULTI_MASK;
1591 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1595 for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1596 params.rss_key[i] = random32();
1598 __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags);
1602 return bnx2x_config_rss(bp, ¶ms);
1605 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1607 struct bnx2x_func_state_params func_params = {0};
1609 /* Prepare parameters for function state transitions */
1610 __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1612 func_params.f_obj = &bp->func_obj;
1613 func_params.cmd = BNX2X_F_CMD_HW_INIT;
1615 func_params.params.hw_init.load_phase = load_code;
1617 return bnx2x_func_state_change(bp, &func_params);
1621 * Cleans the object that have internal lists without sending
1622 * ramrods. Should be run when interrutps are disabled.
1624 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1627 unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1628 struct bnx2x_mcast_ramrod_params rparam = {0};
1629 struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1631 /***************** Cleanup MACs' object first *************************/
1633 /* Wait for completion of requested */
1634 __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1635 /* Perform a dry cleanup */
1636 __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1638 /* Clean ETH primary MAC */
1639 __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1640 rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1643 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1645 /* Cleanup UC list */
1647 __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1648 rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1651 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1653 /***************** Now clean mcast object *****************************/
1654 rparam.mcast_obj = &bp->mcast_obj;
1655 __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1657 /* Add a DEL command... */
1658 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1660 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1661 "object: %d\n", rc);
1663 /* ...and wait until all pending commands are cleared */
1664 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1667 BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1672 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1676 #ifndef BNX2X_STOP_ON_ERROR
1677 #define LOAD_ERROR_EXIT(bp, label) \
1679 (bp)->state = BNX2X_STATE_ERROR; \
1683 #define LOAD_ERROR_EXIT(bp, label) \
1685 (bp)->state = BNX2X_STATE_ERROR; \
1691 /* must be called with rtnl_lock */
1692 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1694 int port = BP_PORT(bp);
1698 #ifdef BNX2X_STOP_ON_ERROR
1699 if (unlikely(bp->panic))
1703 bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1705 /* Set the initial link reported state to link down */
1706 bnx2x_acquire_phy_lock(bp);
1707 memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1708 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1709 &bp->last_reported_link.link_report_flags);
1710 bnx2x_release_phy_lock(bp);
1712 /* must be called before memory allocation and HW init */
1713 bnx2x_ilt_set_info(bp);
1716 * Zero fastpath structures preserving invariants like napi, which are
1717 * allocated only once, fp index, max_cos, bp pointer.
1718 * Also set fp->disable_tpa.
1720 for_each_queue(bp, i)
1724 /* Set the receive queues buffer size */
1725 bnx2x_set_rx_buf_size(bp);
1727 if (bnx2x_alloc_mem(bp))
1730 /* As long as bnx2x_alloc_mem() may possibly update
1731 * bp->num_queues, bnx2x_set_real_num_queues() should always
1734 rc = bnx2x_set_real_num_queues(bp);
1736 BNX2X_ERR("Unable to set real_num_queues\n");
1737 LOAD_ERROR_EXIT(bp, load_error0);
1740 /* configure multi cos mappings in kernel.
1741 * this configuration may be overriden by a multi class queue discipline
1742 * or by a dcbx negotiation result.
1744 bnx2x_setup_tc(bp->dev, bp->max_cos);
1746 bnx2x_napi_enable(bp);
1748 /* Send LOAD_REQUEST command to MCP
1749 * Returns the type of LOAD command:
1750 * if it is the first port to be initialized
1751 * common blocks should be initialized, otherwise - not
1753 if (!BP_NOMCP(bp)) {
1754 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1756 BNX2X_ERR("MCP response failure, aborting\n");
1758 LOAD_ERROR_EXIT(bp, load_error1);
1760 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1761 rc = -EBUSY; /* other port in diagnostic mode */
1762 LOAD_ERROR_EXIT(bp, load_error1);
1766 int path = BP_PATH(bp);
1768 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n",
1769 path, load_count[path][0], load_count[path][1],
1770 load_count[path][2]);
1771 load_count[path][0]++;
1772 load_count[path][1 + port]++;
1773 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n",
1774 path, load_count[path][0], load_count[path][1],
1775 load_count[path][2]);
1776 if (load_count[path][0] == 1)
1777 load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1778 else if (load_count[path][1 + port] == 1)
1779 load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1781 load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1784 if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1785 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1786 (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1789 * We need the barrier to ensure the ordering between the
1790 * writing to bp->port.pmf here and reading it from the
1791 * bnx2x_periodic_task().
1794 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1798 DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1800 /* Init Function state controlling object */
1801 bnx2x__init_func_obj(bp);
1804 rc = bnx2x_init_hw(bp, load_code);
1806 BNX2X_ERR("HW init failed, aborting\n");
1807 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1808 LOAD_ERROR_EXIT(bp, load_error2);
1811 /* Connect to IRQs */
1812 rc = bnx2x_setup_irqs(bp);
1814 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1815 LOAD_ERROR_EXIT(bp, load_error2);
1818 /* Setup NIC internals and enable interrupts */
1819 bnx2x_nic_init(bp, load_code);
1821 /* Init per-function objects */
1822 bnx2x_init_bp_objs(bp);
1824 if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1825 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1826 (bp->common.shmem2_base)) {
1827 if (SHMEM2_HAS(bp, dcc_support))
1828 SHMEM2_WR(bp, dcc_support,
1829 (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1830 SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1833 bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1834 rc = bnx2x_func_start(bp);
1836 BNX2X_ERR("Function start failed!\n");
1837 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1838 LOAD_ERROR_EXIT(bp, load_error3);
1841 /* Send LOAD_DONE command to MCP */
1842 if (!BP_NOMCP(bp)) {
1843 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1845 BNX2X_ERR("MCP response failure, aborting\n");
1847 LOAD_ERROR_EXIT(bp, load_error3);
1851 rc = bnx2x_setup_leading(bp);
1853 BNX2X_ERR("Setup leading failed!\n");
1854 LOAD_ERROR_EXIT(bp, load_error3);
1858 /* Enable Timer scan */
1859 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1862 for_each_nondefault_queue(bp, i) {
1863 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1865 LOAD_ERROR_EXIT(bp, load_error4);
1868 rc = bnx2x_init_rss_pf(bp);
1870 LOAD_ERROR_EXIT(bp, load_error4);
1872 /* Now when Clients are configured we are ready to work */
1873 bp->state = BNX2X_STATE_OPEN;
1875 /* Configure a ucast MAC */
1876 rc = bnx2x_set_eth_mac(bp, true);
1878 LOAD_ERROR_EXIT(bp, load_error4);
1880 if (bp->pending_max) {
1881 bnx2x_update_max_mf_config(bp, bp->pending_max);
1882 bp->pending_max = 0;
1886 bnx2x_initial_phy_init(bp, load_mode);
1888 /* Start fast path */
1890 /* Initialize Rx filter. */
1891 netif_addr_lock_bh(bp->dev);
1892 bnx2x_set_rx_mode(bp->dev);
1893 netif_addr_unlock_bh(bp->dev);
1896 switch (load_mode) {
1898 /* Tx queue should be only reenabled */
1899 netif_tx_wake_all_queues(bp->dev);
1903 netif_tx_start_all_queues(bp->dev);
1904 smp_mb__after_clear_bit();
1908 bp->state = BNX2X_STATE_DIAG;
1916 bnx2x__link_status_update(bp);
1918 /* start the timer */
1919 mod_timer(&bp->timer, jiffies + bp->current_interval);
1922 bnx2x_setup_cnic_irq_info(bp);
1923 if (bp->state == BNX2X_STATE_OPEN)
1924 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1926 bnx2x_inc_load_cnt(bp);
1928 /* Wait for all pending SP commands to complete */
1929 if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1930 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1931 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1935 bnx2x_dcbx_init(bp);
1938 #ifndef BNX2X_STOP_ON_ERROR
1941 /* Disable Timer scan */
1942 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1945 bnx2x_int_disable_sync(bp, 1);
1947 /* Clean queueable objects */
1948 bnx2x_squeeze_objects(bp);
1950 /* Free SKBs, SGEs, TPA pool and driver internals */
1951 bnx2x_free_skbs(bp);
1952 for_each_rx_queue(bp, i)
1953 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1958 if (!BP_NOMCP(bp)) {
1959 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1960 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
1965 bnx2x_napi_disable(bp);
1970 #endif /* ! BNX2X_STOP_ON_ERROR */
1973 /* must be called with rtnl_lock */
1974 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
1977 bool global = false;
1979 if ((bp->state == BNX2X_STATE_CLOSED) ||
1980 (bp->state == BNX2X_STATE_ERROR)) {
1981 /* We can get here if the driver has been unloaded
1982 * during parity error recovery and is either waiting for a
1983 * leader to complete or for other functions to unload and
1984 * then ifdown has been issued. In this case we want to
1985 * unload and let other functions to complete a recovery
1988 bp->recovery_state = BNX2X_RECOVERY_DONE;
1990 bnx2x_release_leader_lock(bp);
1993 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
1999 * It's important to set the bp->state to the value different from
2000 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
2001 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
2003 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2007 bnx2x_tx_disable(bp);
2010 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2013 bp->rx_mode = BNX2X_RX_MODE_NONE;
2015 del_timer_sync(&bp->timer);
2017 /* Set ALWAYS_ALIVE bit in shmem */
2018 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2020 bnx2x_drv_pulse(bp);
2022 bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2024 /* Cleanup the chip if needed */
2025 if (unload_mode != UNLOAD_RECOVERY)
2026 bnx2x_chip_cleanup(bp, unload_mode);
2028 /* Send the UNLOAD_REQUEST to the MCP */
2029 bnx2x_send_unload_req(bp, unload_mode);
2032 * Prevent transactions to host from the functions on the
2033 * engine that doesn't reset global blocks in case of global
2034 * attention once gloabl blocks are reset and gates are opened
2035 * (the engine which leader will perform the recovery
2038 if (!CHIP_IS_E1x(bp))
2039 bnx2x_pf_disable(bp);
2041 /* Disable HW interrupts, NAPI */
2042 bnx2x_netif_stop(bp, 1);
2047 /* Report UNLOAD_DONE to MCP */
2048 bnx2x_send_unload_done(bp);
2052 * At this stage no more interrupts will arrive so we may safly clean
2053 * the queueable objects here in case they failed to get cleaned so far.
2055 bnx2x_squeeze_objects(bp);
2057 /* There should be no more pending SP commands at this stage */
2062 /* Free SKBs, SGEs, TPA pool and driver internals */
2063 bnx2x_free_skbs(bp);
2064 for_each_rx_queue(bp, i)
2065 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2069 bp->state = BNX2X_STATE_CLOSED;
2071 /* Check if there are pending parity attentions. If there are - set
2072 * RECOVERY_IN_PROGRESS.
2074 if (bnx2x_chk_parity_attn(bp, &global, false)) {
2075 bnx2x_set_reset_in_progress(bp);
2077 /* Set RESET_IS_GLOBAL if needed */
2079 bnx2x_set_reset_global(bp);
2083 /* The last driver must disable a "close the gate" if there is no
2084 * parity attention or "process kill" pending.
2086 if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2087 bnx2x_disable_close_the_gate(bp);
2092 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2096 /* If there is no power capability, silently succeed */
2098 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2102 pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2106 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2107 ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2108 PCI_PM_CTRL_PME_STATUS));
2110 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2111 /* delay required during transition out of D3hot */
2116 /* If there are other clients above don't
2117 shut down the power */
2118 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2120 /* Don't shut down the power for emulation and FPGA */
2121 if (CHIP_REV_IS_SLOW(bp))
2124 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2128 pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2130 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2133 /* No more memory access after this point until
2134 * device is brought back to D0.
2145 * net_device service functions
2147 int bnx2x_poll(struct napi_struct *napi, int budget)
2151 struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2153 struct bnx2x *bp = fp->bp;
2156 #ifdef BNX2X_STOP_ON_ERROR
2157 if (unlikely(bp->panic)) {
2158 napi_complete(napi);
2163 for_each_cos_in_tx_queue(fp, cos)
2164 if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2165 bnx2x_tx_int(bp, &fp->txdata[cos]);
2168 if (bnx2x_has_rx_work(fp)) {
2169 work_done += bnx2x_rx_int(fp, budget - work_done);
2171 /* must not complete if we consumed full budget */
2172 if (work_done >= budget)
2176 /* Fall out from the NAPI loop if needed */
2177 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2179 /* No need to update SB for FCoE L2 ring as long as
2180 * it's connected to the default SB and the SB
2181 * has been updated when NAPI was scheduled.
2183 if (IS_FCOE_FP(fp)) {
2184 napi_complete(napi);
2189 bnx2x_update_fpsb_idx(fp);
2190 /* bnx2x_has_rx_work() reads the status block,
2191 * thus we need to ensure that status block indices
2192 * have been actually read (bnx2x_update_fpsb_idx)
2193 * prior to this check (bnx2x_has_rx_work) so that
2194 * we won't write the "newer" value of the status block
2195 * to IGU (if there was a DMA right after
2196 * bnx2x_has_rx_work and if there is no rmb, the memory
2197 * reading (bnx2x_update_fpsb_idx) may be postponed
2198 * to right before bnx2x_ack_sb). In this case there
2199 * will never be another interrupt until there is
2200 * another update of the status block, while there
2201 * is still unhandled work.
2205 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2206 napi_complete(napi);
2207 /* Re-enable interrupts */
2209 "Update index to %d\n", fp->fp_hc_idx);
2210 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2211 le16_to_cpu(fp->fp_hc_idx),
2221 /* we split the first BD into headers and data BDs
2222 * to ease the pain of our fellow microcode engineers
2223 * we use one mapping for both BDs
2224 * So far this has only been observed to happen
2225 * in Other Operating Systems(TM)
2227 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2228 struct bnx2x_fp_txdata *txdata,
2229 struct sw_tx_bd *tx_buf,
2230 struct eth_tx_start_bd **tx_bd, u16 hlen,
2231 u16 bd_prod, int nbd)
2233 struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2234 struct eth_tx_bd *d_tx_bd;
2236 int old_len = le16_to_cpu(h_tx_bd->nbytes);
2238 /* first fix first BD */
2239 h_tx_bd->nbd = cpu_to_le16(nbd);
2240 h_tx_bd->nbytes = cpu_to_le16(hlen);
2242 DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2243 "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2244 h_tx_bd->addr_lo, h_tx_bd->nbd);
2246 /* now get a new data BD
2247 * (after the pbd) and fill it */
2248 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2249 d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2251 mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2252 le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2254 d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2255 d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2256 d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2258 /* this marks the BD as one that has no individual mapping */
2259 tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2261 DP(NETIF_MSG_TX_QUEUED,
2262 "TSO split data size is %d (%x:%x)\n",
2263 d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2266 *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2271 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2274 csum = (u16) ~csum_fold(csum_sub(csum,
2275 csum_partial(t_header - fix, fix, 0)));
2278 csum = (u16) ~csum_fold(csum_add(csum,
2279 csum_partial(t_header, -fix, 0)));
2281 return swab16(csum);
2284 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2288 if (skb->ip_summed != CHECKSUM_PARTIAL)
2292 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2294 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2295 rc |= XMIT_CSUM_TCP;
2299 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2300 rc |= XMIT_CSUM_TCP;
2304 if (skb_is_gso_v6(skb))
2305 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2306 else if (skb_is_gso(skb))
2307 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2312 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2313 /* check if packet requires linearization (packet is too fragmented)
2314 no need to check fragmentation if page size > 8K (there will be no
2315 violation to FW restrictions) */
2316 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2321 int first_bd_sz = 0;
2323 /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2324 if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2326 if (xmit_type & XMIT_GSO) {
2327 unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2328 /* Check if LSO packet needs to be copied:
2329 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2330 int wnd_size = MAX_FETCH_BD - 3;
2331 /* Number of windows to check */
2332 int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2337 /* Headers length */
2338 hlen = (int)(skb_transport_header(skb) - skb->data) +
2341 /* Amount of data (w/o headers) on linear part of SKB*/
2342 first_bd_sz = skb_headlen(skb) - hlen;
2344 wnd_sum = first_bd_sz;
2346 /* Calculate the first sum - it's special */
2347 for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2349 skb_shinfo(skb)->frags[frag_idx].size;
2351 /* If there was data on linear skb data - check it */
2352 if (first_bd_sz > 0) {
2353 if (unlikely(wnd_sum < lso_mss)) {
2358 wnd_sum -= first_bd_sz;
2361 /* Others are easier: run through the frag list and
2362 check all windows */
2363 for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2365 skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1].size;
2367 if (unlikely(wnd_sum < lso_mss)) {
2372 skb_shinfo(skb)->frags[wnd_idx].size;
2375 /* in non-LSO too fragmented packet should always
2382 if (unlikely(to_copy))
2383 DP(NETIF_MSG_TX_QUEUED,
2384 "Linearization IS REQUIRED for %s packet. "
2385 "num_frags %d hlen %d first_bd_sz %d\n",
2386 (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2387 skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2393 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2396 *parsing_data |= (skb_shinfo(skb)->gso_size <<
2397 ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2398 ETH_TX_PARSE_BD_E2_LSO_MSS;
2399 if ((xmit_type & XMIT_GSO_V6) &&
2400 (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2401 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2405 * bnx2x_set_pbd_gso - update PBD in GSO case.
2409 * @xmit_type: xmit flags
2411 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2412 struct eth_tx_parse_bd_e1x *pbd,
2415 pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2416 pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2417 pbd->tcp_flags = pbd_tcp_flags(skb);
2419 if (xmit_type & XMIT_GSO_V4) {
2420 pbd->ip_id = swab16(ip_hdr(skb)->id);
2421 pbd->tcp_pseudo_csum =
2422 swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2424 0, IPPROTO_TCP, 0));
2427 pbd->tcp_pseudo_csum =
2428 swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2429 &ipv6_hdr(skb)->daddr,
2430 0, IPPROTO_TCP, 0));
2432 pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2436 * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2438 * @bp: driver handle
2440 * @parsing_data: data to be updated
2441 * @xmit_type: xmit flags
2445 static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2446 u32 *parsing_data, u32 xmit_type)
2449 ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2450 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2451 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2453 if (xmit_type & XMIT_CSUM_TCP) {
2454 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2455 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2456 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2458 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2460 /* We support checksum offload for TCP and UDP only.
2461 * No need to pass the UDP header length - it's a constant.
2463 return skb_transport_header(skb) +
2464 sizeof(struct udphdr) - skb->data;
2467 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2468 struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2470 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2472 if (xmit_type & XMIT_CSUM_V4)
2473 tx_start_bd->bd_flags.as_bitfield |=
2474 ETH_TX_BD_FLAGS_IP_CSUM;
2476 tx_start_bd->bd_flags.as_bitfield |=
2477 ETH_TX_BD_FLAGS_IPV6;
2479 if (!(xmit_type & XMIT_CSUM_TCP))
2480 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2484 * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2486 * @bp: driver handle
2488 * @pbd: parse BD to be updated
2489 * @xmit_type: xmit flags
2491 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2492 struct eth_tx_parse_bd_e1x *pbd,
2495 u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2497 /* for now NS flag is not used in Linux */
2499 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2500 ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2502 pbd->ip_hlen_w = (skb_transport_header(skb) -
2503 skb_network_header(skb)) >> 1;
2505 hlen += pbd->ip_hlen_w;
2507 /* We support checksum offload for TCP and UDP only */
2508 if (xmit_type & XMIT_CSUM_TCP)
2509 hlen += tcp_hdrlen(skb) / 2;
2511 hlen += sizeof(struct udphdr) / 2;
2513 pbd->total_hlen_w = cpu_to_le16(hlen);
2516 if (xmit_type & XMIT_CSUM_TCP) {
2517 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2520 s8 fix = SKB_CS_OFF(skb); /* signed! */
2522 DP(NETIF_MSG_TX_QUEUED,
2523 "hlen %d fix %d csum before fix %x\n",
2524 le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2526 /* HW bug: fixup the CSUM */
2527 pbd->tcp_pseudo_csum =
2528 bnx2x_csum_fix(skb_transport_header(skb),
2531 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2532 pbd->tcp_pseudo_csum);
2538 /* called with netif_tx_lock
2539 * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2540 * netif_wake_queue()
2542 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2544 struct bnx2x *bp = netdev_priv(dev);
2546 struct bnx2x_fastpath *fp;
2547 struct netdev_queue *txq;
2548 struct bnx2x_fp_txdata *txdata;
2549 struct sw_tx_bd *tx_buf;
2550 struct eth_tx_start_bd *tx_start_bd, *first_bd;
2551 struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2552 struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2553 struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2554 u32 pbd_e2_parsing_data = 0;
2555 u16 pkt_prod, bd_prod;
2556 int nbd, txq_index, fp_index, txdata_index;
2558 u32 xmit_type = bnx2x_xmit_type(bp, skb);
2561 __le16 pkt_size = 0;
2563 u8 mac_type = UNICAST_ADDRESS;
2565 #ifdef BNX2X_STOP_ON_ERROR
2566 if (unlikely(bp->panic))
2567 return NETDEV_TX_BUSY;
2570 txq_index = skb_get_queue_mapping(skb);
2571 txq = netdev_get_tx_queue(dev, txq_index);
2573 BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2575 /* decode the fastpath index and the cos index from the txq */
2576 fp_index = TXQ_TO_FP(txq_index);
2577 txdata_index = TXQ_TO_COS(txq_index);
2581 * Override the above for the FCoE queue:
2582 * - FCoE fp entry is right after the ETH entries.
2583 * - FCoE L2 queue uses bp->txdata[0] only.
2585 if (unlikely(!NO_FCOE(bp) && (txq_index ==
2586 bnx2x_fcoe_tx(bp, txq_index)))) {
2587 fp_index = FCOE_IDX;
2592 /* enable this debug print to view the transmission queue being used
2593 DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d\n",
2594 txq_index, fp_index, txdata_index); */
2596 /* locate the fastpath and the txdata */
2597 fp = &bp->fp[fp_index];
2598 txdata = &fp->txdata[txdata_index];
2600 /* enable this debug print to view the tranmission details
2601 DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2602 " tx_data ptr %p fp pointer %p\n",
2603 txdata->cid, fp_index, txdata_index, txdata, fp); */
2605 if (unlikely(bnx2x_tx_avail(bp, txdata) <
2606 (skb_shinfo(skb)->nr_frags + 3))) {
2607 fp->eth_q_stats.driver_xoff++;
2608 netif_tx_stop_queue(txq);
2609 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2610 return NETDEV_TX_BUSY;
2613 DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x protocol %x "
2614 "protocol(%x,%x) gso type %x xmit_type %x\n",
2615 txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2616 ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2618 eth = (struct ethhdr *)skb->data;
2620 /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2621 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2622 if (is_broadcast_ether_addr(eth->h_dest))
2623 mac_type = BROADCAST_ADDRESS;
2625 mac_type = MULTICAST_ADDRESS;
2628 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2629 /* First, check if we need to linearize the skb (due to FW
2630 restrictions). No need to check fragmentation if page size > 8K
2631 (there will be no violation to FW restrictions) */
2632 if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2633 /* Statistics of linearization */
2635 if (skb_linearize(skb) != 0) {
2636 DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2637 "silently dropping this SKB\n");
2638 dev_kfree_skb_any(skb);
2639 return NETDEV_TX_OK;
2643 /* Map skb linear data for DMA */
2644 mapping = dma_map_single(&bp->pdev->dev, skb->data,
2645 skb_headlen(skb), DMA_TO_DEVICE);
2646 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2647 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2648 "silently dropping this SKB\n");
2649 dev_kfree_skb_any(skb);
2650 return NETDEV_TX_OK;
2653 Please read carefully. First we use one BD which we mark as start,
2654 then we have a parsing info BD (used for TSO or xsum),
2655 and only then we have the rest of the TSO BDs.
2656 (don't forget to mark the last one as last,
2657 and to unmap only AFTER you write to the BD ...)
2658 And above all, all pdb sizes are in words - NOT DWORDS!
2661 /* get current pkt produced now - advance it just before sending packet
2662 * since mapping of pages may fail and cause packet to be dropped
2664 pkt_prod = txdata->tx_pkt_prod;
2665 bd_prod = TX_BD(txdata->tx_bd_prod);
2667 /* get a tx_buf and first BD
2668 * tx_start_bd may be changed during SPLIT,
2669 * but first_bd will always stay first
2671 tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2672 tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2673 first_bd = tx_start_bd;
2675 tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2676 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2680 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2682 /* remember the first BD of the packet */
2683 tx_buf->first_bd = txdata->tx_bd_prod;
2687 DP(NETIF_MSG_TX_QUEUED,
2688 "sending pkt %u @%p next_idx %u bd %u @%p\n",
2689 pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2691 if (vlan_tx_tag_present(skb)) {
2692 tx_start_bd->vlan_or_ethertype =
2693 cpu_to_le16(vlan_tx_tag_get(skb));
2694 tx_start_bd->bd_flags.as_bitfield |=
2695 (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2697 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2699 /* turn on parsing and get a BD */
2700 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2702 if (xmit_type & XMIT_CSUM)
2703 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2705 if (!CHIP_IS_E1x(bp)) {
2706 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2707 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2708 /* Set PBD in checksum offload case */
2709 if (xmit_type & XMIT_CSUM)
2710 hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2711 &pbd_e2_parsing_data,
2715 * fill in the MAC addresses in the PBD - for local
2718 bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2719 &pbd_e2->src_mac_addr_mid,
2720 &pbd_e2->src_mac_addr_lo,
2722 bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2723 &pbd_e2->dst_mac_addr_mid,
2724 &pbd_e2->dst_mac_addr_lo,
2728 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2729 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2730 /* Set PBD in checksum offload case */
2731 if (xmit_type & XMIT_CSUM)
2732 hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2736 /* Setup the data pointer of the first BD of the packet */
2737 tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2738 tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2739 nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2740 tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2741 pkt_size = tx_start_bd->nbytes;
2743 DP(NETIF_MSG_TX_QUEUED, "first bd @%p addr (%x:%x) nbd %d"
2744 " nbytes %d flags %x vlan %x\n",
2745 tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2746 le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2747 tx_start_bd->bd_flags.as_bitfield,
2748 le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2750 if (xmit_type & XMIT_GSO) {
2752 DP(NETIF_MSG_TX_QUEUED,
2753 "TSO packet len %d hlen %d total len %d tso size %d\n",
2754 skb->len, hlen, skb_headlen(skb),
2755 skb_shinfo(skb)->gso_size);
2757 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2759 if (unlikely(skb_headlen(skb) > hlen))
2760 bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2763 if (!CHIP_IS_E1x(bp))
2764 bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2767 bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2770 /* Set the PBD's parsing_data field if not zero
2771 * (for the chips newer than 57711).
2773 if (pbd_e2_parsing_data)
2774 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2776 tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2778 /* Handle fragmented skb */
2779 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2780 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2782 mapping = dma_map_page(&bp->pdev->dev, frag->page,
2783 frag->page_offset, frag->size,
2785 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2787 DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2788 "dropping packet...\n");
2790 /* we need unmap all buffers already mapped
2792 * first_bd->nbd need to be properly updated
2793 * before call to bnx2x_free_tx_pkt
2795 first_bd->nbd = cpu_to_le16(nbd);
2796 bnx2x_free_tx_pkt(bp, txdata,
2797 TX_BD(txdata->tx_pkt_prod));
2798 return NETDEV_TX_OK;
2801 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2802 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2803 if (total_pkt_bd == NULL)
2804 total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2806 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2807 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2808 tx_data_bd->nbytes = cpu_to_le16(frag->size);
2809 le16_add_cpu(&pkt_size, frag->size);
2812 DP(NETIF_MSG_TX_QUEUED,
2813 "frag %d bd @%p addr (%x:%x) nbytes %d\n",
2814 i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2815 le16_to_cpu(tx_data_bd->nbytes));
2818 DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2820 /* update with actual num BDs */
2821 first_bd->nbd = cpu_to_le16(nbd);
2823 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2825 /* now send a tx doorbell, counting the next BD
2826 * if the packet contains or ends with it
2828 if (TX_BD_POFF(bd_prod) < nbd)
2831 /* total_pkt_bytes should be set on the first data BD if
2832 * it's not an LSO packet and there is more than one
2833 * data BD. In this case pkt_size is limited by an MTU value.
2834 * However we prefer to set it for an LSO packet (while we don't
2835 * have to) in order to save some CPU cycles in a none-LSO
2836 * case, when we much more care about them.
2838 if (total_pkt_bd != NULL)
2839 total_pkt_bd->total_pkt_bytes = pkt_size;
2842 DP(NETIF_MSG_TX_QUEUED,
2843 "PBD (E1X) @%p ip_data %x ip_hlen %u ip_id %u lso_mss %u"
2844 " tcp_flags %x xsum %x seq %u hlen %u\n",
2845 pbd_e1x, pbd_e1x->global_data, pbd_e1x->ip_hlen_w,
2846 pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags,
2847 pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq,
2848 le16_to_cpu(pbd_e1x->total_hlen_w));
2850 DP(NETIF_MSG_TX_QUEUED,
2851 "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n",
2852 pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid,
2853 pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi,
2854 pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo,
2855 pbd_e2->parsing_data);
2856 DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
2858 txdata->tx_pkt_prod++;
2860 * Make sure that the BD data is updated before updating the producer
2861 * since FW might read the BD right after the producer is updated.
2862 * This is only applicable for weak-ordered memory model archs such
2863 * as IA-64. The following barrier is also mandatory since FW will
2864 * assumes packets must have BDs.
2868 txdata->tx_db.data.prod += nbd;
2871 DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
2875 txdata->tx_bd_prod += nbd;
2877 if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) {
2878 netif_tx_stop_queue(txq);
2880 /* paired memory barrier is in bnx2x_tx_int(), we have to keep
2881 * ordering of set_bit() in netif_tx_stop_queue() and read of
2885 fp->eth_q_stats.driver_xoff++;
2886 if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)
2887 netif_tx_wake_queue(txq);
2891 return NETDEV_TX_OK;
2895 * bnx2x_setup_tc - routine to configure net_device for multi tc
2897 * @netdev: net device to configure
2898 * @tc: number of traffic classes to enable
2900 * callback connected to the ndo_setup_tc function pointer
2902 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
2904 int cos, prio, count, offset;
2905 struct bnx2x *bp = netdev_priv(dev);
2907 /* setup tc must be called under rtnl lock */
2910 /* no traffic classes requested. aborting */
2912 netdev_reset_tc(dev);
2916 /* requested to support too many traffic classes */
2917 if (num_tc > bp->max_cos) {
2918 DP(NETIF_MSG_TX_ERR, "support for too many traffic classes"
2919 " requested: %d. max supported is %d\n",
2920 num_tc, bp->max_cos);
2924 /* declare amount of supported traffic classes */
2925 if (netdev_set_num_tc(dev, num_tc)) {
2926 DP(NETIF_MSG_TX_ERR, "failed to declare %d traffic classes\n",
2931 /* configure priority to traffic class mapping */
2932 for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) {
2933 netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]);
2934 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n",
2935 prio, bp->prio_to_cos[prio]);
2939 /* Use this configuration to diffrentiate tc0 from other COSes
2940 This can be used for ets or pfc, and save the effort of setting
2941 up a multio class queue disc or negotiating DCBX with a switch
2942 netdev_set_prio_tc_map(dev, 0, 0);
2943 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", 0, 0);
2944 for (prio = 1; prio < 16; prio++) {
2945 netdev_set_prio_tc_map(dev, prio, 1);
2946 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", prio, 1);
2949 /* configure traffic class to transmission queue mapping */
2950 for (cos = 0; cos < bp->max_cos; cos++) {
2951 count = BNX2X_NUM_ETH_QUEUES(bp);
2952 offset = cos * MAX_TXQS_PER_COS;
2953 netdev_set_tc_queue(dev, cos, count, offset);
2954 DP(BNX2X_MSG_SP, "mapping tc %d to offset %d count %d\n",
2955 cos, offset, count);
2961 /* called with rtnl_lock */
2962 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
2964 struct sockaddr *addr = p;
2965 struct bnx2x *bp = netdev_priv(dev);
2968 if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
2971 if (netif_running(dev)) {
2972 rc = bnx2x_set_eth_mac(bp, false);
2977 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2979 if (netif_running(dev))
2980 rc = bnx2x_set_eth_mac(bp, true);
2985 static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index)
2987 union host_hc_status_block *sb = &bnx2x_fp(bp, fp_index, status_blk);
2988 struct bnx2x_fastpath *fp = &bp->fp[fp_index];
2993 if (IS_FCOE_IDX(fp_index)) {
2994 memset(sb, 0, sizeof(union host_hc_status_block));
2995 fp->status_blk_mapping = 0;
3000 if (!CHIP_IS_E1x(bp))
3001 BNX2X_PCI_FREE(sb->e2_sb,
3002 bnx2x_fp(bp, fp_index,
3003 status_blk_mapping),
3004 sizeof(struct host_hc_status_block_e2));
3006 BNX2X_PCI_FREE(sb->e1x_sb,
3007 bnx2x_fp(bp, fp_index,
3008 status_blk_mapping),
3009 sizeof(struct host_hc_status_block_e1x));
3014 if (!skip_rx_queue(bp, fp_index)) {
3015 bnx2x_free_rx_bds(fp);
3017 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3018 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_buf_ring));
3019 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_desc_ring),
3020 bnx2x_fp(bp, fp_index, rx_desc_mapping),
3021 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3023 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_comp_ring),
3024 bnx2x_fp(bp, fp_index, rx_comp_mapping),
3025 sizeof(struct eth_fast_path_rx_cqe) *
3029 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_page_ring));
3030 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_sge_ring),
3031 bnx2x_fp(bp, fp_index, rx_sge_mapping),
3032 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3036 if (!skip_tx_queue(bp, fp_index)) {
3037 /* fastpath tx rings: tx_buf tx_desc */
3038 for_each_cos_in_tx_queue(fp, cos) {
3039 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3042 "freeing tx memory of fp %d cos %d cid %d\n",
3043 fp_index, cos, txdata->cid);
3045 BNX2X_FREE(txdata->tx_buf_ring);
3046 BNX2X_PCI_FREE(txdata->tx_desc_ring,
3047 txdata->tx_desc_mapping,
3048 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3051 /* end of fastpath */
3054 void bnx2x_free_fp_mem(struct bnx2x *bp)
3057 for_each_queue(bp, i)
3058 bnx2x_free_fp_mem_at(bp, i);
3061 static inline void set_sb_shortcuts(struct bnx2x *bp, int index)
3063 union host_hc_status_block status_blk = bnx2x_fp(bp, index, status_blk);
3064 if (!CHIP_IS_E1x(bp)) {
3065 bnx2x_fp(bp, index, sb_index_values) =
3066 (__le16 *)status_blk.e2_sb->sb.index_values;
3067 bnx2x_fp(bp, index, sb_running_index) =
3068 (__le16 *)status_blk.e2_sb->sb.running_index;
3070 bnx2x_fp(bp, index, sb_index_values) =
3071 (__le16 *)status_blk.e1x_sb->sb.index_values;
3072 bnx2x_fp(bp, index, sb_running_index) =
3073 (__le16 *)status_blk.e1x_sb->sb.running_index;
3077 static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
3079 union host_hc_status_block *sb;
3080 struct bnx2x_fastpath *fp = &bp->fp[index];
3084 /* if rx_ring_size specified - use it */
3085 int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size :
3086 MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
3088 /* allocate at least number of buffers required by FW */
3089 rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
3094 sb = &bnx2x_fp(bp, index, status_blk);
3096 if (!IS_FCOE_IDX(index)) {
3099 if (!CHIP_IS_E1x(bp))
3100 BNX2X_PCI_ALLOC(sb->e2_sb,
3101 &bnx2x_fp(bp, index, status_blk_mapping),
3102 sizeof(struct host_hc_status_block_e2));
3104 BNX2X_PCI_ALLOC(sb->e1x_sb,
3105 &bnx2x_fp(bp, index, status_blk_mapping),
3106 sizeof(struct host_hc_status_block_e1x));
3111 /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to
3112 * set shortcuts for it.
3114 if (!IS_FCOE_IDX(index))
3115 set_sb_shortcuts(bp, index);
3118 if (!skip_tx_queue(bp, index)) {
3119 /* fastpath tx rings: tx_buf tx_desc */
3120 for_each_cos_in_tx_queue(fp, cos) {
3121 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3123 DP(BNX2X_MSG_SP, "allocating tx memory of "
3127 BNX2X_ALLOC(txdata->tx_buf_ring,
3128 sizeof(struct sw_tx_bd) * NUM_TX_BD);
3129 BNX2X_PCI_ALLOC(txdata->tx_desc_ring,
3130 &txdata->tx_desc_mapping,
3131 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3136 if (!skip_rx_queue(bp, index)) {
3137 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3138 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring),
3139 sizeof(struct sw_rx_bd) * NUM_RX_BD);
3140 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring),
3141 &bnx2x_fp(bp, index, rx_desc_mapping),
3142 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3144 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring),
3145 &bnx2x_fp(bp, index, rx_comp_mapping),
3146 sizeof(struct eth_fast_path_rx_cqe) *
3150 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring),
3151 sizeof(struct sw_rx_page) * NUM_RX_SGE);
3152 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring),
3153 &bnx2x_fp(bp, index, rx_sge_mapping),
3154 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3156 bnx2x_set_next_page_rx_bd(fp);
3159 bnx2x_set_next_page_rx_cq(fp);
3162 ring_size = bnx2x_alloc_rx_bds(fp, rx_ring_size);
3163 if (ring_size < rx_ring_size)
3169 /* handles low memory cases */
3171 BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
3173 /* FW will drop all packets if queue is not big enough,
3174 * In these cases we disable the queue
3175 * Min size is different for OOO, TPA and non-TPA queues
3177 if (ring_size < (fp->disable_tpa ?
3178 MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) {
3179 /* release memory allocated for this queue */
3180 bnx2x_free_fp_mem_at(bp, index);
3186 int bnx2x_alloc_fp_mem(struct bnx2x *bp)
3191 * 1. Allocate FP for leading - fatal if error
3192 * 2. {CNIC} Allocate FCoE FP - fatal if error
3193 * 3. {CNIC} Allocate OOO + FWD - disable OOO if error
3194 * 4. Allocate RSS - fix number of queues if error
3198 if (bnx2x_alloc_fp_mem_at(bp, 0))
3204 if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX))
3205 /* we will fail load process instead of mark
3212 for_each_nondefault_eth_queue(bp, i)
3213 if (bnx2x_alloc_fp_mem_at(bp, i))
3216 /* handle memory failures */
3217 if (i != BNX2X_NUM_ETH_QUEUES(bp)) {
3218 int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
3223 * move non eth FPs next to last eth FP
3224 * must be done in that order
3225 * FCOE_IDX < FWD_IDX < OOO_IDX
3228 /* move FCoE fp even NO_FCOE_FLAG is on */
3229 bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta);
3231 bp->num_queues -= delta;
3232 BNX2X_ERR("Adjusted num of queues from %d to %d\n",
3233 bp->num_queues + delta, bp->num_queues);
3239 void bnx2x_free_mem_bp(struct bnx2x *bp)
3242 kfree(bp->msix_table);
3246 int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp)
3248 struct bnx2x_fastpath *fp;
3249 struct msix_entry *tbl;
3250 struct bnx2x_ilt *ilt;
3251 int msix_table_size = 0;
3254 * The biggest MSI-X table we might need is as a maximum number of fast
3255 * path IGU SBs plus default SB (for PF).
3257 msix_table_size = bp->igu_sb_cnt + 1;
3259 /* fp array: RSS plus CNIC related L2 queues */
3260 fp = kzalloc((BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE) *
3261 sizeof(*fp), GFP_KERNEL);
3267 tbl = kzalloc(msix_table_size * sizeof(*tbl), GFP_KERNEL);
3270 bp->msix_table = tbl;
3273 ilt = kzalloc(sizeof(*ilt), GFP_KERNEL);
3280 bnx2x_free_mem_bp(bp);
3285 int bnx2x_reload_if_running(struct net_device *dev)
3287 struct bnx2x *bp = netdev_priv(dev);
3289 if (unlikely(!netif_running(dev)))
3292 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
3293 return bnx2x_nic_load(bp, LOAD_NORMAL);
3296 int bnx2x_get_cur_phy_idx(struct bnx2x *bp)
3298 u32 sel_phy_idx = 0;
3299 if (bp->link_params.num_phys <= 1)
3302 if (bp->link_vars.link_up) {
3303 sel_phy_idx = EXT_PHY1;
3304 /* In case link is SERDES, check if the EXT_PHY2 is the one */
3305 if ((bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) &&
3306 (bp->link_params.phy[EXT_PHY2].supported & SUPPORTED_FIBRE))
3307 sel_phy_idx = EXT_PHY2;
3310 switch (bnx2x_phy_selection(&bp->link_params)) {
3311 case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
3312 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY:
3313 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
3314 sel_phy_idx = EXT_PHY1;
3316 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY:
3317 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
3318 sel_phy_idx = EXT_PHY2;
3326 int bnx2x_get_link_cfg_idx(struct bnx2x *bp)
3328 u32 sel_phy_idx = bnx2x_get_cur_phy_idx(bp);
3330 * The selected actived PHY is always after swapping (in case PHY
3331 * swapping is enabled). So when swapping is enabled, we need to reverse
3335 if (bp->link_params.multi_phy_config &
3336 PORT_HW_CFG_PHY_SWAPPED_ENABLED) {
3337 if (sel_phy_idx == EXT_PHY1)
3338 sel_phy_idx = EXT_PHY2;
3339 else if (sel_phy_idx == EXT_PHY2)
3340 sel_phy_idx = EXT_PHY1;
3342 return LINK_CONFIG_IDX(sel_phy_idx);
3345 #if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC)
3346 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
3348 struct bnx2x *bp = netdev_priv(dev);
3349 struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
3352 case NETDEV_FCOE_WWNN:
3353 *wwn = HILO_U64(cp->fcoe_wwn_node_name_hi,
3354 cp->fcoe_wwn_node_name_lo);
3356 case NETDEV_FCOE_WWPN:
3357 *wwn = HILO_U64(cp->fcoe_wwn_port_name_hi,
3358 cp->fcoe_wwn_port_name_lo);
3368 /* called with rtnl_lock */
3369 int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
3371 struct bnx2x *bp = netdev_priv(dev);
3373 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3374 pr_err("Handling parity error recovery. Try again later\n");
3378 if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
3379 ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE))
3382 /* This does not race with packet allocation
3383 * because the actual alloc size is
3384 * only updated as part of load
3388 return bnx2x_reload_if_running(dev);
3391 u32 bnx2x_fix_features(struct net_device *dev, u32 features)
3393 struct bnx2x *bp = netdev_priv(dev);
3395 /* TPA requires Rx CSUM offloading */
3396 if (!(features & NETIF_F_RXCSUM) || bp->disable_tpa)
3397 features &= ~NETIF_F_LRO;
3402 int bnx2x_set_features(struct net_device *dev, u32 features)
3404 struct bnx2x *bp = netdev_priv(dev);
3405 u32 flags = bp->flags;
3406 bool bnx2x_reload = false;
3408 if (features & NETIF_F_LRO)
3409 flags |= TPA_ENABLE_FLAG;
3411 flags &= ~TPA_ENABLE_FLAG;
3413 if (features & NETIF_F_LOOPBACK) {
3414 if (bp->link_params.loopback_mode != LOOPBACK_BMAC) {
3415 bp->link_params.loopback_mode = LOOPBACK_BMAC;
3416 bnx2x_reload = true;
3419 if (bp->link_params.loopback_mode != LOOPBACK_NONE) {
3420 bp->link_params.loopback_mode = LOOPBACK_NONE;
3421 bnx2x_reload = true;
3425 if (flags ^ bp->flags) {
3427 bnx2x_reload = true;
3431 if (bp->recovery_state == BNX2X_RECOVERY_DONE)
3432 return bnx2x_reload_if_running(dev);
3433 /* else: bnx2x_nic_load() will be called at end of recovery */
3439 void bnx2x_tx_timeout(struct net_device *dev)
3441 struct bnx2x *bp = netdev_priv(dev);
3443 #ifdef BNX2X_STOP_ON_ERROR
3448 smp_mb__before_clear_bit();
3449 set_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state);
3450 smp_mb__after_clear_bit();
3452 /* This allows the netif to be shutdown gracefully before resetting */
3453 schedule_delayed_work(&bp->sp_rtnl_task, 0);
3456 int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state)
3458 struct net_device *dev = pci_get_drvdata(pdev);
3462 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3465 bp = netdev_priv(dev);
3469 pci_save_state(pdev);
3471 if (!netif_running(dev)) {
3476 netif_device_detach(dev);
3478 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
3480 bnx2x_set_power_state(bp, pci_choose_state(pdev, state));
3487 int bnx2x_resume(struct pci_dev *pdev)
3489 struct net_device *dev = pci_get_drvdata(pdev);
3494 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3497 bp = netdev_priv(dev);
3499 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3500 pr_err("Handling parity error recovery. Try again later\n");
3506 pci_restore_state(pdev);
3508 if (!netif_running(dev)) {
3513 bnx2x_set_power_state(bp, PCI_D0);
3514 netif_device_attach(dev);
3516 /* Since the chip was reset, clear the FW sequence number */
3518 rc = bnx2x_nic_load(bp, LOAD_OPEN);
3526 void bnx2x_set_ctx_validation(struct bnx2x *bp, struct eth_context *cxt,
3529 /* ustorm cxt validation */
3530 cxt->ustorm_ag_context.cdu_usage =
3531 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3532 CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE);
3533 /* xcontext validation */
3534 cxt->xstorm_ag_context.cdu_reserved =
3535 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3536 CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE);
3539 static inline void storm_memset_hc_timeout(struct bnx2x *bp, u8 port,
3540 u8 fw_sb_id, u8 sb_index,
3544 u32 addr = BAR_CSTRORM_INTMEM +
3545 CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index);
3546 REG_WR8(bp, addr, ticks);
3547 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d ticks %d\n",
3548 port, fw_sb_id, sb_index, ticks);
3551 static inline void storm_memset_hc_disable(struct bnx2x *bp, u8 port,
3552 u16 fw_sb_id, u8 sb_index,
3555 u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT);
3556 u32 addr = BAR_CSTRORM_INTMEM +
3557 CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index);
3558 u16 flags = REG_RD16(bp, addr);
3560 flags &= ~HC_INDEX_DATA_HC_ENABLED;
3561 flags |= enable_flag;
3562 REG_WR16(bp, addr, flags);
3563 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d disable %d\n",
3564 port, fw_sb_id, sb_index, disable);
3567 void bnx2x_update_coalesce_sb_index(struct bnx2x *bp, u8 fw_sb_id,
3568 u8 sb_index, u8 disable, u16 usec)
3570 int port = BP_PORT(bp);
3571 u8 ticks = usec / BNX2X_BTR;
3573 storm_memset_hc_timeout(bp, port, fw_sb_id, sb_index, ticks);
3575 disable = disable ? 1 : (usec ? 0 : 1);
3576 storm_memset_hc_disable(bp, port, fw_sb_id, sb_index, disable);