1 /* bnx2x_cmn.c: Broadcom Everest network driver.
3 * Copyright (c) 2007-2011 Broadcom Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
9 * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10 * Written by: Eliezer Tamir
11 * Based on code from Michael Chan's bnx2 driver
12 * UDP CSUM errata workaround by Arik Gendelman
13 * Slowpath and fastpath rework by Vladislav Zolotarov
14 * Statistics and Link management by Yitchak Gertner
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/etherdevice.h>
21 #include <linux/if_vlan.h>
22 #include <linux/interrupt.h>
25 #include <net/ip6_checksum.h>
26 #include <linux/firmware.h>
27 #include <linux/prefetch.h>
28 #include "bnx2x_cmn.h"
29 #include "bnx2x_init.h"
35 * bnx2x_bz_fp - zero content of the fastpath structure.
38 * @index: fastpath index to be zeroed
40 * Makes sure the contents of the bp->fp[index].napi is kept
43 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
45 struct bnx2x_fastpath *fp = &bp->fp[index];
46 struct napi_struct orig_napi = fp->napi;
47 /* bzero bnx2x_fastpath contents */
48 memset(fp, 0, sizeof(*fp));
50 /* Restore the NAPI object as it has been already initialized */
56 fp->max_cos = bp->max_cos;
58 /* Special queues support only one CoS */
62 * set the tpa flag for each queue. The tpa flag determines the queue
63 * minimal size so it must be set prior to queue memory allocation
65 fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
68 /* We don't want TPA on an FCoE L2 ring */
75 * bnx2x_move_fp - move content of the fastpath structure.
78 * @from: source FP index
79 * @to: destination FP index
81 * Makes sure the contents of the bp->fp[to].napi is kept
84 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
86 struct bnx2x_fastpath *from_fp = &bp->fp[from];
87 struct bnx2x_fastpath *to_fp = &bp->fp[to];
88 struct napi_struct orig_napi = to_fp->napi;
89 /* Move bnx2x_fastpath contents */
90 memcpy(to_fp, from_fp, sizeof(*to_fp));
93 /* Restore the NAPI object as it has been already initialized */
94 to_fp->napi = orig_napi;
97 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
99 /* free skb in the packet ring at pos idx
100 * return idx of last bd freed
102 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
105 struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
106 struct eth_tx_start_bd *tx_start_bd;
107 struct eth_tx_bd *tx_data_bd;
108 struct sk_buff *skb = tx_buf->skb;
109 u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
112 /* prefetch skb end pointer to speedup dev_kfree_skb() */
115 DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
116 txdata->txq_index, idx, tx_buf, skb);
119 DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
120 tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
121 dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
122 BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
125 nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
126 #ifdef BNX2X_STOP_ON_ERROR
127 if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
128 BNX2X_ERR("BAD nbd!\n");
132 new_cons = nbd + tx_buf->first_bd;
134 /* Get the next bd */
135 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
137 /* Skip a parse bd... */
139 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
141 /* ...and the TSO split header bd since they have no mapping */
142 if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
144 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
150 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
151 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
152 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
153 BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
155 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
160 dev_kfree_skb_any(skb);
161 tx_buf->first_bd = 0;
167 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
169 struct netdev_queue *txq;
170 u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
172 #ifdef BNX2X_STOP_ON_ERROR
173 if (unlikely(bp->panic))
177 txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
178 hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
179 sw_cons = txdata->tx_pkt_cons;
181 while (sw_cons != hw_cons) {
184 pkt_cons = TX_BD(sw_cons);
186 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u sw_cons %u "
188 txdata->txq_index, hw_cons, sw_cons, pkt_cons);
190 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
194 txdata->tx_pkt_cons = sw_cons;
195 txdata->tx_bd_cons = bd_cons;
197 /* Need to make the tx_bd_cons update visible to start_xmit()
198 * before checking for netif_tx_queue_stopped(). Without the
199 * memory barrier, there is a small possibility that
200 * start_xmit() will miss it and cause the queue to be stopped
202 * On the other hand we need an rmb() here to ensure the proper
203 * ordering of bit testing in the following
204 * netif_tx_queue_stopped(txq) call.
208 if (unlikely(netif_tx_queue_stopped(txq))) {
209 /* Taking tx_lock() is needed to prevent reenabling the queue
210 * while it's empty. This could have happen if rx_action() gets
211 * suspended in bnx2x_tx_int() after the condition before
212 * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
214 * stops the queue->sees fresh tx_bd_cons->releases the queue->
215 * sends some packets consuming the whole queue again->
219 __netif_tx_lock(txq, smp_processor_id());
221 if ((netif_tx_queue_stopped(txq)) &&
222 (bp->state == BNX2X_STATE_OPEN) &&
223 (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
224 netif_tx_wake_queue(txq);
226 __netif_tx_unlock(txq);
231 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
234 u16 last_max = fp->last_max_sge;
236 if (SUB_S16(idx, last_max) > 0)
237 fp->last_max_sge = idx;
240 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
241 struct eth_fast_path_rx_cqe *fp_cqe)
243 struct bnx2x *bp = fp->bp;
244 u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
245 le16_to_cpu(fp_cqe->len_on_bd)) >>
247 u16 last_max, last_elem, first_elem;
254 /* First mark all used pages */
255 for (i = 0; i < sge_len; i++)
256 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
257 RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
259 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
260 sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
262 /* Here we assume that the last SGE index is the biggest */
263 prefetch((void *)(fp->sge_mask));
264 bnx2x_update_last_max_sge(fp,
265 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
267 last_max = RX_SGE(fp->last_max_sge);
268 last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
269 first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
271 /* If ring is not full */
272 if (last_elem + 1 != first_elem)
275 /* Now update the prod */
276 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
277 if (likely(fp->sge_mask[i]))
280 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
281 delta += BIT_VEC64_ELEM_SZ;
285 fp->rx_sge_prod += delta;
286 /* clear page-end entries */
287 bnx2x_clear_sge_mask_next_elems(fp);
290 DP(NETIF_MSG_RX_STATUS,
291 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
292 fp->last_max_sge, fp->rx_sge_prod);
295 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
296 struct sk_buff *skb, u16 cons, u16 prod,
297 struct eth_fast_path_rx_cqe *cqe)
299 struct bnx2x *bp = fp->bp;
300 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
301 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
302 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
304 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
305 struct sw_rx_bd *first_buf = &tpa_info->first_buf;
307 /* print error if current state != stop */
308 if (tpa_info->tpa_state != BNX2X_TPA_STOP)
309 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
311 /* Try to map an empty skb from the aggregation info */
312 mapping = dma_map_single(&bp->pdev->dev,
313 first_buf->skb->data,
314 fp->rx_buf_size, DMA_FROM_DEVICE);
316 * ...if it fails - move the skb from the consumer to the producer
317 * and set the current aggregation state as ERROR to drop it
318 * when TPA_STOP arrives.
321 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
322 /* Move the BD from the consumer to the producer */
323 bnx2x_reuse_rx_skb(fp, cons, prod);
324 tpa_info->tpa_state = BNX2X_TPA_ERROR;
328 /* move empty skb from pool to prod */
329 prod_rx_buf->skb = first_buf->skb;
330 dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
331 /* point prod_bd to new skb */
332 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
333 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
335 /* move partial skb from cons to pool (don't unmap yet) */
336 *first_buf = *cons_rx_buf;
338 /* mark bin state as START */
339 tpa_info->parsing_flags =
340 le16_to_cpu(cqe->pars_flags.flags);
341 tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
342 tpa_info->tpa_state = BNX2X_TPA_START;
343 tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
344 tpa_info->placement_offset = cqe->placement_offset;
346 #ifdef BNX2X_STOP_ON_ERROR
347 fp->tpa_queue_used |= (1 << queue);
348 #ifdef _ASM_GENERIC_INT_L64_H
349 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
351 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
357 /* Timestamp option length allowed for TPA aggregation:
359 * nop nop kind length echo val
361 #define TPA_TSTAMP_OPT_LEN 12
363 * bnx2x_set_lro_mss - calculate the approximate value of the MSS
366 * @parsing_flags: parsing flags from the START CQE
367 * @len_on_bd: total length of the first packet for the
370 * Approximate value of the MSS for this aggregation calculated using
371 * the first packet of it.
373 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
377 * TPA arrgregation won't have either IP options or TCP options
378 * other than timestamp or IPv6 extension headers.
380 u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
382 if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
383 PRS_FLAG_OVERETH_IPV6)
384 hdrs_len += sizeof(struct ipv6hdr);
386 hdrs_len += sizeof(struct iphdr);
389 /* Check if there was a TCP timestamp, if there is it's will
390 * always be 12 bytes length: nop nop kind length echo val.
392 * Otherwise FW would close the aggregation.
394 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
395 hdrs_len += TPA_TSTAMP_OPT_LEN;
397 return len_on_bd - hdrs_len;
400 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
401 u16 queue, struct sk_buff *skb,
402 struct eth_end_agg_rx_cqe *cqe,
405 struct sw_rx_page *rx_pg, old_rx_pg;
406 u32 i, frag_len, frag_size, pages;
409 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
410 u16 len_on_bd = tpa_info->len_on_bd;
412 frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
413 pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
415 /* This is needed in order to enable forwarding support */
417 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
418 tpa_info->parsing_flags, len_on_bd);
420 #ifdef BNX2X_STOP_ON_ERROR
421 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
422 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
424 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
430 /* Run through the SGL and compose the fragmented skb */
431 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
432 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
434 /* FW gives the indices of the SGE as if the ring is an array
435 (meaning that "next" element will consume 2 indices) */
436 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
437 rx_pg = &fp->rx_page_ring[sge_idx];
440 /* If we fail to allocate a substitute page, we simply stop
441 where we are and drop the whole packet */
442 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
444 fp->eth_q_stats.rx_skb_alloc_failed++;
448 /* Unmap the page as we r going to pass it to the stack */
449 dma_unmap_page(&bp->pdev->dev,
450 dma_unmap_addr(&old_rx_pg, mapping),
451 SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
453 /* Add one frag and update the appropriate fields in the skb */
454 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
456 skb->data_len += frag_len;
457 skb->truesize += SGE_PAGE_SIZE * PAGES_PER_SGE;
458 skb->len += frag_len;
460 frag_size -= frag_len;
466 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
467 u16 queue, struct eth_end_agg_rx_cqe *cqe,
470 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
471 struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
472 u8 pad = tpa_info->placement_offset;
473 u16 len = tpa_info->len_on_bd;
474 struct sk_buff *skb = rx_buf->skb;
476 struct sk_buff *new_skb;
477 u8 old_tpa_state = tpa_info->tpa_state;
479 tpa_info->tpa_state = BNX2X_TPA_STOP;
481 /* If we there was an error during the handling of the TPA_START -
482 * drop this aggregation.
484 if (old_tpa_state == BNX2X_TPA_ERROR)
487 /* Try to allocate the new skb */
488 new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
490 /* Unmap skb in the pool anyway, as we are going to change
491 pool entry status to BNX2X_TPA_STOP even if new skb allocation
493 dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
494 fp->rx_buf_size, DMA_FROM_DEVICE);
496 if (likely(new_skb)) {
498 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
500 #ifdef BNX2X_STOP_ON_ERROR
501 if (pad + len > fp->rx_buf_size) {
502 BNX2X_ERR("skb_put is about to fail... "
503 "pad %d len %d rx_buf_size %d\n",
504 pad, len, fp->rx_buf_size);
510 skb_reserve(skb, pad);
513 skb->protocol = eth_type_trans(skb, bp->dev);
514 skb->ip_summed = CHECKSUM_UNNECESSARY;
516 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
517 if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
518 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
519 napi_gro_receive(&fp->napi, skb);
521 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
522 " - dropping packet!\n");
523 dev_kfree_skb_any(skb);
527 /* put new skb in bin */
528 rx_buf->skb = new_skb;
534 /* drop the packet and keep the buffer in the bin */
535 DP(NETIF_MSG_RX_STATUS,
536 "Failed to allocate or map a new skb - dropping packet!\n");
537 fp->eth_q_stats.rx_skb_alloc_failed++;
540 /* Set Toeplitz hash value in the skb using the value from the
541 * CQE (calculated by HW).
543 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
546 /* Set Toeplitz hash from CQE */
547 if ((bp->dev->features & NETIF_F_RXHASH) &&
548 (cqe->fast_path_cqe.status_flags &
549 ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
551 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
554 static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
555 struct bnx2x_fastpath *fp)
557 /* Do nothing if no L4 csum validation was done.
558 * We do not check whether IP csum was validated. For IPv4 we assume
559 * that if the card got as far as validating the L4 csum, it also
560 * validated the IP csum. IPv6 has no IP csum.
562 if (cqe->fast_path_cqe.status_flags &
563 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
566 /* If L4 validation was done, check if an error was found. */
568 if (cqe->fast_path_cqe.type_error_flags &
569 (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
570 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
571 fp->eth_q_stats.hw_csum_err++;
573 skb->ip_summed = CHECKSUM_UNNECESSARY;
576 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
578 struct bnx2x *bp = fp->bp;
579 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
580 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
583 #ifdef BNX2X_STOP_ON_ERROR
584 if (unlikely(bp->panic))
588 /* CQ "next element" is of the size of the regular element,
589 that's why it's ok here */
590 hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
591 if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
594 bd_cons = fp->rx_bd_cons;
595 bd_prod = fp->rx_bd_prod;
596 bd_prod_fw = bd_prod;
597 sw_comp_cons = fp->rx_comp_cons;
598 sw_comp_prod = fp->rx_comp_prod;
600 /* Memory barrier necessary as speculative reads of the rx
601 * buffer can be ahead of the index in the status block
605 DP(NETIF_MSG_RX_STATUS,
606 "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n",
607 fp->index, hw_comp_cons, sw_comp_cons);
609 while (sw_comp_cons != hw_comp_cons) {
610 struct sw_rx_bd *rx_buf = NULL;
612 union eth_rx_cqe *cqe;
613 struct eth_fast_path_rx_cqe *cqe_fp;
615 enum eth_rx_cqe_type cqe_fp_type;
618 #ifdef BNX2X_STOP_ON_ERROR
619 if (unlikely(bp->panic))
623 comp_ring_cons = RCQ_BD(sw_comp_cons);
624 bd_prod = RX_BD(bd_prod);
625 bd_cons = RX_BD(bd_cons);
627 /* Prefetch the page containing the BD descriptor
628 at producer's index. It will be needed when new skb is
630 prefetch((void *)(PAGE_ALIGN((unsigned long)
631 (&fp->rx_desc_ring[bd_prod])) -
634 cqe = &fp->rx_comp_ring[comp_ring_cons];
635 cqe_fp = &cqe->fast_path_cqe;
636 cqe_fp_flags = cqe_fp->type_error_flags;
637 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
639 DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x"
640 " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags),
641 cqe_fp_flags, cqe_fp->status_flags,
642 le32_to_cpu(cqe_fp->rss_hash_result),
643 le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
645 /* is this a slowpath msg? */
646 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
647 bnx2x_sp_event(fp, cqe);
650 /* this is an rx packet */
652 rx_buf = &fp->rx_buf_ring[bd_cons];
656 if (!CQE_TYPE_FAST(cqe_fp_type)) {
657 #ifdef BNX2X_STOP_ON_ERROR
659 if (fp->disable_tpa &&
660 (CQE_TYPE_START(cqe_fp_type) ||
661 CQE_TYPE_STOP(cqe_fp_type)))
662 BNX2X_ERR("START/STOP packet while "
663 "disable_tpa type %x\n",
664 CQE_TYPE(cqe_fp_type));
667 if (CQE_TYPE_START(cqe_fp_type)) {
668 u16 queue = cqe_fp->queue_index;
669 DP(NETIF_MSG_RX_STATUS,
670 "calling tpa_start on queue %d\n",
673 bnx2x_tpa_start(fp, queue, skb,
677 /* Set Toeplitz hash for LRO skb */
678 bnx2x_set_skb_rxhash(bp, cqe, skb);
684 cqe->end_agg_cqe.queue_index;
685 DP(NETIF_MSG_RX_STATUS,
686 "calling tpa_stop on queue %d\n",
689 bnx2x_tpa_stop(bp, fp, queue,
692 #ifdef BNX2X_STOP_ON_ERROR
697 bnx2x_update_sge_prod(fp, cqe_fp);
702 len = le16_to_cpu(cqe_fp->pkt_len);
703 pad = cqe_fp->placement_offset;
704 dma_sync_single_for_cpu(&bp->pdev->dev,
705 dma_unmap_addr(rx_buf, mapping),
706 pad + RX_COPY_THRESH,
708 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
710 /* is this an error packet? */
711 if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
713 "ERROR flags %x rx packet %u\n",
714 cqe_fp_flags, sw_comp_cons);
715 fp->eth_q_stats.rx_err_discard_pkt++;
719 /* Since we don't have a jumbo ring
720 * copy small packets if mtu > 1500
722 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
723 (len <= RX_COPY_THRESH)) {
724 struct sk_buff *new_skb;
726 new_skb = netdev_alloc_skb(bp->dev, len + pad);
727 if (new_skb == NULL) {
729 "ERROR packet dropped "
730 "because of alloc failure\n");
731 fp->eth_q_stats.rx_skb_alloc_failed++;
736 skb_copy_from_linear_data_offset(skb, pad,
737 new_skb->data + pad, len);
738 skb_reserve(new_skb, pad);
739 skb_put(new_skb, len);
741 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
746 if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
747 dma_unmap_single(&bp->pdev->dev,
748 dma_unmap_addr(rx_buf, mapping),
751 skb_reserve(skb, pad);
756 "ERROR packet dropped because "
757 "of alloc failure\n");
758 fp->eth_q_stats.rx_skb_alloc_failed++;
760 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
764 skb->protocol = eth_type_trans(skb, bp->dev);
766 /* Set Toeplitz hash for a none-LRO skb */
767 bnx2x_set_skb_rxhash(bp, cqe, skb);
769 skb_checksum_none_assert(skb);
771 if (bp->dev->features & NETIF_F_RXCSUM)
772 bnx2x_csum_validate(skb, cqe, fp);
776 skb_record_rx_queue(skb, fp->index);
778 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
780 __vlan_hwaccel_put_tag(skb,
781 le16_to_cpu(cqe_fp->vlan_tag));
782 napi_gro_receive(&fp->napi, skb);
788 bd_cons = NEXT_RX_IDX(bd_cons);
789 bd_prod = NEXT_RX_IDX(bd_prod);
790 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
793 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
794 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
796 if (rx_pkt == budget)
800 fp->rx_bd_cons = bd_cons;
801 fp->rx_bd_prod = bd_prod_fw;
802 fp->rx_comp_cons = sw_comp_cons;
803 fp->rx_comp_prod = sw_comp_prod;
805 /* Update producers */
806 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
809 fp->rx_pkt += rx_pkt;
815 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
817 struct bnx2x_fastpath *fp = fp_cookie;
818 struct bnx2x *bp = fp->bp;
821 DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
822 "[fp %d fw_sd %d igusb %d]\n",
823 fp->index, fp->fw_sb_id, fp->igu_sb_id);
824 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
826 #ifdef BNX2X_STOP_ON_ERROR
827 if (unlikely(bp->panic))
831 /* Handle Rx and Tx according to MSI-X vector */
832 prefetch(fp->rx_cons_sb);
834 for_each_cos_in_tx_queue(fp, cos)
835 prefetch(fp->txdata[cos].tx_cons_sb);
837 prefetch(&fp->sb_running_index[SM_RX_ID]);
838 napi_schedule(&bnx2x_fp(bp, fp->index, napi));
843 /* HW Lock for shared dual port PHYs */
844 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
846 mutex_lock(&bp->port.phy_mutex);
848 if (bp->port.need_hw_lock)
849 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
852 void bnx2x_release_phy_lock(struct bnx2x *bp)
854 if (bp->port.need_hw_lock)
855 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
857 mutex_unlock(&bp->port.phy_mutex);
860 /* calculates MF speed according to current linespeed and MF configuration */
861 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
863 u16 line_speed = bp->link_vars.line_speed;
865 u16 maxCfg = bnx2x_extract_max_cfg(bp,
866 bp->mf_config[BP_VN(bp)]);
868 /* Calculate the current MAX line speed limit for the MF
872 line_speed = (line_speed * maxCfg) / 100;
874 u16 vn_max_rate = maxCfg * 100;
876 if (vn_max_rate < line_speed)
877 line_speed = vn_max_rate;
885 * bnx2x_fill_report_data - fill link report data to report
888 * @data: link state to update
890 * It uses a none-atomic bit operations because is called under the mutex.
892 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
893 struct bnx2x_link_report_data *data)
895 u16 line_speed = bnx2x_get_mf_speed(bp);
897 memset(data, 0, sizeof(*data));
899 /* Fill the report data: efective line speed */
900 data->line_speed = line_speed;
903 if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
904 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
905 &data->link_report_flags);
908 if (bp->link_vars.duplex == DUPLEX_FULL)
909 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
911 /* Rx Flow Control is ON */
912 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
913 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
915 /* Tx Flow Control is ON */
916 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
917 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
921 * bnx2x_link_report - report link status to OS.
925 * Calls the __bnx2x_link_report() under the same locking scheme
926 * as a link/PHY state managing code to ensure a consistent link
930 void bnx2x_link_report(struct bnx2x *bp)
932 bnx2x_acquire_phy_lock(bp);
933 __bnx2x_link_report(bp);
934 bnx2x_release_phy_lock(bp);
938 * __bnx2x_link_report - report link status to OS.
942 * None atomic inmlementation.
943 * Should be called under the phy_lock.
945 void __bnx2x_link_report(struct bnx2x *bp)
947 struct bnx2x_link_report_data cur_data;
951 bnx2x_read_mf_cfg(bp);
953 /* Read the current link report info */
954 bnx2x_fill_report_data(bp, &cur_data);
956 /* Don't report link down or exactly the same link status twice */
957 if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
958 (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
959 &bp->last_reported_link.link_report_flags) &&
960 test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
961 &cur_data.link_report_flags)))
966 /* We are going to report a new link parameters now -
967 * remember the current data for the next time.
969 memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
971 if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
972 &cur_data.link_report_flags)) {
973 netif_carrier_off(bp->dev);
974 netdev_err(bp->dev, "NIC Link is Down\n");
980 netif_carrier_on(bp->dev);
982 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
983 &cur_data.link_report_flags))
988 /* Handle the FC at the end so that only these flags would be
989 * possibly set. This way we may easily check if there is no FC
992 if (cur_data.link_report_flags) {
993 if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
994 &cur_data.link_report_flags)) {
995 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
996 &cur_data.link_report_flags))
997 flow = "ON - receive & transmit";
999 flow = "ON - receive";
1001 flow = "ON - transmit";
1006 netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
1007 cur_data.line_speed, duplex, flow);
1011 void bnx2x_init_rx_rings(struct bnx2x *bp)
1013 int func = BP_FUNC(bp);
1017 /* Allocate TPA resources */
1018 for_each_rx_queue(bp, j) {
1019 struct bnx2x_fastpath *fp = &bp->fp[j];
1022 "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1024 if (!fp->disable_tpa) {
1025 /* Fill the per-aggregtion pool */
1026 for (i = 0; i < MAX_AGG_QS(bp); i++) {
1027 struct bnx2x_agg_info *tpa_info =
1029 struct sw_rx_bd *first_buf =
1030 &tpa_info->first_buf;
1032 first_buf->skb = netdev_alloc_skb(bp->dev,
1034 if (!first_buf->skb) {
1035 BNX2X_ERR("Failed to allocate TPA "
1036 "skb pool for queue[%d] - "
1037 "disabling TPA on this "
1039 bnx2x_free_tpa_pool(bp, fp, i);
1040 fp->disable_tpa = 1;
1043 dma_unmap_addr_set(first_buf, mapping, 0);
1044 tpa_info->tpa_state = BNX2X_TPA_STOP;
1047 /* "next page" elements initialization */
1048 bnx2x_set_next_page_sgl(fp);
1050 /* set SGEs bit mask */
1051 bnx2x_init_sge_ring_bit_mask(fp);
1053 /* Allocate SGEs and initialize the ring elements */
1054 for (i = 0, ring_prod = 0;
1055 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1057 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1058 BNX2X_ERR("was only able to allocate "
1060 BNX2X_ERR("disabling TPA for "
1062 /* Cleanup already allocated elements */
1063 bnx2x_free_rx_sge_range(bp, fp,
1065 bnx2x_free_tpa_pool(bp, fp,
1067 fp->disable_tpa = 1;
1071 ring_prod = NEXT_SGE_IDX(ring_prod);
1074 fp->rx_sge_prod = ring_prod;
1078 for_each_rx_queue(bp, j) {
1079 struct bnx2x_fastpath *fp = &bp->fp[j];
1083 /* Activate BD ring */
1085 * this will generate an interrupt (to the TSTORM)
1086 * must only be done after chip is initialized
1088 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1094 if (CHIP_IS_E1(bp)) {
1095 REG_WR(bp, BAR_USTRORM_INTMEM +
1096 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1097 U64_LO(fp->rx_comp_mapping));
1098 REG_WR(bp, BAR_USTRORM_INTMEM +
1099 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1100 U64_HI(fp->rx_comp_mapping));
1105 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1110 for_each_tx_queue(bp, i) {
1111 struct bnx2x_fastpath *fp = &bp->fp[i];
1112 for_each_cos_in_tx_queue(fp, cos) {
1113 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1115 u16 bd_cons = txdata->tx_bd_cons;
1116 u16 sw_prod = txdata->tx_pkt_prod;
1117 u16 sw_cons = txdata->tx_pkt_cons;
1119 while (sw_cons != sw_prod) {
1120 bd_cons = bnx2x_free_tx_pkt(bp, txdata,
1128 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1130 struct bnx2x *bp = fp->bp;
1133 /* ring wasn't allocated */
1134 if (fp->rx_buf_ring == NULL)
1137 for (i = 0; i < NUM_RX_BD; i++) {
1138 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1139 struct sk_buff *skb = rx_buf->skb;
1143 dma_unmap_single(&bp->pdev->dev,
1144 dma_unmap_addr(rx_buf, mapping),
1145 fp->rx_buf_size, DMA_FROM_DEVICE);
1152 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1156 for_each_rx_queue(bp, j) {
1157 struct bnx2x_fastpath *fp = &bp->fp[j];
1159 bnx2x_free_rx_bds(fp);
1161 if (!fp->disable_tpa)
1162 bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
1166 void bnx2x_free_skbs(struct bnx2x *bp)
1168 bnx2x_free_tx_skbs(bp);
1169 bnx2x_free_rx_skbs(bp);
1172 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1174 /* load old values */
1175 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1177 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1178 /* leave all but MAX value */
1179 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1181 /* set new MAX value */
1182 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1183 & FUNC_MF_CFG_MAX_BW_MASK;
1185 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1190 * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1192 * @bp: driver handle
1193 * @nvecs: number of vectors to be released
1195 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1199 if (nvecs == offset)
1201 free_irq(bp->msix_table[offset].vector, bp->dev);
1202 DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1203 bp->msix_table[offset].vector);
1206 if (nvecs == offset)
1211 for_each_eth_queue(bp, i) {
1212 if (nvecs == offset)
1214 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1215 "irq\n", i, bp->msix_table[offset].vector);
1217 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1221 void bnx2x_free_irq(struct bnx2x *bp)
1223 if (bp->flags & USING_MSIX_FLAG)
1224 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1226 else if (bp->flags & USING_MSI_FLAG)
1227 free_irq(bp->pdev->irq, bp->dev);
1229 free_irq(bp->pdev->irq, bp->dev);
1232 int bnx2x_enable_msix(struct bnx2x *bp)
1234 int msix_vec = 0, i, rc, req_cnt;
1236 bp->msix_table[msix_vec].entry = msix_vec;
1237 DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1238 bp->msix_table[0].entry);
1242 bp->msix_table[msix_vec].entry = msix_vec;
1243 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1244 bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1247 /* We need separate vectors for ETH queues only (not FCoE) */
1248 for_each_eth_queue(bp, i) {
1249 bp->msix_table[msix_vec].entry = msix_vec;
1250 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1251 "(fastpath #%u)\n", msix_vec, msix_vec, i);
1255 req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1257 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1260 * reconfigure number of tx/rx queues according to available
1263 if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1264 /* how less vectors we will have? */
1265 int diff = req_cnt - rc;
1268 "Trying to use less MSI-X vectors: %d\n", rc);
1270 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1274 "MSI-X is not attainable rc %d\n", rc);
1278 * decrease number of queues by number of unallocated entries
1280 bp->num_queues -= diff;
1282 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1285 /* fall to INTx if not enough memory */
1287 bp->flags |= DISABLE_MSI_FLAG;
1288 DP(NETIF_MSG_IFUP, "MSI-X is not attainable rc %d\n", rc);
1292 bp->flags |= USING_MSIX_FLAG;
1297 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1299 int i, rc, offset = 0;
1301 rc = request_irq(bp->msix_table[offset++].vector,
1302 bnx2x_msix_sp_int, 0,
1303 bp->dev->name, bp->dev);
1305 BNX2X_ERR("request sp irq failed\n");
1312 for_each_eth_queue(bp, i) {
1313 struct bnx2x_fastpath *fp = &bp->fp[i];
1314 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1317 rc = request_irq(bp->msix_table[offset].vector,
1318 bnx2x_msix_fp_int, 0, fp->name, fp);
1320 BNX2X_ERR("request fp #%d irq (%d) failed rc %d\n", i,
1321 bp->msix_table[offset].vector, rc);
1322 bnx2x_free_msix_irqs(bp, offset);
1329 i = BNX2X_NUM_ETH_QUEUES(bp);
1330 offset = 1 + CNIC_PRESENT;
1331 netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d"
1333 bp->msix_table[0].vector,
1334 0, bp->msix_table[offset].vector,
1335 i - 1, bp->msix_table[offset + i - 1].vector);
1340 int bnx2x_enable_msi(struct bnx2x *bp)
1344 rc = pci_enable_msi(bp->pdev);
1346 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1349 bp->flags |= USING_MSI_FLAG;
1354 static int bnx2x_req_irq(struct bnx2x *bp)
1356 unsigned long flags;
1359 if (bp->flags & USING_MSI_FLAG)
1362 flags = IRQF_SHARED;
1364 rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1365 bp->dev->name, bp->dev);
1369 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1372 if (bp->flags & USING_MSIX_FLAG) {
1373 rc = bnx2x_req_msix_irqs(bp);
1378 rc = bnx2x_req_irq(bp);
1380 BNX2X_ERR("IRQ request failed rc %d, aborting\n", rc);
1383 if (bp->flags & USING_MSI_FLAG) {
1384 bp->dev->irq = bp->pdev->irq;
1385 netdev_info(bp->dev, "using MSI IRQ %d\n",
1393 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1397 for_each_rx_queue(bp, i)
1398 napi_enable(&bnx2x_fp(bp, i, napi));
1401 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1405 for_each_rx_queue(bp, i)
1406 napi_disable(&bnx2x_fp(bp, i, napi));
1409 void bnx2x_netif_start(struct bnx2x *bp)
1411 if (netif_running(bp->dev)) {
1412 bnx2x_napi_enable(bp);
1413 bnx2x_int_enable(bp);
1414 if (bp->state == BNX2X_STATE_OPEN)
1415 netif_tx_wake_all_queues(bp->dev);
1419 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1421 bnx2x_int_disable_sync(bp, disable_hw);
1422 bnx2x_napi_disable(bp);
1425 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1427 struct bnx2x *bp = netdev_priv(dev);
1431 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1432 u16 ether_type = ntohs(hdr->h_proto);
1434 /* Skip VLAN tag if present */
1435 if (ether_type == ETH_P_8021Q) {
1436 struct vlan_ethhdr *vhdr =
1437 (struct vlan_ethhdr *)skb->data;
1439 ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1442 /* If ethertype is FCoE or FIP - use FCoE ring */
1443 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1444 return bnx2x_fcoe_tx(bp, txq_index);
1447 /* select a non-FCoE queue */
1448 return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1451 void bnx2x_set_num_queues(struct bnx2x *bp)
1453 switch (bp->multi_mode) {
1454 case ETH_RSS_MODE_DISABLED:
1457 case ETH_RSS_MODE_REGULAR:
1458 bp->num_queues = bnx2x_calc_num_queues(bp);
1466 /* Add special queues */
1467 bp->num_queues += NON_ETH_CONTEXT_USE;
1471 * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
1473 * @bp: Driver handle
1475 * We currently support for at most 16 Tx queues for each CoS thus we will
1476 * allocate a multiple of 16 for ETH L2 rings according to the value of the
1479 * If there is an FCoE L2 queue the appropriate Tx queue will have the next
1480 * index after all ETH L2 indices.
1482 * If the actual number of Tx queues (for each CoS) is less than 16 then there
1483 * will be the holes at the end of each group of 16 ETh L2 indices (0..15,
1484 * 16..31,...) with indicies that are not coupled with any real Tx queue.
1486 * The proper configuration of skb->queue_mapping is handled by
1487 * bnx2x_select_queue() and __skb_tx_hash().
1489 * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash()
1490 * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
1492 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1496 tx = MAX_TXQS_PER_COS * bp->max_cos;
1497 rx = BNX2X_NUM_ETH_QUEUES(bp);
1499 /* account for fcoe queue */
1507 rc = netif_set_real_num_tx_queues(bp->dev, tx);
1509 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1512 rc = netif_set_real_num_rx_queues(bp->dev, rx);
1514 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1518 DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1524 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1528 for_each_queue(bp, i) {
1529 struct bnx2x_fastpath *fp = &bp->fp[i];
1531 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1534 * Although there are no IP frames expected to arrive to
1535 * this ring we still want to add an
1536 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1540 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1541 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1544 bp->dev->mtu + ETH_OVREHEAD +
1545 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1549 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1552 u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1553 u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1556 * Prepare the inital contents fo the indirection table if RSS is
1559 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1560 for (i = 0; i < sizeof(ind_table); i++)
1562 bp->fp->cl_id + (i % num_eth_queues);
1566 * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1567 * per-port, so if explicit configuration is needed , do it only
1570 * For 57712 and newer on the other hand it's a per-function
1573 return bnx2x_config_rss_pf(bp, ind_table,
1574 bp->port.pmf || !CHIP_IS_E1x(bp));
1577 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1579 struct bnx2x_config_rss_params params = {0};
1582 /* Although RSS is meaningless when there is a single HW queue we
1583 * still need it enabled in order to have HW Rx hash generated.
1585 * if (!is_eth_multi(bp))
1586 * bp->multi_mode = ETH_RSS_MODE_DISABLED;
1589 params.rss_obj = &bp->rss_conf_obj;
1591 __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags);
1594 switch (bp->multi_mode) {
1595 case ETH_RSS_MODE_DISABLED:
1596 __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
1598 case ETH_RSS_MODE_REGULAR:
1599 __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
1601 case ETH_RSS_MODE_VLAN_PRI:
1602 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, ¶ms.rss_flags);
1604 case ETH_RSS_MODE_E1HOV_PRI:
1605 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, ¶ms.rss_flags);
1607 case ETH_RSS_MODE_IP_DSCP:
1608 __set_bit(BNX2X_RSS_MODE_IP_DSCP, ¶ms.rss_flags);
1611 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1615 /* If RSS is enabled */
1616 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1617 /* RSS configuration */
1618 __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
1619 __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
1620 __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
1621 __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags);
1624 params.rss_result_mask = MULTI_MASK;
1626 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1630 for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1631 params.rss_key[i] = random32();
1633 __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags);
1637 return bnx2x_config_rss(bp, ¶ms);
1640 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1642 struct bnx2x_func_state_params func_params = {0};
1644 /* Prepare parameters for function state transitions */
1645 __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1647 func_params.f_obj = &bp->func_obj;
1648 func_params.cmd = BNX2X_F_CMD_HW_INIT;
1650 func_params.params.hw_init.load_phase = load_code;
1652 return bnx2x_func_state_change(bp, &func_params);
1656 * Cleans the object that have internal lists without sending
1657 * ramrods. Should be run when interrutps are disabled.
1659 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1662 unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1663 struct bnx2x_mcast_ramrod_params rparam = {0};
1664 struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1666 /***************** Cleanup MACs' object first *************************/
1668 /* Wait for completion of requested */
1669 __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1670 /* Perform a dry cleanup */
1671 __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1673 /* Clean ETH primary MAC */
1674 __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1675 rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1678 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1680 /* Cleanup UC list */
1682 __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1683 rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1686 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1688 /***************** Now clean mcast object *****************************/
1689 rparam.mcast_obj = &bp->mcast_obj;
1690 __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1692 /* Add a DEL command... */
1693 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1695 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1696 "object: %d\n", rc);
1698 /* ...and wait until all pending commands are cleared */
1699 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1702 BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1707 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1711 #ifndef BNX2X_STOP_ON_ERROR
1712 #define LOAD_ERROR_EXIT(bp, label) \
1714 (bp)->state = BNX2X_STATE_ERROR; \
1718 #define LOAD_ERROR_EXIT(bp, label) \
1720 (bp)->state = BNX2X_STATE_ERROR; \
1726 /* must be called with rtnl_lock */
1727 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1729 int port = BP_PORT(bp);
1733 #ifdef BNX2X_STOP_ON_ERROR
1734 if (unlikely(bp->panic))
1738 bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1740 /* Set the initial link reported state to link down */
1741 bnx2x_acquire_phy_lock(bp);
1742 memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1743 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1744 &bp->last_reported_link.link_report_flags);
1745 bnx2x_release_phy_lock(bp);
1747 /* must be called before memory allocation and HW init */
1748 bnx2x_ilt_set_info(bp);
1751 * Zero fastpath structures preserving invariants like napi, which are
1752 * allocated only once, fp index, max_cos, bp pointer.
1753 * Also set fp->disable_tpa.
1755 for_each_queue(bp, i)
1759 /* Set the receive queues buffer size */
1760 bnx2x_set_rx_buf_size(bp);
1762 if (bnx2x_alloc_mem(bp))
1765 /* As long as bnx2x_alloc_mem() may possibly update
1766 * bp->num_queues, bnx2x_set_real_num_queues() should always
1769 rc = bnx2x_set_real_num_queues(bp);
1771 BNX2X_ERR("Unable to set real_num_queues\n");
1772 LOAD_ERROR_EXIT(bp, load_error0);
1775 /* configure multi cos mappings in kernel.
1776 * this configuration may be overriden by a multi class queue discipline
1777 * or by a dcbx negotiation result.
1779 bnx2x_setup_tc(bp->dev, bp->max_cos);
1781 bnx2x_napi_enable(bp);
1783 /* Send LOAD_REQUEST command to MCP
1784 * Returns the type of LOAD command:
1785 * if it is the first port to be initialized
1786 * common blocks should be initialized, otherwise - not
1788 if (!BP_NOMCP(bp)) {
1789 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1791 BNX2X_ERR("MCP response failure, aborting\n");
1793 LOAD_ERROR_EXIT(bp, load_error1);
1795 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1796 rc = -EBUSY; /* other port in diagnostic mode */
1797 LOAD_ERROR_EXIT(bp, load_error1);
1801 int path = BP_PATH(bp);
1803 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n",
1804 path, load_count[path][0], load_count[path][1],
1805 load_count[path][2]);
1806 load_count[path][0]++;
1807 load_count[path][1 + port]++;
1808 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n",
1809 path, load_count[path][0], load_count[path][1],
1810 load_count[path][2]);
1811 if (load_count[path][0] == 1)
1812 load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1813 else if (load_count[path][1 + port] == 1)
1814 load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1816 load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1819 if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1820 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1821 (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1824 * We need the barrier to ensure the ordering between the
1825 * writing to bp->port.pmf here and reading it from the
1826 * bnx2x_periodic_task().
1829 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1833 DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1835 /* Init Function state controlling object */
1836 bnx2x__init_func_obj(bp);
1839 rc = bnx2x_init_hw(bp, load_code);
1841 BNX2X_ERR("HW init failed, aborting\n");
1842 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1843 LOAD_ERROR_EXIT(bp, load_error2);
1846 /* Connect to IRQs */
1847 rc = bnx2x_setup_irqs(bp);
1849 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1850 LOAD_ERROR_EXIT(bp, load_error2);
1853 /* Setup NIC internals and enable interrupts */
1854 bnx2x_nic_init(bp, load_code);
1856 /* Init per-function objects */
1857 bnx2x_init_bp_objs(bp);
1859 if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1860 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1861 (bp->common.shmem2_base)) {
1862 if (SHMEM2_HAS(bp, dcc_support))
1863 SHMEM2_WR(bp, dcc_support,
1864 (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1865 SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1868 bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1869 rc = bnx2x_func_start(bp);
1871 BNX2X_ERR("Function start failed!\n");
1872 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1873 LOAD_ERROR_EXIT(bp, load_error3);
1876 /* Send LOAD_DONE command to MCP */
1877 if (!BP_NOMCP(bp)) {
1878 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1880 BNX2X_ERR("MCP response failure, aborting\n");
1882 LOAD_ERROR_EXIT(bp, load_error3);
1886 rc = bnx2x_setup_leading(bp);
1888 BNX2X_ERR("Setup leading failed!\n");
1889 LOAD_ERROR_EXIT(bp, load_error3);
1893 /* Enable Timer scan */
1894 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1897 for_each_nondefault_queue(bp, i) {
1898 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1900 LOAD_ERROR_EXIT(bp, load_error4);
1903 rc = bnx2x_init_rss_pf(bp);
1905 LOAD_ERROR_EXIT(bp, load_error4);
1907 /* Now when Clients are configured we are ready to work */
1908 bp->state = BNX2X_STATE_OPEN;
1910 /* Configure a ucast MAC */
1911 rc = bnx2x_set_eth_mac(bp, true);
1913 LOAD_ERROR_EXIT(bp, load_error4);
1915 if (bp->pending_max) {
1916 bnx2x_update_max_mf_config(bp, bp->pending_max);
1917 bp->pending_max = 0;
1921 bnx2x_initial_phy_init(bp, load_mode);
1923 /* Start fast path */
1925 /* Initialize Rx filter. */
1926 netif_addr_lock_bh(bp->dev);
1927 bnx2x_set_rx_mode(bp->dev);
1928 netif_addr_unlock_bh(bp->dev);
1931 switch (load_mode) {
1933 /* Tx queue should be only reenabled */
1934 netif_tx_wake_all_queues(bp->dev);
1938 netif_tx_start_all_queues(bp->dev);
1939 smp_mb__after_clear_bit();
1943 bp->state = BNX2X_STATE_DIAG;
1951 bnx2x__link_status_update(bp);
1953 /* start the timer */
1954 mod_timer(&bp->timer, jiffies + bp->current_interval);
1957 bnx2x_setup_cnic_irq_info(bp);
1958 if (bp->state == BNX2X_STATE_OPEN)
1959 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1961 bnx2x_inc_load_cnt(bp);
1963 /* Wait for all pending SP commands to complete */
1964 if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1965 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1966 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1970 bnx2x_dcbx_init(bp);
1973 #ifndef BNX2X_STOP_ON_ERROR
1976 /* Disable Timer scan */
1977 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1980 bnx2x_int_disable_sync(bp, 1);
1982 /* Clean queueable objects */
1983 bnx2x_squeeze_objects(bp);
1985 /* Free SKBs, SGEs, TPA pool and driver internals */
1986 bnx2x_free_skbs(bp);
1987 for_each_rx_queue(bp, i)
1988 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1993 if (!BP_NOMCP(bp)) {
1994 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1995 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
2000 bnx2x_napi_disable(bp);
2005 #endif /* ! BNX2X_STOP_ON_ERROR */
2008 /* must be called with rtnl_lock */
2009 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
2012 bool global = false;
2014 if ((bp->state == BNX2X_STATE_CLOSED) ||
2015 (bp->state == BNX2X_STATE_ERROR)) {
2016 /* We can get here if the driver has been unloaded
2017 * during parity error recovery and is either waiting for a
2018 * leader to complete or for other functions to unload and
2019 * then ifdown has been issued. In this case we want to
2020 * unload and let other functions to complete a recovery
2023 bp->recovery_state = BNX2X_RECOVERY_DONE;
2025 bnx2x_release_leader_lock(bp);
2028 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
2034 * It's important to set the bp->state to the value different from
2035 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
2036 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
2038 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2042 bnx2x_tx_disable(bp);
2045 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2048 bp->rx_mode = BNX2X_RX_MODE_NONE;
2050 del_timer_sync(&bp->timer);
2052 /* Set ALWAYS_ALIVE bit in shmem */
2053 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2055 bnx2x_drv_pulse(bp);
2057 bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2059 /* Cleanup the chip if needed */
2060 if (unload_mode != UNLOAD_RECOVERY)
2061 bnx2x_chip_cleanup(bp, unload_mode);
2063 /* Send the UNLOAD_REQUEST to the MCP */
2064 bnx2x_send_unload_req(bp, unload_mode);
2067 * Prevent transactions to host from the functions on the
2068 * engine that doesn't reset global blocks in case of global
2069 * attention once gloabl blocks are reset and gates are opened
2070 * (the engine which leader will perform the recovery
2073 if (!CHIP_IS_E1x(bp))
2074 bnx2x_pf_disable(bp);
2076 /* Disable HW interrupts, NAPI */
2077 bnx2x_netif_stop(bp, 1);
2082 /* Report UNLOAD_DONE to MCP */
2083 bnx2x_send_unload_done(bp);
2087 * At this stage no more interrupts will arrive so we may safly clean
2088 * the queueable objects here in case they failed to get cleaned so far.
2090 bnx2x_squeeze_objects(bp);
2092 /* There should be no more pending SP commands at this stage */
2097 /* Free SKBs, SGEs, TPA pool and driver internals */
2098 bnx2x_free_skbs(bp);
2099 for_each_rx_queue(bp, i)
2100 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2104 bp->state = BNX2X_STATE_CLOSED;
2106 /* Check if there are pending parity attentions. If there are - set
2107 * RECOVERY_IN_PROGRESS.
2109 if (bnx2x_chk_parity_attn(bp, &global, false)) {
2110 bnx2x_set_reset_in_progress(bp);
2112 /* Set RESET_IS_GLOBAL if needed */
2114 bnx2x_set_reset_global(bp);
2118 /* The last driver must disable a "close the gate" if there is no
2119 * parity attention or "process kill" pending.
2121 if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2122 bnx2x_disable_close_the_gate(bp);
2127 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2131 /* If there is no power capability, silently succeed */
2133 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2137 pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2141 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2142 ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2143 PCI_PM_CTRL_PME_STATUS));
2145 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2146 /* delay required during transition out of D3hot */
2151 /* If there are other clients above don't
2152 shut down the power */
2153 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2155 /* Don't shut down the power for emulation and FPGA */
2156 if (CHIP_REV_IS_SLOW(bp))
2159 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2163 pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2165 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2168 /* No more memory access after this point until
2169 * device is brought back to D0.
2180 * net_device service functions
2182 int bnx2x_poll(struct napi_struct *napi, int budget)
2186 struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2188 struct bnx2x *bp = fp->bp;
2191 #ifdef BNX2X_STOP_ON_ERROR
2192 if (unlikely(bp->panic)) {
2193 napi_complete(napi);
2198 for_each_cos_in_tx_queue(fp, cos)
2199 if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2200 bnx2x_tx_int(bp, &fp->txdata[cos]);
2203 if (bnx2x_has_rx_work(fp)) {
2204 work_done += bnx2x_rx_int(fp, budget - work_done);
2206 /* must not complete if we consumed full budget */
2207 if (work_done >= budget)
2211 /* Fall out from the NAPI loop if needed */
2212 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2214 /* No need to update SB for FCoE L2 ring as long as
2215 * it's connected to the default SB and the SB
2216 * has been updated when NAPI was scheduled.
2218 if (IS_FCOE_FP(fp)) {
2219 napi_complete(napi);
2224 bnx2x_update_fpsb_idx(fp);
2225 /* bnx2x_has_rx_work() reads the status block,
2226 * thus we need to ensure that status block indices
2227 * have been actually read (bnx2x_update_fpsb_idx)
2228 * prior to this check (bnx2x_has_rx_work) so that
2229 * we won't write the "newer" value of the status block
2230 * to IGU (if there was a DMA right after
2231 * bnx2x_has_rx_work and if there is no rmb, the memory
2232 * reading (bnx2x_update_fpsb_idx) may be postponed
2233 * to right before bnx2x_ack_sb). In this case there
2234 * will never be another interrupt until there is
2235 * another update of the status block, while there
2236 * is still unhandled work.
2240 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2241 napi_complete(napi);
2242 /* Re-enable interrupts */
2244 "Update index to %d\n", fp->fp_hc_idx);
2245 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2246 le16_to_cpu(fp->fp_hc_idx),
2256 /* we split the first BD into headers and data BDs
2257 * to ease the pain of our fellow microcode engineers
2258 * we use one mapping for both BDs
2260 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2261 struct bnx2x_fp_txdata *txdata,
2262 struct sw_tx_bd *tx_buf,
2263 struct eth_tx_start_bd **tx_bd, u16 hlen,
2264 u16 bd_prod, int nbd)
2266 struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2267 struct eth_tx_bd *d_tx_bd;
2269 int old_len = le16_to_cpu(h_tx_bd->nbytes);
2271 /* first fix first BD */
2272 h_tx_bd->nbd = cpu_to_le16(nbd);
2273 h_tx_bd->nbytes = cpu_to_le16(hlen);
2275 DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2276 "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2277 h_tx_bd->addr_lo, h_tx_bd->nbd);
2279 /* now get a new data BD
2280 * (after the pbd) and fill it */
2281 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2282 d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2284 mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2285 le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2287 d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2288 d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2289 d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2291 /* this marks the BD as one that has no individual mapping */
2292 tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2294 DP(NETIF_MSG_TX_QUEUED,
2295 "TSO split data size is %d (%x:%x)\n",
2296 d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2299 *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2304 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2307 csum = (u16) ~csum_fold(csum_sub(csum,
2308 csum_partial(t_header - fix, fix, 0)));
2311 csum = (u16) ~csum_fold(csum_add(csum,
2312 csum_partial(t_header, -fix, 0)));
2314 return swab16(csum);
2317 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2321 if (skb->ip_summed != CHECKSUM_PARTIAL)
2325 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2327 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2328 rc |= XMIT_CSUM_TCP;
2332 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2333 rc |= XMIT_CSUM_TCP;
2337 if (skb_is_gso_v6(skb))
2338 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2339 else if (skb_is_gso(skb))
2340 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2345 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2346 /* check if packet requires linearization (packet is too fragmented)
2347 no need to check fragmentation if page size > 8K (there will be no
2348 violation to FW restrictions) */
2349 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2354 int first_bd_sz = 0;
2356 /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2357 if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2359 if (xmit_type & XMIT_GSO) {
2360 unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2361 /* Check if LSO packet needs to be copied:
2362 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2363 int wnd_size = MAX_FETCH_BD - 3;
2364 /* Number of windows to check */
2365 int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2370 /* Headers length */
2371 hlen = (int)(skb_transport_header(skb) - skb->data) +
2374 /* Amount of data (w/o headers) on linear part of SKB*/
2375 first_bd_sz = skb_headlen(skb) - hlen;
2377 wnd_sum = first_bd_sz;
2379 /* Calculate the first sum - it's special */
2380 for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2382 skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]);
2384 /* If there was data on linear skb data - check it */
2385 if (first_bd_sz > 0) {
2386 if (unlikely(wnd_sum < lso_mss)) {
2391 wnd_sum -= first_bd_sz;
2394 /* Others are easier: run through the frag list and
2395 check all windows */
2396 for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2398 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]);
2400 if (unlikely(wnd_sum < lso_mss)) {
2405 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]);
2408 /* in non-LSO too fragmented packet should always
2415 if (unlikely(to_copy))
2416 DP(NETIF_MSG_TX_QUEUED,
2417 "Linearization IS REQUIRED for %s packet. "
2418 "num_frags %d hlen %d first_bd_sz %d\n",
2419 (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2420 skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2426 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2429 *parsing_data |= (skb_shinfo(skb)->gso_size <<
2430 ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2431 ETH_TX_PARSE_BD_E2_LSO_MSS;
2432 if ((xmit_type & XMIT_GSO_V6) &&
2433 (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2434 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2438 * bnx2x_set_pbd_gso - update PBD in GSO case.
2442 * @xmit_type: xmit flags
2444 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2445 struct eth_tx_parse_bd_e1x *pbd,
2448 pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2449 pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2450 pbd->tcp_flags = pbd_tcp_flags(skb);
2452 if (xmit_type & XMIT_GSO_V4) {
2453 pbd->ip_id = swab16(ip_hdr(skb)->id);
2454 pbd->tcp_pseudo_csum =
2455 swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2457 0, IPPROTO_TCP, 0));
2460 pbd->tcp_pseudo_csum =
2461 swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2462 &ipv6_hdr(skb)->daddr,
2463 0, IPPROTO_TCP, 0));
2465 pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2469 * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2471 * @bp: driver handle
2473 * @parsing_data: data to be updated
2474 * @xmit_type: xmit flags
2478 static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2479 u32 *parsing_data, u32 xmit_type)
2482 ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2483 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2484 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2486 if (xmit_type & XMIT_CSUM_TCP) {
2487 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2488 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2489 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2491 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2493 /* We support checksum offload for TCP and UDP only.
2494 * No need to pass the UDP header length - it's a constant.
2496 return skb_transport_header(skb) +
2497 sizeof(struct udphdr) - skb->data;
2500 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2501 struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2503 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2505 if (xmit_type & XMIT_CSUM_V4)
2506 tx_start_bd->bd_flags.as_bitfield |=
2507 ETH_TX_BD_FLAGS_IP_CSUM;
2509 tx_start_bd->bd_flags.as_bitfield |=
2510 ETH_TX_BD_FLAGS_IPV6;
2512 if (!(xmit_type & XMIT_CSUM_TCP))
2513 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2517 * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2519 * @bp: driver handle
2521 * @pbd: parse BD to be updated
2522 * @xmit_type: xmit flags
2524 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2525 struct eth_tx_parse_bd_e1x *pbd,
2528 u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2530 /* for now NS flag is not used in Linux */
2532 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2533 ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2535 pbd->ip_hlen_w = (skb_transport_header(skb) -
2536 skb_network_header(skb)) >> 1;
2538 hlen += pbd->ip_hlen_w;
2540 /* We support checksum offload for TCP and UDP only */
2541 if (xmit_type & XMIT_CSUM_TCP)
2542 hlen += tcp_hdrlen(skb) / 2;
2544 hlen += sizeof(struct udphdr) / 2;
2546 pbd->total_hlen_w = cpu_to_le16(hlen);
2549 if (xmit_type & XMIT_CSUM_TCP) {
2550 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2553 s8 fix = SKB_CS_OFF(skb); /* signed! */
2555 DP(NETIF_MSG_TX_QUEUED,
2556 "hlen %d fix %d csum before fix %x\n",
2557 le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2559 /* HW bug: fixup the CSUM */
2560 pbd->tcp_pseudo_csum =
2561 bnx2x_csum_fix(skb_transport_header(skb),
2564 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2565 pbd->tcp_pseudo_csum);
2571 /* called with netif_tx_lock
2572 * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2573 * netif_wake_queue()
2575 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2577 struct bnx2x *bp = netdev_priv(dev);
2579 struct bnx2x_fastpath *fp;
2580 struct netdev_queue *txq;
2581 struct bnx2x_fp_txdata *txdata;
2582 struct sw_tx_bd *tx_buf;
2583 struct eth_tx_start_bd *tx_start_bd, *first_bd;
2584 struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2585 struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2586 struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2587 u32 pbd_e2_parsing_data = 0;
2588 u16 pkt_prod, bd_prod;
2589 int nbd, txq_index, fp_index, txdata_index;
2591 u32 xmit_type = bnx2x_xmit_type(bp, skb);
2594 __le16 pkt_size = 0;
2596 u8 mac_type = UNICAST_ADDRESS;
2598 #ifdef BNX2X_STOP_ON_ERROR
2599 if (unlikely(bp->panic))
2600 return NETDEV_TX_BUSY;
2603 txq_index = skb_get_queue_mapping(skb);
2604 txq = netdev_get_tx_queue(dev, txq_index);
2606 BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2608 /* decode the fastpath index and the cos index from the txq */
2609 fp_index = TXQ_TO_FP(txq_index);
2610 txdata_index = TXQ_TO_COS(txq_index);
2614 * Override the above for the FCoE queue:
2615 * - FCoE fp entry is right after the ETH entries.
2616 * - FCoE L2 queue uses bp->txdata[0] only.
2618 if (unlikely(!NO_FCOE(bp) && (txq_index ==
2619 bnx2x_fcoe_tx(bp, txq_index)))) {
2620 fp_index = FCOE_IDX;
2625 /* enable this debug print to view the transmission queue being used
2626 DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d\n",
2627 txq_index, fp_index, txdata_index); */
2629 /* locate the fastpath and the txdata */
2630 fp = &bp->fp[fp_index];
2631 txdata = &fp->txdata[txdata_index];
2633 /* enable this debug print to view the tranmission details
2634 DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2635 " tx_data ptr %p fp pointer %p\n",
2636 txdata->cid, fp_index, txdata_index, txdata, fp); */
2638 if (unlikely(bnx2x_tx_avail(bp, txdata) <
2639 (skb_shinfo(skb)->nr_frags + 3))) {
2640 fp->eth_q_stats.driver_xoff++;
2641 netif_tx_stop_queue(txq);
2642 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2643 return NETDEV_TX_BUSY;
2646 DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x protocol %x "
2647 "protocol(%x,%x) gso type %x xmit_type %x\n",
2648 txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2649 ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2651 eth = (struct ethhdr *)skb->data;
2653 /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2654 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2655 if (is_broadcast_ether_addr(eth->h_dest))
2656 mac_type = BROADCAST_ADDRESS;
2658 mac_type = MULTICAST_ADDRESS;
2661 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2662 /* First, check if we need to linearize the skb (due to FW
2663 restrictions). No need to check fragmentation if page size > 8K
2664 (there will be no violation to FW restrictions) */
2665 if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2666 /* Statistics of linearization */
2668 if (skb_linearize(skb) != 0) {
2669 DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2670 "silently dropping this SKB\n");
2671 dev_kfree_skb_any(skb);
2672 return NETDEV_TX_OK;
2676 /* Map skb linear data for DMA */
2677 mapping = dma_map_single(&bp->pdev->dev, skb->data,
2678 skb_headlen(skb), DMA_TO_DEVICE);
2679 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2680 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2681 "silently dropping this SKB\n");
2682 dev_kfree_skb_any(skb);
2683 return NETDEV_TX_OK;
2686 Please read carefully. First we use one BD which we mark as start,
2687 then we have a parsing info BD (used for TSO or xsum),
2688 and only then we have the rest of the TSO BDs.
2689 (don't forget to mark the last one as last,
2690 and to unmap only AFTER you write to the BD ...)
2691 And above all, all pdb sizes are in words - NOT DWORDS!
2694 /* get current pkt produced now - advance it just before sending packet
2695 * since mapping of pages may fail and cause packet to be dropped
2697 pkt_prod = txdata->tx_pkt_prod;
2698 bd_prod = TX_BD(txdata->tx_bd_prod);
2700 /* get a tx_buf and first BD
2701 * tx_start_bd may be changed during SPLIT,
2702 * but first_bd will always stay first
2704 tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2705 tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2706 first_bd = tx_start_bd;
2708 tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2709 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2713 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2715 /* remember the first BD of the packet */
2716 tx_buf->first_bd = txdata->tx_bd_prod;
2720 DP(NETIF_MSG_TX_QUEUED,
2721 "sending pkt %u @%p next_idx %u bd %u @%p\n",
2722 pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2724 if (vlan_tx_tag_present(skb)) {
2725 tx_start_bd->vlan_or_ethertype =
2726 cpu_to_le16(vlan_tx_tag_get(skb));
2727 tx_start_bd->bd_flags.as_bitfield |=
2728 (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2730 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2732 /* turn on parsing and get a BD */
2733 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2735 if (xmit_type & XMIT_CSUM)
2736 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2738 if (!CHIP_IS_E1x(bp)) {
2739 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2740 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2741 /* Set PBD in checksum offload case */
2742 if (xmit_type & XMIT_CSUM)
2743 hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2744 &pbd_e2_parsing_data,
2748 * fill in the MAC addresses in the PBD - for local
2751 bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2752 &pbd_e2->src_mac_addr_mid,
2753 &pbd_e2->src_mac_addr_lo,
2755 bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2756 &pbd_e2->dst_mac_addr_mid,
2757 &pbd_e2->dst_mac_addr_lo,
2761 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2762 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2763 /* Set PBD in checksum offload case */
2764 if (xmit_type & XMIT_CSUM)
2765 hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2769 /* Setup the data pointer of the first BD of the packet */
2770 tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2771 tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2772 nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2773 tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2774 pkt_size = tx_start_bd->nbytes;
2776 DP(NETIF_MSG_TX_QUEUED, "first bd @%p addr (%x:%x) nbd %d"
2777 " nbytes %d flags %x vlan %x\n",
2778 tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2779 le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2780 tx_start_bd->bd_flags.as_bitfield,
2781 le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2783 if (xmit_type & XMIT_GSO) {
2785 DP(NETIF_MSG_TX_QUEUED,
2786 "TSO packet len %d hlen %d total len %d tso size %d\n",
2787 skb->len, hlen, skb_headlen(skb),
2788 skb_shinfo(skb)->gso_size);
2790 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2792 if (unlikely(skb_headlen(skb) > hlen))
2793 bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2796 if (!CHIP_IS_E1x(bp))
2797 bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2800 bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2803 /* Set the PBD's parsing_data field if not zero
2804 * (for the chips newer than 57711).
2806 if (pbd_e2_parsing_data)
2807 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2809 tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2811 /* Handle fragmented skb */
2812 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2813 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2815 mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0,
2816 skb_frag_size(frag), DMA_TO_DEVICE);
2817 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2819 DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2820 "dropping packet...\n");
2822 /* we need unmap all buffers already mapped
2824 * first_bd->nbd need to be properly updated
2825 * before call to bnx2x_free_tx_pkt
2827 first_bd->nbd = cpu_to_le16(nbd);
2828 bnx2x_free_tx_pkt(bp, txdata,
2829 TX_BD(txdata->tx_pkt_prod));
2830 return NETDEV_TX_OK;
2833 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2834 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2835 if (total_pkt_bd == NULL)
2836 total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2838 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2839 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2840 tx_data_bd->nbytes = cpu_to_le16(skb_frag_size(frag));
2841 le16_add_cpu(&pkt_size, skb_frag_size(frag));
2844 DP(NETIF_MSG_TX_QUEUED,
2845 "frag %d bd @%p addr (%x:%x) nbytes %d\n",
2846 i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2847 le16_to_cpu(tx_data_bd->nbytes));
2850 DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2852 /* update with actual num BDs */
2853 first_bd->nbd = cpu_to_le16(nbd);
2855 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2857 /* now send a tx doorbell, counting the next BD
2858 * if the packet contains or ends with it
2860 if (TX_BD_POFF(bd_prod) < nbd)
2863 /* total_pkt_bytes should be set on the first data BD if
2864 * it's not an LSO packet and there is more than one
2865 * data BD. In this case pkt_size is limited by an MTU value.
2866 * However we prefer to set it for an LSO packet (while we don't
2867 * have to) in order to save some CPU cycles in a none-LSO
2868 * case, when we much more care about them.
2870 if (total_pkt_bd != NULL)
2871 total_pkt_bd->total_pkt_bytes = pkt_size;
2874 DP(NETIF_MSG_TX_QUEUED,
2875 "PBD (E1X) @%p ip_data %x ip_hlen %u ip_id %u lso_mss %u"
2876 " tcp_flags %x xsum %x seq %u hlen %u\n",
2877 pbd_e1x, pbd_e1x->global_data, pbd_e1x->ip_hlen_w,
2878 pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags,
2879 pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq,
2880 le16_to_cpu(pbd_e1x->total_hlen_w));
2882 DP(NETIF_MSG_TX_QUEUED,
2883 "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n",
2884 pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid,
2885 pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi,
2886 pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo,
2887 pbd_e2->parsing_data);
2888 DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
2890 txdata->tx_pkt_prod++;
2892 * Make sure that the BD data is updated before updating the producer
2893 * since FW might read the BD right after the producer is updated.
2894 * This is only applicable for weak-ordered memory model archs such
2895 * as IA-64. The following barrier is also mandatory since FW will
2896 * assumes packets must have BDs.
2900 txdata->tx_db.data.prod += nbd;
2903 DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
2907 txdata->tx_bd_prod += nbd;
2909 if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) {
2910 netif_tx_stop_queue(txq);
2912 /* paired memory barrier is in bnx2x_tx_int(), we have to keep
2913 * ordering of set_bit() in netif_tx_stop_queue() and read of
2917 fp->eth_q_stats.driver_xoff++;
2918 if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)
2919 netif_tx_wake_queue(txq);
2923 return NETDEV_TX_OK;
2927 * bnx2x_setup_tc - routine to configure net_device for multi tc
2929 * @netdev: net device to configure
2930 * @tc: number of traffic classes to enable
2932 * callback connected to the ndo_setup_tc function pointer
2934 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
2936 int cos, prio, count, offset;
2937 struct bnx2x *bp = netdev_priv(dev);
2939 /* setup tc must be called under rtnl lock */
2942 /* no traffic classes requested. aborting */
2944 netdev_reset_tc(dev);
2948 /* requested to support too many traffic classes */
2949 if (num_tc > bp->max_cos) {
2950 DP(NETIF_MSG_TX_ERR, "support for too many traffic classes"
2951 " requested: %d. max supported is %d\n",
2952 num_tc, bp->max_cos);
2956 /* declare amount of supported traffic classes */
2957 if (netdev_set_num_tc(dev, num_tc)) {
2958 DP(NETIF_MSG_TX_ERR, "failed to declare %d traffic classes\n",
2963 /* configure priority to traffic class mapping */
2964 for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) {
2965 netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]);
2966 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n",
2967 prio, bp->prio_to_cos[prio]);
2971 /* Use this configuration to diffrentiate tc0 from other COSes
2972 This can be used for ets or pfc, and save the effort of setting
2973 up a multio class queue disc or negotiating DCBX with a switch
2974 netdev_set_prio_tc_map(dev, 0, 0);
2975 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", 0, 0);
2976 for (prio = 1; prio < 16; prio++) {
2977 netdev_set_prio_tc_map(dev, prio, 1);
2978 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", prio, 1);
2981 /* configure traffic class to transmission queue mapping */
2982 for (cos = 0; cos < bp->max_cos; cos++) {
2983 count = BNX2X_NUM_ETH_QUEUES(bp);
2984 offset = cos * MAX_TXQS_PER_COS;
2985 netdev_set_tc_queue(dev, cos, count, offset);
2986 DP(BNX2X_MSG_SP, "mapping tc %d to offset %d count %d\n",
2987 cos, offset, count);
2993 /* called with rtnl_lock */
2994 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
2996 struct sockaddr *addr = p;
2997 struct bnx2x *bp = netdev_priv(dev);
3000 if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
3003 if (netif_running(dev)) {
3004 rc = bnx2x_set_eth_mac(bp, false);
3009 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
3011 if (netif_running(dev))
3012 rc = bnx2x_set_eth_mac(bp, true);
3017 static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index)
3019 union host_hc_status_block *sb = &bnx2x_fp(bp, fp_index, status_blk);
3020 struct bnx2x_fastpath *fp = &bp->fp[fp_index];
3025 if (IS_FCOE_IDX(fp_index)) {
3026 memset(sb, 0, sizeof(union host_hc_status_block));
3027 fp->status_blk_mapping = 0;
3032 if (!CHIP_IS_E1x(bp))
3033 BNX2X_PCI_FREE(sb->e2_sb,
3034 bnx2x_fp(bp, fp_index,
3035 status_blk_mapping),
3036 sizeof(struct host_hc_status_block_e2));
3038 BNX2X_PCI_FREE(sb->e1x_sb,
3039 bnx2x_fp(bp, fp_index,
3040 status_blk_mapping),
3041 sizeof(struct host_hc_status_block_e1x));
3046 if (!skip_rx_queue(bp, fp_index)) {
3047 bnx2x_free_rx_bds(fp);
3049 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3050 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_buf_ring));
3051 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_desc_ring),
3052 bnx2x_fp(bp, fp_index, rx_desc_mapping),
3053 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3055 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_comp_ring),
3056 bnx2x_fp(bp, fp_index, rx_comp_mapping),
3057 sizeof(struct eth_fast_path_rx_cqe) *
3061 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_page_ring));
3062 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_sge_ring),
3063 bnx2x_fp(bp, fp_index, rx_sge_mapping),
3064 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3068 if (!skip_tx_queue(bp, fp_index)) {
3069 /* fastpath tx rings: tx_buf tx_desc */
3070 for_each_cos_in_tx_queue(fp, cos) {
3071 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3074 "freeing tx memory of fp %d cos %d cid %d\n",
3075 fp_index, cos, txdata->cid);
3077 BNX2X_FREE(txdata->tx_buf_ring);
3078 BNX2X_PCI_FREE(txdata->tx_desc_ring,
3079 txdata->tx_desc_mapping,
3080 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3083 /* end of fastpath */
3086 void bnx2x_free_fp_mem(struct bnx2x *bp)
3089 for_each_queue(bp, i)
3090 bnx2x_free_fp_mem_at(bp, i);
3093 static inline void set_sb_shortcuts(struct bnx2x *bp, int index)
3095 union host_hc_status_block status_blk = bnx2x_fp(bp, index, status_blk);
3096 if (!CHIP_IS_E1x(bp)) {
3097 bnx2x_fp(bp, index, sb_index_values) =
3098 (__le16 *)status_blk.e2_sb->sb.index_values;
3099 bnx2x_fp(bp, index, sb_running_index) =
3100 (__le16 *)status_blk.e2_sb->sb.running_index;
3102 bnx2x_fp(bp, index, sb_index_values) =
3103 (__le16 *)status_blk.e1x_sb->sb.index_values;
3104 bnx2x_fp(bp, index, sb_running_index) =
3105 (__le16 *)status_blk.e1x_sb->sb.running_index;
3109 static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
3111 union host_hc_status_block *sb;
3112 struct bnx2x_fastpath *fp = &bp->fp[index];
3115 int rx_ring_size = 0;
3117 /* if rx_ring_size specified - use it */
3118 if (!bp->rx_ring_size) {
3120 rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
3122 /* allocate at least number of buffers required by FW */
3123 rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
3124 MIN_RX_SIZE_TPA, rx_ring_size);
3126 bp->rx_ring_size = rx_ring_size;
3128 rx_ring_size = bp->rx_ring_size;
3131 sb = &bnx2x_fp(bp, index, status_blk);
3133 if (!IS_FCOE_IDX(index)) {
3136 if (!CHIP_IS_E1x(bp))
3137 BNX2X_PCI_ALLOC(sb->e2_sb,
3138 &bnx2x_fp(bp, index, status_blk_mapping),
3139 sizeof(struct host_hc_status_block_e2));
3141 BNX2X_PCI_ALLOC(sb->e1x_sb,
3142 &bnx2x_fp(bp, index, status_blk_mapping),
3143 sizeof(struct host_hc_status_block_e1x));
3148 /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to
3149 * set shortcuts for it.
3151 if (!IS_FCOE_IDX(index))
3152 set_sb_shortcuts(bp, index);
3155 if (!skip_tx_queue(bp, index)) {
3156 /* fastpath tx rings: tx_buf tx_desc */
3157 for_each_cos_in_tx_queue(fp, cos) {
3158 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3160 DP(BNX2X_MSG_SP, "allocating tx memory of "
3164 BNX2X_ALLOC(txdata->tx_buf_ring,
3165 sizeof(struct sw_tx_bd) * NUM_TX_BD);
3166 BNX2X_PCI_ALLOC(txdata->tx_desc_ring,
3167 &txdata->tx_desc_mapping,
3168 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3173 if (!skip_rx_queue(bp, index)) {
3174 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3175 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring),
3176 sizeof(struct sw_rx_bd) * NUM_RX_BD);
3177 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring),
3178 &bnx2x_fp(bp, index, rx_desc_mapping),
3179 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3181 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring),
3182 &bnx2x_fp(bp, index, rx_comp_mapping),
3183 sizeof(struct eth_fast_path_rx_cqe) *
3187 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring),
3188 sizeof(struct sw_rx_page) * NUM_RX_SGE);
3189 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring),
3190 &bnx2x_fp(bp, index, rx_sge_mapping),
3191 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3193 bnx2x_set_next_page_rx_bd(fp);
3196 bnx2x_set_next_page_rx_cq(fp);
3199 ring_size = bnx2x_alloc_rx_bds(fp, rx_ring_size);
3200 if (ring_size < rx_ring_size)
3206 /* handles low memory cases */
3208 BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
3210 /* FW will drop all packets if queue is not big enough,
3211 * In these cases we disable the queue
3212 * Min size is different for OOO, TPA and non-TPA queues
3214 if (ring_size < (fp->disable_tpa ?
3215 MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) {
3216 /* release memory allocated for this queue */
3217 bnx2x_free_fp_mem_at(bp, index);
3223 int bnx2x_alloc_fp_mem(struct bnx2x *bp)
3228 * 1. Allocate FP for leading - fatal if error
3229 * 2. {CNIC} Allocate FCoE FP - fatal if error
3230 * 3. {CNIC} Allocate OOO + FWD - disable OOO if error
3231 * 4. Allocate RSS - fix number of queues if error
3235 if (bnx2x_alloc_fp_mem_at(bp, 0))
3241 if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX))
3242 /* we will fail load process instead of mark
3249 for_each_nondefault_eth_queue(bp, i)
3250 if (bnx2x_alloc_fp_mem_at(bp, i))
3253 /* handle memory failures */
3254 if (i != BNX2X_NUM_ETH_QUEUES(bp)) {
3255 int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
3260 * move non eth FPs next to last eth FP
3261 * must be done in that order
3262 * FCOE_IDX < FWD_IDX < OOO_IDX
3265 /* move FCoE fp even NO_FCOE_FLAG is on */
3266 bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta);
3268 bp->num_queues -= delta;