1 /* bnx2x_cmn.c: Broadcom Everest network driver.
3 * Copyright (c) 2007-2011 Broadcom Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
9 * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10 * Written by: Eliezer Tamir
11 * Based on code from Michael Chan's bnx2 driver
12 * UDP CSUM errata workaround by Arik Gendelman
13 * Slowpath and fastpath rework by Vladislav Zolotarov
14 * Statistics and Link management by Yitchak Gertner
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/etherdevice.h>
21 #include <linux/if_vlan.h>
22 #include <linux/interrupt.h>
25 #include <net/ip6_checksum.h>
26 #include <linux/firmware.h>
27 #include <linux/prefetch.h>
28 #include "bnx2x_cmn.h"
29 #include "bnx2x_init.h"
35 * bnx2x_bz_fp - zero content of the fastpath structure.
38 * @index: fastpath index to be zeroed
40 * Makes sure the contents of the bp->fp[index].napi is kept
43 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
45 struct bnx2x_fastpath *fp = &bp->fp[index];
46 struct napi_struct orig_napi = fp->napi;
47 /* bzero bnx2x_fastpath contents */
48 memset(fp, 0, sizeof(*fp));
50 /* Restore the NAPI object as it has been already initialized */
56 fp->max_cos = bp->max_cos;
58 /* Special queues support only one CoS */
62 * set the tpa flag for each queue. The tpa flag determines the queue
63 * minimal size so it must be set prior to queue memory allocation
65 fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
68 /* We don't want TPA on an FCoE L2 ring */
75 * bnx2x_move_fp - move content of the fastpath structure.
78 * @from: source FP index
79 * @to: destination FP index
81 * Makes sure the contents of the bp->fp[to].napi is kept
84 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
86 struct bnx2x_fastpath *from_fp = &bp->fp[from];
87 struct bnx2x_fastpath *to_fp = &bp->fp[to];
88 struct napi_struct orig_napi = to_fp->napi;
89 /* Move bnx2x_fastpath contents */
90 memcpy(to_fp, from_fp, sizeof(*to_fp));
93 /* Restore the NAPI object as it has been already initialized */
94 to_fp->napi = orig_napi;
97 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
99 /* free skb in the packet ring at pos idx
100 * return idx of last bd freed
102 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
105 struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
106 struct eth_tx_start_bd *tx_start_bd;
107 struct eth_tx_bd *tx_data_bd;
108 struct sk_buff *skb = tx_buf->skb;
109 u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
112 /* prefetch skb end pointer to speedup dev_kfree_skb() */
115 DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
116 txdata->txq_index, idx, tx_buf, skb);
119 DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
120 tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
121 dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
122 BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
125 nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
126 #ifdef BNX2X_STOP_ON_ERROR
127 if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
128 BNX2X_ERR("BAD nbd!\n");
132 new_cons = nbd + tx_buf->first_bd;
134 /* Get the next bd */
135 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
137 /* Skip a parse bd... */
139 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
141 /* ...and the TSO split header bd since they have no mapping */
142 if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
144 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
150 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
151 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
152 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
153 BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
155 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
160 dev_kfree_skb_any(skb);
161 tx_buf->first_bd = 0;
167 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
169 struct netdev_queue *txq;
170 u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
172 #ifdef BNX2X_STOP_ON_ERROR
173 if (unlikely(bp->panic))
177 txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
178 hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
179 sw_cons = txdata->tx_pkt_cons;
181 while (sw_cons != hw_cons) {
184 pkt_cons = TX_BD(sw_cons);
186 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u sw_cons %u "
188 txdata->txq_index, hw_cons, sw_cons, pkt_cons);
190 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
194 txdata->tx_pkt_cons = sw_cons;
195 txdata->tx_bd_cons = bd_cons;
197 /* Need to make the tx_bd_cons update visible to start_xmit()
198 * before checking for netif_tx_queue_stopped(). Without the
199 * memory barrier, there is a small possibility that
200 * start_xmit() will miss it and cause the queue to be stopped
202 * On the other hand we need an rmb() here to ensure the proper
203 * ordering of bit testing in the following
204 * netif_tx_queue_stopped(txq) call.
208 if (unlikely(netif_tx_queue_stopped(txq))) {
209 /* Taking tx_lock() is needed to prevent reenabling the queue
210 * while it's empty. This could have happen if rx_action() gets
211 * suspended in bnx2x_tx_int() after the condition before
212 * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
214 * stops the queue->sees fresh tx_bd_cons->releases the queue->
215 * sends some packets consuming the whole queue again->
219 __netif_tx_lock(txq, smp_processor_id());
221 if ((netif_tx_queue_stopped(txq)) &&
222 (bp->state == BNX2X_STATE_OPEN) &&
223 (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
224 netif_tx_wake_queue(txq);
226 __netif_tx_unlock(txq);
231 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
234 u16 last_max = fp->last_max_sge;
236 if (SUB_S16(idx, last_max) > 0)
237 fp->last_max_sge = idx;
240 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
241 struct eth_fast_path_rx_cqe *fp_cqe)
243 struct bnx2x *bp = fp->bp;
244 u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
245 le16_to_cpu(fp_cqe->len_on_bd)) >>
247 u16 last_max, last_elem, first_elem;
254 /* First mark all used pages */
255 for (i = 0; i < sge_len; i++)
256 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
257 RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
259 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
260 sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
262 /* Here we assume that the last SGE index is the biggest */
263 prefetch((void *)(fp->sge_mask));
264 bnx2x_update_last_max_sge(fp,
265 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
267 last_max = RX_SGE(fp->last_max_sge);
268 last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
269 first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
271 /* If ring is not full */
272 if (last_elem + 1 != first_elem)
275 /* Now update the prod */
276 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
277 if (likely(fp->sge_mask[i]))
280 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
281 delta += BIT_VEC64_ELEM_SZ;
285 fp->rx_sge_prod += delta;
286 /* clear page-end entries */
287 bnx2x_clear_sge_mask_next_elems(fp);
290 DP(NETIF_MSG_RX_STATUS,
291 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
292 fp->last_max_sge, fp->rx_sge_prod);
295 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
296 struct sk_buff *skb, u16 cons, u16 prod,
297 struct eth_fast_path_rx_cqe *cqe)
299 struct bnx2x *bp = fp->bp;
300 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
301 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
302 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
304 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
305 struct sw_rx_bd *first_buf = &tpa_info->first_buf;
307 /* print error if current state != stop */
308 if (tpa_info->tpa_state != BNX2X_TPA_STOP)
309 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
311 /* Try to map an empty skb from the aggregation info */
312 mapping = dma_map_single(&bp->pdev->dev,
313 first_buf->skb->data,
314 fp->rx_buf_size, DMA_FROM_DEVICE);
316 * ...if it fails - move the skb from the consumer to the producer
317 * and set the current aggregation state as ERROR to drop it
318 * when TPA_STOP arrives.
321 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
322 /* Move the BD from the consumer to the producer */
323 bnx2x_reuse_rx_skb(fp, cons, prod);
324 tpa_info->tpa_state = BNX2X_TPA_ERROR;
328 /* move empty skb from pool to prod */
329 prod_rx_buf->skb = first_buf->skb;
330 dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
331 /* point prod_bd to new skb */
332 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
333 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
335 /* move partial skb from cons to pool (don't unmap yet) */
336 *first_buf = *cons_rx_buf;
338 /* mark bin state as START */
339 tpa_info->parsing_flags =
340 le16_to_cpu(cqe->pars_flags.flags);
341 tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
342 tpa_info->tpa_state = BNX2X_TPA_START;
343 tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
344 tpa_info->placement_offset = cqe->placement_offset;
346 #ifdef BNX2X_STOP_ON_ERROR
347 fp->tpa_queue_used |= (1 << queue);
348 #ifdef _ASM_GENERIC_INT_L64_H
349 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
351 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
357 /* Timestamp option length allowed for TPA aggregation:
359 * nop nop kind length echo val
361 #define TPA_TSTAMP_OPT_LEN 12
363 * bnx2x_set_lro_mss - calculate the approximate value of the MSS
366 * @parsing_flags: parsing flags from the START CQE
367 * @len_on_bd: total length of the first packet for the
370 * Approximate value of the MSS for this aggregation calculated using
371 * the first packet of it.
373 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
377 * TPA arrgregation won't have either IP options or TCP options
378 * other than timestamp or IPv6 extension headers.
380 u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
382 if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
383 PRS_FLAG_OVERETH_IPV6)
384 hdrs_len += sizeof(struct ipv6hdr);
386 hdrs_len += sizeof(struct iphdr);
389 /* Check if there was a TCP timestamp, if there is it's will
390 * always be 12 bytes length: nop nop kind length echo val.
392 * Otherwise FW would close the aggregation.
394 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
395 hdrs_len += TPA_TSTAMP_OPT_LEN;
397 return len_on_bd - hdrs_len;
400 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
401 u16 queue, struct sk_buff *skb,
402 struct eth_end_agg_rx_cqe *cqe,
405 struct sw_rx_page *rx_pg, old_rx_pg;
406 u32 i, frag_len, frag_size, pages;
409 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
410 u16 len_on_bd = tpa_info->len_on_bd;
412 frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
413 pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
415 /* This is needed in order to enable forwarding support */
417 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
418 tpa_info->parsing_flags, len_on_bd);
420 #ifdef BNX2X_STOP_ON_ERROR
421 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
422 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
424 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
430 /* Run through the SGL and compose the fragmented skb */
431 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
432 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
434 /* FW gives the indices of the SGE as if the ring is an array
435 (meaning that "next" element will consume 2 indices) */
436 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
437 rx_pg = &fp->rx_page_ring[sge_idx];
440 /* If we fail to allocate a substitute page, we simply stop
441 where we are and drop the whole packet */
442 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
444 fp->eth_q_stats.rx_skb_alloc_failed++;
448 /* Unmap the page as we r going to pass it to the stack */
449 dma_unmap_page(&bp->pdev->dev,
450 dma_unmap_addr(&old_rx_pg, mapping),
451 SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
453 /* Add one frag and update the appropriate fields in the skb */
454 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
456 skb->data_len += frag_len;
457 skb->truesize += SGE_PAGE_SIZE * PAGES_PER_SGE;
458 skb->len += frag_len;
460 frag_size -= frag_len;
466 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
467 u16 queue, struct eth_end_agg_rx_cqe *cqe,
470 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
471 struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
472 u8 pad = tpa_info->placement_offset;
473 u16 len = tpa_info->len_on_bd;
474 struct sk_buff *skb = rx_buf->skb;
476 struct sk_buff *new_skb;
477 u8 old_tpa_state = tpa_info->tpa_state;
479 tpa_info->tpa_state = BNX2X_TPA_STOP;
481 /* If we there was an error during the handling of the TPA_START -
482 * drop this aggregation.
484 if (old_tpa_state == BNX2X_TPA_ERROR)
487 /* Try to allocate the new skb */
488 new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
490 /* Unmap skb in the pool anyway, as we are going to change
491 pool entry status to BNX2X_TPA_STOP even if new skb allocation
493 dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
494 fp->rx_buf_size, DMA_FROM_DEVICE);
496 if (likely(new_skb)) {
498 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
500 #ifdef BNX2X_STOP_ON_ERROR
501 if (pad + len > fp->rx_buf_size) {
502 BNX2X_ERR("skb_put is about to fail... "
503 "pad %d len %d rx_buf_size %d\n",
504 pad, len, fp->rx_buf_size);
510 skb_reserve(skb, pad);
513 skb->protocol = eth_type_trans(skb, bp->dev);
514 skb->ip_summed = CHECKSUM_UNNECESSARY;
516 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
517 if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
518 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
519 napi_gro_receive(&fp->napi, skb);
521 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
522 " - dropping packet!\n");
523 dev_kfree_skb_any(skb);
527 /* put new skb in bin */
528 rx_buf->skb = new_skb;
534 /* drop the packet and keep the buffer in the bin */
535 DP(NETIF_MSG_RX_STATUS,
536 "Failed to allocate or map a new skb - dropping packet!\n");
537 fp->eth_q_stats.rx_skb_alloc_failed++;
540 /* Set Toeplitz hash value in the skb using the value from the
541 * CQE (calculated by HW).
543 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
546 /* Set Toeplitz hash from CQE */
547 if ((bp->dev->features & NETIF_F_RXHASH) &&
548 (cqe->fast_path_cqe.status_flags &
549 ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
551 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
554 static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
555 struct bnx2x_fastpath *fp)
557 /* Do nothing if no IP/L4 csum validation was done */
559 if (cqe->fast_path_cqe.status_flags &
560 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
561 ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
564 /* If both IP/L4 validation were done, check if an error was found. */
566 if (cqe->fast_path_cqe.type_error_flags &
567 (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
568 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
569 fp->eth_q_stats.hw_csum_err++;
571 skb->ip_summed = CHECKSUM_UNNECESSARY;
574 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
576 struct bnx2x *bp = fp->bp;
577 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
578 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
581 #ifdef BNX2X_STOP_ON_ERROR
582 if (unlikely(bp->panic))
586 /* CQ "next element" is of the size of the regular element,
587 that's why it's ok here */
588 hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
589 if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
592 bd_cons = fp->rx_bd_cons;
593 bd_prod = fp->rx_bd_prod;
594 bd_prod_fw = bd_prod;
595 sw_comp_cons = fp->rx_comp_cons;
596 sw_comp_prod = fp->rx_comp_prod;
598 /* Memory barrier necessary as speculative reads of the rx
599 * buffer can be ahead of the index in the status block
603 DP(NETIF_MSG_RX_STATUS,
604 "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n",
605 fp->index, hw_comp_cons, sw_comp_cons);
607 while (sw_comp_cons != hw_comp_cons) {
608 struct sw_rx_bd *rx_buf = NULL;
610 union eth_rx_cqe *cqe;
611 struct eth_fast_path_rx_cqe *cqe_fp;
613 enum eth_rx_cqe_type cqe_fp_type;
616 #ifdef BNX2X_STOP_ON_ERROR
617 if (unlikely(bp->panic))
621 comp_ring_cons = RCQ_BD(sw_comp_cons);
622 bd_prod = RX_BD(bd_prod);
623 bd_cons = RX_BD(bd_cons);
625 /* Prefetch the page containing the BD descriptor
626 at producer's index. It will be needed when new skb is
628 prefetch((void *)(PAGE_ALIGN((unsigned long)
629 (&fp->rx_desc_ring[bd_prod])) -
632 cqe = &fp->rx_comp_ring[comp_ring_cons];
633 cqe_fp = &cqe->fast_path_cqe;
634 cqe_fp_flags = cqe_fp->type_error_flags;
635 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
637 DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x"
638 " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags),
639 cqe_fp_flags, cqe_fp->status_flags,
640 le32_to_cpu(cqe_fp->rss_hash_result),
641 le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
643 /* is this a slowpath msg? */
644 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
645 bnx2x_sp_event(fp, cqe);
648 /* this is an rx packet */
650 rx_buf = &fp->rx_buf_ring[bd_cons];
654 if (!CQE_TYPE_FAST(cqe_fp_type)) {
655 #ifdef BNX2X_STOP_ON_ERROR
657 if (fp->disable_tpa &&
658 (CQE_TYPE_START(cqe_fp_type) ||
659 CQE_TYPE_STOP(cqe_fp_type)))
660 BNX2X_ERR("START/STOP packet while "
661 "disable_tpa type %x\n",
662 CQE_TYPE(cqe_fp_type));
665 if (CQE_TYPE_START(cqe_fp_type)) {
666 u16 queue = cqe_fp->queue_index;
667 DP(NETIF_MSG_RX_STATUS,
668 "calling tpa_start on queue %d\n",
671 bnx2x_tpa_start(fp, queue, skb,
675 /* Set Toeplitz hash for LRO skb */
676 bnx2x_set_skb_rxhash(bp, cqe, skb);
682 cqe->end_agg_cqe.queue_index;
683 DP(NETIF_MSG_RX_STATUS,
684 "calling tpa_stop on queue %d\n",
687 bnx2x_tpa_stop(bp, fp, queue,
690 #ifdef BNX2X_STOP_ON_ERROR
695 bnx2x_update_sge_prod(fp, cqe_fp);
700 len = le16_to_cpu(cqe_fp->pkt_len);
701 pad = cqe_fp->placement_offset;
702 dma_sync_single_for_cpu(&bp->pdev->dev,
703 dma_unmap_addr(rx_buf, mapping),
704 pad + RX_COPY_THRESH,
706 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
708 /* is this an error packet? */
709 if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
711 "ERROR flags %x rx packet %u\n",
712 cqe_fp_flags, sw_comp_cons);
713 fp->eth_q_stats.rx_err_discard_pkt++;
717 /* Since we don't have a jumbo ring
718 * copy small packets if mtu > 1500
720 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
721 (len <= RX_COPY_THRESH)) {
722 struct sk_buff *new_skb;
724 new_skb = netdev_alloc_skb(bp->dev, len + pad);
725 if (new_skb == NULL) {
727 "ERROR packet dropped "
728 "because of alloc failure\n");
729 fp->eth_q_stats.rx_skb_alloc_failed++;
734 skb_copy_from_linear_data_offset(skb, pad,
735 new_skb->data + pad, len);
736 skb_reserve(new_skb, pad);
737 skb_put(new_skb, len);
739 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
744 if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
745 dma_unmap_single(&bp->pdev->dev,
746 dma_unmap_addr(rx_buf, mapping),
749 skb_reserve(skb, pad);
754 "ERROR packet dropped because "
755 "of alloc failure\n");
756 fp->eth_q_stats.rx_skb_alloc_failed++;
758 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
762 skb->protocol = eth_type_trans(skb, bp->dev);
764 /* Set Toeplitz hash for a none-LRO skb */
765 bnx2x_set_skb_rxhash(bp, cqe, skb);
767 skb_checksum_none_assert(skb);
769 if (bp->dev->features & NETIF_F_RXCSUM)
770 bnx2x_csum_validate(skb, cqe, fp);
774 skb_record_rx_queue(skb, fp->index);
776 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
778 __vlan_hwaccel_put_tag(skb,
779 le16_to_cpu(cqe_fp->vlan_tag));
780 napi_gro_receive(&fp->napi, skb);
786 bd_cons = NEXT_RX_IDX(bd_cons);
787 bd_prod = NEXT_RX_IDX(bd_prod);
788 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
791 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
792 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
794 if (rx_pkt == budget)
798 fp->rx_bd_cons = bd_cons;
799 fp->rx_bd_prod = bd_prod_fw;
800 fp->rx_comp_cons = sw_comp_cons;
801 fp->rx_comp_prod = sw_comp_prod;
803 /* Update producers */
804 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
807 fp->rx_pkt += rx_pkt;
813 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
815 struct bnx2x_fastpath *fp = fp_cookie;
816 struct bnx2x *bp = fp->bp;
819 DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
820 "[fp %d fw_sd %d igusb %d]\n",
821 fp->index, fp->fw_sb_id, fp->igu_sb_id);
822 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
824 #ifdef BNX2X_STOP_ON_ERROR
825 if (unlikely(bp->panic))
829 /* Handle Rx and Tx according to MSI-X vector */
830 prefetch(fp->rx_cons_sb);
832 for_each_cos_in_tx_queue(fp, cos)
833 prefetch(fp->txdata[cos].tx_cons_sb);
835 prefetch(&fp->sb_running_index[SM_RX_ID]);
836 napi_schedule(&bnx2x_fp(bp, fp->index, napi));
841 /* HW Lock for shared dual port PHYs */
842 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
844 mutex_lock(&bp->port.phy_mutex);
846 if (bp->port.need_hw_lock)
847 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
850 void bnx2x_release_phy_lock(struct bnx2x *bp)
852 if (bp->port.need_hw_lock)
853 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
855 mutex_unlock(&bp->port.phy_mutex);
858 /* calculates MF speed according to current linespeed and MF configuration */
859 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
861 u16 line_speed = bp->link_vars.line_speed;
863 u16 maxCfg = bnx2x_extract_max_cfg(bp,
864 bp->mf_config[BP_VN(bp)]);
866 /* Calculate the current MAX line speed limit for the MF
870 line_speed = (line_speed * maxCfg) / 100;
872 u16 vn_max_rate = maxCfg * 100;
874 if (vn_max_rate < line_speed)
875 line_speed = vn_max_rate;
883 * bnx2x_fill_report_data - fill link report data to report
886 * @data: link state to update
888 * It uses a none-atomic bit operations because is called under the mutex.
890 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
891 struct bnx2x_link_report_data *data)
893 u16 line_speed = bnx2x_get_mf_speed(bp);
895 memset(data, 0, sizeof(*data));
897 /* Fill the report data: efective line speed */
898 data->line_speed = line_speed;
901 if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
902 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
903 &data->link_report_flags);
906 if (bp->link_vars.duplex == DUPLEX_FULL)
907 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
909 /* Rx Flow Control is ON */
910 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
911 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
913 /* Tx Flow Control is ON */
914 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
915 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
919 * bnx2x_link_report - report link status to OS.
923 * Calls the __bnx2x_link_report() under the same locking scheme
924 * as a link/PHY state managing code to ensure a consistent link
928 void bnx2x_link_report(struct bnx2x *bp)
930 bnx2x_acquire_phy_lock(bp);
931 __bnx2x_link_report(bp);
932 bnx2x_release_phy_lock(bp);
936 * __bnx2x_link_report - report link status to OS.
940 * None atomic inmlementation.
941 * Should be called under the phy_lock.
943 void __bnx2x_link_report(struct bnx2x *bp)
945 struct bnx2x_link_report_data cur_data;
949 bnx2x_read_mf_cfg(bp);
951 /* Read the current link report info */
952 bnx2x_fill_report_data(bp, &cur_data);
954 /* Don't report link down or exactly the same link status twice */
955 if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
956 (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
957 &bp->last_reported_link.link_report_flags) &&
958 test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
959 &cur_data.link_report_flags)))
964 /* We are going to report a new link parameters now -
965 * remember the current data for the next time.
967 memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
969 if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
970 &cur_data.link_report_flags)) {
971 netif_carrier_off(bp->dev);
972 netdev_err(bp->dev, "NIC Link is Down\n");
978 netif_carrier_on(bp->dev);
980 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
981 &cur_data.link_report_flags))
986 /* Handle the FC at the end so that only these flags would be
987 * possibly set. This way we may easily check if there is no FC
990 if (cur_data.link_report_flags) {
991 if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
992 &cur_data.link_report_flags)) {
993 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
994 &cur_data.link_report_flags))
995 flow = "ON - receive & transmit";
997 flow = "ON - receive";
999 flow = "ON - transmit";
1004 netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
1005 cur_data.line_speed, duplex, flow);
1009 void bnx2x_init_rx_rings(struct bnx2x *bp)
1011 int func = BP_FUNC(bp);
1015 /* Allocate TPA resources */
1016 for_each_rx_queue(bp, j) {
1017 struct bnx2x_fastpath *fp = &bp->fp[j];
1020 "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1022 if (!fp->disable_tpa) {
1023 /* Fill the per-aggregtion pool */
1024 for (i = 0; i < MAX_AGG_QS(bp); i++) {
1025 struct bnx2x_agg_info *tpa_info =
1027 struct sw_rx_bd *first_buf =
1028 &tpa_info->first_buf;
1030 first_buf->skb = netdev_alloc_skb(bp->dev,
1032 if (!first_buf->skb) {
1033 BNX2X_ERR("Failed to allocate TPA "
1034 "skb pool for queue[%d] - "
1035 "disabling TPA on this "
1037 bnx2x_free_tpa_pool(bp, fp, i);
1038 fp->disable_tpa = 1;
1041 dma_unmap_addr_set(first_buf, mapping, 0);
1042 tpa_info->tpa_state = BNX2X_TPA_STOP;
1045 /* "next page" elements initialization */
1046 bnx2x_set_next_page_sgl(fp);
1048 /* set SGEs bit mask */
1049 bnx2x_init_sge_ring_bit_mask(fp);
1051 /* Allocate SGEs and initialize the ring elements */
1052 for (i = 0, ring_prod = 0;
1053 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1055 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1056 BNX2X_ERR("was only able to allocate "
1058 BNX2X_ERR("disabling TPA for "
1060 /* Cleanup already allocated elements */
1061 bnx2x_free_rx_sge_range(bp, fp,
1063 bnx2x_free_tpa_pool(bp, fp,
1065 fp->disable_tpa = 1;
1069 ring_prod = NEXT_SGE_IDX(ring_prod);
1072 fp->rx_sge_prod = ring_prod;
1076 for_each_rx_queue(bp, j) {
1077 struct bnx2x_fastpath *fp = &bp->fp[j];
1081 /* Activate BD ring */
1083 * this will generate an interrupt (to the TSTORM)
1084 * must only be done after chip is initialized
1086 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1092 if (CHIP_IS_E1(bp)) {
1093 REG_WR(bp, BAR_USTRORM_INTMEM +
1094 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1095 U64_LO(fp->rx_comp_mapping));
1096 REG_WR(bp, BAR_USTRORM_INTMEM +
1097 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1098 U64_HI(fp->rx_comp_mapping));
1103 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1108 for_each_tx_queue(bp, i) {
1109 struct bnx2x_fastpath *fp = &bp->fp[i];
1110 for_each_cos_in_tx_queue(fp, cos) {
1111 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1113 u16 bd_cons = txdata->tx_bd_cons;
1114 u16 sw_prod = txdata->tx_pkt_prod;
1115 u16 sw_cons = txdata->tx_pkt_cons;
1117 while (sw_cons != sw_prod) {
1118 bd_cons = bnx2x_free_tx_pkt(bp, txdata,
1126 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1128 struct bnx2x *bp = fp->bp;
1131 /* ring wasn't allocated */
1132 if (fp->rx_buf_ring == NULL)
1135 for (i = 0; i < NUM_RX_BD; i++) {
1136 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1137 struct sk_buff *skb = rx_buf->skb;
1141 dma_unmap_single(&bp->pdev->dev,
1142 dma_unmap_addr(rx_buf, mapping),
1143 fp->rx_buf_size, DMA_FROM_DEVICE);
1150 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1154 for_each_rx_queue(bp, j) {
1155 struct bnx2x_fastpath *fp = &bp->fp[j];
1157 bnx2x_free_rx_bds(fp);
1159 if (!fp->disable_tpa)
1160 bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
1164 void bnx2x_free_skbs(struct bnx2x *bp)
1166 bnx2x_free_tx_skbs(bp);
1167 bnx2x_free_rx_skbs(bp);
1170 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1172 /* load old values */
1173 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1175 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1176 /* leave all but MAX value */
1177 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1179 /* set new MAX value */
1180 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1181 & FUNC_MF_CFG_MAX_BW_MASK;
1183 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1188 * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1190 * @bp: driver handle
1191 * @nvecs: number of vectors to be released
1193 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1197 if (nvecs == offset)
1199 free_irq(bp->msix_table[offset].vector, bp->dev);
1200 DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1201 bp->msix_table[offset].vector);
1204 if (nvecs == offset)
1209 for_each_eth_queue(bp, i) {
1210 if (nvecs == offset)
1212 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1213 "irq\n", i, bp->msix_table[offset].vector);
1215 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1219 void bnx2x_free_irq(struct bnx2x *bp)
1221 if (bp->flags & USING_MSIX_FLAG)
1222 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1224 else if (bp->flags & USING_MSI_FLAG)
1225 free_irq(bp->pdev->irq, bp->dev);
1227 free_irq(bp->pdev->irq, bp->dev);
1230 int bnx2x_enable_msix(struct bnx2x *bp)
1232 int msix_vec = 0, i, rc, req_cnt;
1234 bp->msix_table[msix_vec].entry = msix_vec;
1235 DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1236 bp->msix_table[0].entry);
1240 bp->msix_table[msix_vec].entry = msix_vec;
1241 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1242 bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1245 /* We need separate vectors for ETH queues only (not FCoE) */
1246 for_each_eth_queue(bp, i) {
1247 bp->msix_table[msix_vec].entry = msix_vec;
1248 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1249 "(fastpath #%u)\n", msix_vec, msix_vec, i);
1253 req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1255 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1258 * reconfigure number of tx/rx queues according to available
1261 if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1262 /* how less vectors we will have? */
1263 int diff = req_cnt - rc;
1266 "Trying to use less MSI-X vectors: %d\n", rc);
1268 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1272 "MSI-X is not attainable rc %d\n", rc);
1276 * decrease number of queues by number of unallocated entries
1278 bp->num_queues -= diff;
1280 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1283 /* fall to INTx if not enough memory */
1285 bp->flags |= DISABLE_MSI_FLAG;
1286 DP(NETIF_MSG_IFUP, "MSI-X is not attainable rc %d\n", rc);
1290 bp->flags |= USING_MSIX_FLAG;
1295 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1297 int i, rc, offset = 0;
1299 rc = request_irq(bp->msix_table[offset++].vector,
1300 bnx2x_msix_sp_int, 0,
1301 bp->dev->name, bp->dev);
1303 BNX2X_ERR("request sp irq failed\n");
1310 for_each_eth_queue(bp, i) {
1311 struct bnx2x_fastpath *fp = &bp->fp[i];
1312 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1315 rc = request_irq(bp->msix_table[offset].vector,
1316 bnx2x_msix_fp_int, 0, fp->name, fp);
1318 BNX2X_ERR("request fp #%d irq (%d) failed rc %d\n", i,
1319 bp->msix_table[offset].vector, rc);
1320 bnx2x_free_msix_irqs(bp, offset);
1327 i = BNX2X_NUM_ETH_QUEUES(bp);
1328 offset = 1 + CNIC_PRESENT;
1329 netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d"
1331 bp->msix_table[0].vector,
1332 0, bp->msix_table[offset].vector,
1333 i - 1, bp->msix_table[offset + i - 1].vector);
1338 int bnx2x_enable_msi(struct bnx2x *bp)
1342 rc = pci_enable_msi(bp->pdev);
1344 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1347 bp->flags |= USING_MSI_FLAG;
1352 static int bnx2x_req_irq(struct bnx2x *bp)
1354 unsigned long flags;
1357 if (bp->flags & USING_MSI_FLAG)
1360 flags = IRQF_SHARED;
1362 rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1363 bp->dev->name, bp->dev);
1367 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1370 if (bp->flags & USING_MSIX_FLAG) {
1371 rc = bnx2x_req_msix_irqs(bp);
1376 rc = bnx2x_req_irq(bp);
1378 BNX2X_ERR("IRQ request failed rc %d, aborting\n", rc);
1381 if (bp->flags & USING_MSI_FLAG) {
1382 bp->dev->irq = bp->pdev->irq;
1383 netdev_info(bp->dev, "using MSI IRQ %d\n",
1391 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1395 for_each_rx_queue(bp, i)
1396 napi_enable(&bnx2x_fp(bp, i, napi));
1399 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1403 for_each_rx_queue(bp, i)
1404 napi_disable(&bnx2x_fp(bp, i, napi));
1407 void bnx2x_netif_start(struct bnx2x *bp)
1409 if (netif_running(bp->dev)) {
1410 bnx2x_napi_enable(bp);
1411 bnx2x_int_enable(bp);
1412 if (bp->state == BNX2X_STATE_OPEN)
1413 netif_tx_wake_all_queues(bp->dev);
1417 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1419 bnx2x_int_disable_sync(bp, disable_hw);
1420 bnx2x_napi_disable(bp);
1423 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1425 struct bnx2x *bp = netdev_priv(dev);
1429 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1430 u16 ether_type = ntohs(hdr->h_proto);
1432 /* Skip VLAN tag if present */
1433 if (ether_type == ETH_P_8021Q) {
1434 struct vlan_ethhdr *vhdr =
1435 (struct vlan_ethhdr *)skb->data;
1437 ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1440 /* If ethertype is FCoE or FIP - use FCoE ring */
1441 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1442 return bnx2x_fcoe_tx(bp, txq_index);
1445 /* select a non-FCoE queue */
1446 return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1449 void bnx2x_set_num_queues(struct bnx2x *bp)
1451 switch (bp->multi_mode) {
1452 case ETH_RSS_MODE_DISABLED:
1455 case ETH_RSS_MODE_REGULAR:
1456 bp->num_queues = bnx2x_calc_num_queues(bp);
1464 /* Add special queues */
1465 bp->num_queues += NON_ETH_CONTEXT_USE;
1469 * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
1471 * @bp: Driver handle
1473 * We currently support for at most 16 Tx queues for each CoS thus we will
1474 * allocate a multiple of 16 for ETH L2 rings according to the value of the
1477 * If there is an FCoE L2 queue the appropriate Tx queue will have the next
1478 * index after all ETH L2 indices.
1480 * If the actual number of Tx queues (for each CoS) is less than 16 then there
1481 * will be the holes at the end of each group of 16 ETh L2 indices (0..15,
1482 * 16..31,...) with indicies that are not coupled with any real Tx queue.
1484 * The proper configuration of skb->queue_mapping is handled by
1485 * bnx2x_select_queue() and __skb_tx_hash().
1487 * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash()
1488 * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
1490 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1494 tx = MAX_TXQS_PER_COS * bp->max_cos;
1495 rx = BNX2X_NUM_ETH_QUEUES(bp);
1497 /* account for fcoe queue */
1505 rc = netif_set_real_num_tx_queues(bp->dev, tx);
1507 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1510 rc = netif_set_real_num_rx_queues(bp->dev, rx);
1512 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1516 DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1522 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1526 for_each_queue(bp, i) {
1527 struct bnx2x_fastpath *fp = &bp->fp[i];
1529 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1532 * Although there are no IP frames expected to arrive to
1533 * this ring we still want to add an
1534 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1538 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1539 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1542 bp->dev->mtu + ETH_OVREHEAD +
1543 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1547 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1550 u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1551 u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1554 * Prepare the inital contents fo the indirection table if RSS is
1557 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1558 for (i = 0; i < sizeof(ind_table); i++)
1560 bp->fp->cl_id + (i % num_eth_queues);
1564 * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1565 * per-port, so if explicit configuration is needed , do it only
1568 * For 57712 and newer on the other hand it's a per-function
1571 return bnx2x_config_rss_pf(bp, ind_table,
1572 bp->port.pmf || !CHIP_IS_E1x(bp));
1575 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1577 struct bnx2x_config_rss_params params = {0};
1580 /* Although RSS is meaningless when there is a single HW queue we
1581 * still need it enabled in order to have HW Rx hash generated.
1583 * if (!is_eth_multi(bp))
1584 * bp->multi_mode = ETH_RSS_MODE_DISABLED;
1587 params.rss_obj = &bp->rss_conf_obj;
1589 __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags);
1592 switch (bp->multi_mode) {
1593 case ETH_RSS_MODE_DISABLED:
1594 __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
1596 case ETH_RSS_MODE_REGULAR:
1597 __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
1599 case ETH_RSS_MODE_VLAN_PRI:
1600 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, ¶ms.rss_flags);
1602 case ETH_RSS_MODE_E1HOV_PRI:
1603 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, ¶ms.rss_flags);
1605 case ETH_RSS_MODE_IP_DSCP:
1606 __set_bit(BNX2X_RSS_MODE_IP_DSCP, ¶ms.rss_flags);
1609 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1613 /* If RSS is enabled */
1614 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1615 /* RSS configuration */
1616 __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
1617 __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
1618 __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
1619 __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags);
1622 params.rss_result_mask = MULTI_MASK;
1624 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1628 for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1629 params.rss_key[i] = random32();
1631 __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags);
1635 return bnx2x_config_rss(bp, ¶ms);
1638 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1640 struct bnx2x_func_state_params func_params = {0};
1642 /* Prepare parameters for function state transitions */
1643 __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1645 func_params.f_obj = &bp->func_obj;
1646 func_params.cmd = BNX2X_F_CMD_HW_INIT;
1648 func_params.params.hw_init.load_phase = load_code;
1650 return bnx2x_func_state_change(bp, &func_params);
1654 * Cleans the object that have internal lists without sending
1655 * ramrods. Should be run when interrutps are disabled.
1657 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1660 unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1661 struct bnx2x_mcast_ramrod_params rparam = {0};
1662 struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1664 /***************** Cleanup MACs' object first *************************/
1666 /* Wait for completion of requested */
1667 __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1668 /* Perform a dry cleanup */
1669 __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1671 /* Clean ETH primary MAC */
1672 __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1673 rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1676 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1678 /* Cleanup UC list */
1680 __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1681 rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1684 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1686 /***************** Now clean mcast object *****************************/
1687 rparam.mcast_obj = &bp->mcast_obj;
1688 __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1690 /* Add a DEL command... */
1691 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1693 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1694 "object: %d\n", rc);
1696 /* ...and wait until all pending commands are cleared */
1697 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1700 BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1705 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1709 #ifndef BNX2X_STOP_ON_ERROR
1710 #define LOAD_ERROR_EXIT(bp, label) \
1712 (bp)->state = BNX2X_STATE_ERROR; \
1716 #define LOAD_ERROR_EXIT(bp, label) \
1718 (bp)->state = BNX2X_STATE_ERROR; \
1724 /* must be called with rtnl_lock */
1725 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1727 int port = BP_PORT(bp);
1731 #ifdef BNX2X_STOP_ON_ERROR
1732 if (unlikely(bp->panic))
1736 bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1738 /* Set the initial link reported state to link down */
1739 bnx2x_acquire_phy_lock(bp);
1740 memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1741 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1742 &bp->last_reported_link.link_report_flags);
1743 bnx2x_release_phy_lock(bp);
1745 /* must be called before memory allocation and HW init */
1746 bnx2x_ilt_set_info(bp);
1749 * Zero fastpath structures preserving invariants like napi, which are
1750 * allocated only once, fp index, max_cos, bp pointer.
1751 * Also set fp->disable_tpa.
1753 for_each_queue(bp, i)
1757 /* Set the receive queues buffer size */
1758 bnx2x_set_rx_buf_size(bp);
1760 if (bnx2x_alloc_mem(bp))
1763 /* As long as bnx2x_alloc_mem() may possibly update
1764 * bp->num_queues, bnx2x_set_real_num_queues() should always
1767 rc = bnx2x_set_real_num_queues(bp);
1769 BNX2X_ERR("Unable to set real_num_queues\n");
1770 LOAD_ERROR_EXIT(bp, load_error0);
1773 /* configure multi cos mappings in kernel.
1774 * this configuration may be overriden by a multi class queue discipline
1775 * or by a dcbx negotiation result.
1777 bnx2x_setup_tc(bp->dev, bp->max_cos);
1779 bnx2x_napi_enable(bp);
1781 /* Send LOAD_REQUEST command to MCP
1782 * Returns the type of LOAD command:
1783 * if it is the first port to be initialized
1784 * common blocks should be initialized, otherwise - not
1786 if (!BP_NOMCP(bp)) {
1787 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1789 BNX2X_ERR("MCP response failure, aborting\n");
1791 LOAD_ERROR_EXIT(bp, load_error1);
1793 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1794 rc = -EBUSY; /* other port in diagnostic mode */
1795 LOAD_ERROR_EXIT(bp, load_error1);
1799 int path = BP_PATH(bp);
1801 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n",
1802 path, load_count[path][0], load_count[path][1],
1803 load_count[path][2]);
1804 load_count[path][0]++;
1805 load_count[path][1 + port]++;
1806 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n",
1807 path, load_count[path][0], load_count[path][1],
1808 load_count[path][2]);
1809 if (load_count[path][0] == 1)
1810 load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1811 else if (load_count[path][1 + port] == 1)
1812 load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1814 load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1817 if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1818 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1819 (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1822 * We need the barrier to ensure the ordering between the
1823 * writing to bp->port.pmf here and reading it from the
1824 * bnx2x_periodic_task().
1827 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1831 DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1833 /* Init Function state controlling object */
1834 bnx2x__init_func_obj(bp);
1837 rc = bnx2x_init_hw(bp, load_code);
1839 BNX2X_ERR("HW init failed, aborting\n");
1840 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1841 LOAD_ERROR_EXIT(bp, load_error2);
1844 /* Connect to IRQs */
1845 rc = bnx2x_setup_irqs(bp);
1847 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1848 LOAD_ERROR_EXIT(bp, load_error2);
1851 /* Setup NIC internals and enable interrupts */
1852 bnx2x_nic_init(bp, load_code);
1854 /* Init per-function objects */
1855 bnx2x_init_bp_objs(bp);
1857 if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1858 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1859 (bp->common.shmem2_base)) {
1860 if (SHMEM2_HAS(bp, dcc_support))
1861 SHMEM2_WR(bp, dcc_support,
1862 (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1863 SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1866 bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1867 rc = bnx2x_func_start(bp);
1869 BNX2X_ERR("Function start failed!\n");
1870 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1871 LOAD_ERROR_EXIT(bp, load_error3);
1874 /* Send LOAD_DONE command to MCP */
1875 if (!BP_NOMCP(bp)) {
1876 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1878 BNX2X_ERR("MCP response failure, aborting\n");
1880 LOAD_ERROR_EXIT(bp, load_error3);
1884 rc = bnx2x_setup_leading(bp);
1886 BNX2X_ERR("Setup leading failed!\n");
1887 LOAD_ERROR_EXIT(bp, load_error3);
1891 /* Enable Timer scan */
1892 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1895 for_each_nondefault_queue(bp, i) {
1896 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1898 LOAD_ERROR_EXIT(bp, load_error4);
1901 rc = bnx2x_init_rss_pf(bp);
1903 LOAD_ERROR_EXIT(bp, load_error4);
1905 /* Now when Clients are configured we are ready to work */
1906 bp->state = BNX2X_STATE_OPEN;
1908 /* Configure a ucast MAC */
1909 rc = bnx2x_set_eth_mac(bp, true);
1911 LOAD_ERROR_EXIT(bp, load_error4);
1913 if (bp->pending_max) {
1914 bnx2x_update_max_mf_config(bp, bp->pending_max);
1915 bp->pending_max = 0;
1919 bnx2x_initial_phy_init(bp, load_mode);
1921 /* Start fast path */
1923 /* Initialize Rx filter. */
1924 netif_addr_lock_bh(bp->dev);
1925 bnx2x_set_rx_mode(bp->dev);
1926 netif_addr_unlock_bh(bp->dev);
1929 switch (load_mode) {
1931 /* Tx queue should be only reenabled */
1932 netif_tx_wake_all_queues(bp->dev);
1936 netif_tx_start_all_queues(bp->dev);
1937 smp_mb__after_clear_bit();
1941 bp->state = BNX2X_STATE_DIAG;
1949 bnx2x__link_status_update(bp);
1951 /* start the timer */
1952 mod_timer(&bp->timer, jiffies + bp->current_interval);
1955 bnx2x_setup_cnic_irq_info(bp);
1956 if (bp->state == BNX2X_STATE_OPEN)
1957 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1959 bnx2x_inc_load_cnt(bp);
1961 /* Wait for all pending SP commands to complete */
1962 if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1963 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1964 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1968 bnx2x_dcbx_init(bp);
1971 #ifndef BNX2X_STOP_ON_ERROR
1974 /* Disable Timer scan */
1975 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1978 bnx2x_int_disable_sync(bp, 1);
1980 /* Clean queueable objects */
1981 bnx2x_squeeze_objects(bp);
1983 /* Free SKBs, SGEs, TPA pool and driver internals */
1984 bnx2x_free_skbs(bp);
1985 for_each_rx_queue(bp, i)
1986 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1991 if (!BP_NOMCP(bp)) {
1992 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1993 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
1998 bnx2x_napi_disable(bp);
2003 #endif /* ! BNX2X_STOP_ON_ERROR */
2006 /* must be called with rtnl_lock */
2007 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
2010 bool global = false;
2012 if ((bp->state == BNX2X_STATE_CLOSED) ||
2013 (bp->state == BNX2X_STATE_ERROR)) {
2014 /* We can get here if the driver has been unloaded
2015 * during parity error recovery and is either waiting for a
2016 * leader to complete or for other functions to unload and
2017 * then ifdown has been issued. In this case we want to
2018 * unload and let other functions to complete a recovery
2021 bp->recovery_state = BNX2X_RECOVERY_DONE;
2023 bnx2x_release_leader_lock(bp);
2026 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
2032 * It's important to set the bp->state to the value different from
2033 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
2034 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
2036 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2040 bnx2x_tx_disable(bp);
2043 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2046 bp->rx_mode = BNX2X_RX_MODE_NONE;
2048 del_timer_sync(&bp->timer);
2050 /* Set ALWAYS_ALIVE bit in shmem */
2051 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2053 bnx2x_drv_pulse(bp);
2055 bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2057 /* Cleanup the chip if needed */
2058 if (unload_mode != UNLOAD_RECOVERY)
2059 bnx2x_chip_cleanup(bp, unload_mode);
2061 /* Send the UNLOAD_REQUEST to the MCP */
2062 bnx2x_send_unload_req(bp, unload_mode);
2065 * Prevent transactions to host from the functions on the
2066 * engine that doesn't reset global blocks in case of global
2067 * attention once gloabl blocks are reset and gates are opened
2068 * (the engine which leader will perform the recovery
2071 if (!CHIP_IS_E1x(bp))
2072 bnx2x_pf_disable(bp);
2074 /* Disable HW interrupts, NAPI */
2075 bnx2x_netif_stop(bp, 1);
2080 /* Report UNLOAD_DONE to MCP */
2081 bnx2x_send_unload_done(bp);
2085 * At this stage no more interrupts will arrive so we may safly clean
2086 * the queueable objects here in case they failed to get cleaned so far.
2088 bnx2x_squeeze_objects(bp);
2090 /* There should be no more pending SP commands at this stage */
2095 /* Free SKBs, SGEs, TPA pool and driver internals */
2096 bnx2x_free_skbs(bp);
2097 for_each_rx_queue(bp, i)
2098 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2102 bp->state = BNX2X_STATE_CLOSED;
2104 /* Check if there are pending parity attentions. If there are - set
2105 * RECOVERY_IN_PROGRESS.
2107 if (bnx2x_chk_parity_attn(bp, &global, false)) {
2108 bnx2x_set_reset_in_progress(bp);
2110 /* Set RESET_IS_GLOBAL if needed */
2112 bnx2x_set_reset_global(bp);
2116 /* The last driver must disable a "close the gate" if there is no
2117 * parity attention or "process kill" pending.
2119 if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2120 bnx2x_disable_close_the_gate(bp);
2125 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2129 /* If there is no power capability, silently succeed */
2131 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2135 pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2139 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2140 ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2141 PCI_PM_CTRL_PME_STATUS));
2143 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2144 /* delay required during transition out of D3hot */
2149 /* If there are other clients above don't
2150 shut down the power */
2151 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2153 /* Don't shut down the power for emulation and FPGA */
2154 if (CHIP_REV_IS_SLOW(bp))
2157 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2161 pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2163 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2166 /* No more memory access after this point until
2167 * device is brought back to D0.
2178 * net_device service functions
2180 int bnx2x_poll(struct napi_struct *napi, int budget)
2184 struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2186 struct bnx2x *bp = fp->bp;
2189 #ifdef BNX2X_STOP_ON_ERROR
2190 if (unlikely(bp->panic)) {
2191 napi_complete(napi);
2196 for_each_cos_in_tx_queue(fp, cos)
2197 if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2198 bnx2x_tx_int(bp, &fp->txdata[cos]);
2201 if (bnx2x_has_rx_work(fp)) {
2202 work_done += bnx2x_rx_int(fp, budget - work_done);
2204 /* must not complete if we consumed full budget */
2205 if (work_done >= budget)
2209 /* Fall out from the NAPI loop if needed */
2210 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2212 /* No need to update SB for FCoE L2 ring as long as
2213 * it's connected to the default SB and the SB
2214 * has been updated when NAPI was scheduled.
2216 if (IS_FCOE_FP(fp)) {
2217 napi_complete(napi);
2222 bnx2x_update_fpsb_idx(fp);
2223 /* bnx2x_has_rx_work() reads the status block,
2224 * thus we need to ensure that status block indices
2225 * have been actually read (bnx2x_update_fpsb_idx)
2226 * prior to this check (bnx2x_has_rx_work) so that
2227 * we won't write the "newer" value of the status block
2228 * to IGU (if there was a DMA right after
2229 * bnx2x_has_rx_work and if there is no rmb, the memory
2230 * reading (bnx2x_update_fpsb_idx) may be postponed
2231 * to right before bnx2x_ack_sb). In this case there
2232 * will never be another interrupt until there is
2233 * another update of the status block, while there
2234 * is still unhandled work.
2238 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2239 napi_complete(napi);
2240 /* Re-enable interrupts */
2242 "Update index to %d\n", fp->fp_hc_idx);
2243 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2244 le16_to_cpu(fp->fp_hc_idx),
2254 /* we split the first BD into headers and data BDs
2255 * to ease the pain of our fellow microcode engineers
2256 * we use one mapping for both BDs
2258 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2259 struct bnx2x_fp_txdata *txdata,
2260 struct sw_tx_bd *tx_buf,
2261 struct eth_tx_start_bd **tx_bd, u16 hlen,
2262 u16 bd_prod, int nbd)
2264 struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2265 struct eth_tx_bd *d_tx_bd;
2267 int old_len = le16_to_cpu(h_tx_bd->nbytes);
2269 /* first fix first BD */
2270 h_tx_bd->nbd = cpu_to_le16(nbd);
2271 h_tx_bd->nbytes = cpu_to_le16(hlen);
2273 DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2274 "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2275 h_tx_bd->addr_lo, h_tx_bd->nbd);
2277 /* now get a new data BD
2278 * (after the pbd) and fill it */
2279 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2280 d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2282 mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2283 le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2285 d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2286 d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2287 d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2289 /* this marks the BD as one that has no individual mapping */
2290 tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2292 DP(NETIF_MSG_TX_QUEUED,
2293 "TSO split data size is %d (%x:%x)\n",
2294 d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2297 *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2302 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2305 csum = (u16) ~csum_fold(csum_sub(csum,
2306 csum_partial(t_header - fix, fix, 0)));
2309 csum = (u16) ~csum_fold(csum_add(csum,
2310 csum_partial(t_header, -fix, 0)));
2312 return swab16(csum);
2315 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2319 if (skb->ip_summed != CHECKSUM_PARTIAL)
2323 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2325 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2326 rc |= XMIT_CSUM_TCP;
2330 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2331 rc |= XMIT_CSUM_TCP;
2335 if (skb_is_gso_v6(skb))
2336 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2337 else if (skb_is_gso(skb))
2338 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2343 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2344 /* check if packet requires linearization (packet is too fragmented)
2345 no need to check fragmentation if page size > 8K (there will be no
2346 violation to FW restrictions) */
2347 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2352 int first_bd_sz = 0;
2354 /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2355 if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2357 if (xmit_type & XMIT_GSO) {
2358 unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2359 /* Check if LSO packet needs to be copied:
2360 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2361 int wnd_size = MAX_FETCH_BD - 3;
2362 /* Number of windows to check */
2363 int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2368 /* Headers length */
2369 hlen = (int)(skb_transport_header(skb) - skb->data) +
2372 /* Amount of data (w/o headers) on linear part of SKB*/
2373 first_bd_sz = skb_headlen(skb) - hlen;
2375 wnd_sum = first_bd_sz;
2377 /* Calculate the first sum - it's special */
2378 for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2380 skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]);
2382 /* If there was data on linear skb data - check it */
2383 if (first_bd_sz > 0) {
2384 if (unlikely(wnd_sum < lso_mss)) {
2389 wnd_sum -= first_bd_sz;
2392 /* Others are easier: run through the frag list and
2393 check all windows */
2394 for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2396 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]);
2398 if (unlikely(wnd_sum < lso_mss)) {
2403 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]);
2406 /* in non-LSO too fragmented packet should always
2413 if (unlikely(to_copy))
2414 DP(NETIF_MSG_TX_QUEUED,
2415 "Linearization IS REQUIRED for %s packet. "
2416 "num_frags %d hlen %d first_bd_sz %d\n",
2417 (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2418 skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2424 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2427 *parsing_data |= (skb_shinfo(skb)->gso_size <<
2428 ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2429 ETH_TX_PARSE_BD_E2_LSO_MSS;
2430 if ((xmit_type & XMIT_GSO_V6) &&
2431 (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2432 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2436 * bnx2x_set_pbd_gso - update PBD in GSO case.
2440 * @xmit_type: xmit flags
2442 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2443 struct eth_tx_parse_bd_e1x *pbd,
2446 pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2447 pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2448 pbd->tcp_flags = pbd_tcp_flags(skb);
2450 if (xmit_type & XMIT_GSO_V4) {
2451 pbd->ip_id = swab16(ip_hdr(skb)->id);
2452 pbd->tcp_pseudo_csum =
2453 swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2455 0, IPPROTO_TCP, 0));
2458 pbd->tcp_pseudo_csum =
2459 swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2460 &ipv6_hdr(skb)->daddr,
2461 0, IPPROTO_TCP, 0));
2463 pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2467 * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2469 * @bp: driver handle
2471 * @parsing_data: data to be updated
2472 * @xmit_type: xmit flags
2476 static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2477 u32 *parsing_data, u32 xmit_type)
2480 ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2481 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2482 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2484 if (xmit_type & XMIT_CSUM_TCP) {
2485 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2486 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2487 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2489 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2491 /* We support checksum offload for TCP and UDP only.
2492 * No need to pass the UDP header length - it's a constant.
2494 return skb_transport_header(skb) +
2495 sizeof(struct udphdr) - skb->data;
2498 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2499 struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2501 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2503 if (xmit_type & XMIT_CSUM_V4)
2504 tx_start_bd->bd_flags.as_bitfield |=
2505 ETH_TX_BD_FLAGS_IP_CSUM;
2507 tx_start_bd->bd_flags.as_bitfield |=
2508 ETH_TX_BD_FLAGS_IPV6;
2510 if (!(xmit_type & XMIT_CSUM_TCP))
2511 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2515 * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2517 * @bp: driver handle
2519 * @pbd: parse BD to be updated
2520 * @xmit_type: xmit flags
2522 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2523 struct eth_tx_parse_bd_e1x *pbd,
2526 u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2528 /* for now NS flag is not used in Linux */
2530 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2531 ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2533 pbd->ip_hlen_w = (skb_transport_header(skb) -
2534 skb_network_header(skb)) >> 1;
2536 hlen += pbd->ip_hlen_w;
2538 /* We support checksum offload for TCP and UDP only */
2539 if (xmit_type & XMIT_CSUM_TCP)
2540 hlen += tcp_hdrlen(skb) / 2;
2542 hlen += sizeof(struct udphdr) / 2;
2544 pbd->total_hlen_w = cpu_to_le16(hlen);
2547 if (xmit_type & XMIT_CSUM_TCP) {
2548 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2551 s8 fix = SKB_CS_OFF(skb); /* signed! */
2553 DP(NETIF_MSG_TX_QUEUED,
2554 "hlen %d fix %d csum before fix %x\n",
2555 le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2557 /* HW bug: fixup the CSUM */
2558 pbd->tcp_pseudo_csum =
2559 bnx2x_csum_fix(skb_transport_header(skb),
2562 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2563 pbd->tcp_pseudo_csum);
2569 /* called with netif_tx_lock
2570 * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2571 * netif_wake_queue()
2573 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2575 struct bnx2x *bp = netdev_priv(dev);
2577 struct bnx2x_fastpath *fp;
2578 struct netdev_queue *txq;
2579 struct bnx2x_fp_txdata *txdata;
2580 struct sw_tx_bd *tx_buf;
2581 struct eth_tx_start_bd *tx_start_bd, *first_bd;
2582 struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2583 struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2584 struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2585 u32 pbd_e2_parsing_data = 0;
2586 u16 pkt_prod, bd_prod;
2587 int nbd, txq_index, fp_index, txdata_index;
2589 u32 xmit_type = bnx2x_xmit_type(bp, skb);
2592 __le16 pkt_size = 0;
2594 u8 mac_type = UNICAST_ADDRESS;
2596 #ifdef BNX2X_STOP_ON_ERROR
2597 if (unlikely(bp->panic))
2598 return NETDEV_TX_BUSY;
2601 txq_index = skb_get_queue_mapping(skb);
2602 txq = netdev_get_tx_queue(dev, txq_index);
2604 BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2606 /* decode the fastpath index and the cos index from the txq */
2607 fp_index = TXQ_TO_FP(txq_index);
2608 txdata_index = TXQ_TO_COS(txq_index);
2612 * Override the above for the FCoE queue:
2613 * - FCoE fp entry is right after the ETH entries.
2614 * - FCoE L2 queue uses bp->txdata[0] only.
2616 if (unlikely(!NO_FCOE(bp) && (txq_index ==
2617 bnx2x_fcoe_tx(bp, txq_index)))) {
2618 fp_index = FCOE_IDX;
2623 /* enable this debug print to view the transmission queue being used
2624 DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d\n",
2625 txq_index, fp_index, txdata_index); */
2627 /* locate the fastpath and the txdata */
2628 fp = &bp->fp[fp_index];
2629 txdata = &fp->txdata[txdata_index];
2631 /* enable this debug print to view the tranmission details
2632 DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2633 " tx_data ptr %p fp pointer %p\n",
2634 txdata->cid, fp_index, txdata_index, txdata, fp); */
2636 if (unlikely(bnx2x_tx_avail(bp, txdata) <
2637 (skb_shinfo(skb)->nr_frags + 3))) {
2638 fp->eth_q_stats.driver_xoff++;
2639 netif_tx_stop_queue(txq);
2640 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2641 return NETDEV_TX_BUSY;
2644 DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x protocol %x "
2645 "protocol(%x,%x) gso type %x xmit_type %x\n",
2646 txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2647 ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2649 eth = (struct ethhdr *)skb->data;
2651 /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2652 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2653 if (is_broadcast_ether_addr(eth->h_dest))
2654 mac_type = BROADCAST_ADDRESS;
2656 mac_type = MULTICAST_ADDRESS;
2659 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2660 /* First, check if we need to linearize the skb (due to FW
2661 restrictions). No need to check fragmentation if page size > 8K
2662 (there will be no violation to FW restrictions) */
2663 if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2664 /* Statistics of linearization */
2666 if (skb_linearize(skb) != 0) {
2667 DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2668 "silently dropping this SKB\n");
2669 dev_kfree_skb_any(skb);
2670 return NETDEV_TX_OK;
2674 /* Map skb linear data for DMA */
2675 mapping = dma_map_single(&bp->pdev->dev, skb->data,
2676 skb_headlen(skb), DMA_TO_DEVICE);
2677 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2678 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2679 "silently dropping this SKB\n");
2680 dev_kfree_skb_any(skb);
2681 return NETDEV_TX_OK;
2684 Please read carefully. First we use one BD which we mark as start,
2685 then we have a parsing info BD (used for TSO or xsum),
2686 and only then we have the rest of the TSO BDs.
2687 (don't forget to mark the last one as last,
2688 and to unmap only AFTER you write to the BD ...)
2689 And above all, all pdb sizes are in words - NOT DWORDS!
2692 /* get current pkt produced now - advance it just before sending packet
2693 * since mapping of pages may fail and cause packet to be dropped
2695 pkt_prod = txdata->tx_pkt_prod;
2696 bd_prod = TX_BD(txdata->tx_bd_prod);
2698 /* get a tx_buf and first BD
2699 * tx_start_bd may be changed during SPLIT,
2700 * but first_bd will always stay first
2702 tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2703 tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2704 first_bd = tx_start_bd;
2706 tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2707 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2711 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2713 /* remember the first BD of the packet */
2714 tx_buf->first_bd = txdata->tx_bd_prod;
2718 DP(NETIF_MSG_TX_QUEUED,
2719 "sending pkt %u @%p next_idx %u bd %u @%p\n",
2720 pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2722 if (vlan_tx_tag_present(skb)) {
2723 tx_start_bd->vlan_or_ethertype =
2724 cpu_to_le16(vlan_tx_tag_get(skb));
2725 tx_start_bd->bd_flags.as_bitfield |=
2726 (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2728 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2730 /* turn on parsing and get a BD */
2731 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2733 if (xmit_type & XMIT_CSUM)
2734 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2736 if (!CHIP_IS_E1x(bp)) {
2737 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2738 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2739 /* Set PBD in checksum offload case */
2740 if (xmit_type & XMIT_CSUM)
2741 hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2742 &pbd_e2_parsing_data,
2746 * fill in the MAC addresses in the PBD - for local
2749 bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2750 &pbd_e2->src_mac_addr_mid,
2751 &pbd_e2->src_mac_addr_lo,
2753 bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2754 &pbd_e2->dst_mac_addr_mid,
2755 &pbd_e2->dst_mac_addr_lo,
2759 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2760 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2761 /* Set PBD in checksum offload case */
2762 if (xmit_type & XMIT_CSUM)
2763 hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2767 /* Setup the data pointer of the first BD of the packet */
2768 tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2769 tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2770 nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2771 tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2772 pkt_size = tx_start_bd->nbytes;
2774 DP(NETIF_MSG_TX_QUEUED, "first bd @%p addr (%x:%x) nbd %d"
2775 " nbytes %d flags %x vlan %x\n",
2776 tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2777 le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2778 tx_start_bd->bd_flags.as_bitfield,
2779 le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2781 if (xmit_type & XMIT_GSO) {
2783 DP(NETIF_MSG_TX_QUEUED,
2784 "TSO packet len %d hlen %d total len %d tso size %d\n",
2785 skb->len, hlen, skb_headlen(skb),
2786 skb_shinfo(skb)->gso_size);
2788 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2790 if (unlikely(skb_headlen(skb) > hlen))
2791 bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2794 if (!CHIP_IS_E1x(bp))
2795 bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2798 bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2801 /* Set the PBD's parsing_data field if not zero
2802 * (for the chips newer than 57711).
2804 if (pbd_e2_parsing_data)
2805 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2807 tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2809 /* Handle fragmented skb */
2810 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2811 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2813 mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0,
2814 skb_frag_size(frag), DMA_TO_DEVICE);
2815 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2817 DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2818 "dropping packet...\n");
2820 /* we need unmap all buffers already mapped
2822 * first_bd->nbd need to be properly updated
2823 * before call to bnx2x_free_tx_pkt
2825 first_bd->nbd = cpu_to_le16(nbd);
2826 bnx2x_free_tx_pkt(bp, txdata,
2827 TX_BD(txdata->tx_pkt_prod));
2828 return NETDEV_TX_OK;
2831 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2832 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2833 if (total_pkt_bd == NULL)
2834 total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2836 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2837 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2838 tx_data_bd->nbytes = cpu_to_le16(skb_frag_size(frag));
2839 le16_add_cpu(&pkt_size, skb_frag_size(frag));
2842 DP(NETIF_MSG_TX_QUEUED,
2843 "frag %d bd @%p addr (%x:%x) nbytes %d\n",
2844 i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2845 le16_to_cpu(tx_data_bd->nbytes));
2848 DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2850 /* update with actual num BDs */
2851 first_bd->nbd = cpu_to_le16(nbd);
2853 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2855 /* now send a tx doorbell, counting the next BD
2856 * if the packet contains or ends with it
2858 if (TX_BD_POFF(bd_prod) < nbd)
2861 /* total_pkt_bytes should be set on the first data BD if
2862 * it's not an LSO packet and there is more than one
2863 * data BD. In this case pkt_size is limited by an MTU value.
2864 * However we prefer to set it for an LSO packet (while we don't
2865 * have to) in order to save some CPU cycles in a none-LSO
2866 * case, when we much more care about them.
2868 if (total_pkt_bd != NULL)
2869 total_pkt_bd->total_pkt_bytes = pkt_size;
2872 DP(NETIF_MSG_TX_QUEUED,
2873 "PBD (E1X) @%p ip_data %x ip_hlen %u ip_id %u lso_mss %u"
2874 " tcp_flags %x xsum %x seq %u hlen %u\n",
2875 pbd_e1x, pbd_e1x->global_data, pbd_e1x->ip_hlen_w,
2876 pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags,
2877 pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq,
2878 le16_to_cpu(pbd_e1x->total_hlen_w));
2880 DP(NETIF_MSG_TX_QUEUED,
2881 "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n",
2882 pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid,
2883 pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi,
2884 pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo,
2885 pbd_e2->parsing_data);
2886 DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
2888 txdata->tx_pkt_prod++;
2890 * Make sure that the BD data is updated before updating the producer
2891 * since FW might read the BD right after the producer is updated.
2892 * This is only applicable for weak-ordered memory model archs such
2893 * as IA-64. The following barrier is also mandatory since FW will
2894 * assumes packets must have BDs.
2898 txdata->tx_db.data.prod += nbd;
2901 DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
2905 txdata->tx_bd_prod += nbd;
2907 if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) {
2908 netif_tx_stop_queue(txq);
2910 /* paired memory barrier is in bnx2x_tx_int(), we have to keep
2911 * ordering of set_bit() in netif_tx_stop_queue() and read of
2915 fp->eth_q_stats.driver_xoff++;
2916 if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)
2917 netif_tx_wake_queue(txq);
2921 return NETDEV_TX_OK;
2925 * bnx2x_setup_tc - routine to configure net_device for multi tc
2927 * @netdev: net device to configure
2928 * @tc: number of traffic classes to enable
2930 * callback connected to the ndo_setup_tc function pointer
2932 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
2934 int cos, prio, count, offset;
2935 struct bnx2x *bp = netdev_priv(dev);
2937 /* setup tc must be called under rtnl lock */
2940 /* no traffic classes requested. aborting */
2942 netdev_reset_tc(dev);
2946 /* requested to support too many traffic classes */
2947 if (num_tc > bp->max_cos) {
2948 DP(NETIF_MSG_TX_ERR, "support for too many traffic classes"
2949 " requested: %d. max supported is %d\n",
2950 num_tc, bp->max_cos);
2954 /* declare amount of supported traffic classes */
2955 if (netdev_set_num_tc(dev, num_tc)) {
2956 DP(NETIF_MSG_TX_ERR, "failed to declare %d traffic classes\n",
2961 /* configure priority to traffic class mapping */
2962 for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) {
2963 netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]);
2964 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n",
2965 prio, bp->prio_to_cos[prio]);
2969 /* Use this configuration to diffrentiate tc0 from other COSes
2970 This can be used for ets or pfc, and save the effort of setting
2971 up a multio class queue disc or negotiating DCBX with a switch
2972 netdev_set_prio_tc_map(dev, 0, 0);
2973 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", 0, 0);
2974 for (prio = 1; prio < 16; prio++) {
2975 netdev_set_prio_tc_map(dev, prio, 1);
2976 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", prio, 1);
2979 /* configure traffic class to transmission queue mapping */
2980 for (cos = 0; cos < bp->max_cos; cos++) {
2981 count = BNX2X_NUM_ETH_QUEUES(bp);
2982 offset = cos * MAX_TXQS_PER_COS;
2983 netdev_set_tc_queue(dev, cos, count, offset);
2984 DP(BNX2X_MSG_SP, "mapping tc %d to offset %d count %d\n",
2985 cos, offset, count);
2991 /* called with rtnl_lock */
2992 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
2994 struct sockaddr *addr = p;
2995 struct bnx2x *bp = netdev_priv(dev);
2998 if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
3001 if (netif_running(dev)) {
3002 rc = bnx2x_set_eth_mac(bp, false);
3007 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
3009 if (netif_running(dev))
3010 rc = bnx2x_set_eth_mac(bp, true);
3015 static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index)
3017 union host_hc_status_block *sb = &bnx2x_fp(bp, fp_index, status_blk);
3018 struct bnx2x_fastpath *fp = &bp->fp[fp_index];
3023 if (IS_FCOE_IDX(fp_index)) {
3024 memset(sb, 0, sizeof(union host_hc_status_block));
3025 fp->status_blk_mapping = 0;
3030 if (!CHIP_IS_E1x(bp))
3031 BNX2X_PCI_FREE(sb->e2_sb,
3032 bnx2x_fp(bp, fp_index,
3033 status_blk_mapping),
3034 sizeof(struct host_hc_status_block_e2));
3036 BNX2X_PCI_FREE(sb->e1x_sb,
3037 bnx2x_fp(bp, fp_index,
3038 status_blk_mapping),
3039 sizeof(struct host_hc_status_block_e1x));
3044 if (!skip_rx_queue(bp, fp_index)) {
3045 bnx2x_free_rx_bds(fp);
3047 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3048 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_buf_ring));
3049 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_desc_ring),
3050 bnx2x_fp(bp, fp_index, rx_desc_mapping),
3051 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3053 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_comp_ring),
3054 bnx2x_fp(bp, fp_index, rx_comp_mapping),
3055 sizeof(struct eth_fast_path_rx_cqe) *
3059 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_page_ring));
3060 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_sge_ring),
3061 bnx2x_fp(bp, fp_index, rx_sge_mapping),
3062 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3066 if (!skip_tx_queue(bp, fp_index)) {
3067 /* fastpath tx rings: tx_buf tx_desc */
3068 for_each_cos_in_tx_queue(fp, cos) {
3069 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3072 "freeing tx memory of fp %d cos %d cid %d\n",
3073 fp_index, cos, txdata->cid);
3075 BNX2X_FREE(txdata->tx_buf_ring);
3076 BNX2X_PCI_FREE(txdata->tx_desc_ring,
3077 txdata->tx_desc_mapping,
3078 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3081 /* end of fastpath */
3084 void bnx2x_free_fp_mem(struct bnx2x *bp)
3087 for_each_queue(bp, i)
3088 bnx2x_free_fp_mem_at(bp, i);
3091 static inline void set_sb_shortcuts(struct bnx2x *bp, int index)
3093 union host_hc_status_block status_blk = bnx2x_fp(bp, index, status_blk);
3094 if (!CHIP_IS_E1x(bp)) {
3095 bnx2x_fp(bp, index, sb_index_values) =
3096 (__le16 *)status_blk.e2_sb->sb.index_values;
3097 bnx2x_fp(bp, index, sb_running_index) =
3098 (__le16 *)status_blk.e2_sb->sb.running_index;
3100 bnx2x_fp(bp, index, sb_index_values) =
3101 (__le16 *)status_blk.e1x_sb->sb.index_values;
3102 bnx2x_fp(bp, index, sb_running_index) =
3103 (__le16 *)status_blk.e1x_sb->sb.running_index;
3107 static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
3109 union host_hc_status_block *sb;
3110 struct bnx2x_fastpath *fp = &bp->fp[index];
3113 int rx_ring_size = 0;
3115 /* if rx_ring_size specified - use it */
3116 if (!bp->rx_ring_size) {
3118 rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
3120 /* allocate at least number of buffers required by FW */
3121 rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
3122 MIN_RX_SIZE_TPA, rx_ring_size);
3124 bp->rx_ring_size = rx_ring_size;
3126 rx_ring_size = bp->rx_ring_size;
3129 sb = &bnx2x_fp(bp, index, status_blk);
3131 if (!IS_FCOE_IDX(index)) {
3134 if (!CHIP_IS_E1x(bp))
3135 BNX2X_PCI_ALLOC(sb->e2_sb,
3136 &bnx2x_fp(bp, index, status_blk_mapping),
3137 sizeof(struct host_hc_status_block_e2));
3139 BNX2X_PCI_ALLOC(sb->e1x_sb,
3140 &bnx2x_fp(bp, index, status_blk_mapping),
3141 sizeof(struct host_hc_status_block_e1x));
3146 /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to
3147 * set shortcuts for it.
3149 if (!IS_FCOE_IDX(index))
3150 set_sb_shortcuts(bp, index);
3153 if (!skip_tx_queue(bp, index)) {
3154 /* fastpath tx rings: tx_buf tx_desc */
3155 for_each_cos_in_tx_queue(fp, cos) {
3156 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3158 DP(BNX2X_MSG_SP, "allocating tx memory of "
3162 BNX2X_ALLOC(txdata->tx_buf_ring,
3163 sizeof(struct sw_tx_bd) * NUM_TX_BD);
3164 BNX2X_PCI_ALLOC(txdata->tx_desc_ring,
3165 &txdata->tx_desc_mapping,
3166 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3171 if (!skip_rx_queue(bp, index)) {
3172 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3173 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring),
3174 sizeof(struct sw_rx_bd) * NUM_RX_BD);
3175 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring),
3176 &bnx2x_fp(bp, index, rx_desc_mapping),
3177 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3179 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring),
3180 &bnx2x_fp(bp, index, rx_comp_mapping),
3181 sizeof(struct eth_fast_path_rx_cqe) *
3185 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring),
3186 sizeof(struct sw_rx_page) * NUM_RX_SGE);
3187 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring),
3188 &bnx2x_fp(bp, index, rx_sge_mapping),
3189 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3191 bnx2x_set_next_page_rx_bd(fp);
3194 bnx2x_set_next_page_rx_cq(fp);
3197 ring_size = bnx2x_alloc_rx_bds(fp, rx_ring_size);
3198 if (ring_size < rx_ring_size)
3204 /* handles low memory cases */
3206 BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
3208 /* FW will drop all packets if queue is not big enough,
3209 * In these cases we disable the queue
3210 * Min size is different for OOO, TPA and non-TPA queues
3212 if (ring_size < (fp->disable_tpa ?
3213 MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) {
3214 /* release memory allocated for this queue */
3215 bnx2x_free_fp_mem_at(bp, index);
3221 int bnx2x_alloc_fp_mem(struct bnx2x *bp)
3226 * 1. Allocate FP for leading - fatal if error
3227 * 2. {CNIC} Allocate FCoE FP - fatal if error
3228 * 3. {CNIC} Allocate OOO + FWD - disable OOO if error
3229 * 4. Allocate RSS - fix number of queues if error
3233 if (bnx2x_alloc_fp_mem_at(bp, 0))
3239 if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX))
3240 /* we will fail load process instead of mark
3247 for_each_nondefault_eth_queue(bp, i)
3248 if (bnx2x_alloc_fp_mem_at(bp, i))
3251 /* handle memory failures */
3252 if (i != BNX2X_NUM_ETH_QUEUES(bp)) {
3253 int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
3258 * move non eth FPs next to last eth FP
3259 * must be done in that order
3260 * FCOE_IDX < FWD_IDX < OOO_IDX
3263 /* move FCoE fp even NO_FCOE_FLAG is on */
3264 bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta);
3266 bp->num_queues -= delta;
3267 BNX2X_ERR("Adjusted num of queues from %d to %d\n",
3268 bp->num_queues + delta, bp->num_queues);
3274 void bnx2x_free_mem_bp(struct bnx2x *bp)
3277 kfree(bp->msix_table);
3281 int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp)
3283 struct bnx2x_fastpath *fp;
3284 struct msix_entry *tbl;
3285 struct bnx2x_ilt *ilt;
3286 int msix_table_size = 0;
3289 * The biggest MSI-X table we might need is as a maximum number of fast
3290 * path IGU SBs plus default SB (for PF).
3292 msix_table_size = bp->igu_sb_cnt + 1;
3294 /* fp array: RSS plus CNIC related L2 queues */
3295 fp = kzalloc((BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE) *
3296 sizeof(*fp), GFP_KERNEL);
3302 tbl = kzalloc(msix_table_size * sizeof(*tbl), GFP_KERNEL);
3305 bp->msix_table = tbl;
3308 ilt = kzalloc(sizeof(*ilt), GFP_KERNEL);
3315 bnx2x_free_mem_bp(bp);
3320 int bnx2x_reload_if_running(struct net_device *dev)
3322 struct bnx2x *bp = netdev_priv(dev);
3324 if (unlikely(!netif_running(dev)))
3327 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
3328 return bnx2x_nic_load(bp, LOAD_NORMAL);
3331 int bnx2x_get_cur_phy_idx(struct bnx2x *bp)
3333 u32 sel_phy_idx = 0;
3334 if (bp->link_params.num_phys <= 1)
3337 if (bp->link_vars.link_up) {
3338 sel_phy_idx = EXT_PHY1;
3339 /* In case link is SERDES, check if the EXT_PHY2 is the one */
3340 if ((bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) &&
3341 (bp->link_params.phy[EXT_PHY2].supported & SUPPORTED_FIBRE))
3342 sel_phy_idx = EXT_PHY2;
3345 switch (bnx2x_phy_selection(&bp->link_params)) {
3346 case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
3347 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY:
3348 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
3349 sel_phy_idx = EXT_PHY1;
3351 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY:
3352 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
3353 sel_phy_idx = EXT_PHY2;
3361 int bnx2x_get_link_cfg_idx(struct bnx2x *bp)
3363 u32 sel_phy_idx = bnx2x_get_cur_phy_idx(bp);
3365 * The selected actived PHY is always after swapping (in case PHY
3366 * swapping is enabled). So when swapping is enabled, we need to reverse
3370 if (bp->link_params.multi_phy_config &
3371 PORT_HW_CFG_PHY_SWAPPED_ENABLED) {
3372 if (sel_phy_idx == EXT_PHY1)
3373 sel_phy_idx = EXT_PHY2;
3374 else if (sel_phy_idx == EXT_PHY2)
3375 sel_phy_idx = EXT_PHY1;
3377 return LINK_CONFIG_IDX(sel_phy_idx);
3380 #if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC)
3381 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
3383 struct bnx2x *bp = netdev_priv(dev);
3384 struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
3387 case NETDEV_FCOE_WWNN:
3388 *wwn = HILO_U64(cp->fcoe_wwn_node_name_hi,
3389 cp->fcoe_wwn_node_name_lo);
3391 case NETDEV_FCOE_WWPN:
3392 *wwn = HILO_U64(cp->fcoe_wwn_port_name_hi,
3393 cp->fcoe_wwn_port_name_lo);
3403 /* called with rtnl_lock */
3404 int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
3406 struct bnx2x *bp = netdev_priv(dev);
3408 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3409 pr_err("Handling parity error recovery. Try again later\n");
3413 if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
3414 ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE))
3417 /* This does not race with packet allocation
3418 * because the actual alloc size is
3419 * only updated as part of load
3423 return bnx2x_reload_if_running(dev);
3426 u32 bnx2x_fix_features(struct net_device *dev, u32 features)
3428 struct bnx2x *bp = netdev_priv(dev);
3430 /* TPA requires Rx CSUM offloading */
3431 if (!(features & NETIF_F_RXCSUM) || bp->disable_tpa)
3432 features &= ~NETIF_F_LRO;
3437 int bnx2x_set_features(struct net_device *dev, u32 features)
3439 struct bnx2x *bp = netdev_priv(dev);
3440 u32 flags = bp->flags;
3441 bool bnx2x_reload = false;
3443 if (features & NETIF_F_LRO)
3444 flags |= TPA_ENABLE_FLAG;
3446 flags &= ~TPA_ENABLE_FLAG;
3448 if (features & NETIF_F_LOOPBACK) {
3449 if (bp->link_params.loopback_mode != LOOPBACK_BMAC) {
3450 bp->link_params.loopback_mode = LOOPBACK_BMAC;
3451 bnx2x_reload = true;
3454 if (bp->link_params.loopback_mode != LOOPBACK_NONE) {
3455 bp->link_params.loopback_mode = LOOPBACK_NONE;
3456 bnx2x_reload = true;
3460 if (flags ^ bp->flags) {
3462 bnx2x_reload = true;
3466 if (bp->recovery_state == BNX2X_RECOVERY_DONE)
3467 return bnx2x_reload_if_running(dev);
3468 /* else: bnx2x_nic_load() will be called at end of recovery */
3474 void bnx2x_tx_timeout(struct net_device *dev)
3476 struct bnx2x *bp = netdev_priv(dev);
3478 #ifdef BNX2X_STOP_ON_ERROR
3483 smp_mb__before_clear_bit();
3484 set_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state);
3485 smp_mb__after_clear_bit();
3487 /* This allows the netif to be shutdown gracefully before resetting */
3488 schedule_delayed_work(&bp->sp_rtnl_task, 0);
3491 int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state)
3493 struct net_device *dev = pci_get_drvdata(pdev);
3497 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3500 bp = netdev_priv(dev);
3504 pci_save_state(pdev);
3506 if (!netif_running(dev)) {
3511 netif_device_detach(dev);
3513 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
3515 bnx2x_set_power_state(bp, pci_choose_state(pdev, state));
3522 int bnx2x_resume(struct pci_dev *pdev)
3524 struct net_device *dev = pci_get_drvdata(pdev);
3529 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3532 bp = netdev_priv(dev);
3534 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3535 pr_err("Handling parity error recovery. Try again later\n");
3541 pci_restore_state(pdev);
3543 if (!netif_running(dev)) {
3548 bnx2x_set_power_state(bp, PCI_D0);
3549 netif_device_attach(dev);
3551 /* Since the chip was reset, clear the FW sequence number */
3553 rc = bnx2x_nic_load(bp, LOAD_OPEN);
3561 void bnx2x_set_ctx_validation(struct bnx2x *bp, struct eth_context *cxt,
3564 /* ustorm cxt validation */
3565 cxt->ustorm_ag_context.cdu_usage =
3566 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3567 CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE);
3568 /* xcontext validation */
3569 cxt->xstorm_ag_context.cdu_reserved =
3570 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3571 CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE);
3574 static inline void storm_memset_hc_timeout(struct bnx2x *bp, u8 port,
3575 u8 fw_sb_id, u8 sb_index,
3579 u32 addr = BAR_CSTRORM_INTMEM +
3580 CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index);
3581 REG_WR8(bp, addr, ticks);
3582 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d ticks %d\n",
3583 port, fw_sb_id, sb_index, ticks);
3586 static inline void storm_memset_hc_disable(struct bnx2x *bp, u8 port,
3587 u16 fw_sb_id, u8 sb_index,
3590 u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT);
3591 u32 addr = BAR_CSTRORM_INTMEM +
3592 CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index);
3593 u16 flags = REG_RD16(bp, addr);
3595 flags &= ~HC_INDEX_DATA_HC_ENABLED;
3596 flags |= enable_flag;
3597 REG_WR16(bp, addr, flags);
3598 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d disable %d\n",
3599 port, fw_sb_id, sb_index, disable);
3602 void bnx2x_update_coalesce_sb_index(struct bnx2x *bp, u8 fw_sb_id,
3603 u8 sb_index, u8 disable, u16 usec)
3605 int port = BP_PORT(bp);
3606 u8 ticks = usec / BNX2X_BTR;
3608 storm_memset_hc_timeout(bp, port, fw_sb_id, sb_index, ticks);
3610 disable = disable ? 1 : (usec ? 0 : 1);
3611 storm_memset_hc_disable(bp, port, fw_sb_id, sb_index, disable);