1 /* bnx2x_cmn.c: Broadcom Everest network driver.
3 * Copyright (c) 2007-2011 Broadcom Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
9 * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10 * Written by: Eliezer Tamir
11 * Based on code from Michael Chan's bnx2 driver
12 * UDP CSUM errata workaround by Arik Gendelman
13 * Slowpath and fastpath rework by Vladislav Zolotarov
14 * Statistics and Link management by Yitchak Gertner
18 #include <linux/etherdevice.h>
19 #include <linux/if_vlan.h>
20 #include <linux/interrupt.h>
23 #include <net/ip6_checksum.h>
24 #include <linux/firmware.h>
25 #include <linux/prefetch.h>
26 #include "bnx2x_cmn.h"
27 #include "bnx2x_init.h"
33 * bnx2x_bz_fp - zero content of the fastpath structure.
36 * @index: fastpath index to be zeroed
38 * Makes sure the contents of the bp->fp[index].napi is kept
41 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
43 struct bnx2x_fastpath *fp = &bp->fp[index];
44 struct napi_struct orig_napi = fp->napi;
45 /* bzero bnx2x_fastpath contents */
46 memset(fp, 0, sizeof(*fp));
48 /* Restore the NAPI object as it has been already initialized */
54 fp->max_cos = bp->max_cos;
56 /* Special queues support only one CoS */
60 * set the tpa flag for each queue. The tpa flag determines the queue
61 * minimal size so it must be set prior to queue memory allocation
63 fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
66 /* We don't want TPA on FCoE, FWD and OOO L2 rings */
67 bnx2x_fcoe(bp, disable_tpa) = 1;
72 * bnx2x_move_fp - move content of the fastpath structure.
75 * @from: source FP index
76 * @to: destination FP index
78 * Makes sure the contents of the bp->fp[to].napi is kept
81 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
83 struct bnx2x_fastpath *from_fp = &bp->fp[from];
84 struct bnx2x_fastpath *to_fp = &bp->fp[to];
85 struct napi_struct orig_napi = to_fp->napi;
86 /* Move bnx2x_fastpath contents */
87 memcpy(to_fp, from_fp, sizeof(*to_fp));
90 /* Restore the NAPI object as it has been already initialized */
91 to_fp->napi = orig_napi;
94 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
96 /* free skb in the packet ring at pos idx
97 * return idx of last bd freed
99 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
102 struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
103 struct eth_tx_start_bd *tx_start_bd;
104 struct eth_tx_bd *tx_data_bd;
105 struct sk_buff *skb = tx_buf->skb;
106 u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
109 /* prefetch skb end pointer to speedup dev_kfree_skb() */
112 DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
113 txdata->txq_index, idx, tx_buf, skb);
116 DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
117 tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
118 dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
119 BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
122 nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
123 #ifdef BNX2X_STOP_ON_ERROR
124 if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
125 BNX2X_ERR("BAD nbd!\n");
129 new_cons = nbd + tx_buf->first_bd;
131 /* Get the next bd */
132 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
134 /* Skip a parse bd... */
136 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
138 /* ...and the TSO split header bd since they have no mapping */
139 if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
141 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
147 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
148 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
149 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
150 BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
152 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
157 dev_kfree_skb_any(skb);
158 tx_buf->first_bd = 0;
164 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
166 struct netdev_queue *txq;
167 u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
169 #ifdef BNX2X_STOP_ON_ERROR
170 if (unlikely(bp->panic))
174 txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
175 hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
176 sw_cons = txdata->tx_pkt_cons;
178 while (sw_cons != hw_cons) {
181 pkt_cons = TX_BD(sw_cons);
183 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u sw_cons %u "
185 txdata->txq_index, hw_cons, sw_cons, pkt_cons);
187 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
191 txdata->tx_pkt_cons = sw_cons;
192 txdata->tx_bd_cons = bd_cons;
194 /* Need to make the tx_bd_cons update visible to start_xmit()
195 * before checking for netif_tx_queue_stopped(). Without the
196 * memory barrier, there is a small possibility that
197 * start_xmit() will miss it and cause the queue to be stopped
199 * On the other hand we need an rmb() here to ensure the proper
200 * ordering of bit testing in the following
201 * netif_tx_queue_stopped(txq) call.
205 if (unlikely(netif_tx_queue_stopped(txq))) {
206 /* Taking tx_lock() is needed to prevent reenabling the queue
207 * while it's empty. This could have happen if rx_action() gets
208 * suspended in bnx2x_tx_int() after the condition before
209 * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
211 * stops the queue->sees fresh tx_bd_cons->releases the queue->
212 * sends some packets consuming the whole queue again->
216 __netif_tx_lock(txq, smp_processor_id());
218 if ((netif_tx_queue_stopped(txq)) &&
219 (bp->state == BNX2X_STATE_OPEN) &&
220 (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3))
221 netif_tx_wake_queue(txq);
223 __netif_tx_unlock(txq);
228 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
231 u16 last_max = fp->last_max_sge;
233 if (SUB_S16(idx, last_max) > 0)
234 fp->last_max_sge = idx;
237 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
238 struct eth_fast_path_rx_cqe *fp_cqe)
240 struct bnx2x *bp = fp->bp;
241 u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
242 le16_to_cpu(fp_cqe->len_on_bd)) >>
244 u16 last_max, last_elem, first_elem;
251 /* First mark all used pages */
252 for (i = 0; i < sge_len; i++)
253 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
254 RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
256 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
257 sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
259 /* Here we assume that the last SGE index is the biggest */
260 prefetch((void *)(fp->sge_mask));
261 bnx2x_update_last_max_sge(fp,
262 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
264 last_max = RX_SGE(fp->last_max_sge);
265 last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
266 first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
268 /* If ring is not full */
269 if (last_elem + 1 != first_elem)
272 /* Now update the prod */
273 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
274 if (likely(fp->sge_mask[i]))
277 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
278 delta += BIT_VEC64_ELEM_SZ;
282 fp->rx_sge_prod += delta;
283 /* clear page-end entries */
284 bnx2x_clear_sge_mask_next_elems(fp);
287 DP(NETIF_MSG_RX_STATUS,
288 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
289 fp->last_max_sge, fp->rx_sge_prod);
292 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
293 struct sk_buff *skb, u16 cons, u16 prod,
294 struct eth_fast_path_rx_cqe *cqe)
296 struct bnx2x *bp = fp->bp;
297 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
298 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
299 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
301 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
302 struct sw_rx_bd *first_buf = &tpa_info->first_buf;
304 /* print error if current state != stop */
305 if (tpa_info->tpa_state != BNX2X_TPA_STOP)
306 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
308 /* Try to map an empty skb from the aggregation info */
309 mapping = dma_map_single(&bp->pdev->dev,
310 first_buf->skb->data,
311 fp->rx_buf_size, DMA_FROM_DEVICE);
313 * ...if it fails - move the skb from the consumer to the producer
314 * and set the current aggregation state as ERROR to drop it
315 * when TPA_STOP arrives.
318 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
319 /* Move the BD from the consumer to the producer */
320 bnx2x_reuse_rx_skb(fp, cons, prod);
321 tpa_info->tpa_state = BNX2X_TPA_ERROR;
325 /* move empty skb from pool to prod */
326 prod_rx_buf->skb = first_buf->skb;
327 dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
328 /* point prod_bd to new skb */
329 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
330 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
332 /* move partial skb from cons to pool (don't unmap yet) */
333 *first_buf = *cons_rx_buf;
335 /* mark bin state as START */
336 tpa_info->parsing_flags =
337 le16_to_cpu(cqe->pars_flags.flags);
338 tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
339 tpa_info->tpa_state = BNX2X_TPA_START;
340 tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
341 tpa_info->placement_offset = cqe->placement_offset;
343 #ifdef BNX2X_STOP_ON_ERROR
344 fp->tpa_queue_used |= (1 << queue);
345 #ifdef _ASM_GENERIC_INT_L64_H
346 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
348 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
354 /* Timestamp option length allowed for TPA aggregation:
356 * nop nop kind length echo val
358 #define TPA_TSTAMP_OPT_LEN 12
360 * bnx2x_set_lro_mss - calculate the approximate value of the MSS
363 * @parsing_flags: parsing flags from the START CQE
364 * @len_on_bd: total length of the first packet for the
367 * Approximate value of the MSS for this aggregation calculated using
368 * the first packet of it.
370 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
374 * TPA arrgregation won't have either IP options or TCP options
375 * other than timestamp or IPv6 extension headers.
377 u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
379 if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
380 PRS_FLAG_OVERETH_IPV6)
381 hdrs_len += sizeof(struct ipv6hdr);
383 hdrs_len += sizeof(struct iphdr);
386 /* Check if there was a TCP timestamp, if there is it's will
387 * always be 12 bytes length: nop nop kind length echo val.
389 * Otherwise FW would close the aggregation.
391 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
392 hdrs_len += TPA_TSTAMP_OPT_LEN;
394 return len_on_bd - hdrs_len;
397 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
398 u16 queue, struct sk_buff *skb,
399 struct eth_end_agg_rx_cqe *cqe,
402 struct sw_rx_page *rx_pg, old_rx_pg;
403 u32 i, frag_len, frag_size, pages;
406 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
407 u16 len_on_bd = tpa_info->len_on_bd;
409 frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
410 pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
412 /* This is needed in order to enable forwarding support */
414 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
415 tpa_info->parsing_flags, len_on_bd);
417 #ifdef BNX2X_STOP_ON_ERROR
418 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
419 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
421 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
427 /* Run through the SGL and compose the fragmented skb */
428 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
429 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
431 /* FW gives the indices of the SGE as if the ring is an array
432 (meaning that "next" element will consume 2 indices) */
433 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
434 rx_pg = &fp->rx_page_ring[sge_idx];
437 /* If we fail to allocate a substitute page, we simply stop
438 where we are and drop the whole packet */
439 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
441 fp->eth_q_stats.rx_skb_alloc_failed++;
445 /* Unmap the page as we r going to pass it to the stack */
446 dma_unmap_page(&bp->pdev->dev,
447 dma_unmap_addr(&old_rx_pg, mapping),
448 SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
450 /* Add one frag and update the appropriate fields in the skb */
451 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
453 skb->data_len += frag_len;
454 skb->truesize += frag_len;
455 skb->len += frag_len;
457 frag_size -= frag_len;
463 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
464 u16 queue, struct eth_end_agg_rx_cqe *cqe,
467 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
468 struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
469 u8 pad = tpa_info->placement_offset;
470 u16 len = tpa_info->len_on_bd;
471 struct sk_buff *skb = rx_buf->skb;
473 struct sk_buff *new_skb;
474 u8 old_tpa_state = tpa_info->tpa_state;
476 tpa_info->tpa_state = BNX2X_TPA_STOP;
478 /* If we there was an error during the handling of the TPA_START -
479 * drop this aggregation.
481 if (old_tpa_state == BNX2X_TPA_ERROR)
484 /* Try to allocate the new skb */
485 new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
487 /* Unmap skb in the pool anyway, as we are going to change
488 pool entry status to BNX2X_TPA_STOP even if new skb allocation
490 dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
491 fp->rx_buf_size, DMA_FROM_DEVICE);
493 if (likely(new_skb)) {
495 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
497 #ifdef BNX2X_STOP_ON_ERROR
498 if (pad + len > fp->rx_buf_size) {
499 BNX2X_ERR("skb_put is about to fail... "
500 "pad %d len %d rx_buf_size %d\n",
501 pad, len, fp->rx_buf_size);
507 skb_reserve(skb, pad);
510 skb->protocol = eth_type_trans(skb, bp->dev);
511 skb->ip_summed = CHECKSUM_UNNECESSARY;
513 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
514 if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
515 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
516 napi_gro_receive(&fp->napi, skb);
518 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
519 " - dropping packet!\n");
520 dev_kfree_skb_any(skb);
524 /* put new skb in bin */
525 rx_buf->skb = new_skb;
531 /* drop the packet and keep the buffer in the bin */
532 DP(NETIF_MSG_RX_STATUS,
533 "Failed to allocate or map a new skb - dropping packet!\n");
534 fp->eth_q_stats.rx_skb_alloc_failed++;
537 /* Set Toeplitz hash value in the skb using the value from the
538 * CQE (calculated by HW).
540 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
543 /* Set Toeplitz hash from CQE */
544 if ((bp->dev->features & NETIF_F_RXHASH) &&
545 (cqe->fast_path_cqe.status_flags &
546 ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
548 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
551 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
553 struct bnx2x *bp = fp->bp;
554 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
555 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
558 #ifdef BNX2X_STOP_ON_ERROR
559 if (unlikely(bp->panic))
563 /* CQ "next element" is of the size of the regular element,
564 that's why it's ok here */
565 hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
566 if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
569 bd_cons = fp->rx_bd_cons;
570 bd_prod = fp->rx_bd_prod;
571 bd_prod_fw = bd_prod;
572 sw_comp_cons = fp->rx_comp_cons;
573 sw_comp_prod = fp->rx_comp_prod;
575 /* Memory barrier necessary as speculative reads of the rx
576 * buffer can be ahead of the index in the status block
580 DP(NETIF_MSG_RX_STATUS,
581 "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n",
582 fp->index, hw_comp_cons, sw_comp_cons);
584 while (sw_comp_cons != hw_comp_cons) {
585 struct sw_rx_bd *rx_buf = NULL;
587 union eth_rx_cqe *cqe;
588 struct eth_fast_path_rx_cqe *cqe_fp;
590 enum eth_rx_cqe_type cqe_fp_type;
593 #ifdef BNX2X_STOP_ON_ERROR
594 if (unlikely(bp->panic))
598 comp_ring_cons = RCQ_BD(sw_comp_cons);
599 bd_prod = RX_BD(bd_prod);
600 bd_cons = RX_BD(bd_cons);
602 /* Prefetch the page containing the BD descriptor
603 at producer's index. It will be needed when new skb is
605 prefetch((void *)(PAGE_ALIGN((unsigned long)
606 (&fp->rx_desc_ring[bd_prod])) -
609 cqe = &fp->rx_comp_ring[comp_ring_cons];
610 cqe_fp = &cqe->fast_path_cqe;
611 cqe_fp_flags = cqe_fp->type_error_flags;
612 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
614 DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x"
615 " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags),
616 cqe_fp_flags, cqe_fp->status_flags,
617 le32_to_cpu(cqe_fp->rss_hash_result),
618 le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
620 /* is this a slowpath msg? */
621 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
622 bnx2x_sp_event(fp, cqe);
625 /* this is an rx packet */
627 rx_buf = &fp->rx_buf_ring[bd_cons];
631 if (!CQE_TYPE_FAST(cqe_fp_type)) {
632 #ifdef BNX2X_STOP_ON_ERROR
634 if (fp->disable_tpa &&
635 (CQE_TYPE_START(cqe_fp_type) ||
636 CQE_TYPE_STOP(cqe_fp_type)))
637 BNX2X_ERR("START/STOP packet while "
638 "disable_tpa type %x\n",
639 CQE_TYPE(cqe_fp_type));
642 if (CQE_TYPE_START(cqe_fp_type)) {
643 u16 queue = cqe_fp->queue_index;
644 DP(NETIF_MSG_RX_STATUS,
645 "calling tpa_start on queue %d\n",
648 bnx2x_tpa_start(fp, queue, skb,
652 /* Set Toeplitz hash for LRO skb */
653 bnx2x_set_skb_rxhash(bp, cqe, skb);
659 cqe->end_agg_cqe.queue_index;
660 DP(NETIF_MSG_RX_STATUS,
661 "calling tpa_stop on queue %d\n",
664 bnx2x_tpa_stop(bp, fp, queue,
667 #ifdef BNX2X_STOP_ON_ERROR
672 bnx2x_update_sge_prod(fp, cqe_fp);
677 len = le16_to_cpu(cqe_fp->pkt_len);
678 pad = cqe_fp->placement_offset;
679 dma_sync_single_for_cpu(&bp->pdev->dev,
680 dma_unmap_addr(rx_buf, mapping),
681 pad + RX_COPY_THRESH,
683 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
685 /* is this an error packet? */
686 if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
688 "ERROR flags %x rx packet %u\n",
689 cqe_fp_flags, sw_comp_cons);
690 fp->eth_q_stats.rx_err_discard_pkt++;
694 /* Since we don't have a jumbo ring
695 * copy small packets if mtu > 1500
697 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
698 (len <= RX_COPY_THRESH)) {
699 struct sk_buff *new_skb;
701 new_skb = netdev_alloc_skb(bp->dev, len + pad);
702 if (new_skb == NULL) {
704 "ERROR packet dropped "
705 "because of alloc failure\n");
706 fp->eth_q_stats.rx_skb_alloc_failed++;
711 skb_copy_from_linear_data_offset(skb, pad,
712 new_skb->data + pad, len);
713 skb_reserve(new_skb, pad);
714 skb_put(new_skb, len);
716 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
721 if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
722 dma_unmap_single(&bp->pdev->dev,
723 dma_unmap_addr(rx_buf, mapping),
726 skb_reserve(skb, pad);
731 "ERROR packet dropped because "
732 "of alloc failure\n");
733 fp->eth_q_stats.rx_skb_alloc_failed++;
735 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
739 skb->protocol = eth_type_trans(skb, bp->dev);
741 /* Set Toeplitz hash for a none-LRO skb */
742 bnx2x_set_skb_rxhash(bp, cqe, skb);
744 skb_checksum_none_assert(skb);
746 if (bp->dev->features & NETIF_F_RXCSUM) {
748 if (likely(BNX2X_RX_CSUM_OK(cqe)))
749 skb->ip_summed = CHECKSUM_UNNECESSARY;
751 fp->eth_q_stats.hw_csum_err++;
755 skb_record_rx_queue(skb, fp->index);
757 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
759 __vlan_hwaccel_put_tag(skb,
760 le16_to_cpu(cqe_fp->vlan_tag));
761 napi_gro_receive(&fp->napi, skb);
767 bd_cons = NEXT_RX_IDX(bd_cons);
768 bd_prod = NEXT_RX_IDX(bd_prod);
769 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
772 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
773 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
775 if (rx_pkt == budget)
779 fp->rx_bd_cons = bd_cons;
780 fp->rx_bd_prod = bd_prod_fw;
781 fp->rx_comp_cons = sw_comp_cons;
782 fp->rx_comp_prod = sw_comp_prod;
784 /* Update producers */
785 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
788 fp->rx_pkt += rx_pkt;
794 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
796 struct bnx2x_fastpath *fp = fp_cookie;
797 struct bnx2x *bp = fp->bp;
800 DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
801 "[fp %d fw_sd %d igusb %d]\n",
802 fp->index, fp->fw_sb_id, fp->igu_sb_id);
803 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
805 #ifdef BNX2X_STOP_ON_ERROR
806 if (unlikely(bp->panic))
810 /* Handle Rx and Tx according to MSI-X vector */
811 prefetch(fp->rx_cons_sb);
813 for_each_cos_in_tx_queue(fp, cos)
814 prefetch(fp->txdata[cos].tx_cons_sb);
816 prefetch(&fp->sb_running_index[SM_RX_ID]);
817 napi_schedule(&bnx2x_fp(bp, fp->index, napi));
822 /* HW Lock for shared dual port PHYs */
823 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
825 mutex_lock(&bp->port.phy_mutex);
827 if (bp->port.need_hw_lock)
828 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
831 void bnx2x_release_phy_lock(struct bnx2x *bp)
833 if (bp->port.need_hw_lock)
834 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
836 mutex_unlock(&bp->port.phy_mutex);
839 /* calculates MF speed according to current linespeed and MF configuration */
840 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
842 u16 line_speed = bp->link_vars.line_speed;
844 u16 maxCfg = bnx2x_extract_max_cfg(bp,
845 bp->mf_config[BP_VN(bp)]);
847 /* Calculate the current MAX line speed limit for the MF
851 line_speed = (line_speed * maxCfg) / 100;
853 u16 vn_max_rate = maxCfg * 100;
855 if (vn_max_rate < line_speed)
856 line_speed = vn_max_rate;
864 * bnx2x_fill_report_data - fill link report data to report
867 * @data: link state to update
869 * It uses a none-atomic bit operations because is called under the mutex.
871 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
872 struct bnx2x_link_report_data *data)
874 u16 line_speed = bnx2x_get_mf_speed(bp);
876 memset(data, 0, sizeof(*data));
878 /* Fill the report data: efective line speed */
879 data->line_speed = line_speed;
882 if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
883 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
884 &data->link_report_flags);
887 if (bp->link_vars.duplex == DUPLEX_FULL)
888 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
890 /* Rx Flow Control is ON */
891 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
892 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
894 /* Tx Flow Control is ON */
895 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
896 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
900 * bnx2x_link_report - report link status to OS.
904 * Calls the __bnx2x_link_report() under the same locking scheme
905 * as a link/PHY state managing code to ensure a consistent link
909 void bnx2x_link_report(struct bnx2x *bp)
911 bnx2x_acquire_phy_lock(bp);
912 __bnx2x_link_report(bp);
913 bnx2x_release_phy_lock(bp);
917 * __bnx2x_link_report - report link status to OS.
921 * None atomic inmlementation.
922 * Should be called under the phy_lock.
924 void __bnx2x_link_report(struct bnx2x *bp)
926 struct bnx2x_link_report_data cur_data;
930 bnx2x_read_mf_cfg(bp);
932 /* Read the current link report info */
933 bnx2x_fill_report_data(bp, &cur_data);
935 /* Don't report link down or exactly the same link status twice */
936 if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
937 (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
938 &bp->last_reported_link.link_report_flags) &&
939 test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
940 &cur_data.link_report_flags)))
945 /* We are going to report a new link parameters now -
946 * remember the current data for the next time.
948 memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
950 if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
951 &cur_data.link_report_flags)) {
952 netif_carrier_off(bp->dev);
953 netdev_err(bp->dev, "NIC Link is Down\n");
956 netif_carrier_on(bp->dev);
957 netdev_info(bp->dev, "NIC Link is Up, ");
958 pr_cont("%d Mbps ", cur_data.line_speed);
960 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
961 &cur_data.link_report_flags))
962 pr_cont("full duplex");
964 pr_cont("half duplex");
966 /* Handle the FC at the end so that only these flags would be
967 * possibly set. This way we may easily check if there is no FC
970 if (cur_data.link_report_flags) {
971 if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
972 &cur_data.link_report_flags)) {
973 pr_cont(", receive ");
974 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
975 &cur_data.link_report_flags))
976 pr_cont("& transmit ");
978 pr_cont(", transmit ");
980 pr_cont("flow control ON");
986 void bnx2x_init_rx_rings(struct bnx2x *bp)
988 int func = BP_FUNC(bp);
989 int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
990 ETH_MAX_AGGREGATION_QUEUES_E1H_E2;
994 /* Allocate TPA resources */
995 for_each_rx_queue(bp, j) {
996 struct bnx2x_fastpath *fp = &bp->fp[j];
999 "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1001 if (!fp->disable_tpa) {
1002 /* Fill the per-aggregtion pool */
1003 for (i = 0; i < max_agg_queues; i++) {
1004 struct bnx2x_agg_info *tpa_info =
1006 struct sw_rx_bd *first_buf =
1007 &tpa_info->first_buf;
1009 first_buf->skb = netdev_alloc_skb(bp->dev,
1011 if (!first_buf->skb) {
1012 BNX2X_ERR("Failed to allocate TPA "
1013 "skb pool for queue[%d] - "
1014 "disabling TPA on this "
1016 bnx2x_free_tpa_pool(bp, fp, i);
1017 fp->disable_tpa = 1;
1020 dma_unmap_addr_set(first_buf, mapping, 0);
1021 tpa_info->tpa_state = BNX2X_TPA_STOP;
1024 /* "next page" elements initialization */
1025 bnx2x_set_next_page_sgl(fp);
1027 /* set SGEs bit mask */
1028 bnx2x_init_sge_ring_bit_mask(fp);
1030 /* Allocate SGEs and initialize the ring elements */
1031 for (i = 0, ring_prod = 0;
1032 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1034 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1035 BNX2X_ERR("was only able to allocate "
1037 BNX2X_ERR("disabling TPA for "
1039 /* Cleanup already allocated elements */
1040 bnx2x_free_rx_sge_range(bp, fp,
1042 bnx2x_free_tpa_pool(bp, fp,
1044 fp->disable_tpa = 1;
1048 ring_prod = NEXT_SGE_IDX(ring_prod);
1051 fp->rx_sge_prod = ring_prod;
1055 for_each_rx_queue(bp, j) {
1056 struct bnx2x_fastpath *fp = &bp->fp[j];
1060 /* Activate BD ring */
1062 * this will generate an interrupt (to the TSTORM)
1063 * must only be done after chip is initialized
1065 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1071 if (CHIP_IS_E1(bp)) {
1072 REG_WR(bp, BAR_USTRORM_INTMEM +
1073 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1074 U64_LO(fp->rx_comp_mapping));
1075 REG_WR(bp, BAR_USTRORM_INTMEM +
1076 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1077 U64_HI(fp->rx_comp_mapping));
1082 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1087 for_each_tx_queue(bp, i) {
1088 struct bnx2x_fastpath *fp = &bp->fp[i];
1089 for_each_cos_in_tx_queue(fp, cos) {
1090 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1092 u16 bd_cons = txdata->tx_bd_cons;
1093 u16 sw_prod = txdata->tx_pkt_prod;
1094 u16 sw_cons = txdata->tx_pkt_cons;
1096 while (sw_cons != sw_prod) {
1097 bd_cons = bnx2x_free_tx_pkt(bp, txdata,
1105 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1107 struct bnx2x *bp = fp->bp;
1110 /* ring wasn't allocated */
1111 if (fp->rx_buf_ring == NULL)
1114 for (i = 0; i < NUM_RX_BD; i++) {
1115 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1116 struct sk_buff *skb = rx_buf->skb;
1120 dma_unmap_single(&bp->pdev->dev,
1121 dma_unmap_addr(rx_buf, mapping),
1122 fp->rx_buf_size, DMA_FROM_DEVICE);
1129 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1133 for_each_rx_queue(bp, j) {
1134 struct bnx2x_fastpath *fp = &bp->fp[j];
1136 bnx2x_free_rx_bds(fp);
1138 if (!fp->disable_tpa)
1139 bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ?
1140 ETH_MAX_AGGREGATION_QUEUES_E1 :
1141 ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
1145 void bnx2x_free_skbs(struct bnx2x *bp)
1147 bnx2x_free_tx_skbs(bp);
1148 bnx2x_free_rx_skbs(bp);
1151 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1153 /* load old values */
1154 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1156 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1157 /* leave all but MAX value */
1158 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1160 /* set new MAX value */
1161 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1162 & FUNC_MF_CFG_MAX_BW_MASK;
1164 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1169 * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1171 * @bp: driver handle
1172 * @nvecs: number of vectors to be released
1174 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1178 if (nvecs == offset)
1180 free_irq(bp->msix_table[offset].vector, bp->dev);
1181 DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1182 bp->msix_table[offset].vector);
1185 if (nvecs == offset)
1190 for_each_eth_queue(bp, i) {
1191 if (nvecs == offset)
1193 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1194 "irq\n", i, bp->msix_table[offset].vector);
1196 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1200 void bnx2x_free_irq(struct bnx2x *bp)
1202 if (bp->flags & USING_MSIX_FLAG)
1203 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1205 else if (bp->flags & USING_MSI_FLAG)
1206 free_irq(bp->pdev->irq, bp->dev);
1208 free_irq(bp->pdev->irq, bp->dev);
1211 int bnx2x_enable_msix(struct bnx2x *bp)
1213 int msix_vec = 0, i, rc, req_cnt;
1215 bp->msix_table[msix_vec].entry = msix_vec;
1216 DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1217 bp->msix_table[0].entry);
1221 bp->msix_table[msix_vec].entry = msix_vec;
1222 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1223 bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1226 /* We need separate vectors for ETH queues only (not FCoE) */
1227 for_each_eth_queue(bp, i) {
1228 bp->msix_table[msix_vec].entry = msix_vec;
1229 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1230 "(fastpath #%u)\n", msix_vec, msix_vec, i);
1234 req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1236 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1239 * reconfigure number of tx/rx queues according to available
1242 if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1243 /* how less vectors we will have? */
1244 int diff = req_cnt - rc;
1247 "Trying to use less MSI-X vectors: %d\n", rc);
1249 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1253 "MSI-X is not attainable rc %d\n", rc);
1257 * decrease number of queues by number of unallocated entries
1259 bp->num_queues -= diff;
1261 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1264 /* fall to INTx if not enough memory */
1266 bp->flags |= DISABLE_MSI_FLAG;
1267 DP(NETIF_MSG_IFUP, "MSI-X is not attainable rc %d\n", rc);
1271 bp->flags |= USING_MSIX_FLAG;
1276 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1278 int i, rc, offset = 0;
1280 rc = request_irq(bp->msix_table[offset++].vector,
1281 bnx2x_msix_sp_int, 0,
1282 bp->dev->name, bp->dev);
1284 BNX2X_ERR("request sp irq failed\n");
1291 for_each_eth_queue(bp, i) {
1292 struct bnx2x_fastpath *fp = &bp->fp[i];
1293 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1296 rc = request_irq(bp->msix_table[offset].vector,
1297 bnx2x_msix_fp_int, 0, fp->name, fp);
1299 BNX2X_ERR("request fp #%d irq (%d) failed rc %d\n", i,
1300 bp->msix_table[offset].vector, rc);
1301 bnx2x_free_msix_irqs(bp, offset);
1308 i = BNX2X_NUM_ETH_QUEUES(bp);
1309 offset = 1 + CNIC_PRESENT;
1310 netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d"
1312 bp->msix_table[0].vector,
1313 0, bp->msix_table[offset].vector,
1314 i - 1, bp->msix_table[offset + i - 1].vector);
1319 int bnx2x_enable_msi(struct bnx2x *bp)
1323 rc = pci_enable_msi(bp->pdev);
1325 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1328 bp->flags |= USING_MSI_FLAG;
1333 static int bnx2x_req_irq(struct bnx2x *bp)
1335 unsigned long flags;
1338 if (bp->flags & USING_MSI_FLAG)
1341 flags = IRQF_SHARED;
1343 rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1344 bp->dev->name, bp->dev);
1348 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1351 if (bp->flags & USING_MSIX_FLAG) {
1352 rc = bnx2x_req_msix_irqs(bp);
1357 rc = bnx2x_req_irq(bp);
1359 BNX2X_ERR("IRQ request failed rc %d, aborting\n", rc);
1362 if (bp->flags & USING_MSI_FLAG) {
1363 bp->dev->irq = bp->pdev->irq;
1364 netdev_info(bp->dev, "using MSI IRQ %d\n",
1372 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1376 for_each_rx_queue(bp, i)
1377 napi_enable(&bnx2x_fp(bp, i, napi));
1380 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1384 for_each_rx_queue(bp, i)
1385 napi_disable(&bnx2x_fp(bp, i, napi));
1388 void bnx2x_netif_start(struct bnx2x *bp)
1390 if (netif_running(bp->dev)) {
1391 bnx2x_napi_enable(bp);
1392 bnx2x_int_enable(bp);
1393 if (bp->state == BNX2X_STATE_OPEN)
1394 netif_tx_wake_all_queues(bp->dev);
1398 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1400 bnx2x_int_disable_sync(bp, disable_hw);
1401 bnx2x_napi_disable(bp);
1404 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1406 struct bnx2x *bp = netdev_priv(dev);
1409 return skb_tx_hash(dev, skb);
1411 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1412 u16 ether_type = ntohs(hdr->h_proto);
1414 /* Skip VLAN tag if present */
1415 if (ether_type == ETH_P_8021Q) {
1416 struct vlan_ethhdr *vhdr =
1417 (struct vlan_ethhdr *)skb->data;
1419 ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1422 /* If ethertype is FCoE or FIP - use FCoE ring */
1423 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1424 return bnx2x_fcoe_tx(bp, txq_index);
1427 /* Select a none-FCoE queue: if FCoE is enabled, exclude FCoE L2 ring
1429 return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1432 void bnx2x_set_num_queues(struct bnx2x *bp)
1434 switch (bp->multi_mode) {
1435 case ETH_RSS_MODE_DISABLED:
1438 case ETH_RSS_MODE_REGULAR:
1439 bp->num_queues = bnx2x_calc_num_queues(bp);
1447 /* Add special queues */
1448 bp->num_queues += NON_ETH_CONTEXT_USE;
1451 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1455 tx = MAX_TXQS_PER_COS * bp->max_cos;
1456 rx = BNX2X_NUM_ETH_QUEUES(bp);
1458 /* account for fcoe queue */
1466 rc = netif_set_real_num_tx_queues(bp->dev, tx);
1468 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1471 rc = netif_set_real_num_rx_queues(bp->dev, rx);
1473 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1477 DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1483 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1487 for_each_queue(bp, i) {
1488 struct bnx2x_fastpath *fp = &bp->fp[i];
1490 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1493 * Although there are no IP frames expected to arrive to
1494 * this ring we still want to add an
1495 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1499 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1500 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1503 bp->dev->mtu + ETH_OVREHEAD +
1504 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1508 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1511 u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1512 u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1515 * Prepare the inital contents fo the indirection table if RSS is
1518 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1519 for (i = 0; i < sizeof(ind_table); i++)
1521 bp->fp->cl_id + (i % num_eth_queues);
1525 * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1526 * per-port, so if explicit configuration is needed , do it only
1529 * For 57712 and newer on the other hand it's a per-function
1532 return bnx2x_config_rss_pf(bp, ind_table,
1533 bp->port.pmf || !CHIP_IS_E1x(bp));
1536 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1538 struct bnx2x_config_rss_params params = {0};
1541 /* Although RSS is meaningless when there is a single HW queue we
1542 * still need it enabled in order to have HW Rx hash generated.
1544 * if (!is_eth_multi(bp))
1545 * bp->multi_mode = ETH_RSS_MODE_DISABLED;
1548 params.rss_obj = &bp->rss_conf_obj;
1550 __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags);
1553 switch (bp->multi_mode) {
1554 case ETH_RSS_MODE_DISABLED:
1555 __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
1557 case ETH_RSS_MODE_REGULAR:
1558 __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
1560 case ETH_RSS_MODE_VLAN_PRI:
1561 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, ¶ms.rss_flags);
1563 case ETH_RSS_MODE_E1HOV_PRI:
1564 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, ¶ms.rss_flags);
1566 case ETH_RSS_MODE_IP_DSCP:
1567 __set_bit(BNX2X_RSS_MODE_IP_DSCP, ¶ms.rss_flags);
1570 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1574 /* If RSS is enabled */
1575 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1576 /* RSS configuration */
1577 __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
1578 __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
1579 __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
1580 __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags);
1583 params.rss_result_mask = MULTI_MASK;
1585 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1589 for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1590 params.rss_key[i] = random32();
1592 __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags);
1596 return bnx2x_config_rss(bp, ¶ms);
1599 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1601 struct bnx2x_func_state_params func_params = {0};
1603 /* Prepare parameters for function state transitions */
1604 __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1606 func_params.f_obj = &bp->func_obj;
1607 func_params.cmd = BNX2X_F_CMD_HW_INIT;
1609 func_params.params.hw_init.load_phase = load_code;
1611 return bnx2x_func_state_change(bp, &func_params);
1615 * Cleans the object that have internal lists without sending
1616 * ramrods. Should be run when interrutps are disabled.
1618 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1621 unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1622 struct bnx2x_mcast_ramrod_params rparam = {0};
1623 struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1625 /***************** Cleanup MACs' object first *************************/
1627 /* Wait for completion of requested */
1628 __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1629 /* Perform a dry cleanup */
1630 __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1632 /* Clean ETH primary MAC */
1633 __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1634 rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1637 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1639 /* Cleanup UC list */
1641 __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1642 rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1645 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1647 /***************** Now clean mcast object *****************************/
1648 rparam.mcast_obj = &bp->mcast_obj;
1649 __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1651 /* Add a DEL command... */
1652 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1654 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1655 "object: %d\n", rc);
1657 /* ...and wait until all pending commands are cleared */
1658 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1661 BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1666 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1670 #ifndef BNX2X_STOP_ON_ERROR
1671 #define LOAD_ERROR_EXIT(bp, label) \
1673 (bp)->state = BNX2X_STATE_ERROR; \
1677 #define LOAD_ERROR_EXIT(bp, label) \
1679 (bp)->state = BNX2X_STATE_ERROR; \
1685 /* must be called with rtnl_lock */
1686 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1688 int port = BP_PORT(bp);
1692 #ifdef BNX2X_STOP_ON_ERROR
1693 if (unlikely(bp->panic))
1697 bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1699 /* Set the initial link reported state to link down */
1700 bnx2x_acquire_phy_lock(bp);
1701 memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1702 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1703 &bp->last_reported_link.link_report_flags);
1704 bnx2x_release_phy_lock(bp);
1706 /* must be called before memory allocation and HW init */
1707 bnx2x_ilt_set_info(bp);
1710 * Zero fastpath structures preserving invariants like napi, which are
1711 * allocated only once, fp index, max_cos, bp pointer.
1712 * Also set fp->disable_tpa.
1714 for_each_queue(bp, i)
1718 /* Set the receive queues buffer size */
1719 bnx2x_set_rx_buf_size(bp);
1721 if (bnx2x_alloc_mem(bp))
1724 /* As long as bnx2x_alloc_mem() may possibly update
1725 * bp->num_queues, bnx2x_set_real_num_queues() should always
1728 rc = bnx2x_set_real_num_queues(bp);
1730 BNX2X_ERR("Unable to set real_num_queues\n");
1731 LOAD_ERROR_EXIT(bp, load_error0);
1734 /* configure multi cos mappings in kernel.
1735 * this configuration may be overriden by a multi class queue discipline
1736 * or by a dcbx negotiation result.
1738 bnx2x_setup_tc(bp->dev, bp->max_cos);
1740 bnx2x_napi_enable(bp);
1742 /* Send LOAD_REQUEST command to MCP
1743 * Returns the type of LOAD command:
1744 * if it is the first port to be initialized
1745 * common blocks should be initialized, otherwise - not
1747 if (!BP_NOMCP(bp)) {
1748 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1750 BNX2X_ERR("MCP response failure, aborting\n");
1752 LOAD_ERROR_EXIT(bp, load_error1);
1754 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1755 rc = -EBUSY; /* other port in diagnostic mode */
1756 LOAD_ERROR_EXIT(bp, load_error1);
1760 int path = BP_PATH(bp);
1762 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n",
1763 path, load_count[path][0], load_count[path][1],
1764 load_count[path][2]);
1765 load_count[path][0]++;
1766 load_count[path][1 + port]++;
1767 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n",
1768 path, load_count[path][0], load_count[path][1],
1769 load_count[path][2]);
1770 if (load_count[path][0] == 1)
1771 load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1772 else if (load_count[path][1 + port] == 1)
1773 load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1775 load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1778 if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1779 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1780 (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1783 * We need the barrier to ensure the ordering between the
1784 * writing to bp->port.pmf here and reading it from the
1785 * bnx2x_periodic_task().
1788 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1792 DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1794 /* Init Function state controlling object */
1795 bnx2x__init_func_obj(bp);
1798 rc = bnx2x_init_hw(bp, load_code);
1800 BNX2X_ERR("HW init failed, aborting\n");
1801 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1802 LOAD_ERROR_EXIT(bp, load_error2);
1805 /* Connect to IRQs */
1806 rc = bnx2x_setup_irqs(bp);
1808 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1809 LOAD_ERROR_EXIT(bp, load_error2);
1812 /* Setup NIC internals and enable interrupts */
1813 bnx2x_nic_init(bp, load_code);
1815 /* Init per-function objects */
1816 bnx2x_init_bp_objs(bp);
1818 if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1819 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1820 (bp->common.shmem2_base)) {
1821 if (SHMEM2_HAS(bp, dcc_support))
1822 SHMEM2_WR(bp, dcc_support,
1823 (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1824 SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1827 bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1828 rc = bnx2x_func_start(bp);
1830 BNX2X_ERR("Function start failed!\n");
1831 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1832 LOAD_ERROR_EXIT(bp, load_error3);
1835 /* Send LOAD_DONE command to MCP */
1836 if (!BP_NOMCP(bp)) {
1837 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1839 BNX2X_ERR("MCP response failure, aborting\n");
1841 LOAD_ERROR_EXIT(bp, load_error3);
1845 rc = bnx2x_setup_leading(bp);
1847 BNX2X_ERR("Setup leading failed!\n");
1848 LOAD_ERROR_EXIT(bp, load_error3);
1852 /* Enable Timer scan */
1853 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1856 for_each_nondefault_queue(bp, i) {
1857 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1859 LOAD_ERROR_EXIT(bp, load_error4);
1862 rc = bnx2x_init_rss_pf(bp);
1864 LOAD_ERROR_EXIT(bp, load_error4);
1866 /* Now when Clients are configured we are ready to work */
1867 bp->state = BNX2X_STATE_OPEN;
1869 /* Configure a ucast MAC */
1870 rc = bnx2x_set_eth_mac(bp, true);
1872 LOAD_ERROR_EXIT(bp, load_error4);
1874 if (bp->pending_max) {
1875 bnx2x_update_max_mf_config(bp, bp->pending_max);
1876 bp->pending_max = 0;
1880 bnx2x_initial_phy_init(bp, load_mode);
1882 /* Start fast path */
1884 /* Initialize Rx filter. */
1885 netif_addr_lock_bh(bp->dev);
1886 bnx2x_set_rx_mode(bp->dev);
1887 netif_addr_unlock_bh(bp->dev);
1890 switch (load_mode) {
1892 /* Tx queue should be only reenabled */
1893 netif_tx_wake_all_queues(bp->dev);
1897 netif_tx_start_all_queues(bp->dev);
1898 smp_mb__after_clear_bit();
1902 bp->state = BNX2X_STATE_DIAG;
1910 bnx2x__link_status_update(bp);
1912 /* start the timer */
1913 mod_timer(&bp->timer, jiffies + bp->current_interval);
1916 bnx2x_setup_cnic_irq_info(bp);
1917 if (bp->state == BNX2X_STATE_OPEN)
1918 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1920 bnx2x_inc_load_cnt(bp);
1922 /* Wait for all pending SP commands to complete */
1923 if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1924 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1925 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1929 bnx2x_dcbx_init(bp);
1932 #ifndef BNX2X_STOP_ON_ERROR
1935 /* Disable Timer scan */
1936 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1939 bnx2x_int_disable_sync(bp, 1);
1941 /* Clean queueable objects */
1942 bnx2x_squeeze_objects(bp);
1944 /* Free SKBs, SGEs, TPA pool and driver internals */
1945 bnx2x_free_skbs(bp);
1946 for_each_rx_queue(bp, i)
1947 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1952 if (!BP_NOMCP(bp)) {
1953 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1954 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
1959 bnx2x_napi_disable(bp);
1964 #endif /* ! BNX2X_STOP_ON_ERROR */
1967 /* must be called with rtnl_lock */
1968 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
1971 bool global = false;
1973 if ((bp->state == BNX2X_STATE_CLOSED) ||
1974 (bp->state == BNX2X_STATE_ERROR)) {
1975 /* We can get here if the driver has been unloaded
1976 * during parity error recovery and is either waiting for a
1977 * leader to complete or for other functions to unload and
1978 * then ifdown has been issued. In this case we want to
1979 * unload and let other functions to complete a recovery
1982 bp->recovery_state = BNX2X_RECOVERY_DONE;
1984 bnx2x_release_leader_lock(bp);
1987 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
1993 * It's important to set the bp->state to the value different from
1994 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
1995 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
1997 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2001 bnx2x_tx_disable(bp);
2004 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2007 bp->rx_mode = BNX2X_RX_MODE_NONE;
2009 del_timer_sync(&bp->timer);
2011 /* Set ALWAYS_ALIVE bit in shmem */
2012 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2014 bnx2x_drv_pulse(bp);
2016 bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2018 /* Cleanup the chip if needed */
2019 if (unload_mode != UNLOAD_RECOVERY)
2020 bnx2x_chip_cleanup(bp, unload_mode);
2022 /* Send the UNLOAD_REQUEST to the MCP */
2023 bnx2x_send_unload_req(bp, unload_mode);
2026 * Prevent transactions to host from the functions on the
2027 * engine that doesn't reset global blocks in case of global
2028 * attention once gloabl blocks are reset and gates are opened
2029 * (the engine which leader will perform the recovery
2032 if (!CHIP_IS_E1x(bp))
2033 bnx2x_pf_disable(bp);
2035 /* Disable HW interrupts, NAPI */
2036 bnx2x_netif_stop(bp, 1);
2041 /* Report UNLOAD_DONE to MCP */
2042 bnx2x_send_unload_done(bp);
2046 * At this stage no more interrupts will arrive so we may safly clean
2047 * the queueable objects here in case they failed to get cleaned so far.
2049 bnx2x_squeeze_objects(bp);
2051 /* There should be no more pending SP commands at this stage */
2056 /* Free SKBs, SGEs, TPA pool and driver internals */
2057 bnx2x_free_skbs(bp);
2058 for_each_rx_queue(bp, i)
2059 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2063 bp->state = BNX2X_STATE_CLOSED;
2065 /* Check if there are pending parity attentions. If there are - set
2066 * RECOVERY_IN_PROGRESS.
2068 if (bnx2x_chk_parity_attn(bp, &global, false)) {
2069 bnx2x_set_reset_in_progress(bp);
2071 /* Set RESET_IS_GLOBAL if needed */
2073 bnx2x_set_reset_global(bp);
2077 /* The last driver must disable a "close the gate" if there is no
2078 * parity attention or "process kill" pending.
2080 if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2081 bnx2x_disable_close_the_gate(bp);
2086 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2090 /* If there is no power capability, silently succeed */
2092 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2096 pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2100 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2101 ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2102 PCI_PM_CTRL_PME_STATUS));
2104 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2105 /* delay required during transition out of D3hot */
2110 /* If there are other clients above don't
2111 shut down the power */
2112 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2114 /* Don't shut down the power for emulation and FPGA */
2115 if (CHIP_REV_IS_SLOW(bp))
2118 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2122 pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2124 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2127 /* No more memory access after this point until
2128 * device is brought back to D0.
2139 * net_device service functions
2141 int bnx2x_poll(struct napi_struct *napi, int budget)
2145 struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2147 struct bnx2x *bp = fp->bp;
2150 #ifdef BNX2X_STOP_ON_ERROR
2151 if (unlikely(bp->panic)) {
2152 napi_complete(napi);
2157 for_each_cos_in_tx_queue(fp, cos)
2158 if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2159 bnx2x_tx_int(bp, &fp->txdata[cos]);
2162 if (bnx2x_has_rx_work(fp)) {
2163 work_done += bnx2x_rx_int(fp, budget - work_done);
2165 /* must not complete if we consumed full budget */
2166 if (work_done >= budget)
2170 /* Fall out from the NAPI loop if needed */
2171 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2173 /* No need to update SB for FCoE L2 ring as long as
2174 * it's connected to the default SB and the SB
2175 * has been updated when NAPI was scheduled.
2177 if (IS_FCOE_FP(fp)) {
2178 napi_complete(napi);
2183 bnx2x_update_fpsb_idx(fp);
2184 /* bnx2x_has_rx_work() reads the status block,
2185 * thus we need to ensure that status block indices
2186 * have been actually read (bnx2x_update_fpsb_idx)
2187 * prior to this check (bnx2x_has_rx_work) so that
2188 * we won't write the "newer" value of the status block
2189 * to IGU (if there was a DMA right after
2190 * bnx2x_has_rx_work and if there is no rmb, the memory
2191 * reading (bnx2x_update_fpsb_idx) may be postponed
2192 * to right before bnx2x_ack_sb). In this case there
2193 * will never be another interrupt until there is
2194 * another update of the status block, while there
2195 * is still unhandled work.
2199 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2200 napi_complete(napi);
2201 /* Re-enable interrupts */
2203 "Update index to %d\n", fp->fp_hc_idx);
2204 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2205 le16_to_cpu(fp->fp_hc_idx),
2215 /* we split the first BD into headers and data BDs
2216 * to ease the pain of our fellow microcode engineers
2217 * we use one mapping for both BDs
2218 * So far this has only been observed to happen
2219 * in Other Operating Systems(TM)
2221 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2222 struct bnx2x_fp_txdata *txdata,
2223 struct sw_tx_bd *tx_buf,
2224 struct eth_tx_start_bd **tx_bd, u16 hlen,
2225 u16 bd_prod, int nbd)
2227 struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2228 struct eth_tx_bd *d_tx_bd;
2230 int old_len = le16_to_cpu(h_tx_bd->nbytes);
2232 /* first fix first BD */
2233 h_tx_bd->nbd = cpu_to_le16(nbd);
2234 h_tx_bd->nbytes = cpu_to_le16(hlen);
2236 DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2237 "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2238 h_tx_bd->addr_lo, h_tx_bd->nbd);
2240 /* now get a new data BD
2241 * (after the pbd) and fill it */
2242 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2243 d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2245 mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2246 le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2248 d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2249 d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2250 d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2252 /* this marks the BD as one that has no individual mapping */
2253 tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2255 DP(NETIF_MSG_TX_QUEUED,
2256 "TSO split data size is %d (%x:%x)\n",
2257 d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2260 *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2265 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2268 csum = (u16) ~csum_fold(csum_sub(csum,
2269 csum_partial(t_header - fix, fix, 0)));
2272 csum = (u16) ~csum_fold(csum_add(csum,
2273 csum_partial(t_header, -fix, 0)));
2275 return swab16(csum);
2278 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2282 if (skb->ip_summed != CHECKSUM_PARTIAL)
2286 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2288 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2289 rc |= XMIT_CSUM_TCP;
2293 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2294 rc |= XMIT_CSUM_TCP;
2298 if (skb_is_gso_v6(skb))
2299 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2300 else if (skb_is_gso(skb))
2301 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2306 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2307 /* check if packet requires linearization (packet is too fragmented)
2308 no need to check fragmentation if page size > 8K (there will be no
2309 violation to FW restrictions) */
2310 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2315 int first_bd_sz = 0;
2317 /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2318 if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2320 if (xmit_type & XMIT_GSO) {
2321 unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2322 /* Check if LSO packet needs to be copied:
2323 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2324 int wnd_size = MAX_FETCH_BD - 3;
2325 /* Number of windows to check */
2326 int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2331 /* Headers length */
2332 hlen = (int)(skb_transport_header(skb) - skb->data) +
2335 /* Amount of data (w/o headers) on linear part of SKB*/
2336 first_bd_sz = skb_headlen(skb) - hlen;
2338 wnd_sum = first_bd_sz;
2340 /* Calculate the first sum - it's special */
2341 for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2343 skb_shinfo(skb)->frags[frag_idx].size;
2345 /* If there was data on linear skb data - check it */
2346 if (first_bd_sz > 0) {
2347 if (unlikely(wnd_sum < lso_mss)) {
2352 wnd_sum -= first_bd_sz;
2355 /* Others are easier: run through the frag list and
2356 check all windows */
2357 for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2359 skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1].size;
2361 if (unlikely(wnd_sum < lso_mss)) {
2366 skb_shinfo(skb)->frags[wnd_idx].size;
2369 /* in non-LSO too fragmented packet should always
2376 if (unlikely(to_copy))
2377 DP(NETIF_MSG_TX_QUEUED,
2378 "Linearization IS REQUIRED for %s packet. "
2379 "num_frags %d hlen %d first_bd_sz %d\n",
2380 (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2381 skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2387 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2390 *parsing_data |= (skb_shinfo(skb)->gso_size <<
2391 ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2392 ETH_TX_PARSE_BD_E2_LSO_MSS;
2393 if ((xmit_type & XMIT_GSO_V6) &&
2394 (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2395 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2399 * bnx2x_set_pbd_gso - update PBD in GSO case.
2403 * @xmit_type: xmit flags
2405 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2406 struct eth_tx_parse_bd_e1x *pbd,
2409 pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2410 pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2411 pbd->tcp_flags = pbd_tcp_flags(skb);
2413 if (xmit_type & XMIT_GSO_V4) {
2414 pbd->ip_id = swab16(ip_hdr(skb)->id);
2415 pbd->tcp_pseudo_csum =
2416 swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2418 0, IPPROTO_TCP, 0));
2421 pbd->tcp_pseudo_csum =
2422 swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2423 &ipv6_hdr(skb)->daddr,
2424 0, IPPROTO_TCP, 0));
2426 pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2430 * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2432 * @bp: driver handle
2434 * @parsing_data: data to be updated
2435 * @xmit_type: xmit flags
2439 static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2440 u32 *parsing_data, u32 xmit_type)
2443 ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2444 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2445 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2447 if (xmit_type & XMIT_CSUM_TCP) {
2448 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2449 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2450 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2452 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2454 /* We support checksum offload for TCP and UDP only.
2455 * No need to pass the UDP header length - it's a constant.
2457 return skb_transport_header(skb) +
2458 sizeof(struct udphdr) - skb->data;
2461 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2462 struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2464 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2466 if (xmit_type & XMIT_CSUM_V4)
2467 tx_start_bd->bd_flags.as_bitfield |=
2468 ETH_TX_BD_FLAGS_IP_CSUM;
2470 tx_start_bd->bd_flags.as_bitfield |=
2471 ETH_TX_BD_FLAGS_IPV6;
2473 if (!(xmit_type & XMIT_CSUM_TCP))
2474 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2478 * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2480 * @bp: driver handle
2482 * @pbd: parse BD to be updated
2483 * @xmit_type: xmit flags
2485 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2486 struct eth_tx_parse_bd_e1x *pbd,
2489 u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2491 /* for now NS flag is not used in Linux */
2493 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2494 ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2496 pbd->ip_hlen_w = (skb_transport_header(skb) -
2497 skb_network_header(skb)) >> 1;
2499 hlen += pbd->ip_hlen_w;
2501 /* We support checksum offload for TCP and UDP only */
2502 if (xmit_type & XMIT_CSUM_TCP)
2503 hlen += tcp_hdrlen(skb) / 2;
2505 hlen += sizeof(struct udphdr) / 2;
2507 pbd->total_hlen_w = cpu_to_le16(hlen);
2510 if (xmit_type & XMIT_CSUM_TCP) {
2511 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2514 s8 fix = SKB_CS_OFF(skb); /* signed! */
2516 DP(NETIF_MSG_TX_QUEUED,
2517 "hlen %d fix %d csum before fix %x\n",
2518 le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2520 /* HW bug: fixup the CSUM */
2521 pbd->tcp_pseudo_csum =
2522 bnx2x_csum_fix(skb_transport_header(skb),
2525 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2526 pbd->tcp_pseudo_csum);
2532 /* called with netif_tx_lock
2533 * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2534 * netif_wake_queue()
2536 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2538 struct bnx2x *bp = netdev_priv(dev);
2540 struct bnx2x_fastpath *fp;
2541 struct netdev_queue *txq;
2542 struct bnx2x_fp_txdata *txdata;
2543 struct sw_tx_bd *tx_buf;
2544 struct eth_tx_start_bd *tx_start_bd, *first_bd;
2545 struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2546 struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2547 struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2548 u32 pbd_e2_parsing_data = 0;
2549 u16 pkt_prod, bd_prod;
2550 int nbd, txq_index, fp_index, txdata_index;
2552 u32 xmit_type = bnx2x_xmit_type(bp, skb);
2555 __le16 pkt_size = 0;
2557 u8 mac_type = UNICAST_ADDRESS;
2559 #ifdef BNX2X_STOP_ON_ERROR
2560 if (unlikely(bp->panic))
2561 return NETDEV_TX_BUSY;
2564 txq_index = skb_get_queue_mapping(skb);
2565 txq = netdev_get_tx_queue(dev, txq_index);
2567 BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2569 /* decode the fastpath index and the cos index from the txq */
2570 fp_index = TXQ_TO_FP(txq_index);
2571 txdata_index = TXQ_TO_COS(txq_index);
2575 * Override the above for the FCoE queue:
2576 * - FCoE fp entry is right after the ETH entries.
2577 * - FCoE L2 queue uses bp->txdata[0] only.
2579 if (unlikely(!NO_FCOE(bp) && (txq_index ==
2580 bnx2x_fcoe_tx(bp, txq_index)))) {
2581 fp_index = FCOE_IDX;
2586 /* enable this debug print to view the transmission queue being used
2587 DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d",
2588 txq_index, fp_index, txdata_index); */
2590 /* locate the fastpath and the txdata */
2591 fp = &bp->fp[fp_index];
2592 txdata = &fp->txdata[txdata_index];
2594 /* enable this debug print to view the tranmission details
2595 DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2596 " tx_data ptr %p fp pointer %p",
2597 txdata->cid, fp_index, txdata_index, txdata, fp); */
2599 if (unlikely(bnx2x_tx_avail(bp, txdata) <
2600 (skb_shinfo(skb)->nr_frags + 3))) {
2601 fp->eth_q_stats.driver_xoff++;
2602 netif_tx_stop_queue(txq);
2603 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2604 return NETDEV_TX_BUSY;
2607 DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x protocol %x "
2608 "protocol(%x,%x) gso type %x xmit_type %x\n",
2609 txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2610 ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2612 eth = (struct ethhdr *)skb->data;
2614 /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2615 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2616 if (is_broadcast_ether_addr(eth->h_dest))
2617 mac_type = BROADCAST_ADDRESS;
2619 mac_type = MULTICAST_ADDRESS;
2622 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2623 /* First, check if we need to linearize the skb (due to FW
2624 restrictions). No need to check fragmentation if page size > 8K
2625 (there will be no violation to FW restrictions) */
2626 if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2627 /* Statistics of linearization */
2629 if (skb_linearize(skb) != 0) {
2630 DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2631 "silently dropping this SKB\n");
2632 dev_kfree_skb_any(skb);
2633 return NETDEV_TX_OK;
2637 /* Map skb linear data for DMA */
2638 mapping = dma_map_single(&bp->pdev->dev, skb->data,
2639 skb_headlen(skb), DMA_TO_DEVICE);
2640 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2641 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2642 "silently dropping this SKB\n");
2643 dev_kfree_skb_any(skb);
2644 return NETDEV_TX_OK;
2647 Please read carefully. First we use one BD which we mark as start,
2648 then we have a parsing info BD (used for TSO or xsum),
2649 and only then we have the rest of the TSO BDs.
2650 (don't forget to mark the last one as last,
2651 and to unmap only AFTER you write to the BD ...)
2652 And above all, all pdb sizes are in words - NOT DWORDS!
2655 /* get current pkt produced now - advance it just before sending packet
2656 * since mapping of pages may fail and cause packet to be dropped
2658 pkt_prod = txdata->tx_pkt_prod;
2659 bd_prod = TX_BD(txdata->tx_bd_prod);
2661 /* get a tx_buf and first BD
2662 * tx_start_bd may be changed during SPLIT,
2663 * but first_bd will always stay first
2665 tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2666 tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2667 first_bd = tx_start_bd;
2669 tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2670 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2674 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2676 /* remember the first BD of the packet */
2677 tx_buf->first_bd = txdata->tx_bd_prod;
2681 DP(NETIF_MSG_TX_QUEUED,
2682 "sending pkt %u @%p next_idx %u bd %u @%p\n",
2683 pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2685 if (vlan_tx_tag_present(skb)) {
2686 tx_start_bd->vlan_or_ethertype =
2687 cpu_to_le16(vlan_tx_tag_get(skb));
2688 tx_start_bd->bd_flags.as_bitfield |=
2689 (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2691 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2693 /* turn on parsing and get a BD */
2694 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2696 if (xmit_type & XMIT_CSUM)
2697 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2699 if (!CHIP_IS_E1x(bp)) {
2700 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2701 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2702 /* Set PBD in checksum offload case */
2703 if (xmit_type & XMIT_CSUM)
2704 hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2705 &pbd_e2_parsing_data,
2709 * fill in the MAC addresses in the PBD - for local
2712 bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2713 &pbd_e2->src_mac_addr_mid,
2714 &pbd_e2->src_mac_addr_lo,
2716 bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2717 &pbd_e2->dst_mac_addr_mid,
2718 &pbd_e2->dst_mac_addr_lo,
2722 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2723 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2724 /* Set PBD in checksum offload case */
2725 if (xmit_type & XMIT_CSUM)
2726 hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2730 /* Setup the data pointer of the first BD of the packet */
2731 tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2732 tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2733 nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2734 tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2735 pkt_size = tx_start_bd->nbytes;
2737 DP(NETIF_MSG_TX_QUEUED, "first bd @%p addr (%x:%x) nbd %d"
2738 " nbytes %d flags %x vlan %x\n",
2739 tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2740 le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2741 tx_start_bd->bd_flags.as_bitfield,
2742 le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2744 if (xmit_type & XMIT_GSO) {
2746 DP(NETIF_MSG_TX_QUEUED,
2747 "TSO packet len %d hlen %d total len %d tso size %d\n",
2748 skb->len, hlen, skb_headlen(skb),
2749 skb_shinfo(skb)->gso_size);
2751 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2753 if (unlikely(skb_headlen(skb) > hlen))
2754 bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2757 if (!CHIP_IS_E1x(bp))
2758 bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2761 bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2764 /* Set the PBD's parsing_data field if not zero
2765 * (for the chips newer than 57711).
2767 if (pbd_e2_parsing_data)
2768 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2770 tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2772 /* Handle fragmented skb */
2773 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2774 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2776 mapping = dma_map_page(&bp->pdev->dev, frag->page,
2777 frag->page_offset, frag->size,
2779 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2781 DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2782 "dropping packet...\n");
2784 /* we need unmap all buffers already mapped
2786 * first_bd->nbd need to be properly updated
2787 * before call to bnx2x_free_tx_pkt
2789 first_bd->nbd = cpu_to_le16(nbd);
2790 bnx2x_free_tx_pkt(bp, txdata,
2791 TX_BD(txdata->tx_pkt_prod));
2792 return NETDEV_TX_OK;
2795 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2796 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2797 if (total_pkt_bd == NULL)
2798 total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2800 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2801 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2802 tx_data_bd->nbytes = cpu_to_le16(frag->size);
2803 le16_add_cpu(&pkt_size, frag->size);
2806 DP(NETIF_MSG_TX_QUEUED,
2807 "frag %d bd @%p addr (%x:%x) nbytes %d\n",
2808 i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2809 le16_to_cpu(tx_data_bd->nbytes));
2812 DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2814 /* update with actual num BDs */
2815 first_bd->nbd = cpu_to_le16(nbd);
2817 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2819 /* now send a tx doorbell, counting the next BD
2820 * if the packet contains or ends with it
2822 if (TX_BD_POFF(bd_prod) < nbd)
2825 /* total_pkt_bytes should be set on the first data BD if
2826 * it's not an LSO packet and there is more than one
2827 * data BD. In this case pkt_size is limited by an MTU value.
2828 * However we prefer to set it for an LSO packet (while we don't
2829 * have to) in order to save some CPU cycles in a none-LSO
2830 * case, when we much more care about them.
2832 if (total_pkt_bd != NULL)
2833 total_pkt_bd->total_pkt_bytes = pkt_size;
2836 DP(NETIF_MSG_TX_QUEUED,
2837 "PBD (E1X) @%p ip_data %x ip_hlen %u ip_id %u lso_mss %u"
2838 " tcp_flags %x xsum %x seq %u hlen %u\n",
2839 pbd_e1x, pbd_e1x->global_data, pbd_e1x->ip_hlen_w,
2840 pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags,
2841 pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq,
2842 le16_to_cpu(pbd_e1x->total_hlen_w));
2844 DP(NETIF_MSG_TX_QUEUED,
2845 "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n",
2846 pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid,
2847 pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi,
2848 pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo,
2849 pbd_e2->parsing_data);
2850 DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
2852 txdata->tx_pkt_prod++;
2854 * Make sure that the BD data is updated before updating the producer
2855 * since FW might read the BD right after the producer is updated.
2856 * This is only applicable for weak-ordered memory model archs such
2857 * as IA-64. The following barrier is also mandatory since FW will
2858 * assumes packets must have BDs.
2862 txdata->tx_db.data.prod += nbd;
2865 DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
2869 txdata->tx_bd_prod += nbd;
2871 if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) {
2872 netif_tx_stop_queue(txq);
2874 /* paired memory barrier is in bnx2x_tx_int(), we have to keep
2875 * ordering of set_bit() in netif_tx_stop_queue() and read of
2879 fp->eth_q_stats.driver_xoff++;
2880 if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)
2881 netif_tx_wake_queue(txq);
2885 return NETDEV_TX_OK;
2889 * bnx2x_setup_tc - routine to configure net_device for multi tc
2891 * @netdev: net device to configure
2892 * @tc: number of traffic classes to enable
2894 * callback connected to the ndo_setup_tc function pointer
2896 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
2898 int cos, prio, count, offset;
2899 struct bnx2x *bp = netdev_priv(dev);
2901 /* setup tc must be called under rtnl lock */
2904 /* no traffic classes requested. aborting */
2906 netdev_reset_tc(dev);
2910 /* requested to support too many traffic classes */
2911 if (num_tc > bp->max_cos) {
2912 DP(NETIF_MSG_TX_ERR, "support for too many traffic classes"
2913 " requested: %d. max supported is %d",
2914 num_tc, bp->max_cos);
2918 /* declare amount of supported traffic classes */
2919 if (netdev_set_num_tc(dev, num_tc)) {
2920 DP(NETIF_MSG_TX_ERR, "failed to declare %d traffic classes",
2925 /* configure priority to traffic class mapping */
2926 for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) {
2927 netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]);
2928 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d",
2929 prio, bp->prio_to_cos[prio]);
2933 /* Use this configuration to diffrentiate tc0 from other COSes
2934 This can be used for ets or pfc, and save the effort of setting
2935 up a multio class queue disc or negotiating DCBX with a switch
2936 netdev_set_prio_tc_map(dev, 0, 0);
2937 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d", 0, 0);
2938 for (prio = 1; prio < 16; prio++) {
2939 netdev_set_prio_tc_map(dev, prio, 1);
2940 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d", prio, 1);
2943 /* configure traffic class to transmission queue mapping */
2944 for (cos = 0; cos < bp->max_cos; cos++) {
2945 count = BNX2X_NUM_ETH_QUEUES(bp);
2946 offset = cos * MAX_TXQS_PER_COS;
2947 netdev_set_tc_queue(dev, cos, count, offset);
2948 DP(BNX2X_MSG_SP, "mapping tc %d to offset %d count %d",
2949 cos, offset, count);
2955 /* called with rtnl_lock */
2956 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
2958 struct sockaddr *addr = p;
2959 struct bnx2x *bp = netdev_priv(dev);
2962 if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
2965 if (netif_running(dev)) {
2966 rc = bnx2x_set_eth_mac(bp, false);
2971 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2973 if (netif_running(dev))
2974 rc = bnx2x_set_eth_mac(bp, true);
2979 static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index)
2981 union host_hc_status_block *sb = &bnx2x_fp(bp, fp_index, status_blk);
2982 struct bnx2x_fastpath *fp = &bp->fp[fp_index];
2987 if (IS_FCOE_IDX(fp_index)) {
2988 memset(sb, 0, sizeof(union host_hc_status_block));
2989 fp->status_blk_mapping = 0;
2994 if (!CHIP_IS_E1x(bp))
2995 BNX2X_PCI_FREE(sb->e2_sb,
2996 bnx2x_fp(bp, fp_index,
2997 status_blk_mapping),
2998 sizeof(struct host_hc_status_block_e2));
3000 BNX2X_PCI_FREE(sb->e1x_sb,
3001 bnx2x_fp(bp, fp_index,
3002 status_blk_mapping),
3003 sizeof(struct host_hc_status_block_e1x));
3008 if (!skip_rx_queue(bp, fp_index)) {
3009 bnx2x_free_rx_bds(fp);
3011 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3012 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_buf_ring));
3013 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_desc_ring),
3014 bnx2x_fp(bp, fp_index, rx_desc_mapping),
3015 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3017 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_comp_ring),
3018 bnx2x_fp(bp, fp_index, rx_comp_mapping),
3019 sizeof(struct eth_fast_path_rx_cqe) *
3023 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_page_ring));
3024 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_sge_ring),
3025 bnx2x_fp(bp, fp_index, rx_sge_mapping),
3026 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3030 if (!skip_tx_queue(bp, fp_index)) {
3031 /* fastpath tx rings: tx_buf tx_desc */
3032 for_each_cos_in_tx_queue(fp, cos) {
3033 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3036 "freeing tx memory of fp %d cos %d cid %d",
3037 fp_index, cos, txdata->cid);
3039 BNX2X_FREE(txdata->tx_buf_ring);
3040 BNX2X_PCI_FREE(txdata->tx_desc_ring,
3041 txdata->tx_desc_mapping,
3042 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3045 /* end of fastpath */
3048 void bnx2x_free_fp_mem(struct bnx2x *bp)
3051 for_each_queue(bp, i)
3052 bnx2x_free_fp_mem_at(bp, i);
3055 static inline void set_sb_shortcuts(struct bnx2x *bp, int index)
3057 union host_hc_status_block status_blk = bnx2x_fp(bp, index, status_blk);
3058 if (!CHIP_IS_E1x(bp)) {
3059 bnx2x_fp(bp, index, sb_index_values) =
3060 (__le16 *)status_blk.e2_sb->sb.index_values;
3061 bnx2x_fp(bp, index, sb_running_index) =
3062 (__le16 *)status_blk.e2_sb->sb.running_index;
3064 bnx2x_fp(bp, index, sb_index_values) =
3065 (__le16 *)status_blk.e1x_sb->sb.index_values;
3066 bnx2x_fp(bp, index, sb_running_index) =
3067 (__le16 *)status_blk.e1x_sb->sb.running_index;
3071 static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
3073 union host_hc_status_block *sb;
3074 struct bnx2x_fastpath *fp = &bp->fp[index];
3078 /* if rx_ring_size specified - use it */
3079 int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size :
3080 MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
3082 /* allocate at least number of buffers required by FW */
3083 rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
3088 sb = &bnx2x_fp(bp, index, status_blk);
3090 if (!IS_FCOE_IDX(index)) {
3093 if (!CHIP_IS_E1x(bp))
3094 BNX2X_PCI_ALLOC(sb->e2_sb,
3095 &bnx2x_fp(bp, index, status_blk_mapping),
3096 sizeof(struct host_hc_status_block_e2));
3098 BNX2X_PCI_ALLOC(sb->e1x_sb,
3099 &bnx2x_fp(bp, index, status_blk_mapping),
3100 sizeof(struct host_hc_status_block_e1x));
3105 /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to
3106 * set shortcuts for it.
3108 if (!IS_FCOE_IDX(index))
3109 set_sb_shortcuts(bp, index);
3112 if (!skip_tx_queue(bp, index)) {
3113 /* fastpath tx rings: tx_buf tx_desc */
3114 for_each_cos_in_tx_queue(fp, cos) {
3115 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3117 DP(BNX2X_MSG_SP, "allocating tx memory of "
3121 BNX2X_ALLOC(txdata->tx_buf_ring,
3122 sizeof(struct sw_tx_bd) * NUM_TX_BD);
3123 BNX2X_PCI_ALLOC(txdata->tx_desc_ring,
3124 &txdata->tx_desc_mapping,
3125 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3130 if (!skip_rx_queue(bp, index)) {
3131 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3132 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring),
3133 sizeof(struct sw_rx_bd) * NUM_RX_BD);
3134 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring),
3135 &bnx2x_fp(bp, index, rx_desc_mapping),
3136 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3138 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring),
3139 &bnx2x_fp(bp, index, rx_comp_mapping),
3140 sizeof(struct eth_fast_path_rx_cqe) *
3144 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring),
3145 sizeof(struct sw_rx_page) * NUM_RX_SGE);
3146 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring),
3147 &bnx2x_fp(bp, index, rx_sge_mapping),
3148 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3150 bnx2x_set_next_page_rx_bd(fp);
3153 bnx2x_set_next_page_rx_cq(fp);
3156 ring_size = bnx2x_alloc_rx_bds(fp, rx_ring_size);
3157 if (ring_size < rx_ring_size)
3163 /* handles low memory cases */
3165 BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
3167 /* FW will drop all packets if queue is not big enough,
3168 * In these cases we disable the queue
3169 * Min size is different for OOO, TPA and non-TPA queues
3171 if (ring_size < (fp->disable_tpa ?
3172 MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) {
3173 /* release memory allocated for this queue */
3174 bnx2x_free_fp_mem_at(bp, index);
3180 int bnx2x_alloc_fp_mem(struct bnx2x *bp)
3185 * 1. Allocate FP for leading - fatal if error
3186 * 2. {CNIC} Allocate FCoE FP - fatal if error
3187 * 3. {CNIC} Allocate OOO + FWD - disable OOO if error
3188 * 4. Allocate RSS - fix number of queues if error
3192 if (bnx2x_alloc_fp_mem_at(bp, 0))
3198 if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX))
3199 /* we will fail load process instead of mark
3206 for_each_nondefault_eth_queue(bp, i)
3207 if (bnx2x_alloc_fp_mem_at(bp, i))
3210 /* handle memory failures */
3211 if (i != BNX2X_NUM_ETH_QUEUES(bp)) {
3212 int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
3217 * move non eth FPs next to last eth FP
3218 * must be done in that order
3219 * FCOE_IDX < FWD_IDX < OOO_IDX
3222 /* move FCoE fp even NO_FCOE_FLAG is on */
3223 bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta);
3225 bp->num_queues -= delta;
3226 BNX2X_ERR("Adjusted num of queues from %d to %d\n",
3227 bp->num_queues + delta, bp->num_queues);
3233 void bnx2x_free_mem_bp(struct bnx2x *bp)
3236 kfree(bp->msix_table);
3240 int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp)
3242 struct bnx2x_fastpath *fp;
3243 struct msix_entry *tbl;
3244 struct bnx2x_ilt *ilt;
3245 int msix_table_size = 0;
3248 * The biggest MSI-X table we might need is as a maximum number of fast
3249 * path IGU SBs plus default SB (for PF).
3251 msix_table_size = bp->igu_sb_cnt + 1;
3253 /* fp array: RSS plus CNIC related L2 queues */
3254 fp = kzalloc((BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE) *
3255 sizeof(*fp), GFP_KERNEL);
3261 tbl = kzalloc(msix_table_size * sizeof(*tbl), GFP_KERNEL);
3264 bp->msix_table = tbl;
3267 ilt = kzalloc(sizeof(*ilt), GFP_KERNEL);
3274 bnx2x_free_mem_bp(bp);
3279 int bnx2x_reload_if_running(struct net_device *dev)
3281 struct bnx2x *bp = netdev_priv(dev);
3283 if (unlikely(!netif_running(dev)))
3286 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
3287 return bnx2x_nic_load(bp, LOAD_NORMAL);
3290 int bnx2x_get_cur_phy_idx(struct bnx2x *bp)
3292 u32 sel_phy_idx = 0;
3293 if (bp->link_params.num_phys <= 1)
3296 if (bp->link_vars.link_up) {
3297 sel_phy_idx = EXT_PHY1;
3298 /* In case link is SERDES, check if the EXT_PHY2 is the one */
3299 if ((bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) &&
3300 (bp->link_params.phy[EXT_PHY2].supported & SUPPORTED_FIBRE))
3301 sel_phy_idx = EXT_PHY2;
3304 switch (bnx2x_phy_selection(&bp->link_params)) {
3305 case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
3306 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY:
3307 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
3308 sel_phy_idx = EXT_PHY1;
3310 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY:
3311 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
3312 sel_phy_idx = EXT_PHY2;
3320 int bnx2x_get_link_cfg_idx(struct bnx2x *bp)
3322 u32 sel_phy_idx = bnx2x_get_cur_phy_idx(bp);
3324 * The selected actived PHY is always after swapping (in case PHY
3325 * swapping is enabled). So when swapping is enabled, we need to reverse
3329 if (bp->link_params.multi_phy_config &
3330 PORT_HW_CFG_PHY_SWAPPED_ENABLED) {
3331 if (sel_phy_idx == EXT_PHY1)
3332 sel_phy_idx = EXT_PHY2;
3333 else if (sel_phy_idx == EXT_PHY2)
3334 sel_phy_idx = EXT_PHY1;
3336 return LINK_CONFIG_IDX(sel_phy_idx);
3339 #if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC)
3340 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
3342 struct bnx2x *bp = netdev_priv(dev);
3343 struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
3346 case NETDEV_FCOE_WWNN:
3347 *wwn = HILO_U64(cp->fcoe_wwn_node_name_hi,
3348 cp->fcoe_wwn_node_name_lo);
3350 case NETDEV_FCOE_WWPN:
3351 *wwn = HILO_U64(cp->fcoe_wwn_port_name_hi,
3352 cp->fcoe_wwn_port_name_lo);
3362 /* called with rtnl_lock */
3363 int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
3365 struct bnx2x *bp = netdev_priv(dev);
3367 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3368 printk(KERN_ERR "Handling parity error recovery. Try again later\n");
3372 if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
3373 ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE))
3376 /* This does not race with packet allocation
3377 * because the actual alloc size is
3378 * only updated as part of load
3382 return bnx2x_reload_if_running(dev);
3385 u32 bnx2x_fix_features(struct net_device *dev, u32 features)
3387 struct bnx2x *bp = netdev_priv(dev);
3389 /* TPA requires Rx CSUM offloading */
3390 if (!(features & NETIF_F_RXCSUM) || bp->disable_tpa)
3391 features &= ~NETIF_F_LRO;
3396 int bnx2x_set_features(struct net_device *dev, u32 features)
3398 struct bnx2x *bp = netdev_priv(dev);
3399 u32 flags = bp->flags;
3400 bool bnx2x_reload = false;
3402 if (features & NETIF_F_LRO)
3403 flags |= TPA_ENABLE_FLAG;
3405 flags &= ~TPA_ENABLE_FLAG;
3407 if (features & NETIF_F_LOOPBACK) {
3408 if (bp->link_params.loopback_mode != LOOPBACK_BMAC) {
3409 bp->link_params.loopback_mode = LOOPBACK_BMAC;
3410 bnx2x_reload = true;
3413 if (bp->link_params.loopback_mode != LOOPBACK_NONE) {
3414 bp->link_params.loopback_mode = LOOPBACK_NONE;
3415 bnx2x_reload = true;
3419 if (flags ^ bp->flags) {
3421 bnx2x_reload = true;
3425 if (bp->recovery_state == BNX2X_RECOVERY_DONE)
3426 return bnx2x_reload_if_running(dev);
3427 /* else: bnx2x_nic_load() will be called at end of recovery */
3433 void bnx2x_tx_timeout(struct net_device *dev)
3435 struct bnx2x *bp = netdev_priv(dev);
3437 #ifdef BNX2X_STOP_ON_ERROR
3442 smp_mb__before_clear_bit();
3443 set_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state);
3444 smp_mb__after_clear_bit();
3446 /* This allows the netif to be shutdown gracefully before resetting */
3447 schedule_delayed_work(&bp->sp_rtnl_task, 0);
3450 int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state)
3452 struct net_device *dev = pci_get_drvdata(pdev);
3456 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3459 bp = netdev_priv(dev);
3463 pci_save_state(pdev);
3465 if (!netif_running(dev)) {
3470 netif_device_detach(dev);
3472 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
3474 bnx2x_set_power_state(bp, pci_choose_state(pdev, state));
3481 int bnx2x_resume(struct pci_dev *pdev)
3483 struct net_device *dev = pci_get_drvdata(pdev);
3488 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3491 bp = netdev_priv(dev);
3493 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3494 printk(KERN_ERR "Handling parity error recovery. Try again later\n");
3500 pci_restore_state(pdev);
3502 if (!netif_running(dev)) {
3507 bnx2x_set_power_state(bp, PCI_D0);
3508 netif_device_attach(dev);
3510 /* Since the chip was reset, clear the FW sequence number */
3512 rc = bnx2x_nic_load(bp, LOAD_OPEN);
3520 void bnx2x_set_ctx_validation(struct bnx2x *bp, struct eth_context *cxt,
3523 /* ustorm cxt validation */
3524 cxt->ustorm_ag_context.cdu_usage =
3525 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3526 CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE);
3527 /* xcontext validation */
3528 cxt->xstorm_ag_context.cdu_reserved =
3529 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3530 CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE);
3533 static inline void storm_memset_hc_timeout(struct bnx2x *bp, u8 port,
3534 u8 fw_sb_id, u8 sb_index,
3538 u32 addr = BAR_CSTRORM_INTMEM +
3539 CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index);
3540 REG_WR8(bp, addr, ticks);
3541 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d ticks %d\n",
3542 port, fw_sb_id, sb_index, ticks);
3545 static inline void storm_memset_hc_disable(struct bnx2x *bp, u8 port,
3546 u16 fw_sb_id, u8 sb_index,
3549 u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT);
3550 u32 addr = BAR_CSTRORM_INTMEM +
3551 CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index);
3552 u16 flags = REG_RD16(bp, addr);
3554 flags &= ~HC_INDEX_DATA_HC_ENABLED;
3555 flags |= enable_flag;
3556 REG_WR16(bp, addr, flags);
3557 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d disable %d\n",
3558 port, fw_sb_id, sb_index, disable);
3561 void bnx2x_update_coalesce_sb_index(struct bnx2x *bp, u8 fw_sb_id,
3562 u8 sb_index, u8 disable, u16 usec)
3564 int port = BP_PORT(bp);
3565 u8 ticks = usec / BNX2X_BTR;
3567 storm_memset_hc_timeout(bp, port, fw_sb_id, sb_index, ticks);
3569 disable = disable ? 1 : (usec ? 0 : 1);
3570 storm_memset_hc_disable(bp, port, fw_sb_id, sb_index, disable);