bnx2x: record rx queue for LRO packets
[pandora-kernel.git] / drivers / net / ethernet / broadcom / bnx2x / bnx2x_cmn.c
1 /* bnx2x_cmn.c: Broadcom Everest network driver.
2  *
3  * Copyright (c) 2007-2011 Broadcom Corporation
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  *
9  * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10  * Written by: Eliezer Tamir
11  * Based on code from Michael Chan's bnx2 driver
12  * UDP CSUM errata workaround by Arik Gendelman
13  * Slowpath and fastpath rework by Vladislav Zolotarov
14  * Statistics and Link management by Yitchak Gertner
15  *
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/etherdevice.h>
21 #include <linux/if_vlan.h>
22 #include <linux/interrupt.h>
23 #include <linux/ip.h>
24 #include <net/ipv6.h>
25 #include <net/ip6_checksum.h>
26 #include <linux/firmware.h>
27 #include <linux/prefetch.h>
28 #include "bnx2x_cmn.h"
29 #include "bnx2x_init.h"
30 #include "bnx2x_sp.h"
31
32
33
34 /**
35  * bnx2x_bz_fp - zero content of the fastpath structure.
36  *
37  * @bp:         driver handle
38  * @index:      fastpath index to be zeroed
39  *
40  * Makes sure the contents of the bp->fp[index].napi is kept
41  * intact.
42  */
43 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
44 {
45         struct bnx2x_fastpath *fp = &bp->fp[index];
46         struct napi_struct orig_napi = fp->napi;
47         /* bzero bnx2x_fastpath contents */
48         memset(fp, 0, sizeof(*fp));
49
50         /* Restore the NAPI object as it has been already initialized */
51         fp->napi = orig_napi;
52
53         fp->bp = bp;
54         fp->index = index;
55         if (IS_ETH_FP(fp))
56                 fp->max_cos = bp->max_cos;
57         else
58                 /* Special queues support only one CoS */
59                 fp->max_cos = 1;
60
61         /*
62          * set the tpa flag for each queue. The tpa flag determines the queue
63          * minimal size so it must be set prior to queue memory allocation
64          */
65         fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
66
67 #ifdef BCM_CNIC
68         /* We don't want TPA on an FCoE L2 ring */
69         if (IS_FCOE_FP(fp))
70                 fp->disable_tpa = 1;
71 #endif
72 }
73
74 /**
75  * bnx2x_move_fp - move content of the fastpath structure.
76  *
77  * @bp:         driver handle
78  * @from:       source FP index
79  * @to:         destination FP index
80  *
81  * Makes sure the contents of the bp->fp[to].napi is kept
82  * intact.
83  */
84 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
85 {
86         struct bnx2x_fastpath *from_fp = &bp->fp[from];
87         struct bnx2x_fastpath *to_fp = &bp->fp[to];
88         struct napi_struct orig_napi = to_fp->napi;
89         /* Move bnx2x_fastpath contents */
90         memcpy(to_fp, from_fp, sizeof(*to_fp));
91         to_fp->index = to;
92
93         /* Restore the NAPI object as it has been already initialized */
94         to_fp->napi = orig_napi;
95 }
96
97 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
98
99 /* free skb in the packet ring at pos idx
100  * return idx of last bd freed
101  */
102 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
103                              u16 idx)
104 {
105         struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
106         struct eth_tx_start_bd *tx_start_bd;
107         struct eth_tx_bd *tx_data_bd;
108         struct sk_buff *skb = tx_buf->skb;
109         u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
110         int nbd;
111
112         /* prefetch skb end pointer to speedup dev_kfree_skb() */
113         prefetch(&skb->end);
114
115         DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d  buff @(%p)->skb %p\n",
116            txdata->txq_index, idx, tx_buf, skb);
117
118         /* unmap first bd */
119         DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
120         tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
121         dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
122                          BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
123
124
125         nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
126 #ifdef BNX2X_STOP_ON_ERROR
127         if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
128                 BNX2X_ERR("BAD nbd!\n");
129                 bnx2x_panic();
130         }
131 #endif
132         new_cons = nbd + tx_buf->first_bd;
133
134         /* Get the next bd */
135         bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
136
137         /* Skip a parse bd... */
138         --nbd;
139         bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
140
141         /* ...and the TSO split header bd since they have no mapping */
142         if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
143                 --nbd;
144                 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
145         }
146
147         /* now free frags */
148         while (nbd > 0) {
149
150                 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
151                 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
152                 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
153                                BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
154                 if (--nbd)
155                         bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
156         }
157
158         /* release skb */
159         WARN_ON(!skb);
160         dev_kfree_skb_any(skb);
161         tx_buf->first_bd = 0;
162         tx_buf->skb = NULL;
163
164         return new_cons;
165 }
166
167 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
168 {
169         struct netdev_queue *txq;
170         u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
171
172 #ifdef BNX2X_STOP_ON_ERROR
173         if (unlikely(bp->panic))
174                 return -1;
175 #endif
176
177         txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
178         hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
179         sw_cons = txdata->tx_pkt_cons;
180
181         while (sw_cons != hw_cons) {
182                 u16 pkt_cons;
183
184                 pkt_cons = TX_BD(sw_cons);
185
186                 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u  sw_cons %u "
187                                       " pkt_cons %u\n",
188                    txdata->txq_index, hw_cons, sw_cons, pkt_cons);
189
190                 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
191                 sw_cons++;
192         }
193
194         txdata->tx_pkt_cons = sw_cons;
195         txdata->tx_bd_cons = bd_cons;
196
197         /* Need to make the tx_bd_cons update visible to start_xmit()
198          * before checking for netif_tx_queue_stopped().  Without the
199          * memory barrier, there is a small possibility that
200          * start_xmit() will miss it and cause the queue to be stopped
201          * forever.
202          * On the other hand we need an rmb() here to ensure the proper
203          * ordering of bit testing in the following
204          * netif_tx_queue_stopped(txq) call.
205          */
206         smp_mb();
207
208         if (unlikely(netif_tx_queue_stopped(txq))) {
209                 /* Taking tx_lock() is needed to prevent reenabling the queue
210                  * while it's empty. This could have happen if rx_action() gets
211                  * suspended in bnx2x_tx_int() after the condition before
212                  * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
213                  *
214                  * stops the queue->sees fresh tx_bd_cons->releases the queue->
215                  * sends some packets consuming the whole queue again->
216                  * stops the queue
217                  */
218
219                 __netif_tx_lock(txq, smp_processor_id());
220
221                 if ((netif_tx_queue_stopped(txq)) &&
222                     (bp->state == BNX2X_STATE_OPEN) &&
223                     (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
224                         netif_tx_wake_queue(txq);
225
226                 __netif_tx_unlock(txq);
227         }
228         return 0;
229 }
230
231 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
232                                              u16 idx)
233 {
234         u16 last_max = fp->last_max_sge;
235
236         if (SUB_S16(idx, last_max) > 0)
237                 fp->last_max_sge = idx;
238 }
239
240 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
241                                   struct eth_fast_path_rx_cqe *fp_cqe)
242 {
243         struct bnx2x *bp = fp->bp;
244         u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
245                                      le16_to_cpu(fp_cqe->len_on_bd)) >>
246                       SGE_PAGE_SHIFT;
247         u16 last_max, last_elem, first_elem;
248         u16 delta = 0;
249         u16 i;
250
251         if (!sge_len)
252                 return;
253
254         /* First mark all used pages */
255         for (i = 0; i < sge_len; i++)
256                 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
257                         RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
258
259         DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
260            sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
261
262         /* Here we assume that the last SGE index is the biggest */
263         prefetch((void *)(fp->sge_mask));
264         bnx2x_update_last_max_sge(fp,
265                 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
266
267         last_max = RX_SGE(fp->last_max_sge);
268         last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
269         first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
270
271         /* If ring is not full */
272         if (last_elem + 1 != first_elem)
273                 last_elem++;
274
275         /* Now update the prod */
276         for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
277                 if (likely(fp->sge_mask[i]))
278                         break;
279
280                 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
281                 delta += BIT_VEC64_ELEM_SZ;
282         }
283
284         if (delta > 0) {
285                 fp->rx_sge_prod += delta;
286                 /* clear page-end entries */
287                 bnx2x_clear_sge_mask_next_elems(fp);
288         }
289
290         DP(NETIF_MSG_RX_STATUS,
291            "fp->last_max_sge = %d  fp->rx_sge_prod = %d\n",
292            fp->last_max_sge, fp->rx_sge_prod);
293 }
294
295 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
296                             struct sk_buff *skb, u16 cons, u16 prod,
297                             struct eth_fast_path_rx_cqe *cqe)
298 {
299         struct bnx2x *bp = fp->bp;
300         struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
301         struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
302         struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
303         dma_addr_t mapping;
304         struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
305         struct sw_rx_bd *first_buf = &tpa_info->first_buf;
306
307         /* print error if current state != stop */
308         if (tpa_info->tpa_state != BNX2X_TPA_STOP)
309                 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
310
311         /* Try to map an empty skb from the aggregation info  */
312         mapping = dma_map_single(&bp->pdev->dev,
313                                  first_buf->skb->data,
314                                  fp->rx_buf_size, DMA_FROM_DEVICE);
315         /*
316          *  ...if it fails - move the skb from the consumer to the producer
317          *  and set the current aggregation state as ERROR to drop it
318          *  when TPA_STOP arrives.
319          */
320
321         if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
322                 /* Move the BD from the consumer to the producer */
323                 bnx2x_reuse_rx_skb(fp, cons, prod);
324                 tpa_info->tpa_state = BNX2X_TPA_ERROR;
325                 return;
326         }
327
328         /* move empty skb from pool to prod */
329         prod_rx_buf->skb = first_buf->skb;
330         dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
331         /* point prod_bd to new skb */
332         prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
333         prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
334
335         /* move partial skb from cons to pool (don't unmap yet) */
336         *first_buf = *cons_rx_buf;
337
338         /* mark bin state as START */
339         tpa_info->parsing_flags =
340                 le16_to_cpu(cqe->pars_flags.flags);
341         tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
342         tpa_info->tpa_state = BNX2X_TPA_START;
343         tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
344         tpa_info->placement_offset = cqe->placement_offset;
345
346 #ifdef BNX2X_STOP_ON_ERROR
347         fp->tpa_queue_used |= (1 << queue);
348 #ifdef _ASM_GENERIC_INT_L64_H
349         DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
350 #else
351         DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
352 #endif
353            fp->tpa_queue_used);
354 #endif
355 }
356
357 /* Timestamp option length allowed for TPA aggregation:
358  *
359  *              nop nop kind length echo val
360  */
361 #define TPA_TSTAMP_OPT_LEN      12
362 /**
363  * bnx2x_set_lro_mss - calculate the approximate value of the MSS
364  *
365  * @bp:                 driver handle
366  * @parsing_flags:      parsing flags from the START CQE
367  * @len_on_bd:          total length of the first packet for the
368  *                      aggregation.
369  *
370  * Approximate value of the MSS for this aggregation calculated using
371  * the first packet of it.
372  */
373 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
374                                     u16 len_on_bd)
375 {
376         /*
377          * TPA arrgregation won't have either IP options or TCP options
378          * other than timestamp or IPv6 extension headers.
379          */
380         u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
381
382         if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
383             PRS_FLAG_OVERETH_IPV6)
384                 hdrs_len += sizeof(struct ipv6hdr);
385         else /* IPv4 */
386                 hdrs_len += sizeof(struct iphdr);
387
388
389         /* Check if there was a TCP timestamp, if there is it's will
390          * always be 12 bytes length: nop nop kind length echo val.
391          *
392          * Otherwise FW would close the aggregation.
393          */
394         if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
395                 hdrs_len += TPA_TSTAMP_OPT_LEN;
396
397         return len_on_bd - hdrs_len;
398 }
399
400 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
401                                u16 queue, struct sk_buff *skb,
402                                struct eth_end_agg_rx_cqe *cqe,
403                                u16 cqe_idx)
404 {
405         struct sw_rx_page *rx_pg, old_rx_pg;
406         u32 i, frag_len, frag_size, pages;
407         int err;
408         int j;
409         struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
410         u16 len_on_bd = tpa_info->len_on_bd;
411
412         frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
413         pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
414
415         /* This is needed in order to enable forwarding support */
416         if (frag_size)
417                 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
418                                         tpa_info->parsing_flags, len_on_bd);
419
420 #ifdef BNX2X_STOP_ON_ERROR
421         if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
422                 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
423                           pages, cqe_idx);
424                 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
425                 bnx2x_panic();
426                 return -EINVAL;
427         }
428 #endif
429
430         /* Run through the SGL and compose the fragmented skb */
431         for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
432                 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
433
434                 /* FW gives the indices of the SGE as if the ring is an array
435                    (meaning that "next" element will consume 2 indices) */
436                 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
437                 rx_pg = &fp->rx_page_ring[sge_idx];
438                 old_rx_pg = *rx_pg;
439
440                 /* If we fail to allocate a substitute page, we simply stop
441                    where we are and drop the whole packet */
442                 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
443                 if (unlikely(err)) {
444                         fp->eth_q_stats.rx_skb_alloc_failed++;
445                         return err;
446                 }
447
448                 /* Unmap the page as we r going to pass it to the stack */
449                 dma_unmap_page(&bp->pdev->dev,
450                                dma_unmap_addr(&old_rx_pg, mapping),
451                                SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
452
453                 /* Add one frag and update the appropriate fields in the skb */
454                 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
455
456                 skb->data_len += frag_len;
457                 skb->truesize += SGE_PAGE_SIZE * PAGES_PER_SGE;
458                 skb->len += frag_len;
459
460                 frag_size -= frag_len;
461         }
462
463         return 0;
464 }
465
466 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
467                            u16 queue, struct eth_end_agg_rx_cqe *cqe,
468                            u16 cqe_idx)
469 {
470         struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
471         struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
472         u8 pad = tpa_info->placement_offset;
473         u16 len = tpa_info->len_on_bd;
474         struct sk_buff *skb = rx_buf->skb;
475         /* alloc new skb */
476         struct sk_buff *new_skb;
477         u8 old_tpa_state = tpa_info->tpa_state;
478
479         tpa_info->tpa_state = BNX2X_TPA_STOP;
480
481         /* If we there was an error during the handling of the TPA_START -
482          * drop this aggregation.
483          */
484         if (old_tpa_state == BNX2X_TPA_ERROR)
485                 goto drop;
486
487         /* Try to allocate the new skb */
488         new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
489
490         /* Unmap skb in the pool anyway, as we are going to change
491            pool entry status to BNX2X_TPA_STOP even if new skb allocation
492            fails. */
493         dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
494                          fp->rx_buf_size, DMA_FROM_DEVICE);
495
496         if (likely(new_skb)) {
497                 prefetch(skb);
498                 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
499
500 #ifdef BNX2X_STOP_ON_ERROR
501                 if (pad + len > fp->rx_buf_size) {
502                         BNX2X_ERR("skb_put is about to fail...  "
503                                   "pad %d  len %d  rx_buf_size %d\n",
504                                   pad, len, fp->rx_buf_size);
505                         bnx2x_panic();
506                         return;
507                 }
508 #endif
509
510                 skb_reserve(skb, pad);
511                 skb_put(skb, len);
512
513                 skb->protocol = eth_type_trans(skb, bp->dev);
514                 skb->ip_summed = CHECKSUM_UNNECESSARY;
515
516                 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
517                         if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
518                                 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
519                         skb_record_rx_queue(skb, fp->index);
520                         napi_gro_receive(&fp->napi, skb);
521                 } else {
522                         DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
523                            " - dropping packet!\n");
524                         dev_kfree_skb_any(skb);
525                 }
526
527
528                 /* put new skb in bin */
529                 rx_buf->skb = new_skb;
530
531                 return;
532         }
533
534 drop:
535         /* drop the packet and keep the buffer in the bin */
536         DP(NETIF_MSG_RX_STATUS,
537            "Failed to allocate or map a new skb - dropping packet!\n");
538         fp->eth_q_stats.rx_skb_alloc_failed++;
539 }
540
541 /* Set Toeplitz hash value in the skb using the value from the
542  * CQE (calculated by HW).
543  */
544 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
545                                         struct sk_buff *skb)
546 {
547         /* Set Toeplitz hash from CQE */
548         if ((bp->dev->features & NETIF_F_RXHASH) &&
549             (cqe->fast_path_cqe.status_flags &
550              ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
551                 skb->rxhash =
552                 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
553 }
554
555 static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
556                                 struct bnx2x_fastpath *fp)
557 {
558         /* Do nothing if no L4 csum validation was done.
559          * We do not check whether IP csum was validated. For IPv4 we assume
560          * that if the card got as far as validating the L4 csum, it also
561          * validated the IP csum. IPv6 has no IP csum.
562          */
563         if (cqe->fast_path_cqe.status_flags &
564             ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
565                 return;
566
567         /* If L4 validation was done, check if an error was found. */
568
569         if (cqe->fast_path_cqe.type_error_flags &
570             (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
571              ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
572                 fp->eth_q_stats.hw_csum_err++;
573         else
574                 skb->ip_summed = CHECKSUM_UNNECESSARY;
575 }
576
577 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
578 {
579         struct bnx2x *bp = fp->bp;
580         u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
581         u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
582         int rx_pkt = 0;
583
584 #ifdef BNX2X_STOP_ON_ERROR
585         if (unlikely(bp->panic))
586                 return 0;
587 #endif
588
589         /* CQ "next element" is of the size of the regular element,
590            that's why it's ok here */
591         hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
592         if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
593                 hw_comp_cons++;
594
595         bd_cons = fp->rx_bd_cons;
596         bd_prod = fp->rx_bd_prod;
597         bd_prod_fw = bd_prod;
598         sw_comp_cons = fp->rx_comp_cons;
599         sw_comp_prod = fp->rx_comp_prod;
600
601         /* Memory barrier necessary as speculative reads of the rx
602          * buffer can be ahead of the index in the status block
603          */
604         rmb();
605
606         DP(NETIF_MSG_RX_STATUS,
607            "queue[%d]:  hw_comp_cons %u  sw_comp_cons %u\n",
608            fp->index, hw_comp_cons, sw_comp_cons);
609
610         while (sw_comp_cons != hw_comp_cons) {
611                 struct sw_rx_bd *rx_buf = NULL;
612                 struct sk_buff *skb;
613                 union eth_rx_cqe *cqe;
614                 struct eth_fast_path_rx_cqe *cqe_fp;
615                 u8 cqe_fp_flags;
616                 enum eth_rx_cqe_type cqe_fp_type;
617                 u16 len, pad;
618
619 #ifdef BNX2X_STOP_ON_ERROR
620                 if (unlikely(bp->panic))
621                         return 0;
622 #endif
623
624                 comp_ring_cons = RCQ_BD(sw_comp_cons);
625                 bd_prod = RX_BD(bd_prod);
626                 bd_cons = RX_BD(bd_cons);
627
628                 /* Prefetch the page containing the BD descriptor
629                    at producer's index. It will be needed when new skb is
630                    allocated */
631                 prefetch((void *)(PAGE_ALIGN((unsigned long)
632                                              (&fp->rx_desc_ring[bd_prod])) -
633                                   PAGE_SIZE + 1));
634
635                 cqe = &fp->rx_comp_ring[comp_ring_cons];
636                 cqe_fp = &cqe->fast_path_cqe;
637                 cqe_fp_flags = cqe_fp->type_error_flags;
638                 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
639
640                 DP(NETIF_MSG_RX_STATUS, "CQE type %x  err %x  status %x"
641                    "  queue %x  vlan %x  len %u\n", CQE_TYPE(cqe_fp_flags),
642                    cqe_fp_flags, cqe_fp->status_flags,
643                    le32_to_cpu(cqe_fp->rss_hash_result),
644                    le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
645
646                 /* is this a slowpath msg? */
647                 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
648                         bnx2x_sp_event(fp, cqe);
649                         goto next_cqe;
650
651                 /* this is an rx packet */
652                 } else {
653                         rx_buf = &fp->rx_buf_ring[bd_cons];
654                         skb = rx_buf->skb;
655                         prefetch(skb);
656
657                         if (!CQE_TYPE_FAST(cqe_fp_type)) {
658 #ifdef BNX2X_STOP_ON_ERROR
659                                 /* sanity check */
660                                 if (fp->disable_tpa &&
661                                     (CQE_TYPE_START(cqe_fp_type) ||
662                                      CQE_TYPE_STOP(cqe_fp_type)))
663                                         BNX2X_ERR("START/STOP packet while "
664                                                   "disable_tpa type %x\n",
665                                                   CQE_TYPE(cqe_fp_type));
666 #endif
667
668                                 if (CQE_TYPE_START(cqe_fp_type)) {
669                                         u16 queue = cqe_fp->queue_index;
670                                         DP(NETIF_MSG_RX_STATUS,
671                                            "calling tpa_start on queue %d\n",
672                                            queue);
673
674                                         bnx2x_tpa_start(fp, queue, skb,
675                                                         bd_cons, bd_prod,
676                                                         cqe_fp);
677
678                                         /* Set Toeplitz hash for LRO skb */
679                                         bnx2x_set_skb_rxhash(bp, cqe, skb);
680
681                                         goto next_rx;
682
683                                 } else {
684                                         u16 queue =
685                                                 cqe->end_agg_cqe.queue_index;
686                                         DP(NETIF_MSG_RX_STATUS,
687                                            "calling tpa_stop on queue %d\n",
688                                            queue);
689
690                                         bnx2x_tpa_stop(bp, fp, queue,
691                                                        &cqe->end_agg_cqe,
692                                                        comp_ring_cons);
693 #ifdef BNX2X_STOP_ON_ERROR
694                                         if (bp->panic)
695                                                 return 0;
696 #endif
697
698                                         bnx2x_update_sge_prod(fp, cqe_fp);
699                                         goto next_cqe;
700                                 }
701                         }
702                         /* non TPA */
703                         len = le16_to_cpu(cqe_fp->pkt_len);
704                         pad = cqe_fp->placement_offset;
705                         dma_sync_single_for_cpu(&bp->pdev->dev,
706                                         dma_unmap_addr(rx_buf, mapping),
707                                                        pad + RX_COPY_THRESH,
708                                                        DMA_FROM_DEVICE);
709                         prefetch(((char *)(skb)) + L1_CACHE_BYTES);
710
711                         /* is this an error packet? */
712                         if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
713                                 DP(NETIF_MSG_RX_ERR,
714                                    "ERROR  flags %x  rx packet %u\n",
715                                    cqe_fp_flags, sw_comp_cons);
716                                 fp->eth_q_stats.rx_err_discard_pkt++;
717                                 goto reuse_rx;
718                         }
719
720                         /* Since we don't have a jumbo ring
721                          * copy small packets if mtu > 1500
722                          */
723                         if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
724                             (len <= RX_COPY_THRESH)) {
725                                 struct sk_buff *new_skb;
726
727                                 new_skb = netdev_alloc_skb(bp->dev, len + pad);
728                                 if (new_skb == NULL) {
729                                         DP(NETIF_MSG_RX_ERR,
730                                            "ERROR  packet dropped "
731                                            "because of alloc failure\n");
732                                         fp->eth_q_stats.rx_skb_alloc_failed++;
733                                         goto reuse_rx;
734                                 }
735
736                                 /* aligned copy */
737                                 skb_copy_from_linear_data_offset(skb, pad,
738                                                     new_skb->data + pad, len);
739                                 skb_reserve(new_skb, pad);
740                                 skb_put(new_skb, len);
741
742                                 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
743
744                                 skb = new_skb;
745
746                         } else
747                         if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
748                                 dma_unmap_single(&bp->pdev->dev,
749                                         dma_unmap_addr(rx_buf, mapping),
750                                                  fp->rx_buf_size,
751                                                  DMA_FROM_DEVICE);
752                                 skb_reserve(skb, pad);
753                                 skb_put(skb, len);
754
755                         } else {
756                                 DP(NETIF_MSG_RX_ERR,
757                                    "ERROR  packet dropped because "
758                                    "of alloc failure\n");
759                                 fp->eth_q_stats.rx_skb_alloc_failed++;
760 reuse_rx:
761                                 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
762                                 goto next_rx;
763                         }
764
765                         skb->protocol = eth_type_trans(skb, bp->dev);
766
767                         /* Set Toeplitz hash for a none-LRO skb */
768                         bnx2x_set_skb_rxhash(bp, cqe, skb);
769
770                         skb_checksum_none_assert(skb);
771
772                         if (bp->dev->features & NETIF_F_RXCSUM)
773                                 bnx2x_csum_validate(skb, cqe, fp);
774
775                 }
776
777                 skb_record_rx_queue(skb, fp->index);
778
779                 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
780                     PARSING_FLAGS_VLAN)
781                         __vlan_hwaccel_put_tag(skb,
782                                                le16_to_cpu(cqe_fp->vlan_tag));
783                 napi_gro_receive(&fp->napi, skb);
784
785
786 next_rx:
787                 rx_buf->skb = NULL;
788
789                 bd_cons = NEXT_RX_IDX(bd_cons);
790                 bd_prod = NEXT_RX_IDX(bd_prod);
791                 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
792                 rx_pkt++;
793 next_cqe:
794                 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
795                 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
796
797                 if (rx_pkt == budget)
798                         break;
799         } /* while */
800
801         fp->rx_bd_cons = bd_cons;
802         fp->rx_bd_prod = bd_prod_fw;
803         fp->rx_comp_cons = sw_comp_cons;
804         fp->rx_comp_prod = sw_comp_prod;
805
806         /* Update producers */
807         bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
808                              fp->rx_sge_prod);
809
810         fp->rx_pkt += rx_pkt;
811         fp->rx_calls++;
812
813         return rx_pkt;
814 }
815
816 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
817 {
818         struct bnx2x_fastpath *fp = fp_cookie;
819         struct bnx2x *bp = fp->bp;
820         u8 cos;
821
822         DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
823                          "[fp %d fw_sd %d igusb %d]\n",
824            fp->index, fp->fw_sb_id, fp->igu_sb_id);
825         bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
826
827 #ifdef BNX2X_STOP_ON_ERROR
828         if (unlikely(bp->panic))
829                 return IRQ_HANDLED;
830 #endif
831
832         /* Handle Rx and Tx according to MSI-X vector */
833         prefetch(fp->rx_cons_sb);
834
835         for_each_cos_in_tx_queue(fp, cos)
836                 prefetch(fp->txdata[cos].tx_cons_sb);
837
838         prefetch(&fp->sb_running_index[SM_RX_ID]);
839         napi_schedule(&bnx2x_fp(bp, fp->index, napi));
840
841         return IRQ_HANDLED;
842 }
843
844 /* HW Lock for shared dual port PHYs */
845 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
846 {
847         mutex_lock(&bp->port.phy_mutex);
848
849         if (bp->port.need_hw_lock)
850                 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
851 }
852
853 void bnx2x_release_phy_lock(struct bnx2x *bp)
854 {
855         if (bp->port.need_hw_lock)
856                 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
857
858         mutex_unlock(&bp->port.phy_mutex);
859 }
860
861 /* calculates MF speed according to current linespeed and MF configuration */
862 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
863 {
864         u16 line_speed = bp->link_vars.line_speed;
865         if (IS_MF(bp)) {
866                 u16 maxCfg = bnx2x_extract_max_cfg(bp,
867                                                    bp->mf_config[BP_VN(bp)]);
868
869                 /* Calculate the current MAX line speed limit for the MF
870                  * devices
871                  */
872                 if (IS_MF_SI(bp))
873                         line_speed = (line_speed * maxCfg) / 100;
874                 else { /* SD mode */
875                         u16 vn_max_rate = maxCfg * 100;
876
877                         if (vn_max_rate < line_speed)
878                                 line_speed = vn_max_rate;
879                 }
880         }
881
882         return line_speed;
883 }
884
885 /**
886  * bnx2x_fill_report_data - fill link report data to report
887  *
888  * @bp:         driver handle
889  * @data:       link state to update
890  *
891  * It uses a none-atomic bit operations because is called under the mutex.
892  */
893 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
894                                           struct bnx2x_link_report_data *data)
895 {
896         u16 line_speed = bnx2x_get_mf_speed(bp);
897
898         memset(data, 0, sizeof(*data));
899
900         /* Fill the report data: efective line speed */
901         data->line_speed = line_speed;
902
903         /* Link is down */
904         if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
905                 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
906                           &data->link_report_flags);
907
908         /* Full DUPLEX */
909         if (bp->link_vars.duplex == DUPLEX_FULL)
910                 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
911
912         /* Rx Flow Control is ON */
913         if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
914                 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
915
916         /* Tx Flow Control is ON */
917         if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
918                 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
919 }
920
921 /**
922  * bnx2x_link_report - report link status to OS.
923  *
924  * @bp:         driver handle
925  *
926  * Calls the __bnx2x_link_report() under the same locking scheme
927  * as a link/PHY state managing code to ensure a consistent link
928  * reporting.
929  */
930
931 void bnx2x_link_report(struct bnx2x *bp)
932 {
933         bnx2x_acquire_phy_lock(bp);
934         __bnx2x_link_report(bp);
935         bnx2x_release_phy_lock(bp);
936 }
937
938 /**
939  * __bnx2x_link_report - report link status to OS.
940  *
941  * @bp:         driver handle
942  *
943  * None atomic inmlementation.
944  * Should be called under the phy_lock.
945  */
946 void __bnx2x_link_report(struct bnx2x *bp)
947 {
948         struct bnx2x_link_report_data cur_data;
949
950         /* reread mf_cfg */
951         if (!CHIP_IS_E1(bp))
952                 bnx2x_read_mf_cfg(bp);
953
954         /* Read the current link report info */
955         bnx2x_fill_report_data(bp, &cur_data);
956
957         /* Don't report link down or exactly the same link status twice */
958         if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
959             (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
960                       &bp->last_reported_link.link_report_flags) &&
961              test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
962                       &cur_data.link_report_flags)))
963                 return;
964
965         bp->link_cnt++;
966
967         /* We are going to report a new link parameters now -
968          * remember the current data for the next time.
969          */
970         memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
971
972         if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
973                      &cur_data.link_report_flags)) {
974                 netif_carrier_off(bp->dev);
975                 netdev_err(bp->dev, "NIC Link is Down\n");
976                 return;
977         } else {
978                 const char *duplex;
979                 const char *flow;
980
981                 netif_carrier_on(bp->dev);
982
983                 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
984                                        &cur_data.link_report_flags))
985                         duplex = "full";
986                 else
987                         duplex = "half";
988
989                 /* Handle the FC at the end so that only these flags would be
990                  * possibly set. This way we may easily check if there is no FC
991                  * enabled.
992                  */
993                 if (cur_data.link_report_flags) {
994                         if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
995                                      &cur_data.link_report_flags)) {
996                                 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
997                                      &cur_data.link_report_flags))
998                                         flow = "ON - receive & transmit";
999                                 else
1000                                         flow = "ON - receive";
1001                         } else {
1002                                 flow = "ON - transmit";
1003                         }
1004                 } else {
1005                         flow = "none";
1006                 }
1007                 netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
1008                             cur_data.line_speed, duplex, flow);
1009         }
1010 }
1011
1012 void bnx2x_init_rx_rings(struct bnx2x *bp)
1013 {
1014         int func = BP_FUNC(bp);
1015         u16 ring_prod;
1016         int i, j;
1017
1018         /* Allocate TPA resources */
1019         for_each_rx_queue(bp, j) {
1020                 struct bnx2x_fastpath *fp = &bp->fp[j];
1021
1022                 DP(NETIF_MSG_IFUP,
1023                    "mtu %d  rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1024
1025                 if (!fp->disable_tpa) {
1026                         /* Fill the per-aggregtion pool */
1027                         for (i = 0; i < MAX_AGG_QS(bp); i++) {
1028                                 struct bnx2x_agg_info *tpa_info =
1029                                         &fp->tpa_info[i];
1030                                 struct sw_rx_bd *first_buf =
1031                                         &tpa_info->first_buf;
1032
1033                                 first_buf->skb = netdev_alloc_skb(bp->dev,
1034                                                        fp->rx_buf_size);
1035                                 if (!first_buf->skb) {
1036                                         BNX2X_ERR("Failed to allocate TPA "
1037                                                   "skb pool for queue[%d] - "
1038                                                   "disabling TPA on this "
1039                                                   "queue!\n", j);
1040                                         bnx2x_free_tpa_pool(bp, fp, i);
1041                                         fp->disable_tpa = 1;
1042                                         break;
1043                                 }
1044                                 dma_unmap_addr_set(first_buf, mapping, 0);
1045                                 tpa_info->tpa_state = BNX2X_TPA_STOP;
1046                         }
1047
1048                         /* "next page" elements initialization */
1049                         bnx2x_set_next_page_sgl(fp);
1050
1051                         /* set SGEs bit mask */
1052                         bnx2x_init_sge_ring_bit_mask(fp);
1053
1054                         /* Allocate SGEs and initialize the ring elements */
1055                         for (i = 0, ring_prod = 0;
1056                              i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1057
1058                                 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1059                                         BNX2X_ERR("was only able to allocate "
1060                                                   "%d rx sges\n", i);
1061                                         BNX2X_ERR("disabling TPA for "
1062                                                   "queue[%d]\n", j);
1063                                         /* Cleanup already allocated elements */
1064                                         bnx2x_free_rx_sge_range(bp, fp,
1065                                                                 ring_prod);
1066                                         bnx2x_free_tpa_pool(bp, fp,
1067                                                             MAX_AGG_QS(bp));
1068                                         fp->disable_tpa = 1;
1069                                         ring_prod = 0;
1070                                         break;
1071                                 }
1072                                 ring_prod = NEXT_SGE_IDX(ring_prod);
1073                         }
1074
1075                         fp->rx_sge_prod = ring_prod;
1076                 }
1077         }
1078
1079         for_each_rx_queue(bp, j) {
1080                 struct bnx2x_fastpath *fp = &bp->fp[j];
1081
1082                 fp->rx_bd_cons = 0;
1083
1084                 /* Activate BD ring */
1085                 /* Warning!
1086                  * this will generate an interrupt (to the TSTORM)
1087                  * must only be done after chip is initialized
1088                  */
1089                 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1090                                      fp->rx_sge_prod);
1091
1092                 if (j != 0)
1093                         continue;
1094
1095                 if (CHIP_IS_E1(bp)) {
1096                         REG_WR(bp, BAR_USTRORM_INTMEM +
1097                                USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1098                                U64_LO(fp->rx_comp_mapping));
1099                         REG_WR(bp, BAR_USTRORM_INTMEM +
1100                                USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1101                                U64_HI(fp->rx_comp_mapping));
1102                 }
1103         }
1104 }
1105
1106 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1107 {
1108         int i;
1109         u8 cos;
1110
1111         for_each_tx_queue(bp, i) {
1112                 struct bnx2x_fastpath *fp = &bp->fp[i];
1113                 for_each_cos_in_tx_queue(fp, cos) {
1114                         struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1115
1116                         u16 bd_cons = txdata->tx_bd_cons;
1117                         u16 sw_prod = txdata->tx_pkt_prod;
1118                         u16 sw_cons = txdata->tx_pkt_cons;
1119
1120                         while (sw_cons != sw_prod) {
1121                                 bd_cons = bnx2x_free_tx_pkt(bp, txdata,
1122                                                             TX_BD(sw_cons));
1123                                 sw_cons++;
1124                         }
1125                 }
1126         }
1127 }
1128
1129 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1130 {
1131         struct bnx2x *bp = fp->bp;
1132         int i;
1133
1134         /* ring wasn't allocated */
1135         if (fp->rx_buf_ring == NULL)
1136                 return;
1137
1138         for (i = 0; i < NUM_RX_BD; i++) {
1139                 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1140                 struct sk_buff *skb = rx_buf->skb;
1141
1142                 if (skb == NULL)
1143                         continue;
1144                 dma_unmap_single(&bp->pdev->dev,
1145                                  dma_unmap_addr(rx_buf, mapping),
1146                                  fp->rx_buf_size, DMA_FROM_DEVICE);
1147
1148                 rx_buf->skb = NULL;
1149                 dev_kfree_skb(skb);
1150         }
1151 }
1152
1153 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1154 {
1155         int j;
1156
1157         for_each_rx_queue(bp, j) {
1158                 struct bnx2x_fastpath *fp = &bp->fp[j];
1159
1160                 bnx2x_free_rx_bds(fp);
1161
1162                 if (!fp->disable_tpa)
1163                         bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
1164         }
1165 }
1166
1167 void bnx2x_free_skbs(struct bnx2x *bp)
1168 {
1169         bnx2x_free_tx_skbs(bp);
1170         bnx2x_free_rx_skbs(bp);
1171 }
1172
1173 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1174 {
1175         /* load old values */
1176         u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1177
1178         if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1179                 /* leave all but MAX value */
1180                 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1181
1182                 /* set new MAX value */
1183                 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1184                                 & FUNC_MF_CFG_MAX_BW_MASK;
1185
1186                 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1187         }
1188 }
1189
1190 /**
1191  * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1192  *
1193  * @bp:         driver handle
1194  * @nvecs:      number of vectors to be released
1195  */
1196 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1197 {
1198         int i, offset = 0;
1199
1200         if (nvecs == offset)
1201                 return;
1202         free_irq(bp->msix_table[offset].vector, bp->dev);
1203         DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1204            bp->msix_table[offset].vector);
1205         offset++;
1206 #ifdef BCM_CNIC
1207         if (nvecs == offset)
1208                 return;
1209         offset++;
1210 #endif
1211
1212         for_each_eth_queue(bp, i) {
1213                 if (nvecs == offset)
1214                         return;
1215                 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1216                    "irq\n", i, bp->msix_table[offset].vector);
1217
1218                 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1219         }
1220 }
1221
1222 void bnx2x_free_irq(struct bnx2x *bp)
1223 {
1224         if (bp->flags & USING_MSIX_FLAG)
1225                 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1226                                      CNIC_PRESENT + 1);
1227         else if (bp->flags & USING_MSI_FLAG)
1228                 free_irq(bp->pdev->irq, bp->dev);
1229         else
1230                 free_irq(bp->pdev->irq, bp->dev);
1231 }
1232
1233 int bnx2x_enable_msix(struct bnx2x *bp)
1234 {
1235         int msix_vec = 0, i, rc, req_cnt;
1236
1237         bp->msix_table[msix_vec].entry = msix_vec;
1238         DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1239            bp->msix_table[0].entry);
1240         msix_vec++;
1241
1242 #ifdef BCM_CNIC
1243         bp->msix_table[msix_vec].entry = msix_vec;
1244         DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1245            bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1246         msix_vec++;
1247 #endif
1248         /* We need separate vectors for ETH queues only (not FCoE) */
1249         for_each_eth_queue(bp, i) {
1250                 bp->msix_table[msix_vec].entry = msix_vec;
1251                 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1252                    "(fastpath #%u)\n", msix_vec, msix_vec, i);
1253                 msix_vec++;
1254         }
1255
1256         req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1257
1258         rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1259
1260         /*
1261          * reconfigure number of tx/rx queues according to available
1262          * MSI-X vectors
1263          */
1264         if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1265                 /* how less vectors we will have? */
1266                 int diff = req_cnt - rc;
1267
1268                 DP(NETIF_MSG_IFUP,
1269                    "Trying to use less MSI-X vectors: %d\n", rc);
1270
1271                 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1272
1273                 if (rc) {
1274                         DP(NETIF_MSG_IFUP,
1275                            "MSI-X is not attainable  rc %d\n", rc);
1276                         return rc;
1277                 }
1278                 /*
1279                  * decrease number of queues by number of unallocated entries
1280                  */
1281                 bp->num_queues -= diff;
1282
1283                 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1284                                   bp->num_queues);
1285         } else if (rc) {
1286                 /* fall to INTx if not enough memory */
1287                 if (rc == -ENOMEM)
1288                         bp->flags |= DISABLE_MSI_FLAG;
1289                 DP(NETIF_MSG_IFUP, "MSI-X is not attainable  rc %d\n", rc);
1290                 return rc;
1291         }
1292
1293         bp->flags |= USING_MSIX_FLAG;
1294
1295         return 0;
1296 }
1297
1298 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1299 {
1300         int i, rc, offset = 0;
1301
1302         rc = request_irq(bp->msix_table[offset++].vector,
1303                          bnx2x_msix_sp_int, 0,
1304                          bp->dev->name, bp->dev);
1305         if (rc) {
1306                 BNX2X_ERR("request sp irq failed\n");
1307                 return -EBUSY;
1308         }
1309
1310 #ifdef BCM_CNIC
1311         offset++;
1312 #endif
1313         for_each_eth_queue(bp, i) {
1314                 struct bnx2x_fastpath *fp = &bp->fp[i];
1315                 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1316                          bp->dev->name, i);
1317
1318                 rc = request_irq(bp->msix_table[offset].vector,
1319                                  bnx2x_msix_fp_int, 0, fp->name, fp);
1320                 if (rc) {
1321                         BNX2X_ERR("request fp #%d irq (%d) failed  rc %d\n", i,
1322                               bp->msix_table[offset].vector, rc);
1323                         bnx2x_free_msix_irqs(bp, offset);
1324                         return -EBUSY;
1325                 }
1326
1327                 offset++;
1328         }
1329
1330         i = BNX2X_NUM_ETH_QUEUES(bp);
1331         offset = 1 + CNIC_PRESENT;
1332         netdev_info(bp->dev, "using MSI-X  IRQs: sp %d  fp[%d] %d"
1333                " ... fp[%d] %d\n",
1334                bp->msix_table[0].vector,
1335                0, bp->msix_table[offset].vector,
1336                i - 1, bp->msix_table[offset + i - 1].vector);
1337
1338         return 0;
1339 }
1340
1341 int bnx2x_enable_msi(struct bnx2x *bp)
1342 {
1343         int rc;
1344
1345         rc = pci_enable_msi(bp->pdev);
1346         if (rc) {
1347                 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1348                 return -1;
1349         }
1350         bp->flags |= USING_MSI_FLAG;
1351
1352         return 0;
1353 }
1354
1355 static int bnx2x_req_irq(struct bnx2x *bp)
1356 {
1357         unsigned long flags;
1358         int rc;
1359
1360         if (bp->flags & USING_MSI_FLAG)
1361                 flags = 0;
1362         else
1363                 flags = IRQF_SHARED;
1364
1365         rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1366                          bp->dev->name, bp->dev);
1367         return rc;
1368 }
1369
1370 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1371 {
1372         int rc = 0;
1373         if (bp->flags & USING_MSIX_FLAG) {
1374                 rc = bnx2x_req_msix_irqs(bp);
1375                 if (rc)
1376                         return rc;
1377         } else {
1378                 bnx2x_ack_int(bp);
1379                 rc = bnx2x_req_irq(bp);
1380                 if (rc) {
1381                         BNX2X_ERR("IRQ request failed  rc %d, aborting\n", rc);
1382                         return rc;
1383                 }
1384                 if (bp->flags & USING_MSI_FLAG) {
1385                         bp->dev->irq = bp->pdev->irq;
1386                         netdev_info(bp->dev, "using MSI  IRQ %d\n",
1387                                bp->pdev->irq);
1388                 }
1389         }
1390
1391         return 0;
1392 }
1393
1394 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1395 {
1396         int i;
1397
1398         for_each_rx_queue(bp, i)
1399                 napi_enable(&bnx2x_fp(bp, i, napi));
1400 }
1401
1402 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1403 {
1404         int i;
1405
1406         for_each_rx_queue(bp, i)
1407                 napi_disable(&bnx2x_fp(bp, i, napi));
1408 }
1409
1410 void bnx2x_netif_start(struct bnx2x *bp)
1411 {
1412         if (netif_running(bp->dev)) {
1413                 bnx2x_napi_enable(bp);
1414                 bnx2x_int_enable(bp);
1415                 if (bp->state == BNX2X_STATE_OPEN)
1416                         netif_tx_wake_all_queues(bp->dev);
1417         }
1418 }
1419
1420 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1421 {
1422         bnx2x_int_disable_sync(bp, disable_hw);
1423         bnx2x_napi_disable(bp);
1424 }
1425
1426 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1427 {
1428         struct bnx2x *bp = netdev_priv(dev);
1429
1430 #ifdef BCM_CNIC
1431         if (!NO_FCOE(bp)) {
1432                 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1433                 u16 ether_type = ntohs(hdr->h_proto);
1434
1435                 /* Skip VLAN tag if present */
1436                 if (ether_type == ETH_P_8021Q) {
1437                         struct vlan_ethhdr *vhdr =
1438                                 (struct vlan_ethhdr *)skb->data;
1439
1440                         ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1441                 }
1442
1443                 /* If ethertype is FCoE or FIP - use FCoE ring */
1444                 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1445                         return bnx2x_fcoe_tx(bp, txq_index);
1446         }
1447 #endif
1448         /* select a non-FCoE queue */
1449         return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1450 }
1451
1452 void bnx2x_set_num_queues(struct bnx2x *bp)
1453 {
1454         switch (bp->multi_mode) {
1455         case ETH_RSS_MODE_DISABLED:
1456                 bp->num_queues = 1;
1457                 break;
1458         case ETH_RSS_MODE_REGULAR:
1459                 bp->num_queues = bnx2x_calc_num_queues(bp);
1460                 break;
1461
1462         default:
1463                 bp->num_queues = 1;
1464                 break;
1465         }
1466
1467         /* Add special queues */
1468         bp->num_queues += NON_ETH_CONTEXT_USE;
1469 }
1470
1471 /**
1472  * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
1473  *
1474  * @bp:         Driver handle
1475  *
1476  * We currently support for at most 16 Tx queues for each CoS thus we will
1477  * allocate a multiple of 16 for ETH L2 rings according to the value of the
1478  * bp->max_cos.
1479  *
1480  * If there is an FCoE L2 queue the appropriate Tx queue will have the next
1481  * index after all ETH L2 indices.
1482  *
1483  * If the actual number of Tx queues (for each CoS) is less than 16 then there
1484  * will be the holes at the end of each group of 16 ETh L2 indices (0..15,
1485  * 16..31,...) with indicies that are not coupled with any real Tx queue.
1486  *
1487  * The proper configuration of skb->queue_mapping is handled by
1488  * bnx2x_select_queue() and __skb_tx_hash().
1489  *
1490  * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash()
1491  * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
1492  */
1493 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1494 {
1495         int rc, tx, rx;
1496
1497         tx = MAX_TXQS_PER_COS * bp->max_cos;
1498         rx = BNX2X_NUM_ETH_QUEUES(bp);
1499
1500 /* account for fcoe queue */
1501 #ifdef BCM_CNIC
1502         if (!NO_FCOE(bp)) {
1503                 rx += FCOE_PRESENT;
1504                 tx += FCOE_PRESENT;
1505         }
1506 #endif
1507
1508         rc = netif_set_real_num_tx_queues(bp->dev, tx);
1509         if (rc) {
1510                 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1511                 return rc;
1512         }
1513         rc = netif_set_real_num_rx_queues(bp->dev, rx);
1514         if (rc) {
1515                 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1516                 return rc;
1517         }
1518
1519         DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1520                           tx, rx);
1521
1522         return rc;
1523 }
1524
1525 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1526 {
1527         int i;
1528
1529         for_each_queue(bp, i) {
1530                 struct bnx2x_fastpath *fp = &bp->fp[i];
1531
1532                 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1533                 if (IS_FCOE_IDX(i))
1534                         /*
1535                          * Although there are no IP frames expected to arrive to
1536                          * this ring we still want to add an
1537                          * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1538                          * overrun attack.
1539                          */
1540                         fp->rx_buf_size =
1541                                 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1542                                 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1543                 else
1544                         fp->rx_buf_size =
1545                                 bp->dev->mtu + ETH_OVREHEAD +
1546                                 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1547         }
1548 }
1549
1550 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1551 {
1552         int i;
1553         u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1554         u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1555
1556         /*
1557          * Prepare the inital contents fo the indirection table if RSS is
1558          * enabled
1559          */
1560         if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1561                 for (i = 0; i < sizeof(ind_table); i++)
1562                         ind_table[i] =
1563                                 bp->fp->cl_id + (i % num_eth_queues);
1564         }
1565
1566         /*
1567          * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1568          * per-port, so if explicit configuration is needed , do it only
1569          * for a PMF.
1570          *
1571          * For 57712 and newer on the other hand it's a per-function
1572          * configuration.
1573          */
1574         return bnx2x_config_rss_pf(bp, ind_table,
1575                                    bp->port.pmf || !CHIP_IS_E1x(bp));
1576 }
1577
1578 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1579 {
1580         struct bnx2x_config_rss_params params = {0};
1581         int i;
1582
1583         /* Although RSS is meaningless when there is a single HW queue we
1584          * still need it enabled in order to have HW Rx hash generated.
1585          *
1586          * if (!is_eth_multi(bp))
1587          *      bp->multi_mode = ETH_RSS_MODE_DISABLED;
1588          */
1589
1590         params.rss_obj = &bp->rss_conf_obj;
1591
1592         __set_bit(RAMROD_COMP_WAIT, &params.ramrod_flags);
1593
1594         /* RSS mode */
1595         switch (bp->multi_mode) {
1596         case ETH_RSS_MODE_DISABLED:
1597                 __set_bit(BNX2X_RSS_MODE_DISABLED, &params.rss_flags);
1598                 break;
1599         case ETH_RSS_MODE_REGULAR:
1600                 __set_bit(BNX2X_RSS_MODE_REGULAR, &params.rss_flags);
1601                 break;
1602         case ETH_RSS_MODE_VLAN_PRI:
1603                 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, &params.rss_flags);
1604                 break;
1605         case ETH_RSS_MODE_E1HOV_PRI:
1606                 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, &params.rss_flags);
1607                 break;
1608         case ETH_RSS_MODE_IP_DSCP:
1609                 __set_bit(BNX2X_RSS_MODE_IP_DSCP, &params.rss_flags);
1610                 break;
1611         default:
1612                 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1613                 return -EINVAL;
1614         }
1615
1616         /* If RSS is enabled */
1617         if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1618                 /* RSS configuration */
1619                 __set_bit(BNX2X_RSS_IPV4, &params.rss_flags);
1620                 __set_bit(BNX2X_RSS_IPV4_TCP, &params.rss_flags);
1621                 __set_bit(BNX2X_RSS_IPV6, &params.rss_flags);
1622                 __set_bit(BNX2X_RSS_IPV6_TCP, &params.rss_flags);
1623
1624                 /* Hash bits */
1625                 params.rss_result_mask = MULTI_MASK;
1626
1627                 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1628
1629                 if (config_hash) {
1630                         /* RSS keys */
1631                         for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1632                                 params.rss_key[i] = random32();
1633
1634                         __set_bit(BNX2X_RSS_SET_SRCH, &params.rss_flags);
1635                 }
1636         }
1637
1638         return bnx2x_config_rss(bp, &params);
1639 }
1640
1641 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1642 {
1643         struct bnx2x_func_state_params func_params = {0};
1644
1645         /* Prepare parameters for function state transitions */
1646         __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1647
1648         func_params.f_obj = &bp->func_obj;
1649         func_params.cmd = BNX2X_F_CMD_HW_INIT;
1650
1651         func_params.params.hw_init.load_phase = load_code;
1652
1653         return bnx2x_func_state_change(bp, &func_params);
1654 }
1655
1656 /*
1657  * Cleans the object that have internal lists without sending
1658  * ramrods. Should be run when interrutps are disabled.
1659  */
1660 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1661 {
1662         int rc;
1663         unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1664         struct bnx2x_mcast_ramrod_params rparam = {0};
1665         struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1666
1667         /***************** Cleanup MACs' object first *************************/
1668
1669         /* Wait for completion of requested */
1670         __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1671         /* Perform a dry cleanup */
1672         __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1673
1674         /* Clean ETH primary MAC */
1675         __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1676         rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1677                                  &ramrod_flags);
1678         if (rc != 0)
1679                 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1680
1681         /* Cleanup UC list */
1682         vlan_mac_flags = 0;
1683         __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1684         rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1685                                  &ramrod_flags);
1686         if (rc != 0)
1687                 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1688
1689         /***************** Now clean mcast object *****************************/
1690         rparam.mcast_obj = &bp->mcast_obj;
1691         __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1692
1693         /* Add a DEL command... */
1694         rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1695         if (rc < 0)
1696                 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1697                           "object: %d\n", rc);
1698
1699         /* ...and wait until all pending commands are cleared */
1700         rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1701         while (rc != 0) {
1702                 if (rc < 0) {
1703                         BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1704                                   rc);
1705                         return;
1706                 }
1707
1708                 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1709         }
1710 }
1711
1712 #ifndef BNX2X_STOP_ON_ERROR
1713 #define LOAD_ERROR_EXIT(bp, label) \
1714         do { \
1715                 (bp)->state = BNX2X_STATE_ERROR; \
1716                 goto label; \
1717         } while (0)
1718 #else
1719 #define LOAD_ERROR_EXIT(bp, label) \
1720         do { \
1721                 (bp)->state = BNX2X_STATE_ERROR; \
1722                 (bp)->panic = 1; \
1723                 return -EBUSY; \
1724         } while (0)
1725 #endif
1726
1727 /* must be called with rtnl_lock */
1728 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1729 {
1730         int port = BP_PORT(bp);
1731         u32 load_code;
1732         int i, rc;
1733
1734 #ifdef BNX2X_STOP_ON_ERROR
1735         if (unlikely(bp->panic))
1736                 return -EPERM;
1737 #endif
1738
1739         bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1740
1741         /* Set the initial link reported state to link down */
1742         bnx2x_acquire_phy_lock(bp);
1743         memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1744         __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1745                 &bp->last_reported_link.link_report_flags);
1746         bnx2x_release_phy_lock(bp);
1747
1748         /* must be called before memory allocation and HW init */
1749         bnx2x_ilt_set_info(bp);
1750
1751         /*
1752          * Zero fastpath structures preserving invariants like napi, which are
1753          * allocated only once, fp index, max_cos, bp pointer.
1754          * Also set fp->disable_tpa.
1755          */
1756         for_each_queue(bp, i)
1757                 bnx2x_bz_fp(bp, i);
1758
1759
1760         /* Set the receive queues buffer size */
1761         bnx2x_set_rx_buf_size(bp);
1762
1763         if (bnx2x_alloc_mem(bp))
1764                 return -ENOMEM;
1765
1766         /* As long as bnx2x_alloc_mem() may possibly update
1767          * bp->num_queues, bnx2x_set_real_num_queues() should always
1768          * come after it.
1769          */
1770         rc = bnx2x_set_real_num_queues(bp);
1771         if (rc) {
1772                 BNX2X_ERR("Unable to set real_num_queues\n");
1773                 LOAD_ERROR_EXIT(bp, load_error0);
1774         }
1775
1776         /* configure multi cos mappings in kernel.
1777          * this configuration may be overriden by a multi class queue discipline
1778          * or by a dcbx negotiation result.
1779          */
1780         bnx2x_setup_tc(bp->dev, bp->max_cos);
1781
1782         bnx2x_napi_enable(bp);
1783
1784         /* Send LOAD_REQUEST command to MCP
1785          * Returns the type of LOAD command:
1786          * if it is the first port to be initialized
1787          * common blocks should be initialized, otherwise - not
1788          */
1789         if (!BP_NOMCP(bp)) {
1790                 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1791                 if (!load_code) {
1792                         BNX2X_ERR("MCP response failure, aborting\n");
1793                         rc = -EBUSY;
1794                         LOAD_ERROR_EXIT(bp, load_error1);
1795                 }
1796                 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1797                         rc = -EBUSY; /* other port in diagnostic mode */
1798                         LOAD_ERROR_EXIT(bp, load_error1);
1799                 }
1800
1801         } else {
1802                 int path = BP_PATH(bp);
1803
1804                 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d]      %d, %d, %d\n",
1805                    path, load_count[path][0], load_count[path][1],
1806                    load_count[path][2]);
1807                 load_count[path][0]++;
1808                 load_count[path][1 + port]++;
1809                 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d]  %d, %d, %d\n",
1810                    path, load_count[path][0], load_count[path][1],
1811                    load_count[path][2]);
1812                 if (load_count[path][0] == 1)
1813                         load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1814                 else if (load_count[path][1 + port] == 1)
1815                         load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1816                 else
1817                         load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1818         }
1819
1820         if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1821             (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1822             (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1823                 bp->port.pmf = 1;
1824                 /*
1825                  * We need the barrier to ensure the ordering between the
1826                  * writing to bp->port.pmf here and reading it from the
1827                  * bnx2x_periodic_task().
1828                  */
1829                 smp_mb();
1830                 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1831         } else
1832                 bp->port.pmf = 0;
1833
1834         DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1835
1836         /* Init Function state controlling object */
1837         bnx2x__init_func_obj(bp);
1838
1839         /* Initialize HW */
1840         rc = bnx2x_init_hw(bp, load_code);
1841         if (rc) {
1842                 BNX2X_ERR("HW init failed, aborting\n");
1843                 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1844                 LOAD_ERROR_EXIT(bp, load_error2);
1845         }
1846
1847         /* Connect to IRQs */
1848         rc = bnx2x_setup_irqs(bp);
1849         if (rc) {
1850                 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1851                 LOAD_ERROR_EXIT(bp, load_error2);
1852         }
1853
1854         /* Setup NIC internals and enable interrupts */
1855         bnx2x_nic_init(bp, load_code);
1856
1857         /* Init per-function objects */
1858         bnx2x_init_bp_objs(bp);
1859
1860         if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1861             (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1862             (bp->common.shmem2_base)) {
1863                 if (SHMEM2_HAS(bp, dcc_support))
1864                         SHMEM2_WR(bp, dcc_support,
1865                                   (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1866                                    SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1867         }
1868
1869         bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1870         rc = bnx2x_func_start(bp);
1871         if (rc) {
1872                 BNX2X_ERR("Function start failed!\n");
1873                 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1874                 LOAD_ERROR_EXIT(bp, load_error3);
1875         }
1876
1877         /* Send LOAD_DONE command to MCP */
1878         if (!BP_NOMCP(bp)) {
1879                 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1880                 if (!load_code) {
1881                         BNX2X_ERR("MCP response failure, aborting\n");
1882                         rc = -EBUSY;
1883                         LOAD_ERROR_EXIT(bp, load_error3);
1884                 }
1885         }
1886
1887         rc = bnx2x_setup_leading(bp);
1888         if (rc) {
1889                 BNX2X_ERR("Setup leading failed!\n");
1890                 LOAD_ERROR_EXIT(bp, load_error3);
1891         }
1892
1893 #ifdef BCM_CNIC
1894         /* Enable Timer scan */
1895         REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1896 #endif
1897
1898         for_each_nondefault_queue(bp, i) {
1899                 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1900                 if (rc)
1901                         LOAD_ERROR_EXIT(bp, load_error4);
1902         }
1903
1904         rc = bnx2x_init_rss_pf(bp);
1905         if (rc)
1906                 LOAD_ERROR_EXIT(bp, load_error4);
1907
1908         /* Now when Clients are configured we are ready to work */
1909         bp->state = BNX2X_STATE_OPEN;
1910
1911         /* Configure a ucast MAC */
1912         rc = bnx2x_set_eth_mac(bp, true);
1913         if (rc)
1914                 LOAD_ERROR_EXIT(bp, load_error4);
1915
1916         if (bp->pending_max) {
1917                 bnx2x_update_max_mf_config(bp, bp->pending_max);
1918                 bp->pending_max = 0;
1919         }
1920
1921         if (bp->port.pmf)
1922                 bnx2x_initial_phy_init(bp, load_mode);
1923
1924         /* Start fast path */
1925
1926         /* Initialize Rx filter. */
1927         netif_addr_lock_bh(bp->dev);
1928         bnx2x_set_rx_mode(bp->dev);
1929         netif_addr_unlock_bh(bp->dev);
1930
1931         /* Start the Tx */
1932         switch (load_mode) {
1933         case LOAD_NORMAL:
1934                 /* Tx queue should be only reenabled */
1935                 netif_tx_wake_all_queues(bp->dev);
1936                 break;
1937
1938         case LOAD_OPEN:
1939                 netif_tx_start_all_queues(bp->dev);
1940                 smp_mb__after_clear_bit();
1941                 break;
1942
1943         case LOAD_DIAG:
1944                 bp->state = BNX2X_STATE_DIAG;
1945                 break;
1946
1947         default:
1948                 break;
1949         }
1950
1951         if (!bp->port.pmf)
1952                 bnx2x__link_status_update(bp);
1953
1954         /* start the timer */
1955         mod_timer(&bp->timer, jiffies + bp->current_interval);
1956
1957 #ifdef BCM_CNIC
1958         bnx2x_setup_cnic_irq_info(bp);
1959         if (bp->state == BNX2X_STATE_OPEN)
1960                 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1961 #endif
1962         bnx2x_inc_load_cnt(bp);
1963
1964         /* Wait for all pending SP commands to complete */
1965         if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1966                 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1967                 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1968                 return -EBUSY;
1969         }
1970
1971         bnx2x_dcbx_init(bp);
1972         return 0;
1973
1974 #ifndef BNX2X_STOP_ON_ERROR
1975 load_error4:
1976 #ifdef BCM_CNIC
1977         /* Disable Timer scan */
1978         REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1979 #endif
1980 load_error3:
1981         bnx2x_int_disable_sync(bp, 1);
1982
1983         /* Clean queueable objects */
1984         bnx2x_squeeze_objects(bp);
1985
1986         /* Free SKBs, SGEs, TPA pool and driver internals */
1987         bnx2x_free_skbs(bp);
1988         for_each_rx_queue(bp, i)
1989                 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1990
1991         /* Release IRQs */
1992         bnx2x_free_irq(bp);
1993 load_error2:
1994         if (!BP_NOMCP(bp)) {
1995                 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1996                 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
1997         }
1998
1999         bp->port.pmf = 0;
2000 load_error1:
2001         bnx2x_napi_disable(bp);
2002 load_error0:
2003         bnx2x_free_mem(bp);
2004
2005         return rc;
2006 #endif /* ! BNX2X_STOP_ON_ERROR */
2007 }
2008
2009 /* must be called with rtnl_lock */
2010 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
2011 {
2012         int i;
2013         bool global = false;
2014
2015         if ((bp->state == BNX2X_STATE_CLOSED) ||
2016             (bp->state == BNX2X_STATE_ERROR)) {
2017                 /* We can get here if the driver has been unloaded
2018                  * during parity error recovery and is either waiting for a
2019                  * leader to complete or for other functions to unload and
2020                  * then ifdown has been issued. In this case we want to
2021                  * unload and let other functions to complete a recovery
2022                  * process.
2023                  */
2024                 bp->recovery_state = BNX2X_RECOVERY_DONE;
2025                 bp->is_leader = 0;
2026                 bnx2x_release_leader_lock(bp);
2027                 smp_mb();
2028
2029                 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
2030
2031                 return -EINVAL;
2032         }
2033
2034         /*
2035          * It's important to set the bp->state to the value different from
2036          * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
2037          * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
2038          */
2039         bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2040         smp_mb();
2041
2042         /* Stop Tx */
2043         bnx2x_tx_disable(bp);
2044
2045 #ifdef BCM_CNIC
2046         bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2047 #endif
2048
2049         bp->rx_mode = BNX2X_RX_MODE_NONE;
2050
2051         del_timer_sync(&bp->timer);
2052
2053         /* Set ALWAYS_ALIVE bit in shmem */
2054         bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2055
2056         bnx2x_drv_pulse(bp);
2057
2058         bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2059
2060         /* Cleanup the chip if needed */
2061         if (unload_mode != UNLOAD_RECOVERY)
2062                 bnx2x_chip_cleanup(bp, unload_mode);
2063         else {
2064                 /* Send the UNLOAD_REQUEST to the MCP */
2065                 bnx2x_send_unload_req(bp, unload_mode);
2066
2067                 /*
2068                  * Prevent transactions to host from the functions on the
2069                  * engine that doesn't reset global blocks in case of global
2070                  * attention once gloabl blocks are reset and gates are opened
2071                  * (the engine which leader will perform the recovery
2072                  * last).
2073                  */
2074                 if (!CHIP_IS_E1x(bp))
2075                         bnx2x_pf_disable(bp);
2076
2077                 /* Disable HW interrupts, NAPI */
2078                 bnx2x_netif_stop(bp, 1);
2079
2080                 /* Release IRQs */
2081                 bnx2x_free_irq(bp);
2082
2083                 /* Report UNLOAD_DONE to MCP */
2084                 bnx2x_send_unload_done(bp);
2085         }
2086
2087         /*
2088          * At this stage no more interrupts will arrive so we may safly clean
2089          * the queueable objects here in case they failed to get cleaned so far.
2090          */
2091         bnx2x_squeeze_objects(bp);
2092
2093         /* There should be no more pending SP commands at this stage */
2094         bp->sp_state = 0;
2095
2096         bp->port.pmf = 0;
2097
2098         /* Free SKBs, SGEs, TPA pool and driver internals */
2099         bnx2x_free_skbs(bp);
2100         for_each_rx_queue(bp, i)
2101                 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2102
2103         bnx2x_free_mem(bp);
2104
2105         bp->state = BNX2X_STATE_CLOSED;
2106
2107         /* Check if there are pending parity attentions. If there are - set
2108          * RECOVERY_IN_PROGRESS.
2109          */
2110         if (bnx2x_chk_parity_attn(bp, &global, false)) {
2111                 bnx2x_set_reset_in_progress(bp);
2112
2113                 /* Set RESET_IS_GLOBAL if needed */
2114                 if (global)
2115                         bnx2x_set_reset_global(bp);
2116         }
2117
2118
2119         /* The last driver must disable a "close the gate" if there is no
2120          * parity attention or "process kill" pending.
2121          */
2122         if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2123                 bnx2x_disable_close_the_gate(bp);
2124
2125         return 0;
2126 }
2127
2128 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2129 {
2130         u16 pmcsr;
2131
2132         /* If there is no power capability, silently succeed */
2133         if (!bp->pm_cap) {
2134                 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2135                 return 0;
2136         }
2137
2138         pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2139
2140         switch (state) {
2141         case PCI_D0:
2142                 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2143                                       ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2144                                        PCI_PM_CTRL_PME_STATUS));
2145
2146                 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2147                         /* delay required during transition out of D3hot */
2148                         msleep(20);
2149                 break;
2150
2151         case PCI_D3hot:
2152                 /* If there are other clients above don't
2153                    shut down the power */
2154                 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2155                         return 0;
2156                 /* Don't shut down the power for emulation and FPGA */
2157                 if (CHIP_REV_IS_SLOW(bp))
2158                         return 0;
2159
2160                 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2161                 pmcsr |= 3;
2162
2163                 if (bp->wol)
2164                         pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2165
2166                 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2167                                       pmcsr);
2168
2169                 /* No more memory access after this point until
2170                 * device is brought back to D0.
2171                 */
2172                 break;
2173
2174         default:
2175                 return -EINVAL;
2176         }
2177         return 0;
2178 }
2179
2180 /*
2181  * net_device service functions
2182  */
2183 int bnx2x_poll(struct napi_struct *napi, int budget)
2184 {
2185         int work_done = 0;
2186         u8 cos;
2187         struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2188                                                  napi);
2189         struct bnx2x *bp = fp->bp;
2190
2191         while (1) {
2192 #ifdef BNX2X_STOP_ON_ERROR
2193                 if (unlikely(bp->panic)) {
2194                         napi_complete(napi);
2195                         return 0;
2196                 }
2197 #endif
2198
2199                 for_each_cos_in_tx_queue(fp, cos)
2200                         if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2201                                 bnx2x_tx_int(bp, &fp->txdata[cos]);
2202
2203
2204                 if (bnx2x_has_rx_work(fp)) {
2205                         work_done += bnx2x_rx_int(fp, budget - work_done);
2206
2207                         /* must not complete if we consumed full budget */
2208                         if (work_done >= budget)
2209                                 break;
2210                 }
2211
2212                 /* Fall out from the NAPI loop if needed */
2213                 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2214 #ifdef BCM_CNIC
2215                         /* No need to update SB for FCoE L2 ring as long as
2216                          * it's connected to the default SB and the SB
2217                          * has been updated when NAPI was scheduled.
2218                          */
2219                         if (IS_FCOE_FP(fp)) {
2220                                 napi_complete(napi);
2221                                 break;
2222                         }
2223 #endif
2224
2225                         bnx2x_update_fpsb_idx(fp);
2226                         /* bnx2x_has_rx_work() reads the status block,
2227                          * thus we need to ensure that status block indices
2228                          * have been actually read (bnx2x_update_fpsb_idx)
2229                          * prior to this check (bnx2x_has_rx_work) so that
2230                          * we won't write the "newer" value of the status block
2231                          * to IGU (if there was a DMA right after
2232                          * bnx2x_has_rx_work and if there is no rmb, the memory
2233                          * reading (bnx2x_update_fpsb_idx) may be postponed
2234                          * to right before bnx2x_ack_sb). In this case there
2235                          * will never be another interrupt until there is
2236                          * another update of the status block, while there
2237                          * is still unhandled work.
2238                          */
2239                         rmb();
2240
2241                         if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2242                                 napi_complete(napi);
2243                                 /* Re-enable interrupts */
2244                                 DP(NETIF_MSG_HW,
2245                                    "Update index to %d\n", fp->fp_hc_idx);
2246                                 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2247                                              le16_to_cpu(fp->fp_hc_idx),
2248                                              IGU_INT_ENABLE, 1);
2249                                 break;
2250                         }
2251                 }
2252         }
2253
2254         return work_done;
2255 }
2256
2257 /* we split the first BD into headers and data BDs
2258  * to ease the pain of our fellow microcode engineers
2259  * we use one mapping for both BDs
2260  */
2261 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2262                                    struct bnx2x_fp_txdata *txdata,
2263                                    struct sw_tx_bd *tx_buf,
2264                                    struct eth_tx_start_bd **tx_bd, u16 hlen,
2265                                    u16 bd_prod, int nbd)
2266 {
2267         struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2268         struct eth_tx_bd *d_tx_bd;
2269         dma_addr_t mapping;
2270         int old_len = le16_to_cpu(h_tx_bd->nbytes);
2271
2272         /* first fix first BD */
2273         h_tx_bd->nbd = cpu_to_le16(nbd);
2274         h_tx_bd->nbytes = cpu_to_le16(hlen);
2275
2276         DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2277            "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2278            h_tx_bd->addr_lo, h_tx_bd->nbd);
2279
2280         /* now get a new data BD
2281          * (after the pbd) and fill it */
2282         bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2283         d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2284
2285         mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2286                            le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2287
2288         d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2289         d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2290         d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2291
2292         /* this marks the BD as one that has no individual mapping */
2293         tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2294
2295         DP(NETIF_MSG_TX_QUEUED,
2296            "TSO split data size is %d (%x:%x)\n",
2297            d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2298
2299         /* update tx_bd */
2300         *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2301
2302         return bd_prod;
2303 }
2304
2305 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2306 {
2307         if (fix > 0)
2308                 csum = (u16) ~csum_fold(csum_sub(csum,
2309                                 csum_partial(t_header - fix, fix, 0)));
2310
2311         else if (fix < 0)
2312                 csum = (u16) ~csum_fold(csum_add(csum,
2313                                 csum_partial(t_header, -fix, 0)));
2314
2315         return swab16(csum);
2316 }
2317
2318 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2319 {
2320         u32 rc;
2321
2322         if (skb->ip_summed != CHECKSUM_PARTIAL)
2323                 rc = XMIT_PLAIN;
2324
2325         else {
2326                 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2327                         rc = XMIT_CSUM_V6;
2328                         if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2329                                 rc |= XMIT_CSUM_TCP;
2330
2331                 } else {
2332                         rc = XMIT_CSUM_V4;
2333                         if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2334                                 rc |= XMIT_CSUM_TCP;
2335                 }
2336         }
2337
2338         if (skb_is_gso_v6(skb))
2339                 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2340         else if (skb_is_gso(skb))
2341                 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2342
2343         return rc;
2344 }
2345
2346 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2347 /* check if packet requires linearization (packet is too fragmented)
2348    no need to check fragmentation if page size > 8K (there will be no
2349    violation to FW restrictions) */
2350 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2351                              u32 xmit_type)
2352 {
2353         int to_copy = 0;
2354         int hlen = 0;
2355         int first_bd_sz = 0;
2356
2357         /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2358         if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2359
2360                 if (xmit_type & XMIT_GSO) {
2361                         unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2362                         /* Check if LSO packet needs to be copied:
2363                            3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2364                         int wnd_size = MAX_FETCH_BD - 3;
2365                         /* Number of windows to check */
2366                         int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2367                         int wnd_idx = 0;
2368                         int frag_idx = 0;
2369                         u32 wnd_sum = 0;
2370
2371                         /* Headers length */
2372                         hlen = (int)(skb_transport_header(skb) - skb->data) +
2373                                 tcp_hdrlen(skb);
2374
2375                         /* Amount of data (w/o headers) on linear part of SKB*/
2376                         first_bd_sz = skb_headlen(skb) - hlen;
2377
2378                         wnd_sum  = first_bd_sz;
2379
2380                         /* Calculate the first sum - it's special */
2381                         for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2382                                 wnd_sum +=
2383                                         skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]);
2384
2385                         /* If there was data on linear skb data - check it */
2386                         if (first_bd_sz > 0) {
2387                                 if (unlikely(wnd_sum < lso_mss)) {
2388                                         to_copy = 1;
2389                                         goto exit_lbl;
2390                                 }
2391
2392                                 wnd_sum -= first_bd_sz;
2393                         }
2394
2395                         /* Others are easier: run through the frag list and
2396                            check all windows */
2397                         for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2398                                 wnd_sum +=
2399                           skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]);
2400
2401                                 if (unlikely(wnd_sum < lso_mss)) {
2402                                         to_copy = 1;
2403                                         break;
2404                                 }
2405                                 wnd_sum -=
2406                                         skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]);
2407                         }
2408                 } else {
2409                         /* in non-LSO too fragmented packet should always
2410                            be linearized */
2411                         to_copy = 1;
2412                 }
2413         }
2414
2415 exit_lbl:
2416         if (unlikely(to_copy))
2417                 DP(NETIF_MSG_TX_QUEUED,
2418                    "Linearization IS REQUIRED for %s packet. "
2419                    "num_frags %d  hlen %d  first_bd_sz %d\n",
2420                    (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2421                    skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2422
2423         return to_copy;
2424 }
2425 #endif
2426
2427 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2428                                         u32 xmit_type)
2429 {
2430         *parsing_data |= (skb_shinfo(skb)->gso_size <<
2431                               ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2432                               ETH_TX_PARSE_BD_E2_LSO_MSS;
2433         if ((xmit_type & XMIT_GSO_V6) &&
2434             (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2435                 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2436 }
2437
2438 /**
2439  * bnx2x_set_pbd_gso - update PBD in GSO case.
2440  *
2441  * @skb:        packet skb
2442  * @pbd:        parse BD
2443  * @xmit_type:  xmit flags
2444  */
2445 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2446                                      struct eth_tx_parse_bd_e1x *pbd,
2447                                      u32 xmit_type)
2448 {
2449         pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2450         pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2451         pbd->tcp_flags = pbd_tcp_flags(skb);
2452
2453         if (xmit_type & XMIT_GSO_V4) {
2454                 pbd->ip_id = swab16(ip_hdr(skb)->id);
2455                 pbd->tcp_pseudo_csum =
2456                         swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2457                                                   ip_hdr(skb)->daddr,
2458                                                   0, IPPROTO_TCP, 0));
2459
2460         } else
2461                 pbd->tcp_pseudo_csum =
2462                         swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2463                                                 &ipv6_hdr(skb)->daddr,
2464                                                 0, IPPROTO_TCP, 0));
2465
2466         pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2467 }
2468
2469 /**
2470  * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2471  *
2472  * @bp:                 driver handle
2473  * @skb:                packet skb
2474  * @parsing_data:       data to be updated
2475  * @xmit_type:          xmit flags
2476  *
2477  * 57712 related
2478  */
2479 static inline  u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2480         u32 *parsing_data, u32 xmit_type)
2481 {
2482         *parsing_data |=
2483                         ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2484                         ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2485                         ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2486
2487         if (xmit_type & XMIT_CSUM_TCP) {
2488                 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2489                         ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2490                         ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2491
2492                 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2493         } else
2494                 /* We support checksum offload for TCP and UDP only.
2495                  * No need to pass the UDP header length - it's a constant.
2496                  */
2497                 return skb_transport_header(skb) +
2498                                 sizeof(struct udphdr) - skb->data;
2499 }
2500
2501 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2502         struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2503 {
2504         tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2505
2506         if (xmit_type & XMIT_CSUM_V4)
2507                 tx_start_bd->bd_flags.as_bitfield |=
2508                                         ETH_TX_BD_FLAGS_IP_CSUM;
2509         else
2510                 tx_start_bd->bd_flags.as_bitfield |=
2511                                         ETH_TX_BD_FLAGS_IPV6;
2512
2513         if (!(xmit_type & XMIT_CSUM_TCP))
2514                 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2515 }
2516
2517 /**
2518  * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2519  *
2520  * @bp:         driver handle
2521  * @skb:        packet skb
2522  * @pbd:        parse BD to be updated
2523  * @xmit_type:  xmit flags
2524  */
2525 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2526         struct eth_tx_parse_bd_e1x *pbd,
2527         u32 xmit_type)
2528 {
2529         u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2530
2531         /* for now NS flag is not used in Linux */
2532         pbd->global_data =
2533                 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2534                          ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2535
2536         pbd->ip_hlen_w = (skb_transport_header(skb) -
2537                         skb_network_header(skb)) >> 1;
2538
2539         hlen += pbd->ip_hlen_w;
2540
2541         /* We support checksum offload for TCP and UDP only */
2542         if (xmit_type & XMIT_CSUM_TCP)
2543                 hlen += tcp_hdrlen(skb) / 2;
2544         else
2545                 hlen += sizeof(struct udphdr) / 2;
2546
2547         pbd->total_hlen_w = cpu_to_le16(hlen);
2548         hlen = hlen*2;
2549
2550         if (xmit_type & XMIT_CSUM_TCP) {
2551                 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2552
2553         } else {
2554                 s8 fix = SKB_CS_OFF(skb); /* signed! */
2555
2556                 DP(NETIF_MSG_TX_QUEUED,
2557                    "hlen %d  fix %d  csum before fix %x\n",
2558                    le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2559
2560                 /* HW bug: fixup the CSUM */
2561                 pbd->tcp_pseudo_csum =
2562                         bnx2x_csum_fix(skb_transport_header(skb),
2563                                        SKB_CS(skb), fix);
2564
2565                 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2566                    pbd->tcp_pseudo_csum);
2567         }
2568
2569         return hlen;
2570 }
2571
2572 /* called with netif_tx_lock
2573  * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2574  * netif_wake_queue()
2575  */
2576 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2577 {
2578         struct bnx2x *bp = netdev_priv(dev);
2579
2580         struct bnx2x_fastpath *fp;
2581         struct netdev_queue *txq;
2582         struct bnx2x_fp_txdata *txdata;
2583         struct sw_tx_bd *tx_buf;
2584         struct eth_tx_start_bd *tx_start_bd, *first_bd;
2585         struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2586         struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2587         struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2588         u32 pbd_e2_parsing_data = 0;
2589         u16 pkt_prod, bd_prod;
2590         int nbd, txq_index, fp_index, txdata_index;
2591         dma_addr_t mapping;
2592         u32 xmit_type = bnx2x_xmit_type(bp, skb);
2593         int i;
2594         u8 hlen = 0;
2595         __le16 pkt_size = 0;
2596         struct ethhdr *eth;
2597         u8 mac_type = UNICAST_ADDRESS;
2598
2599 #ifdef BNX2X_STOP_ON_ERROR
2600         if (unlikely(bp->panic))
2601                 return NETDEV_TX_BUSY;
2602 #endif
2603
2604         txq_index = skb_get_queue_mapping(skb);
2605         txq = netdev_get_tx_queue(dev, txq_index);
2606
2607         BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2608
2609         /* decode the fastpath index and the cos index from the txq */
2610         fp_index = TXQ_TO_FP(txq_index);
2611         txdata_index = TXQ_TO_COS(txq_index);
2612
2613 #ifdef BCM_CNIC
2614         /*
2615          * Override the above for the FCoE queue:
2616          *   - FCoE fp entry is right after the ETH entries.
2617          *   - FCoE L2 queue uses bp->txdata[0] only.
2618          */
2619         if (unlikely(!NO_FCOE(bp) && (txq_index ==
2620                                       bnx2x_fcoe_tx(bp, txq_index)))) {
2621                 fp_index = FCOE_IDX;
2622                 txdata_index = 0;
2623         }
2624 #endif
2625
2626         /* enable this debug print to view the transmission queue being used
2627         DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d\n",
2628            txq_index, fp_index, txdata_index); */
2629
2630         /* locate the fastpath and the txdata */
2631         fp = &bp->fp[fp_index];
2632         txdata = &fp->txdata[txdata_index];
2633
2634         /* enable this debug print to view the tranmission details
2635         DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2636                         " tx_data ptr %p fp pointer %p\n",
2637            txdata->cid, fp_index, txdata_index, txdata, fp); */
2638
2639         if (unlikely(bnx2x_tx_avail(bp, txdata) <
2640                      (skb_shinfo(skb)->nr_frags + 3))) {
2641                 fp->eth_q_stats.driver_xoff++;
2642                 netif_tx_stop_queue(txq);
2643                 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2644                 return NETDEV_TX_BUSY;
2645         }
2646
2647         DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x  protocol %x  "
2648                                 "protocol(%x,%x) gso type %x  xmit_type %x\n",
2649            txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2650            ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2651
2652         eth = (struct ethhdr *)skb->data;
2653
2654         /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2655         if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2656                 if (is_broadcast_ether_addr(eth->h_dest))
2657                         mac_type = BROADCAST_ADDRESS;
2658                 else
2659                         mac_type = MULTICAST_ADDRESS;
2660         }
2661
2662 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2663         /* First, check if we need to linearize the skb (due to FW
2664            restrictions). No need to check fragmentation if page size > 8K
2665            (there will be no violation to FW restrictions) */
2666         if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2667                 /* Statistics of linearization */
2668                 bp->lin_cnt++;
2669                 if (skb_linearize(skb) != 0) {
2670                         DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2671                            "silently dropping this SKB\n");
2672                         dev_kfree_skb_any(skb);
2673                         return NETDEV_TX_OK;
2674                 }
2675         }
2676 #endif
2677         /* Map skb linear data for DMA */
2678         mapping = dma_map_single(&bp->pdev->dev, skb->data,
2679                                  skb_headlen(skb), DMA_TO_DEVICE);
2680         if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2681                 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2682                    "silently dropping this SKB\n");
2683                 dev_kfree_skb_any(skb);
2684                 return NETDEV_TX_OK;
2685         }
2686         /*
2687         Please read carefully. First we use one BD which we mark as start,
2688         then we have a parsing info BD (used for TSO or xsum),
2689         and only then we have the rest of the TSO BDs.
2690         (don't forget to mark the last one as last,
2691         and to unmap only AFTER you write to the BD ...)
2692         And above all, all pdb sizes are in words - NOT DWORDS!
2693         */
2694
2695         /* get current pkt produced now - advance it just before sending packet
2696          * since mapping of pages may fail and cause packet to be dropped
2697          */
2698         pkt_prod = txdata->tx_pkt_prod;
2699         bd_prod = TX_BD(txdata->tx_bd_prod);
2700
2701         /* get a tx_buf and first BD
2702          * tx_start_bd may be changed during SPLIT,
2703          * but first_bd will always stay first
2704          */
2705         tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2706         tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2707         first_bd = tx_start_bd;
2708
2709         tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2710         SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2711                  mac_type);
2712
2713         /* header nbd */
2714         SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2715
2716         /* remember the first BD of the packet */
2717         tx_buf->first_bd = txdata->tx_bd_prod;
2718         tx_buf->skb = skb;
2719         tx_buf->flags = 0;
2720
2721         DP(NETIF_MSG_TX_QUEUED,
2722            "sending pkt %u @%p  next_idx %u  bd %u @%p\n",
2723            pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2724
2725         if (vlan_tx_tag_present(skb)) {
2726                 tx_start_bd->vlan_or_ethertype =
2727                     cpu_to_le16(vlan_tx_tag_get(skb));
2728                 tx_start_bd->bd_flags.as_bitfield |=
2729                     (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2730         } else
2731                 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2732
2733         /* turn on parsing and get a BD */
2734         bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2735
2736         if (xmit_type & XMIT_CSUM)
2737                 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2738
2739         if (!CHIP_IS_E1x(bp)) {
2740                 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2741                 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2742                 /* Set PBD in checksum offload case */
2743                 if (xmit_type & XMIT_CSUM)
2744                         hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2745                                                      &pbd_e2_parsing_data,
2746                                                      xmit_type);
2747                 if (IS_MF_SI(bp)) {
2748                         /*
2749                          * fill in the MAC addresses in the PBD - for local
2750                          * switching
2751                          */
2752                         bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2753                                               &pbd_e2->src_mac_addr_mid,
2754                                               &pbd_e2->src_mac_addr_lo,
2755                                               eth->h_source);
2756                         bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2757                                               &pbd_e2->dst_mac_addr_mid,
2758                                               &pbd_e2->dst_mac_addr_lo,
2759                                               eth->h_dest);
2760                 }
2761         } else {
2762                 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2763                 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2764                 /* Set PBD in checksum offload case */
2765                 if (xmit_type & XMIT_CSUM)
2766                         hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2767
2768         }
2769
2770         /* Setup the data pointer of the first BD of the packet */
2771         tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2772         tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2773         nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2774         tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2775         pkt_size = tx_start_bd->nbytes;
2776
2777         DP(NETIF_MSG_TX_QUEUED, "first bd @%p  addr (%x:%x)  nbd %d"
2778            "  nbytes %d  flags %x  vlan %x\n",
2779            tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2780            le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2781            tx_start_bd->bd_flags.as_bitfield,
2782            le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2783
2784         if (xmit_type & XMIT_GSO) {
2785
2786                 DP(NETIF_MSG_TX_QUEUED,
2787                    "TSO packet len %d  hlen %d  total len %d  tso size %d\n",
2788                    skb->len, hlen, skb_headlen(skb),
2789                    skb_shinfo(skb)->gso_size);
2790
2791                 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2792
2793                 if (unlikely(skb_headlen(skb) > hlen))
2794                         bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2795                                                  &tx_start_bd, hlen,
2796                                                  bd_prod, ++nbd);
2797                 if (!CHIP_IS_E1x(bp))
2798                         bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2799                                              xmit_type);
2800                 else
2801                         bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2802         }
2803
2804         /* Set the PBD's parsing_data field if not zero
2805          * (for the chips newer than 57711).
2806          */
2807         if (pbd_e2_parsing_data)
2808                 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2809
2810         tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2811
2812         /* Handle fragmented skb */
2813         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2814                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2815
2816                 mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0,
2817                                            skb_frag_size(frag), DMA_TO_DEVICE);
2818                 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2819
2820                         DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2821                                                 "dropping packet...\n");
2822
2823                         /* we need unmap all buffers already mapped
2824                          * for this SKB;
2825                          * first_bd->nbd need to be properly updated
2826                          * before call to bnx2x_free_tx_pkt
2827                          */
2828                         first_bd->nbd = cpu_to_le16(nbd);
2829                         bnx2x_free_tx_pkt(bp, txdata,
2830                                           TX_BD(txdata->tx_pkt_prod));
2831                         return NETDEV_TX_OK;
2832                 }
2833
2834                 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2835                 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2836                 if (total_pkt_bd == NULL)
2837                         total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2838
2839                 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2840                 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2841                 tx_data_bd->nbytes = cpu_to_le16(skb_frag_size(frag));
2842                 le16_add_cpu(&pkt_size, skb_frag_size(frag));
2843                 nbd++;
2844
2845                 DP(NETIF_MSG_TX_QUEUED,
2846                    "frag %d  bd @%p  addr (%x:%x)  nbytes %d\n",
2847                    i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2848                    le16_to_cpu(tx_data_bd->nbytes));
2849         }
2850
2851         DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2852
2853         /* update with actual num BDs */
2854         first_bd->nbd = cpu_to_le16(nbd);
2855
2856         bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2857
2858         /* now send a tx doorbell, counting the next BD
2859          * if the packet contains or ends with it
2860          */
2861         if (TX_BD_POFF(bd_prod) < nbd)
2862                 nbd++;
2863
2864         /* total_pkt_bytes should be set on the first data BD if
2865          * it's not an LSO packet and there is more than one
2866          * data BD. In this case pkt_size is limited by an MTU value.
2867          * However we prefer to set it for an LSO packet (while we don't
2868          * have to) in order to save some CPU cycles in a none-LSO
2869          * case, when we much more care about them.
2870          */
2871         if (total_pkt_bd != NULL)
2872                 total_pkt_bd->total_pkt_bytes = pkt_size;
2873
2874         if (pbd_e1x)