vmxnet3: fix netpoll race condition
[pandora-kernel.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
1 /*
2  * Linux driver for VMware's vmxnet3 ethernet NIC.
3  *
4  * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT. See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * The full GNU General Public License is included in this distribution in
21  * the file called "COPYING".
22  *
23  * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
24  *
25  */
26
27 #include <linux/module.h>
28 #include <net/ip6_checksum.h>
29
30 #include "vmxnet3_int.h"
31
32 char vmxnet3_driver_name[] = "vmxnet3";
33 #define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
34
35 /*
36  * PCI Device ID Table
37  * Last entry must be all 0s
38  */
39 static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
40         {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
41         {0}
42 };
43
44 MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
45
46 static int enable_mq = 1;
47
48 static void
49 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
50
51 /*
52  *    Enable/Disable the given intr
53  */
54 static void
55 vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
56 {
57         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
58 }
59
60
61 static void
62 vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
63 {
64         VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
65 }
66
67
68 /*
69  *    Enable/Disable all intrs used by the device
70  */
71 static void
72 vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
73 {
74         int i;
75
76         for (i = 0; i < adapter->intr.num_intrs; i++)
77                 vmxnet3_enable_intr(adapter, i);
78         adapter->shared->devRead.intrConf.intrCtrl &=
79                                         cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
80 }
81
82
83 static void
84 vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
85 {
86         int i;
87
88         adapter->shared->devRead.intrConf.intrCtrl |=
89                                         cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
90         for (i = 0; i < adapter->intr.num_intrs; i++)
91                 vmxnet3_disable_intr(adapter, i);
92 }
93
94
95 static void
96 vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
97 {
98         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
99 }
100
101
102 static bool
103 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
104 {
105         return tq->stopped;
106 }
107
108
109 static void
110 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
111 {
112         tq->stopped = false;
113         netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
114 }
115
116
117 static void
118 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
119 {
120         tq->stopped = false;
121         netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
122 }
123
124
125 static void
126 vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
127 {
128         tq->stopped = true;
129         tq->num_stop++;
130         netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
131 }
132
133
134 /*
135  * Check the link state. This may start or stop the tx queue.
136  */
137 static void
138 vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
139 {
140         u32 ret;
141         int i;
142         unsigned long flags;
143
144         spin_lock_irqsave(&adapter->cmd_lock, flags);
145         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
146         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
147         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
148
149         adapter->link_speed = ret >> 16;
150         if (ret & 1) { /* Link is up. */
151                 netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
152                             adapter->link_speed);
153                 netif_carrier_on(adapter->netdev);
154
155                 if (affectTxQueue) {
156                         for (i = 0; i < adapter->num_tx_queues; i++)
157                                 vmxnet3_tq_start(&adapter->tx_queue[i],
158                                                  adapter);
159                 }
160         } else {
161                 netdev_info(adapter->netdev, "NIC Link is Down\n");
162                 netif_carrier_off(adapter->netdev);
163
164                 if (affectTxQueue) {
165                         for (i = 0; i < adapter->num_tx_queues; i++)
166                                 vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
167                 }
168         }
169 }
170
171 static void
172 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
173 {
174         int i;
175         unsigned long flags;
176         u32 events = le32_to_cpu(adapter->shared->ecr);
177         if (!events)
178                 return;
179
180         vmxnet3_ack_events(adapter, events);
181
182         /* Check if link state has changed */
183         if (events & VMXNET3_ECR_LINK)
184                 vmxnet3_check_link(adapter, true);
185
186         /* Check if there is an error on xmit/recv queues */
187         if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
188                 spin_lock_irqsave(&adapter->cmd_lock, flags);
189                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
190                                        VMXNET3_CMD_GET_QUEUE_STATUS);
191                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
192
193                 for (i = 0; i < adapter->num_tx_queues; i++)
194                         if (adapter->tqd_start[i].status.stopped)
195                                 dev_err(&adapter->netdev->dev,
196                                         "%s: tq[%d] error 0x%x\n",
197                                         adapter->netdev->name, i, le32_to_cpu(
198                                         adapter->tqd_start[i].status.error));
199                 for (i = 0; i < adapter->num_rx_queues; i++)
200                         if (adapter->rqd_start[i].status.stopped)
201                                 dev_err(&adapter->netdev->dev,
202                                         "%s: rq[%d] error 0x%x\n",
203                                         adapter->netdev->name, i,
204                                         adapter->rqd_start[i].status.error);
205
206                 schedule_work(&adapter->work);
207         }
208 }
209
210 #ifdef __BIG_ENDIAN_BITFIELD
211 /*
212  * The device expects the bitfields in shared structures to be written in
213  * little endian. When CPU is big endian, the following routines are used to
214  * correctly read and write into ABI.
215  * The general technique used here is : double word bitfields are defined in
216  * opposite order for big endian architecture. Then before reading them in
217  * driver the complete double word is translated using le32_to_cpu. Similarly
218  * After the driver writes into bitfields, cpu_to_le32 is used to translate the
219  * double words into required format.
220  * In order to avoid touching bits in shared structure more than once, temporary
221  * descriptors are used. These are passed as srcDesc to following functions.
222  */
223 static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
224                                 struct Vmxnet3_RxDesc *dstDesc)
225 {
226         u32 *src = (u32 *)srcDesc + 2;
227         u32 *dst = (u32 *)dstDesc + 2;
228         dstDesc->addr = le64_to_cpu(srcDesc->addr);
229         *dst = le32_to_cpu(*src);
230         dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
231 }
232
233 static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
234                                struct Vmxnet3_TxDesc *dstDesc)
235 {
236         int i;
237         u32 *src = (u32 *)(srcDesc + 1);
238         u32 *dst = (u32 *)(dstDesc + 1);
239
240         /* Working backwards so that the gen bit is set at the end. */
241         for (i = 2; i > 0; i--) {
242                 src--;
243                 dst--;
244                 *dst = cpu_to_le32(*src);
245         }
246 }
247
248
249 static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
250                                 struct Vmxnet3_RxCompDesc *dstDesc)
251 {
252         int i = 0;
253         u32 *src = (u32 *)srcDesc;
254         u32 *dst = (u32 *)dstDesc;
255         for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
256                 *dst = le32_to_cpu(*src);
257                 src++;
258                 dst++;
259         }
260 }
261
262
263 /* Used to read bitfield values from double words. */
264 static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
265 {
266         u32 temp = le32_to_cpu(*bitfield);
267         u32 mask = ((1 << size) - 1) << pos;
268         temp &= mask;
269         temp >>= pos;
270         return temp;
271 }
272
273
274
275 #endif  /* __BIG_ENDIAN_BITFIELD */
276
277 #ifdef __BIG_ENDIAN_BITFIELD
278
279 #   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
280                         txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
281                         VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
282 #   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
283                         txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
284                         VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
285 #   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
286                         VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
287                         VMXNET3_TCD_GEN_SIZE)
288 #   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
289                         VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
290 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
291                         (dstrcd) = (tmp); \
292                         vmxnet3_RxCompToCPU((rcd), (tmp)); \
293                 } while (0)
294 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
295                         (dstrxd) = (tmp); \
296                         vmxnet3_RxDescToCPU((rxd), (tmp)); \
297                 } while (0)
298
299 #else
300
301 #   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
302 #   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
303 #   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
304 #   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
305 #   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
306 #   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
307
308 #endif /* __BIG_ENDIAN_BITFIELD  */
309
310
311 static void
312 vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
313                      struct pci_dev *pdev)
314 {
315         if (tbi->map_type == VMXNET3_MAP_SINGLE)
316                 dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
317                                  PCI_DMA_TODEVICE);
318         else if (tbi->map_type == VMXNET3_MAP_PAGE)
319                 dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
320                                PCI_DMA_TODEVICE);
321         else
322                 BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
323
324         tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
325 }
326
327
328 static int
329 vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
330                   struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
331 {
332         struct sk_buff *skb;
333         int entries = 0;
334
335         /* no out of order completion */
336         BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
337         BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
338
339         skb = tq->buf_info[eop_idx].skb;
340         BUG_ON(skb == NULL);
341         tq->buf_info[eop_idx].skb = NULL;
342
343         VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
344
345         while (tq->tx_ring.next2comp != eop_idx) {
346                 vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
347                                      pdev);
348
349                 /* update next2comp w/o tx_lock. Since we are marking more,
350                  * instead of less, tx ring entries avail, the worst case is
351                  * that the tx routine incorrectly re-queues a pkt due to
352                  * insufficient tx ring entries.
353                  */
354                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
355                 entries++;
356         }
357
358         dev_kfree_skb_any(skb);
359         return entries;
360 }
361
362
363 static int
364 vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
365                         struct vmxnet3_adapter *adapter)
366 {
367         int completed = 0;
368         union Vmxnet3_GenericDesc *gdesc;
369
370         gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
371         while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
372                 completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
373                                                &gdesc->tcd), tq, adapter->pdev,
374                                                adapter);
375
376                 vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
377                 gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
378         }
379
380         if (completed) {
381                 spin_lock(&tq->tx_lock);
382                 if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
383                              vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
384                              VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
385                              netif_carrier_ok(adapter->netdev))) {
386                         vmxnet3_tq_wake(tq, adapter);
387                 }
388                 spin_unlock(&tq->tx_lock);
389         }
390         return completed;
391 }
392
393
394 static void
395 vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
396                    struct vmxnet3_adapter *adapter)
397 {
398         int i;
399
400         while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
401                 struct vmxnet3_tx_buf_info *tbi;
402
403                 tbi = tq->buf_info + tq->tx_ring.next2comp;
404
405                 vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
406                 if (tbi->skb) {
407                         dev_kfree_skb_any(tbi->skb);
408                         tbi->skb = NULL;
409                 }
410                 vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
411         }
412
413         /* sanity check, verify all buffers are indeed unmapped and freed */
414         for (i = 0; i < tq->tx_ring.size; i++) {
415                 BUG_ON(tq->buf_info[i].skb != NULL ||
416                        tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
417         }
418
419         tq->tx_ring.gen = VMXNET3_INIT_GEN;
420         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
421
422         tq->comp_ring.gen = VMXNET3_INIT_GEN;
423         tq->comp_ring.next2proc = 0;
424 }
425
426
427 static void
428 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
429                    struct vmxnet3_adapter *adapter)
430 {
431         if (tq->tx_ring.base) {
432                 dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
433                                   sizeof(struct Vmxnet3_TxDesc),
434                                   tq->tx_ring.base, tq->tx_ring.basePA);
435                 tq->tx_ring.base = NULL;
436         }
437         if (tq->data_ring.base) {
438                 dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
439                                   sizeof(struct Vmxnet3_TxDataDesc),
440                                   tq->data_ring.base, tq->data_ring.basePA);
441                 tq->data_ring.base = NULL;
442         }
443         if (tq->comp_ring.base) {
444                 dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
445                                   sizeof(struct Vmxnet3_TxCompDesc),
446                                   tq->comp_ring.base, tq->comp_ring.basePA);
447                 tq->comp_ring.base = NULL;
448         }
449         if (tq->buf_info) {
450                 dma_free_coherent(&adapter->pdev->dev,
451                                   tq->tx_ring.size * sizeof(tq->buf_info[0]),
452                                   tq->buf_info, tq->buf_info_pa);
453                 tq->buf_info = NULL;
454         }
455 }
456
457
458 /* Destroy all tx queues */
459 void
460 vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
461 {
462         int i;
463
464         for (i = 0; i < adapter->num_tx_queues; i++)
465                 vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
466 }
467
468
469 static void
470 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
471                 struct vmxnet3_adapter *adapter)
472 {
473         int i;
474
475         /* reset the tx ring contents to 0 and reset the tx ring states */
476         memset(tq->tx_ring.base, 0, tq->tx_ring.size *
477                sizeof(struct Vmxnet3_TxDesc));
478         tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
479         tq->tx_ring.gen = VMXNET3_INIT_GEN;
480
481         memset(tq->data_ring.base, 0, tq->data_ring.size *
482                sizeof(struct Vmxnet3_TxDataDesc));
483
484         /* reset the tx comp ring contents to 0 and reset comp ring states */
485         memset(tq->comp_ring.base, 0, tq->comp_ring.size *
486                sizeof(struct Vmxnet3_TxCompDesc));
487         tq->comp_ring.next2proc = 0;
488         tq->comp_ring.gen = VMXNET3_INIT_GEN;
489
490         /* reset the bookkeeping data */
491         memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
492         for (i = 0; i < tq->tx_ring.size; i++)
493                 tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
494
495         /* stats are not reset */
496 }
497
498
499 static int
500 vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
501                   struct vmxnet3_adapter *adapter)
502 {
503         size_t sz;
504
505         BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
506                tq->comp_ring.base || tq->buf_info);
507
508         tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
509                         tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
510                         &tq->tx_ring.basePA, GFP_KERNEL);
511         if (!tq->tx_ring.base) {
512                 netdev_err(adapter->netdev, "failed to allocate tx ring\n");
513                 goto err;
514         }
515
516         tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
517                         tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
518                         &tq->data_ring.basePA, GFP_KERNEL);
519         if (!tq->data_ring.base) {
520                 netdev_err(adapter->netdev, "failed to allocate data ring\n");
521                 goto err;
522         }
523
524         tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
525                         tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
526                         &tq->comp_ring.basePA, GFP_KERNEL);
527         if (!tq->comp_ring.base) {
528                 netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
529                 goto err;
530         }
531
532         sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
533         tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
534                                            &tq->buf_info_pa, GFP_KERNEL);
535         if (!tq->buf_info)
536                 goto err;
537
538         return 0;
539
540 err:
541         vmxnet3_tq_destroy(tq, adapter);
542         return -ENOMEM;
543 }
544
545 static void
546 vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
547 {
548         int i;
549
550         for (i = 0; i < adapter->num_tx_queues; i++)
551                 vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
552 }
553
554 /*
555  *    starting from ring->next2fill, allocate rx buffers for the given ring
556  *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
557  *    are allocated or allocation fails
558  */
559
560 static int
561 vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
562                         int num_to_alloc, struct vmxnet3_adapter *adapter)
563 {
564         int num_allocated = 0;
565         struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
566         struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
567         u32 val;
568
569         while (num_allocated <= num_to_alloc) {
570                 struct vmxnet3_rx_buf_info *rbi;
571                 union Vmxnet3_GenericDesc *gd;
572
573                 rbi = rbi_base + ring->next2fill;
574                 gd = ring->base + ring->next2fill;
575
576                 if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
577                         if (rbi->skb == NULL) {
578                                 rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
579                                                                        rbi->len,
580                                                                        GFP_KERNEL);
581                                 if (unlikely(rbi->skb == NULL)) {
582                                         rq->stats.rx_buf_alloc_failure++;
583                                         break;
584                                 }
585
586                                 rbi->dma_addr = dma_map_single(
587                                                 &adapter->pdev->dev,
588                                                 rbi->skb->data, rbi->len,
589                                                 PCI_DMA_FROMDEVICE);
590                         } else {
591                                 /* rx buffer skipped by the device */
592                         }
593                         val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
594                 } else {
595                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
596                                rbi->len  != PAGE_SIZE);
597
598                         if (rbi->page == NULL) {
599                                 rbi->page = alloc_page(GFP_ATOMIC);
600                                 if (unlikely(rbi->page == NULL)) {
601                                         rq->stats.rx_buf_alloc_failure++;
602                                         break;
603                                 }
604                                 rbi->dma_addr = dma_map_page(
605                                                 &adapter->pdev->dev,
606                                                 rbi->page, 0, PAGE_SIZE,
607                                                 PCI_DMA_FROMDEVICE);
608                         } else {
609                                 /* rx buffers skipped by the device */
610                         }
611                         val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
612                 }
613
614                 BUG_ON(rbi->dma_addr == 0);
615                 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
616                 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
617                                            | val | rbi->len);
618
619                 /* Fill the last buffer but dont mark it ready, or else the
620                  * device will think that the queue is full */
621                 if (num_allocated == num_to_alloc)
622                         break;
623
624                 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
625                 num_allocated++;
626                 vmxnet3_cmd_ring_adv_next2fill(ring);
627         }
628
629         netdev_dbg(adapter->netdev,
630                 "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
631                 num_allocated, ring->next2fill, ring->next2comp);
632
633         /* so that the device can distinguish a full ring and an empty ring */
634         BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
635
636         return num_allocated;
637 }
638
639
640 static void
641 vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
642                     struct vmxnet3_rx_buf_info *rbi)
643 {
644         struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
645                 skb_shinfo(skb)->nr_frags;
646
647         BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
648
649         __skb_frag_set_page(frag, rbi->page);
650         frag->page_offset = 0;
651         skb_frag_size_set(frag, rcd->len);
652         skb->data_len += rcd->len;
653         skb->truesize += PAGE_SIZE;
654         skb_shinfo(skb)->nr_frags++;
655 }
656
657
658 static void
659 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
660                 struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
661                 struct vmxnet3_adapter *adapter)
662 {
663         u32 dw2, len;
664         unsigned long buf_offset;
665         int i;
666         union Vmxnet3_GenericDesc *gdesc;
667         struct vmxnet3_tx_buf_info *tbi = NULL;
668
669         BUG_ON(ctx->copy_size > skb_headlen(skb));
670
671         /* use the previous gen bit for the SOP desc */
672         dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
673
674         ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
675         gdesc = ctx->sop_txd; /* both loops below can be skipped */
676
677         /* no need to map the buffer if headers are copied */
678         if (ctx->copy_size) {
679                 ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
680                                         tq->tx_ring.next2fill *
681                                         sizeof(struct Vmxnet3_TxDataDesc));
682                 ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
683                 ctx->sop_txd->dword[3] = 0;
684
685                 tbi = tq->buf_info + tq->tx_ring.next2fill;
686                 tbi->map_type = VMXNET3_MAP_NONE;
687
688                 netdev_dbg(adapter->netdev,
689                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
690                         tq->tx_ring.next2fill,
691                         le64_to_cpu(ctx->sop_txd->txd.addr),
692                         ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
693                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
694
695                 /* use the right gen for non-SOP desc */
696                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
697         }
698
699         /* linear part can use multiple tx desc if it's big */
700         len = skb_headlen(skb) - ctx->copy_size;
701         buf_offset = ctx->copy_size;
702         while (len) {
703                 u32 buf_size;
704
705                 if (len < VMXNET3_MAX_TX_BUF_SIZE) {
706                         buf_size = len;
707                         dw2 |= len;
708                 } else {
709                         buf_size = VMXNET3_MAX_TX_BUF_SIZE;
710                         /* spec says that for TxDesc.len, 0 == 2^14 */
711                 }
712
713                 tbi = tq->buf_info + tq->tx_ring.next2fill;
714                 tbi->map_type = VMXNET3_MAP_SINGLE;
715                 tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
716                                 skb->data + buf_offset, buf_size,
717                                 PCI_DMA_TODEVICE);
718
719                 tbi->len = buf_size;
720
721                 gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
722                 BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
723
724                 gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
725                 gdesc->dword[2] = cpu_to_le32(dw2);
726                 gdesc->dword[3] = 0;
727
728                 netdev_dbg(adapter->netdev,
729                         "txd[%u]: 0x%Lx 0x%x 0x%x\n",
730                         tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
731                         le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
732                 vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
733                 dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
734
735                 len -= buf_size;
736                 buf_offset += buf_size;
737         }
738
739         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
740                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
741                 u32 buf_size;
742
743                 buf_offset = 0;
744                 len = skb_frag_size(frag);
745                 while (len) {
746                         tbi = tq->buf_info + tq->tx_ring.next2fill;
747                         if (len < VMXNET3_MAX_TX_BUF_SIZE) {
748                                 buf_size = len;
749                                 dw2 |= len;
750                         } else {
751                                 buf_size = VMXNET3_MAX_TX_BUF_SIZE;
752                                 /* spec says that for TxDesc.len, 0 == 2^14 */
753                         }
754                         tbi->map_type = VMXNET3_MAP_PAGE;
755                         tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
756                                                          buf_offset, buf_size,
757                                                          DMA_TO_DEVICE);
758
759                         tbi->len = buf_size;
760
761                         gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
762                         BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
763
764                         gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
765                         gdesc->dword[2] = cpu_to_le32(dw2);
766                         gdesc->dword[3] = 0;
767
768                         netdev_dbg(adapter->netdev,
769                                 "txd[%u]: 0x%llu %u %u\n",
770                                 tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
771                                 le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
772                         vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
773                         dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
774
775                         len -= buf_size;
776                         buf_offset += buf_size;
777                 }
778         }
779
780         ctx->eop_txd = gdesc;
781
782         /* set the last buf_info for the pkt */
783         tbi->skb = skb;
784         tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
785 }
786
787
788 /* Init all tx queues */
789 static void
790 vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
791 {
792         int i;
793
794         for (i = 0; i < adapter->num_tx_queues; i++)
795                 vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
796 }
797
798
799 /*
800  *    parse and copy relevant protocol headers:
801  *      For a tso pkt, relevant headers are L2/3/4 including options
802  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
803  *      if it's a TCP/UDP pkt
804  *
805  * Returns:
806  *    -1:  error happens during parsing
807  *     0:  protocol headers parsed, but too big to be copied
808  *     1:  protocol headers parsed and copied
809  *
810  * Other effects:
811  *    1. related *ctx fields are updated.
812  *    2. ctx->copy_size is # of bytes copied
813  *    3. the portion copied is guaranteed to be in the linear part
814  *
815  */
816 static int
817 vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
818                            struct vmxnet3_tx_ctx *ctx,
819                            struct vmxnet3_adapter *adapter)
820 {
821         struct Vmxnet3_TxDataDesc *tdd;
822
823         if (ctx->mss) { /* TSO */
824                 ctx->eth_ip_hdr_size = skb_transport_offset(skb);
825                 ctx->l4_hdr_size = tcp_hdrlen(skb);
826                 ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
827         } else {
828                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
829                         ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
830
831                         if (ctx->ipv4) {
832                                 const struct iphdr *iph = ip_hdr(skb);
833
834                                 if (iph->protocol == IPPROTO_TCP)
835                                         ctx->l4_hdr_size = tcp_hdrlen(skb);
836                                 else if (iph->protocol == IPPROTO_UDP)
837                                         ctx->l4_hdr_size = sizeof(struct udphdr);
838                                 else
839                                         ctx->l4_hdr_size = 0;
840                         } else {
841                                 /* for simplicity, don't copy L4 headers */
842                                 ctx->l4_hdr_size = 0;
843                         }
844                         ctx->copy_size = min(ctx->eth_ip_hdr_size +
845                                          ctx->l4_hdr_size, skb->len);
846                 } else {
847                         ctx->eth_ip_hdr_size = 0;
848                         ctx->l4_hdr_size = 0;
849                         /* copy as much as allowed */
850                         ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
851                                              , skb_headlen(skb));
852                 }
853
854                 /* make sure headers are accessible directly */
855                 if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
856                         goto err;
857         }
858
859         if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
860                 tq->stats.oversized_hdr++;
861                 ctx->copy_size = 0;
862                 return 0;
863         }
864
865         tdd = tq->data_ring.base + tq->tx_ring.next2fill;
866
867         memcpy(tdd->data, skb->data, ctx->copy_size);
868         netdev_dbg(adapter->netdev,
869                 "copy %u bytes to dataRing[%u]\n",
870                 ctx->copy_size, tq->tx_ring.next2fill);
871         return 1;
872
873 err:
874         return -1;
875 }
876
877
878 static void
879 vmxnet3_prepare_tso(struct sk_buff *skb,
880                     struct vmxnet3_tx_ctx *ctx)
881 {
882         struct tcphdr *tcph = tcp_hdr(skb);
883
884         if (ctx->ipv4) {
885                 struct iphdr *iph = ip_hdr(skb);
886
887                 iph->check = 0;
888                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
889                                                  IPPROTO_TCP, 0);
890         } else {
891                 struct ipv6hdr *iph = ipv6_hdr(skb);
892
893                 tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
894                                                IPPROTO_TCP, 0);
895         }
896 }
897
898 static int txd_estimate(const struct sk_buff *skb)
899 {
900         int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
901         int i;
902
903         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
904                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
905
906                 count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
907         }
908         return count;
909 }
910
911 /*
912  * Transmits a pkt thru a given tq
913  * Returns:
914  *    NETDEV_TX_OK:      descriptors are setup successfully
915  *    NETDEV_TX_OK:      error occurred, the pkt is dropped
916  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
917  *
918  * Side-effects:
919  *    1. tx ring may be changed
920  *    2. tq stats may be updated accordingly
921  *    3. shared->txNumDeferred may be updated
922  */
923
924 static int
925 vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
926                 struct vmxnet3_adapter *adapter, struct net_device *netdev)
927 {
928         int ret;
929         u32 count;
930         unsigned long flags;
931         struct vmxnet3_tx_ctx ctx;
932         union Vmxnet3_GenericDesc *gdesc;
933 #ifdef __BIG_ENDIAN_BITFIELD
934         /* Use temporary descriptor to avoid touching bits multiple times */
935         union Vmxnet3_GenericDesc tempTxDesc;
936 #endif
937
938         count = txd_estimate(skb);
939
940         ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
941
942         ctx.mss = skb_shinfo(skb)->gso_size;
943         if (ctx.mss) {
944                 if (skb_header_cloned(skb)) {
945                         if (unlikely(pskb_expand_head(skb, 0, 0,
946                                                       GFP_ATOMIC) != 0)) {
947                                 tq->stats.drop_tso++;
948                                 goto drop_pkt;
949                         }
950                         tq->stats.copy_skb_header++;
951                 }
952                 vmxnet3_prepare_tso(skb, &ctx);
953         } else {
954                 if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
955
956                         /* non-tso pkts must not use more than
957                          * VMXNET3_MAX_TXD_PER_PKT entries
958                          */
959                         if (skb_linearize(skb) != 0) {
960                                 tq->stats.drop_too_many_frags++;
961                                 goto drop_pkt;
962                         }
963                         tq->stats.linearized++;
964
965                         /* recalculate the # of descriptors to use */
966                         count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
967                 }
968         }
969
970         spin_lock_irqsave(&tq->tx_lock, flags);
971
972         if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
973                 tq->stats.tx_ring_full++;
974                 netdev_dbg(adapter->netdev,
975                         "tx queue stopped on %s, next2comp %u"
976                         " next2fill %u\n", adapter->netdev->name,
977                         tq->tx_ring.next2comp, tq->tx_ring.next2fill);
978
979                 vmxnet3_tq_stop(tq, adapter);
980                 spin_unlock_irqrestore(&tq->tx_lock, flags);
981                 return NETDEV_TX_BUSY;
982         }
983
984
985         ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
986         if (ret >= 0) {
987                 BUG_ON(ret <= 0 && ctx.copy_size != 0);
988                 /* hdrs parsed, check against other limits */
989                 if (ctx.mss) {
990                         if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
991                                      VMXNET3_MAX_TX_BUF_SIZE)) {
992                                 goto hdr_too_big;
993                         }
994                 } else {
995                         if (skb->ip_summed == CHECKSUM_PARTIAL) {
996                                 if (unlikely(ctx.eth_ip_hdr_size +
997                                              skb->csum_offset >
998                                              VMXNET3_MAX_CSUM_OFFSET)) {
999                                         goto hdr_too_big;
1000                                 }
1001                         }
1002                 }
1003         } else {
1004                 tq->stats.drop_hdr_inspect_err++;
1005                 goto unlock_drop_pkt;
1006         }
1007
1008         /* fill tx descs related to addr & len */
1009         vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011         /* setup the EOP desc */
1012         ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014         /* setup the SOP desc */
1015 #ifdef __BIG_ENDIAN_BITFIELD
1016         gdesc = &tempTxDesc;
1017         gdesc->dword[2] = ctx.sop_txd->dword[2];
1018         gdesc->dword[3] = ctx.sop_txd->dword[3];
1019 #else
1020         gdesc = ctx.sop_txd;
1021 #endif
1022         if (ctx.mss) {
1023                 gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024                 gdesc->txd.om = VMXNET3_OM_TSO;
1025                 gdesc->txd.msscof = ctx.mss;
1026                 le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027                              gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028         } else {
1029                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030                         gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031                         gdesc->txd.om = VMXNET3_OM_CSUM;
1032                         gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033                                             skb->csum_offset;
1034                 } else {
1035                         gdesc->txd.om = 0;
1036                         gdesc->txd.msscof = 0;
1037                 }
1038                 le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039         }
1040
1041         if (vlan_tx_tag_present(skb)) {
1042                 gdesc->txd.ti = 1;
1043                 gdesc->txd.tci = vlan_tx_tag_get(skb);
1044         }
1045
1046         /* finally flips the GEN bit of the SOP desc. */
1047         gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048                                                   VMXNET3_TXD_GEN);
1049 #ifdef __BIG_ENDIAN_BITFIELD
1050         /* Finished updating in bitfields of Tx Desc, so write them in original
1051          * place.
1052          */
1053         vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054                            (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055         gdesc = ctx.sop_txd;
1056 #endif
1057         netdev_dbg(adapter->netdev,
1058                 "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059                 (u32)(ctx.sop_txd -
1060                 tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061                 le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063         spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065         if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066                                         le32_to_cpu(tq->shared->txThreshold)) {
1067                 tq->shared->txNumDeferred = 0;
1068                 VMXNET3_WRITE_BAR0_REG(adapter,
1069                                        VMXNET3_REG_TXPROD + tq->qid * 8,
1070                                        tq->tx_ring.next2fill);
1071         }
1072
1073         return NETDEV_TX_OK;
1074
1075 hdr_too_big:
1076         tq->stats.drop_oversized_hdr++;
1077 unlock_drop_pkt:
1078         spin_unlock_irqrestore(&tq->tx_lock, flags);
1079 drop_pkt:
1080         tq->stats.drop_total++;
1081         dev_kfree_skb(skb);
1082         return NETDEV_TX_OK;
1083 }
1084
1085
1086 static netdev_tx_t
1087 vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088 {
1089         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091         BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092         return vmxnet3_tq_xmit(skb,
1093                                &adapter->tx_queue[skb->queue_mapping],
1094                                adapter, netdev);
1095 }
1096
1097
1098 static void
1099 vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100                 struct sk_buff *skb,
1101                 union Vmxnet3_GenericDesc *gdesc)
1102 {
1103         if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104                 /* typical case: TCP/UDP over IP and both csums are correct */
1105                 if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106                                                         VMXNET3_RCD_CSUM_OK) {
1107                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1108                         BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109                         BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110                         BUG_ON(gdesc->rcd.frg);
1111                 } else {
1112                         if (gdesc->rcd.csum) {
1113                                 skb->csum = htons(gdesc->rcd.csum);
1114                                 skb->ip_summed = CHECKSUM_PARTIAL;
1115                         } else {
1116                                 skb_checksum_none_assert(skb);
1117                         }
1118                 }
1119         } else {
1120                 skb_checksum_none_assert(skb);
1121         }
1122 }
1123
1124
1125 static void
1126 vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127                  struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128 {
1129         rq->stats.drop_err++;
1130         if (!rcd->fcs)
1131                 rq->stats.drop_fcs++;
1132
1133         rq->stats.drop_total++;
1134
1135         /*
1136          * We do not unmap and chain the rx buffer to the skb.
1137          * We basically pretend this buffer is not used and will be recycled
1138          * by vmxnet3_rq_alloc_rx_buf()
1139          */
1140
1141         /*
1142          * ctx->skb may be NULL if this is the first and the only one
1143          * desc for the pkt
1144          */
1145         if (ctx->skb)
1146                 dev_kfree_skb_irq(ctx->skb);
1147
1148         ctx->skb = NULL;
1149 }
1150
1151
1152 static int
1153 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154                        struct vmxnet3_adapter *adapter, int quota)
1155 {
1156         static const u32 rxprod_reg[2] = {
1157                 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158         };
1159         u32 num_rxd = 0;
1160         bool skip_page_frags = false;
1161         struct Vmxnet3_RxCompDesc *rcd;
1162         struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163 #ifdef __BIG_ENDIAN_BITFIELD
1164         struct Vmxnet3_RxDesc rxCmdDesc;
1165         struct Vmxnet3_RxCompDesc rxComp;
1166 #endif
1167         vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168                           &rxComp);
1169         while (rcd->gen == rq->comp_ring.gen) {
1170                 struct vmxnet3_rx_buf_info *rbi;
1171                 struct sk_buff *skb, *new_skb = NULL;
1172                 struct page *new_page = NULL;
1173                 int num_to_alloc;
1174                 struct Vmxnet3_RxDesc *rxd;
1175                 u32 idx, ring_idx;
1176                 struct vmxnet3_cmd_ring *ring = NULL;
1177                 if (num_rxd >= quota) {
1178                         /* we may stop even before we see the EOP desc of
1179                          * the current pkt
1180                          */
1181                         break;
1182                 }
1183                 num_rxd++;
1184                 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185                 idx = rcd->rxdIdx;
1186                 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187                 ring = rq->rx_ring + ring_idx;
1188                 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189                                   &rxCmdDesc);
1190                 rbi = rq->buf_info[ring_idx] + idx;
1191
1192                 BUG_ON(rxd->addr != rbi->dma_addr ||
1193                        rxd->len != rbi->len);
1194
1195                 if (unlikely(rcd->eop && rcd->err)) {
1196                         vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197                         goto rcd_done;
1198                 }
1199
1200                 if (rcd->sop) { /* first buf of the pkt */
1201                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202                                rcd->rqID != rq->qid);
1203
1204                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205                         BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207                         if (unlikely(rcd->len == 0)) {
1208                                 /* Pretend the rx buffer is skipped. */
1209                                 BUG_ON(!(rcd->sop && rcd->eop));
1210                                 netdev_dbg(adapter->netdev,
1211                                         "rxRing[%u][%u] 0 length\n",
1212                                         ring_idx, idx);
1213                                 goto rcd_done;
1214                         }
1215
1216                         skip_page_frags = false;
1217                         ctx->skb = rbi->skb;
1218                         new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219                                                             rbi->len);
1220                         if (new_skb == NULL) {
1221                                 /* Skb allocation failed, do not handover this
1222                                  * skb to stack. Reuse it. Drop the existing pkt
1223                                  */
1224                                 rq->stats.rx_buf_alloc_failure++;
1225                                 ctx->skb = NULL;
1226                                 rq->stats.drop_total++;
1227                                 skip_page_frags = true;
1228                                 goto rcd_done;
1229                         }
1230
1231                         dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232                                          rbi->len,
1233                                          PCI_DMA_FROMDEVICE);
1234
1235 #ifdef VMXNET3_RSS
1236                         if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237                             (adapter->netdev->features & NETIF_F_RXHASH))
1238                                 skb_set_hash(ctx->skb,
1239                                              le32_to_cpu(rcd->rssHash),
1240                                              PKT_HASH_TYPE_L3);
1241 #endif
1242                         skb_put(ctx->skb, rcd->len);
1243
1244                         /* Immediate refill */
1245                         rbi->skb = new_skb;
1246                         rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1247                                                        rbi->skb->data, rbi->len,
1248                                                        PCI_DMA_FROMDEVICE);
1249                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1250                         rxd->len = rbi->len;
1251
1252                 } else {
1253                         BUG_ON(ctx->skb == NULL && !skip_page_frags);
1254
1255                         /* non SOP buffer must be type 1 in most cases */
1256                         BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1257                         BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1258
1259                         /* If an sop buffer was dropped, skip all
1260                          * following non-sop fragments. They will be reused.
1261                          */
1262                         if (skip_page_frags)
1263                                 goto rcd_done;
1264
1265                         new_page = alloc_page(GFP_ATOMIC);
1266                         if (unlikely(new_page == NULL)) {
1267                                 /* Replacement page frag could not be allocated.
1268                                  * Reuse this page. Drop the pkt and free the
1269                                  * skb which contained this page as a frag. Skip
1270                                  * processing all the following non-sop frags.
1271                                  */
1272                                 rq->stats.rx_buf_alloc_failure++;
1273                                 dev_kfree_skb(ctx->skb);
1274                                 ctx->skb = NULL;
1275                                 skip_page_frags = true;
1276                                 goto rcd_done;
1277                         }
1278
1279                         if (rcd->len) {
1280                                 dma_unmap_page(&adapter->pdev->dev,
1281                                                rbi->dma_addr, rbi->len,
1282                                                PCI_DMA_FROMDEVICE);
1283
1284                                 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1285                         }
1286
1287                         /* Immediate refill */
1288                         rbi->page = new_page;
1289                         rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1290                                                      rbi->page,
1291                                                      0, PAGE_SIZE,
1292                                                      PCI_DMA_FROMDEVICE);
1293                         rxd->addr = cpu_to_le64(rbi->dma_addr);
1294                         rxd->len = rbi->len;
1295                 }
1296
1297
1298                 skb = ctx->skb;
1299                 if (rcd->eop) {
1300                         skb->len += skb->data_len;
1301
1302                         vmxnet3_rx_csum(adapter, skb,
1303                                         (union Vmxnet3_GenericDesc *)rcd);
1304                         skb->protocol = eth_type_trans(skb, adapter->netdev);
1305
1306                         if (unlikely(rcd->ts))
1307                                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1308
1309                         if (adapter->netdev->features & NETIF_F_LRO)
1310                                 netif_receive_skb(skb);
1311                         else
1312                                 napi_gro_receive(&rq->napi, skb);
1313
1314                         ctx->skb = NULL;
1315                 }
1316
1317 rcd_done:
1318                 /* device may have skipped some rx descs */
1319                 ring->next2comp = idx;
1320                 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1321                 ring = rq->rx_ring + ring_idx;
1322                 while (num_to_alloc) {
1323                         vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1324                                           &rxCmdDesc);
1325                         BUG_ON(!rxd->addr);
1326
1327                         /* Recv desc is ready to be used by the device */
1328                         rxd->gen = ring->gen;
1329                         vmxnet3_cmd_ring_adv_next2fill(ring);
1330                         num_to_alloc--;
1331                 }
1332
1333                 /* if needed, update the register */
1334                 if (unlikely(rq->shared->updateRxProd)) {
1335                         VMXNET3_WRITE_BAR0_REG(adapter,
1336                                                rxprod_reg[ring_idx] + rq->qid * 8,
1337                                                ring->next2fill);
1338                 }
1339
1340                 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1341                 vmxnet3_getRxComp(rcd,
1342                                   &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1343         }
1344
1345         return num_rxd;
1346 }
1347
1348
1349 static void
1350 vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1351                    struct vmxnet3_adapter *adapter)
1352 {
1353         u32 i, ring_idx;
1354         struct Vmxnet3_RxDesc *rxd;
1355
1356         for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1357                 for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1358 #ifdef __BIG_ENDIAN_BITFIELD
1359                         struct Vmxnet3_RxDesc rxDesc;
1360 #endif
1361                         vmxnet3_getRxDesc(rxd,
1362                                 &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1363
1364                         if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1365                                         rq->buf_info[ring_idx][i].skb) {
1366                                 dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1367                                                  rxd->len, PCI_DMA_FROMDEVICE);
1368                                 dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1369                                 rq->buf_info[ring_idx][i].skb = NULL;
1370                         } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1371                                         rq->buf_info[ring_idx][i].page) {
1372                                 dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1373                                                rxd->len, PCI_DMA_FROMDEVICE);
1374                                 put_page(rq->buf_info[ring_idx][i].page);
1375                                 rq->buf_info[ring_idx][i].page = NULL;
1376                         }
1377                 }
1378
1379                 rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1380                 rq->rx_ring[ring_idx].next2fill =
1381                                         rq->rx_ring[ring_idx].next2comp = 0;
1382         }
1383
1384         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1385         rq->comp_ring.next2proc = 0;
1386 }
1387
1388
1389 static void
1390 vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1391 {
1392         int i;
1393
1394         for (i = 0; i < adapter->num_rx_queues; i++)
1395                 vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1396 }
1397
1398
1399 static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1400                                struct vmxnet3_adapter *adapter)
1401 {
1402         int i;
1403         int j;
1404
1405         /* all rx buffers must have already been freed */
1406         for (i = 0; i < 2; i++) {
1407                 if (rq->buf_info[i]) {
1408                         for (j = 0; j < rq->rx_ring[i].size; j++)
1409                                 BUG_ON(rq->buf_info[i][j].page != NULL);
1410                 }
1411         }
1412
1413
1414         for (i = 0; i < 2; i++) {
1415                 if (rq->rx_ring[i].base) {
1416                         dma_free_coherent(&adapter->pdev->dev,
1417                                           rq->rx_ring[i].size
1418                                           * sizeof(struct Vmxnet3_RxDesc),
1419                                           rq->rx_ring[i].base,
1420                                           rq->rx_ring[i].basePA);
1421                         rq->rx_ring[i].base = NULL;
1422                 }
1423                 rq->buf_info[i] = NULL;
1424         }
1425
1426         if (rq->comp_ring.base) {
1427                 dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1428                                   * sizeof(struct Vmxnet3_RxCompDesc),
1429                                   rq->comp_ring.base, rq->comp_ring.basePA);
1430                 rq->comp_ring.base = NULL;
1431         }
1432
1433         if (rq->buf_info[0]) {
1434                 size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1435                         (rq->rx_ring[0].size + rq->rx_ring[1].size);
1436                 dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1437                                   rq->buf_info_pa);
1438         }
1439 }
1440
1441
1442 static int
1443 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1444                 struct vmxnet3_adapter  *adapter)
1445 {
1446         int i;
1447
1448         /* initialize buf_info */
1449         for (i = 0; i < rq->rx_ring[0].size; i++) {
1450
1451                 /* 1st buf for a pkt is skbuff */
1452                 if (i % adapter->rx_buf_per_pkt == 0) {
1453                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1454                         rq->buf_info[0][i].len = adapter->skb_buf_size;
1455                 } else { /* subsequent bufs for a pkt is frag */
1456                         rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1457                         rq->buf_info[0][i].len = PAGE_SIZE;
1458                 }
1459         }
1460         for (i = 0; i < rq->rx_ring[1].size; i++) {
1461                 rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1462                 rq->buf_info[1][i].len = PAGE_SIZE;
1463         }
1464
1465         /* reset internal state and allocate buffers for both rings */
1466         for (i = 0; i < 2; i++) {
1467                 rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1468
1469                 memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1470                        sizeof(struct Vmxnet3_RxDesc));
1471                 rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1472         }
1473         if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1474                                     adapter) == 0) {
1475                 /* at least has 1 rx buffer for the 1st ring */
1476                 return -ENOMEM;
1477         }
1478         vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1479
1480         /* reset the comp ring */
1481         rq->comp_ring.next2proc = 0;
1482         memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1483                sizeof(struct Vmxnet3_RxCompDesc));
1484         rq->comp_ring.gen = VMXNET3_INIT_GEN;
1485
1486         /* reset rxctx */
1487         rq->rx_ctx.skb = NULL;
1488
1489         /* stats are not reset */
1490         return 0;
1491 }
1492
1493
1494 static int
1495 vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1496 {
1497         int i, err = 0;
1498
1499         for (i = 0; i < adapter->num_rx_queues; i++) {
1500                 err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1501                 if (unlikely(err)) {
1502                         dev_err(&adapter->netdev->dev, "%s: failed to "
1503                                 "initialize rx queue%i\n",
1504                                 adapter->netdev->name, i);
1505                         break;
1506                 }
1507         }
1508         return err;
1509
1510 }
1511
1512
1513 static int
1514 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1515 {
1516         int i;
1517         size_t sz;
1518         struct vmxnet3_rx_buf_info *bi;
1519
1520         for (i = 0; i < 2; i++) {
1521
1522                 sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1523                 rq->rx_ring[i].base = dma_alloc_coherent(
1524                                                 &adapter->pdev->dev, sz,
1525                                                 &rq->rx_ring[i].basePA,
1526                                                 GFP_KERNEL);
1527                 if (!rq->rx_ring[i].base) {
1528                         netdev_err(adapter->netdev,
1529                                    "failed to allocate rx ring %d\n", i);
1530                         goto err;
1531                 }
1532         }
1533
1534         sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1535         rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1536                                                 &rq->comp_ring.basePA,
1537                                                 GFP_KERNEL);
1538         if (!rq->comp_ring.base) {
1539                 netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1540                 goto err;
1541         }
1542
1543         sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1544                                                    rq->rx_ring[1].size);
1545         bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1546                                  GFP_KERNEL);
1547         if (!bi)
1548                 goto err;
1549
1550         rq->buf_info[0] = bi;
1551         rq->buf_info[1] = bi + rq->rx_ring[0].size;
1552
1553         return 0;
1554
1555 err:
1556         vmxnet3_rq_destroy(rq, adapter);
1557         return -ENOMEM;
1558 }
1559
1560
1561 static int
1562 vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1563 {
1564         int i, err = 0;
1565
1566         for (i = 0; i < adapter->num_rx_queues; i++) {
1567                 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1568                 if (unlikely(err)) {
1569                         dev_err(&adapter->netdev->dev,
1570                                 "%s: failed to create rx queue%i\n",
1571                                 adapter->netdev->name, i);
1572                         goto err_out;
1573                 }
1574         }
1575         return err;
1576 err_out:
1577         vmxnet3_rq_destroy_all(adapter);
1578         return err;
1579
1580 }
1581
1582 /* Multiple queue aware polling function for tx and rx */
1583
1584 static int
1585 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1586 {
1587         int rcd_done = 0, i;
1588         if (unlikely(adapter->shared->ecr))
1589                 vmxnet3_process_events(adapter);
1590         for (i = 0; i < adapter->num_tx_queues; i++)
1591                 vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1592
1593         for (i = 0; i < adapter->num_rx_queues; i++)
1594                 rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1595                                                    adapter, budget);
1596         return rcd_done;
1597 }
1598
1599
1600 static int
1601 vmxnet3_poll(struct napi_struct *napi, int budget)
1602 {
1603         struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1604                                           struct vmxnet3_rx_queue, napi);
1605         int rxd_done;
1606
1607         rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1608
1609         if (rxd_done < budget) {
1610                 napi_complete(napi);
1611                 vmxnet3_enable_all_intrs(rx_queue->adapter);
1612         }
1613         return rxd_done;
1614 }
1615
1616 /*
1617  * NAPI polling function for MSI-X mode with multiple Rx queues
1618  * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1619  */
1620
1621 static int
1622 vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1623 {
1624         struct vmxnet3_rx_queue *rq = container_of(napi,
1625                                                 struct vmxnet3_rx_queue, napi);
1626         struct vmxnet3_adapter *adapter = rq->adapter;
1627         int rxd_done;
1628
1629         /* When sharing interrupt with corresponding tx queue, process
1630          * tx completions in that queue as well
1631          */
1632         if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1633                 struct vmxnet3_tx_queue *tq =
1634                                 &adapter->tx_queue[rq - adapter->rx_queue];
1635                 vmxnet3_tq_tx_complete(tq, adapter);
1636         }
1637
1638         rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1639
1640         if (rxd_done < budget) {
1641                 napi_complete(napi);
1642                 vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1643         }
1644         return rxd_done;
1645 }
1646
1647
1648 #ifdef CONFIG_PCI_MSI
1649
1650 /*
1651  * Handle completion interrupts on tx queues
1652  * Returns whether or not the intr is handled
1653  */
1654
1655 static irqreturn_t
1656 vmxnet3_msix_tx(int irq, void *data)
1657 {
1658         struct vmxnet3_tx_queue *tq = data;
1659         struct vmxnet3_adapter *adapter = tq->adapter;
1660
1661         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1662                 vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1663
1664         /* Handle the case where only one irq is allocate for all tx queues */
1665         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1666                 int i;
1667                 for (i = 0; i < adapter->num_tx_queues; i++) {
1668                         struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1669                         vmxnet3_tq_tx_complete(txq, adapter);
1670                 }
1671         } else {
1672                 vmxnet3_tq_tx_complete(tq, adapter);
1673         }
1674         vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1675
1676         return IRQ_HANDLED;
1677 }
1678
1679
1680 /*
1681  * Handle completion interrupts on rx queues. Returns whether or not the
1682  * intr is handled
1683  */
1684
1685 static irqreturn_t
1686 vmxnet3_msix_rx(int irq, void *data)
1687 {
1688         struct vmxnet3_rx_queue *rq = data;
1689         struct vmxnet3_adapter *adapter = rq->adapter;
1690
1691         /* disable intr if needed */
1692         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1693                 vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1694         napi_schedule(&rq->napi);
1695
1696         return IRQ_HANDLED;
1697 }
1698
1699 /*
1700  *----------------------------------------------------------------------------
1701  *
1702  * vmxnet3_msix_event --
1703  *
1704  *    vmxnet3 msix event intr handler
1705  *
1706  * Result:
1707  *    whether or not the intr is handled
1708  *
1709  *----------------------------------------------------------------------------
1710  */
1711
1712 static irqreturn_t
1713 vmxnet3_msix_event(int irq, void *data)
1714 {
1715         struct net_device *dev = data;
1716         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1717
1718         /* disable intr if needed */
1719         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1720                 vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1721
1722         if (adapter->shared->ecr)
1723                 vmxnet3_process_events(adapter);
1724
1725         vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1726
1727         return IRQ_HANDLED;
1728 }
1729
1730 #endif /* CONFIG_PCI_MSI  */
1731
1732
1733 /* Interrupt handler for vmxnet3  */
1734 static irqreturn_t
1735 vmxnet3_intr(int irq, void *dev_id)
1736 {
1737         struct net_device *dev = dev_id;
1738         struct vmxnet3_adapter *adapter = netdev_priv(dev);
1739
1740         if (adapter->intr.type == VMXNET3_IT_INTX) {
1741                 u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1742                 if (unlikely(icr == 0))
1743                         /* not ours */
1744                         return IRQ_NONE;
1745         }
1746
1747
1748         /* disable intr if needed */
1749         if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1750                 vmxnet3_disable_all_intrs(adapter);
1751
1752         napi_schedule(&adapter->rx_queue[0].napi);
1753
1754         return IRQ_HANDLED;
1755 }
1756
1757 #ifdef CONFIG_NET_POLL_CONTROLLER
1758
1759 /* netpoll callback. */
1760 static void
1761 vmxnet3_netpoll(struct net_device *netdev)
1762 {
1763         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1764         int i;
1765
1766         switch (adapter->intr.type) {
1767         case VMXNET3_IT_MSIX:
1768                 for (i = 0; i < adapter->num_rx_queues; i++)
1769                         vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
1770                 break;
1771         case VMXNET3_IT_MSI:
1772         default:
1773                 vmxnet3_intr(0, adapter->netdev);
1774                 break;
1775         }
1776
1777 }
1778 #endif  /* CONFIG_NET_POLL_CONTROLLER */
1779
1780 static int
1781 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1782 {
1783         struct vmxnet3_intr *intr = &adapter->intr;
1784         int err = 0, i;
1785         int vector = 0;
1786
1787 #ifdef CONFIG_PCI_MSI
1788         if (adapter->intr.type == VMXNET3_IT_MSIX) {
1789                 for (i = 0; i < adapter->num_tx_queues; i++) {
1790                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1791                                 sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1792                                         adapter->netdev->name, vector);
1793                                 err = request_irq(
1794                                               intr->msix_entries[vector].vector,
1795                                               vmxnet3_msix_tx, 0,
1796                                               adapter->tx_queue[i].name,
1797                                               &adapter->tx_queue[i]);
1798                         } else {
1799                                 sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1800                                         adapter->netdev->name, vector);
1801                         }
1802                         if (err) {
1803                                 dev_err(&adapter->netdev->dev,
1804                                         "Failed to request irq for MSIX, %s, "
1805                                         "error %d\n",
1806                                         adapter->tx_queue[i].name, err);
1807                                 return err;
1808                         }
1809
1810                         /* Handle the case where only 1 MSIx was allocated for
1811                          * all tx queues */
1812                         if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1813                                 for (; i < adapter->num_tx_queues; i++)
1814                                         adapter->tx_queue[i].comp_ring.intr_idx
1815                                                                 = vector;
1816                                 vector++;
1817                                 break;
1818                         } else {
1819                                 adapter->tx_queue[i].comp_ring.intr_idx
1820                                                                 = vector++;
1821                         }
1822                 }
1823                 if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1824                         vector = 0;
1825
1826                 for (i = 0; i < adapter->num_rx_queues; i++) {
1827                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1828                                 sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1829                                         adapter->netdev->name, vector);
1830                         else
1831                                 sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1832                                         adapter->netdev->name, vector);
1833                         err = request_irq(intr->msix_entries[vector].vector,
1834                                           vmxnet3_msix_rx, 0,
1835                                           adapter->rx_queue[i].name,
1836                                           &(adapter->rx_queue[i]));
1837                         if (err) {
1838                                 netdev_err(adapter->netdev,
1839                                            "Failed to request irq for MSIX, "
1840                                            "%s, error %d\n",
1841                                            adapter->rx_queue[i].name, err);
1842                                 return err;
1843                         }
1844
1845                         adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1846                 }
1847
1848                 sprintf(intr->event_msi_vector_name, "%s-event-%d",
1849                         adapter->netdev->name, vector);
1850                 err = request_irq(intr->msix_entries[vector].vector,
1851                                   vmxnet3_msix_event, 0,
1852                                   intr->event_msi_vector_name, adapter->netdev);
1853                 intr->event_intr_idx = vector;
1854
1855         } else if (intr->type == VMXNET3_IT_MSI) {
1856                 adapter->num_rx_queues = 1;
1857                 err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1858                                   adapter->netdev->name, adapter->netdev);
1859         } else {
1860 #endif
1861                 adapter->num_rx_queues = 1;
1862                 err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1863                                   IRQF_SHARED, adapter->netdev->name,
1864                                   adapter->netdev);
1865 #ifdef CONFIG_PCI_MSI
1866         }
1867 #endif
1868         intr->num_intrs = vector + 1;
1869         if (err) {
1870                 netdev_err(adapter->netdev,
1871                            "Failed to request irq (intr type:%d), error %d\n",
1872                            intr->type, err);
1873         } else {
1874                 /* Number of rx queues will not change after this */
1875                 for (i = 0; i < adapter->num_rx_queues; i++) {
1876                         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1877                         rq->qid = i;
1878                         rq->qid2 = i + adapter->num_rx_queues;
1879                 }
1880
1881
1882
1883                 /* init our intr settings */
1884                 for (i = 0; i < intr->num_intrs; i++)
1885                         intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1886                 if (adapter->intr.type != VMXNET3_IT_MSIX) {
1887                         adapter->intr.event_intr_idx = 0;
1888                         for (i = 0; i < adapter->num_tx_queues; i++)
1889                                 adapter->tx_queue[i].comp_ring.intr_idx = 0;
1890                         adapter->rx_queue[0].comp_ring.intr_idx = 0;
1891                 }
1892
1893                 netdev_info(adapter->netdev,
1894                             "intr type %u, mode %u, %u vectors allocated\n",
1895                             intr->type, intr->mask_mode, intr->num_intrs);
1896         }
1897
1898         return err;
1899 }
1900
1901
1902 static void
1903 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1904 {
1905         struct vmxnet3_intr *intr = &adapter->intr;
1906         BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1907
1908         switch (intr->type) {
1909 #ifdef CONFIG_PCI_MSI
1910         case VMXNET3_IT_MSIX:
1911         {
1912                 int i, vector = 0;
1913
1914                 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1915                         for (i = 0; i < adapter->num_tx_queues; i++) {
1916                                 free_irq(intr->msix_entries[vector++].vector,
1917                                          &(adapter->tx_queue[i]));
1918                                 if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1919                                         break;
1920                         }
1921                 }
1922
1923                 for (i = 0; i < adapter->num_rx_queues; i++) {
1924                         free_irq(intr->msix_entries[vector++].vector,
1925                                  &(adapter->rx_queue[i]));
1926                 }
1927
1928                 free_irq(intr->msix_entries[vector].vector,
1929                          adapter->netdev);
1930                 BUG_ON(vector >= intr->num_intrs);
1931                 break;
1932         }
1933 #endif
1934         case VMXNET3_IT_MSI:
1935                 free_irq(adapter->pdev->irq, adapter->netdev);
1936                 break;
1937         case VMXNET3_IT_INTX:
1938                 free_irq(adapter->pdev->irq, adapter->netdev);
1939                 break;
1940         default:
1941                 BUG();
1942         }
1943 }
1944
1945
1946 static void
1947 vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1948 {
1949         u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1950         u16 vid;
1951
1952         /* allow untagged pkts */
1953         VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1954
1955         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1956                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1957 }
1958
1959
1960 static int
1961 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1962 {
1963         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1964
1965         if (!(netdev->flags & IFF_PROMISC)) {
1966                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1967                 unsigned long flags;
1968
1969                 VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1970                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1971                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1972                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1973                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1974         }
1975
1976         set_bit(vid, adapter->active_vlans);
1977
1978         return 0;
1979 }
1980
1981
1982 static int
1983 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1984 {
1985         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1986
1987         if (!(netdev->flags & IFF_PROMISC)) {
1988                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1989                 unsigned long flags;
1990
1991                 VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1992                 spin_lock_irqsave(&adapter->cmd_lock, flags);
1993                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1994                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1995                 spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1996         }
1997
1998         clear_bit(vid, adapter->active_vlans);
1999
2000         return 0;
2001 }
2002
2003
2004 static u8 *
2005 vmxnet3_copy_mc(struct net_device *netdev)
2006 {
2007         u8 *buf = NULL;
2008         u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2009
2010         /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2011         if (sz <= 0xffff) {
2012                 /* We may be called with BH disabled */
2013                 buf = kmalloc(sz, GFP_ATOMIC);
2014                 if (buf) {
2015                         struct netdev_hw_addr *ha;
2016                         int i = 0;
2017
2018                         netdev_for_each_mc_addr(ha, netdev)
2019                                 memcpy(buf + i++ * ETH_ALEN, ha->addr,
2020                                        ETH_ALEN);
2021                 }
2022         }
2023         return buf;
2024 }
2025
2026
2027 static void
2028 vmxnet3_set_mc(struct net_device *netdev)
2029 {
2030         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2031         unsigned long flags;
2032         struct Vmxnet3_RxFilterConf *rxConf =
2033                                         &adapter->shared->devRead.rxFilterConf;
2034         u8 *new_table = NULL;
2035         dma_addr_t new_table_pa = 0;
2036         u32 new_mode = VMXNET3_RXM_UCAST;
2037
2038         if (netdev->flags & IFF_PROMISC) {
2039                 u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2040                 memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2041
2042                 new_mode |= VMXNET3_RXM_PROMISC;
2043         } else {
2044                 vmxnet3_restore_vlan(adapter);
2045         }
2046
2047         if (netdev->flags & IFF_BROADCAST)
2048                 new_mode |= VMXNET3_RXM_BCAST;
2049
2050         if (netdev->flags & IFF_ALLMULTI)
2051                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2052         else
2053                 if (!netdev_mc_empty(netdev)) {
2054                         new_table = vmxnet3_copy_mc(netdev);
2055                         if (new_table) {
2056                                 new_mode |= VMXNET3_RXM_MCAST;
2057                                 rxConf->mfTableLen = cpu_to_le16(
2058                                         netdev_mc_count(netdev) * ETH_ALEN);
2059                                 new_table_pa = dma_map_single(
2060                                                         &adapter->pdev->dev,
2061                                                         new_table,
2062                                                         rxConf->mfTableLen,
2063                                                         PCI_DMA_TODEVICE);
2064                                 rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2065                         } else {
2066                                 netdev_info(netdev, "failed to copy mcast list"
2067                                             ", setting ALL_MULTI\n");
2068                                 new_mode |= VMXNET3_RXM_ALL_MULTI;
2069                         }
2070                 }
2071
2072
2073         if (!(new_mode & VMXNET3_RXM_MCAST)) {
2074                 rxConf->mfTableLen = 0;
2075                 rxConf->mfTablePA = 0;
2076         }
2077
2078         spin_lock_irqsave(&adapter->cmd_lock, flags);
2079         if (new_mode != rxConf->rxMode) {
2080                 rxConf->rxMode = cpu_to_le32(new_mode);
2081                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2082                                        VMXNET3_CMD_UPDATE_RX_MODE);
2083                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2084                                        VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2085         }
2086
2087         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2088                                VMXNET3_CMD_UPDATE_MAC_FILTERS);
2089         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2090
2091         if (new_table) {
2092                 dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2093                                  rxConf->mfTableLen, PCI_DMA_TODEVICE);
2094                 kfree(new_table);
2095         }
2096 }
2097
2098 void
2099 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2100 {
2101         int i;
2102
2103         for (i = 0; i < adapter->num_rx_queues; i++)
2104                 vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2105 }
2106
2107
2108 /*
2109  *   Set up driver_shared based on settings in adapter.
2110  */
2111
2112 static void
2113 vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2114 {
2115         struct Vmxnet3_DriverShared *shared = adapter->shared;
2116         struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2117         struct Vmxnet3_TxQueueConf *tqc;
2118         struct Vmxnet3_RxQueueConf *rqc;
2119         int i;
2120
2121         memset(shared, 0, sizeof(*shared));
2122
2123         /* driver settings */
2124         shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2125         devRead->misc.driverInfo.version = cpu_to_le32(
2126                                                 VMXNET3_DRIVER_VERSION_NUM);
2127         devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2128                                 VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2129         devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2130         *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2131                                 *((u32 *)&devRead->misc.driverInfo.gos));
2132         devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2133         devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2134
2135         devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2136         devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2137
2138         /* set up feature flags */
2139         if (adapter->netdev->features & NETIF_F_RXCSUM)
2140                 devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2141
2142         if (adapter->netdev->features & NETIF_F_LRO) {
2143                 devRead->misc.uptFeatures |= UPT1_F_LRO;
2144                 devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2145         }
2146         if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2147                 devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2148
2149         devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2150         devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2151         devRead->misc.queueDescLen = cpu_to_le32(
2152                 adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2153                 adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2154
2155         /* tx queue settings */
2156         devRead->misc.numTxQueues =  adapter->num_tx_queues;
2157         for (i = 0; i < adapter->num_tx_queues; i++) {
2158                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2159                 BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2160                 tqc = &adapter->tqd_start[i].conf;
2161                 tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2162                 tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2163                 tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2164                 tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2165                 tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2166                 tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2167                 tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2168                 tqc->ddLen          = cpu_to_le32(
2169                                         sizeof(struct vmxnet3_tx_buf_info) *
2170                                         tqc->txRingSize);
2171                 tqc->intrIdx        = tq->comp_ring.intr_idx;
2172         }
2173
2174         /* rx queue settings */
2175         devRead->misc.numRxQueues = adapter->num_rx_queues;
2176         for (i = 0; i < adapter->num_rx_queues; i++) {
2177                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2178                 rqc = &adapter->rqd_start[i].conf;
2179                 rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2180                 rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2181                 rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2182                 rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2183                 rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2184                 rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2185                 rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2186                 rqc->ddLen           = cpu_to_le32(
2187                                         sizeof(struct vmxnet3_rx_buf_info) *
2188                                         (rqc->rxRingSize[0] +
2189                                          rqc->rxRingSize[1]));
2190                 rqc->intrIdx         = rq->comp_ring.intr_idx;
2191         }
2192
2193 #ifdef VMXNET3_RSS
2194         memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2195
2196         if (adapter->rss) {
2197                 struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2198                 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2199                         0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2200                         0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2201                         0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2202                         0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2203                         0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2204                 };
2205
2206                 devRead->misc.uptFeatures |= UPT1_F_RSS;
2207                 devRead->misc.numRxQueues = adapter->num_rx_queues;
2208                 rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2209                                     UPT1_RSS_HASH_TYPE_IPV4 |
2210                                     UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2211                                     UPT1_RSS_HASH_TYPE_IPV6;
2212                 rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2213                 rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2214                 rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2215                 memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2216
2217                 for (i = 0; i < rssConf->indTableSize; i++)
2218                         rssConf->indTable[i] = ethtool_rxfh_indir_default(
2219                                 i, adapter->num_rx_queues);
2220
2221                 devRead->rssConfDesc.confVer = 1;
2222                 devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2223                 devRead->rssConfDesc.confPA =
2224                         cpu_to_le64(adapter->rss_conf_pa);
2225         }
2226
2227 #endif /* VMXNET3_RSS */
2228
2229         /* intr settings */
2230         devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2231                                      VMXNET3_IMM_AUTO;
2232         devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2233         for (i = 0; i < adapter->intr.num_intrs; i++)
2234                 devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2235
2236         devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2237         devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2238
2239         /* rx filter settings */
2240         devRead->rxFilterConf.rxMode = 0;
2241         vmxnet3_restore_vlan(adapter);
2242         vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2243
2244         /* the rest are already zeroed */
2245 }
2246
2247
2248 int
2249 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2250 {
2251         int err, i;
2252         u32 ret;
2253         unsigned long flags;
2254
2255         netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2256                 " ring sizes %u %u %u\n", adapter->netdev->name,
2257                 adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2258                 adapter->tx_queue[0].tx_ring.size,
2259                 adapter->rx_queue[0].rx_ring[0].size,
2260                 adapter->rx_queue[0].rx_ring[1].size);
2261
2262         vmxnet3_tq_init_all(adapter);
2263         err = vmxnet3_rq_init_all(adapter);
2264         if (err) {
2265                 netdev_err(adapter->netdev,
2266                            "Failed to init rx queue error %d\n", err);
2267                 goto rq_err;
2268         }
2269
2270         err = vmxnet3_request_irqs(adapter);
2271         if (err) {
2272                 netdev_err(adapter->netdev,
2273                            "Failed to setup irq for error %d\n", err);
2274                 goto irq_err;
2275         }
2276
2277         vmxnet3_setup_driver_shared(adapter);
2278
2279         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2280                                adapter->shared_pa));
2281         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2282                                adapter->shared_pa));
2283         spin_lock_irqsave(&adapter->cmd_lock, flags);
2284         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2285                                VMXNET3_CMD_ACTIVATE_DEV);
2286         ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2287         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2288
2289         if (ret != 0) {
2290                 netdev_err(adapter->netdev,
2291                            "Failed to activate dev: error %u\n", ret);
2292                 err = -EINVAL;
2293                 goto activate_err;
2294         }
2295
2296         for (i = 0; i < adapter->num_rx_queues; i++) {
2297                 VMXNET3_WRITE_BAR0_REG(adapter,
2298                                 VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2299                                 adapter->rx_queue[i].rx_ring[0].next2fill);
2300                 VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2301                                 (i * VMXNET3_REG_ALIGN)),
2302                                 adapter->rx_queue[i].rx_ring[1].next2fill);
2303         }
2304
2305         /* Apply the rx filter settins last. */
2306         vmxnet3_set_mc(adapter->netdev);
2307
2308         /*
2309          * Check link state when first activating device. It will start the
2310          * tx queue if the link is up.
2311          */
2312         vmxnet3_check_link(adapter, true);
2313         for (i = 0; i < adapter->num_rx_queues; i++)
2314                 napi_enable(&adapter->rx_queue[i].napi);
2315         vmxnet3_enable_all_intrs(adapter);
2316         clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2317         return 0;
2318
2319 activate_err:
2320         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2321         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2322         vmxnet3_free_irqs(adapter);
2323 irq_err:
2324 rq_err:
2325         /* free up buffers we allocated */
2326         vmxnet3_rq_cleanup_all(adapter);
2327         return err;
2328 }
2329
2330
2331 void
2332 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2333 {
2334         unsigned long flags;
2335         spin_lock_irqsave(&adapter->cmd_lock, flags);
2336         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2337         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2338 }
2339
2340
2341 int
2342 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2343 {
2344         int i;
2345         unsigned long flags;
2346         if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2347                 return 0;
2348
2349
2350         spin_lock_irqsave(&adapter->cmd_lock, flags);
2351         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2352                                VMXNET3_CMD_QUIESCE_DEV);
2353         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2354         vmxnet3_disable_all_intrs(adapter);
2355
2356         for (i = 0; i < adapter->num_rx_queues; i++)
2357                 napi_disable(&adapter->rx_queue[i].napi);
2358         netif_tx_disable(adapter->netdev);
2359         adapter->link_speed = 0;
2360         netif_carrier_off(adapter->netdev);
2361
2362         vmxnet3_tq_cleanup_all(adapter);
2363         vmxnet3_rq_cleanup_all(adapter);
2364         vmxnet3_free_irqs(adapter);
2365         return 0;
2366 }
2367
2368
2369 static void
2370 vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2371 {
2372         u32 tmp;
2373
2374         tmp = *(u32 *)mac;
2375         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2376
2377         tmp = (mac[5] << 8) | mac[4];
2378         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2379 }
2380
2381
2382 static int
2383 vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2384 {
2385         struct sockaddr *addr = p;
2386         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2387
2388         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2389         vmxnet3_write_mac_addr(adapter, addr->sa_data);
2390
2391         return 0;
2392 }
2393
2394
2395 /* ==================== initialization and cleanup routines ============ */
2396
2397 static int
2398 vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2399 {
2400         int err;
2401         unsigned long mmio_start, mmio_len;
2402         struct pci_dev *pdev = adapter->pdev;
2403
2404         err = pci_enable_device(pdev);
2405         if (err) {
2406                 dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2407                 return err;
2408         }
2409
2410         if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2411                 if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2412                         dev_err(&pdev->dev,
2413                                 "pci_set_consistent_dma_mask failed\n");
2414                         err = -EIO;
2415                         goto err_set_mask;
2416                 }
2417                 *dma64 = true;
2418         } else {
2419                 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2420                         dev_err(&pdev->dev,
2421                                 "pci_set_dma_mask failed\n");
2422                         err = -EIO;
2423                         goto err_set_mask;
2424                 }
2425                 *dma64 = false;
2426         }
2427
2428         err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2429                                            vmxnet3_driver_name);
2430         if (err) {
2431                 dev_err(&pdev->dev,
2432                         "Failed to request region for adapter: error %d\n", err);
2433                 goto err_set_mask;
2434         }
2435
2436         pci_set_master(pdev);
2437
2438         mmio_start = pci_resource_start(pdev, 0);
2439         mmio_len = pci_resource_len(pdev, 0);
2440         adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2441         if (!adapter->hw_addr0) {
2442                 dev_err(&pdev->dev, "Failed to map bar0\n");
2443                 err = -EIO;
2444                 goto err_ioremap;
2445         }
2446
2447         mmio_start = pci_resource_start(pdev, 1);
2448         mmio_len = pci_resource_len(pdev, 1);
2449         adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2450         if (!adapter->hw_addr1) {
2451                 dev_err(&pdev->dev, "Failed to map bar1\n");
2452                 err = -EIO;
2453                 goto err_bar1;
2454         }
2455         return 0;
2456
2457 err_bar1:
2458         iounmap(adapter->hw_addr0);
2459 err_ioremap:
2460         pci_release_selected_regions(pdev, (1 << 2) - 1);
2461 err_set_mask:
2462         pci_disable_device(pdev);
2463         return err;
2464 }
2465
2466
2467 static void
2468 vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2469 {
2470         BUG_ON(!adapter->pdev);
2471
2472         iounmap(adapter->hw_addr0);
2473         iounmap(adapter->hw_addr1);
2474         pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2475         pci_disable_device(adapter->pdev);
2476 }
2477
2478
2479 static void
2480 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2481 {
2482         size_t sz, i, ring0_size, ring1_size, comp_size;
2483         struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2484
2485
2486         if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2487                                     VMXNET3_MAX_ETH_HDR_SIZE) {
2488                 adapter->skb_buf_size = adapter->netdev->mtu +
2489                                         VMXNET3_MAX_ETH_HDR_SIZE;
2490                 if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2491                         adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2492
2493                 adapter->rx_buf_per_pkt = 1;
2494         } else {
2495                 adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2496                 sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2497                                             VMXNET3_MAX_ETH_HDR_SIZE;
2498                 adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2499         }
2500
2501         /*
2502          * for simplicity, force the ring0 size to be a multiple of
2503          * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2504          */
2505         sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2506         ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2507         ring0_size = (ring0_size + sz - 1) / sz * sz;
2508         ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2509                            sz * sz);
2510         ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2511         comp_size = ring0_size + ring1_size;
2512
2513         for (i = 0; i < adapter->num_rx_queues; i++) {
2514                 rq = &adapter->rx_queue[i];
2515                 rq->rx_ring[0].size = ring0_size;
2516                 rq->rx_ring[1].size = ring1_size;
2517                 rq->comp_ring.size = comp_size;
2518         }
2519 }
2520
2521
2522 int
2523 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2524                       u32 rx_ring_size, u32 rx_ring2_size)
2525 {
2526         int err = 0, i;
2527
2528         for (i = 0; i < adapter->num_tx_queues; i++) {
2529                 struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2530                 tq->tx_ring.size   = tx_ring_size;
2531                 tq->data_ring.size = tx_ring_size;
2532                 tq->comp_ring.size = tx_ring_size;
2533                 tq->shared = &adapter->tqd_start[i].ctrl;
2534                 tq->stopped = true;
2535                 tq->adapter = adapter;
2536                 tq->qid = i;
2537                 err = vmxnet3_tq_create(tq, adapter);
2538                 /*
2539                  * Too late to change num_tx_queues. We cannot do away with
2540                  * lesser number of queues than what we asked for
2541                  */
2542                 if (err)
2543                         goto queue_err;
2544         }
2545
2546         adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2547         adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2548         vmxnet3_adjust_rx_ring_size(adapter);
2549         for (i = 0; i < adapter->num_rx_queues; i++) {
2550                 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2551                 /* qid and qid2 for rx queues will be assigned later when num
2552                  * of rx queues is finalized after allocating intrs */
2553                 rq->shared = &adapter->rqd_start[i].ctrl;
2554                 rq->adapter = adapter;
2555                 err = vmxnet3_rq_create(rq, adapter);
2556                 if (err) {
2557                         if (i == 0) {
2558                                 netdev_err(adapter->netdev,
2559                                            "Could not allocate any rx queues. "
2560                                            "Aborting.\n");
2561                                 goto queue_err;
2562                         } else {
2563                                 netdev_info(adapter->netdev,
2564                                             "Number of rx queues changed "
2565                                             "to : %d.\n", i);
2566                                 adapter->num_rx_queues = i;
2567                                 err = 0;
2568                                 break;
2569                         }
2570                 }
2571         }
2572         return err;
2573 queue_err:
2574         vmxnet3_tq_destroy_all(adapter);
2575         return err;
2576 }
2577
2578 static int
2579 vmxnet3_open(struct net_device *netdev)
2580 {
2581         struct vmxnet3_adapter *adapter;
2582         int err, i;
2583
2584         adapter = netdev_priv(netdev);
2585
2586         for (i = 0; i < adapter->num_tx_queues; i++)
2587                 spin_lock_init(&adapter->tx_queue[i].tx_lock);
2588
2589         err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2590                                     VMXNET3_DEF_RX_RING_SIZE,
2591                                     VMXNET3_DEF_RX_RING_SIZE);
2592         if (err)
2593                 goto queue_err;
2594
2595         err = vmxnet3_activate_dev(adapter);
2596         if (err)
2597                 goto activate_err;
2598
2599         return 0;
2600
2601 activate_err:
2602         vmxnet3_rq_destroy_all(adapter);
2603         vmxnet3_tq_destroy_all(adapter);
2604 queue_err:
2605         return err;
2606 }
2607
2608
2609 static int
2610 vmxnet3_close(struct net_device *netdev)
2611 {
2612         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2613
2614         /*
2615          * Reset_work may be in the middle of resetting the device, wait for its
2616          * completion.
2617          */
2618         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2619                 msleep(1);
2620
2621         vmxnet3_quiesce_dev(adapter);
2622
2623         vmxnet3_rq_destroy_all(adapter);
2624         vmxnet3_tq_destroy_all(adapter);
2625
2626         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2627
2628
2629         return 0;
2630 }
2631
2632
2633 void
2634 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2635 {
2636         int i;
2637
2638         /*
2639          * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2640          * vmxnet3_close() will deadlock.
2641          */
2642         BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2643
2644         /* we need to enable NAPI, otherwise dev_close will deadlock */
2645         for (i = 0; i < adapter->num_rx_queues; i++)
2646                 napi_enable(&adapter->rx_queue[i].napi);
2647         dev_close(adapter->netdev);
2648 }
2649
2650
2651 static int
2652 vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2653 {
2654         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2655         int err = 0;
2656
2657         if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2658                 return -EINVAL;
2659
2660         netdev->mtu = new_mtu;
2661
2662         /*
2663          * Reset_work may be in the middle of resetting the device, wait for its
2664          * completion.
2665          */
2666         while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2667                 msleep(1);
2668
2669         if (netif_running(netdev)) {
2670                 vmxnet3_quiesce_dev(adapter);
2671                 vmxnet3_reset_dev(adapter);
2672
2673                 /* we need to re-create the rx queue based on the new mtu */
2674                 vmxnet3_rq_destroy_all(adapter);
2675                 vmxnet3_adjust_rx_ring_size(adapter);
2676                 err = vmxnet3_rq_create_all(adapter);
2677                 if (err) {
2678                         netdev_err(netdev,
2679                                    "failed to re-create rx queues, "
2680                                    " error %d. Closing it.\n", err);
2681                         goto out;
2682                 }
2683
2684                 err = vmxnet3_activate_dev(adapter);
2685                 if (err) {
2686                         netdev_err(netdev,
2687                                    "failed to re-activate, error %d. "
2688                                    "Closing it\n", err);
2689                         goto out;
2690                 }
2691         }
2692
2693 out:
2694         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2695         if (err)
2696                 vmxnet3_force_close(adapter);
2697
2698         return err;
2699 }
2700
2701
2702 static void
2703 vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2704 {
2705         struct net_device *netdev = adapter->netdev;
2706
2707         netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2708                 NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2709                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2710                 NETIF_F_LRO;
2711         if (dma64)
2712                 netdev->hw_features |= NETIF_F_HIGHDMA;
2713         netdev->vlan_features = netdev->hw_features &
2714                                 ~(NETIF_F_HW_VLAN_CTAG_TX |
2715                                   NETIF_F_HW_VLAN_CTAG_RX);
2716         netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2717 }
2718
2719
2720 static void
2721 vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2722 {
2723         u32 tmp;
2724
2725         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2726         *(u32 *)mac = tmp;
2727
2728         tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2729         mac[4] = tmp & 0xff;
2730         mac[5] = (tmp >> 8) & 0xff;
2731 }
2732
2733 #ifdef CONFIG_PCI_MSI
2734
2735 /*
2736  * Enable MSIx vectors.
2737  * Returns :
2738  *      0 on successful enabling of required vectors,
2739  *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2740  *       could be enabled.
2741  *      number of vectors which can be enabled otherwise (this number is smaller
2742  *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2743  */
2744
2745 static int
2746 vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2747                              int vectors)
2748 {
2749         int err = 0, vector_threshold;
2750         vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2751
2752         while (vectors >= vector_threshold) {
2753                 err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2754                                       vectors);
2755                 if (!err) {
2756                         adapter->intr.num_intrs = vectors;
2757                         return 0;
2758                 } else if (err < 0) {
2759                         dev_err(&adapter->netdev->dev,
2760                                    "Failed to enable MSI-X, error: %d\n", err);
2761                         vectors = 0;
2762                 } else if (err < vector_threshold) {
2763                         break;
2764                 } else {
2765                         /* If fails to enable required number of MSI-x vectors
2766                          * try enabling minimum number of vectors required.
2767                          */
2768                         dev_err(&adapter->netdev->dev,
2769                                 "Failed to enable %d MSI-X, trying %d instead\n",
2770                                     vectors, vector_threshold);
2771                         vectors = vector_threshold;
2772                 }
2773         }
2774
2775         dev_info(&adapter->pdev->dev,
2776                  "Number of MSI-X interrupts which can be allocated "
2777                  "is lower than min threshold required.\n");
2778         return err;
2779 }
2780
2781
2782 #endif /* CONFIG_PCI_MSI */
2783
2784 static void
2785 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2786 {
2787         u32 cfg;
2788         unsigned long flags;
2789
2790         /* intr settings */
2791         spin_lock_irqsave(&adapter->cmd_lock, flags);
2792         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2793                                VMXNET3_CMD_GET_CONF_INTR);
2794         cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2795         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2796         adapter->intr.type = cfg & 0x3;
2797         adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2798
2799         if (adapter->intr.type == VMXNET3_IT_AUTO) {
2800                 adapter->intr.type = VMXNET3_IT_MSIX;
2801         }
2802
2803 #ifdef CONFIG_PCI_MSI
2804         if (adapter->intr.type == VMXNET3_IT_MSIX) {
2805                 int vector, err = 0;
2806
2807                 adapter->intr.num_intrs = (adapter->share_intr ==
2808                                            VMXNET3_INTR_TXSHARE) ? 1 :
2809                                            adapter->num_tx_queues;
2810                 adapter->intr.num_intrs += (adapter->share_intr ==
2811                                            VMXNET3_INTR_BUDDYSHARE) ? 0 :
2812                                            adapter->num_rx_queues;
2813                 adapter->intr.num_intrs += 1;           /* for link event */
2814
2815                 adapter->intr.num_intrs = (adapter->intr.num_intrs >
2816                                            VMXNET3_LINUX_MIN_MSIX_VECT
2817                                            ? adapter->intr.num_intrs :
2818                                            VMXNET3_LINUX_MIN_MSIX_VECT);
2819
2820                 for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2821                         adapter->intr.msix_entries[vector].entry = vector;
2822
2823                 err = vmxnet3_acquire_msix_vectors(adapter,
2824                                                    adapter->intr.num_intrs);
2825                 /* If we cannot allocate one MSIx vector per queue
2826                  * then limit the number of rx queues to 1
2827                  */
2828                 if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2829                         if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2830                             || adapter->num_rx_queues != 1) {
2831                                 adapter->share_intr = VMXNET3_INTR_TXSHARE;
2832                                 netdev_err(adapter->netdev,
2833                                            "Number of rx queues : 1\n");
2834                                 adapter->num_rx_queues = 1;
2835                                 adapter->intr.num_intrs =
2836                                                 VMXNET3_LINUX_MIN_MSIX_VECT;
2837                         }
2838                         return;
2839                 }
2840                 if (!err)
2841                         return;
2842
2843                 /* If we cannot allocate MSIx vectors use only one rx queue */
2844                 dev_info(&adapter->pdev->dev,
2845                          "Failed to enable MSI-X, error %d. "
2846                          "Limiting #rx queues to 1, try MSI.\n", err);
2847
2848                 adapter->intr.type = VMXNET3_IT_MSI;
2849         }
2850
2851         if (adapter->intr.type == VMXNET3_IT_MSI) {
2852                 int err;
2853                 err = pci_enable_msi(adapter->pdev);
2854                 if (!err) {
2855                         adapter->num_rx_queues = 1;
2856                         adapter->intr.num_intrs = 1;
2857                         return;
2858                 }
2859         }
2860 #endif /* CONFIG_PCI_MSI */
2861
2862         adapter->num_rx_queues = 1;
2863         dev_info(&adapter->netdev->dev,
2864                  "Using INTx interrupt, #Rx queues: 1.\n");
2865         adapter->intr.type = VMXNET3_IT_INTX;
2866
2867         /* INT-X related setting */
2868         adapter->intr.num_intrs = 1;
2869 }
2870
2871
2872 static void
2873 vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2874 {
2875         if (adapter->intr.type == VMXNET3_IT_MSIX)
2876                 pci_disable_msix(adapter->pdev);
2877         else if (adapter->intr.type == VMXNET3_IT_MSI)
2878                 pci_disable_msi(adapter->pdev);
2879         else
2880                 BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2881 }
2882
2883
2884 static void
2885 vmxnet3_tx_timeout(struct net_device *netdev)
2886 {
2887         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2888         adapter->tx_timeout_count++;
2889
2890         netdev_err(adapter->netdev, "tx hang\n");
2891         schedule_work(&adapter->work);
2892         netif_wake_queue(adapter->netdev);
2893 }
2894
2895
2896 static void
2897 vmxnet3_reset_work(struct work_struct *data)
2898 {
2899         struct vmxnet3_adapter *adapter;
2900
2901         adapter = container_of(data, struct vmxnet3_adapter, work);
2902
2903         /* if another thread is resetting the device, no need to proceed */
2904         if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2905                 return;
2906
2907         /* if the device is closed, we must leave it alone */
2908         rtnl_lock();
2909         if (netif_running(adapter->netdev)) {
2910                 netdev_notice(adapter->netdev, "resetting\n");
2911                 vmxnet3_quiesce_dev(adapter);
2912                 vmxnet3_reset_dev(adapter);
2913                 vmxnet3_activate_dev(adapter);
2914         } else {
2915                 netdev_info(adapter->netdev, "already closed\n");
2916         }
2917         rtnl_unlock();
2918
2919         clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2920 }
2921
2922
2923 static int
2924 vmxnet3_probe_device(struct pci_dev *pdev,
2925                      const struct pci_device_id *id)
2926 {
2927         static const struct net_device_ops vmxnet3_netdev_ops = {
2928                 .ndo_open = vmxnet3_open,
2929                 .ndo_stop = vmxnet3_close,
2930                 .ndo_start_xmit = vmxnet3_xmit_frame,
2931                 .ndo_set_mac_address = vmxnet3_set_mac_addr,
2932                 .ndo_change_mtu = vmxnet3_change_mtu,
2933                 .ndo_set_features = vmxnet3_set_features,
2934                 .ndo_get_stats64 = vmxnet3_get_stats64,
2935                 .ndo_tx_timeout = vmxnet3_tx_timeout,
2936                 .ndo_set_rx_mode = vmxnet3_set_mc,
2937                 .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2938                 .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2939 #ifdef CONFIG_NET_POLL_CONTROLLER
2940                 .ndo_poll_controller = vmxnet3_netpoll,
2941 #endif
2942         };
2943         int err;
2944         bool dma64 = false; /* stupid gcc */
2945         u32 ver;
2946         struct net_device *netdev;
2947         struct vmxnet3_adapter *adapter;
2948         u8 mac[ETH_ALEN];
2949         int size;
2950         int num_tx_queues;
2951         int num_rx_queues;
2952
2953         if (!pci_msi_enabled())
2954                 enable_mq = 0;
2955
2956 #ifdef VMXNET3_RSS
2957         if (enable_mq)
2958                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2959                                     (int)num_online_cpus());
2960         else
2961 #endif
2962                 num_rx_queues = 1;
2963         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2964
2965         if (enable_mq)
2966                 num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2967                                     (int)num_online_cpus());
2968         else
2969                 num_tx_queues = 1;
2970
2971         num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2972         netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2973                                    max(num_tx_queues, num_rx_queues));
2974         dev_info(&pdev->dev,
2975                  "# of Tx queues : %d, # of Rx queues : %d\n",
2976                  num_tx_queues, num_rx_queues);
2977
2978         if (!netdev)
2979                 return -ENOMEM;
2980
2981         pci_set_drvdata(pdev, netdev);
2982         adapter = netdev_priv(netdev);
2983         adapter->netdev = netdev;
2984         adapter->pdev = pdev;
2985
2986         spin_lock_init(&adapter->cmd_lock);
2987         adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2988                                              sizeof(struct vmxnet3_adapter),
2989                                              PCI_DMA_TODEVICE);
2990         adapter->shared = dma_alloc_coherent(
2991                                 &adapter->pdev->dev,
2992                                 sizeof(struct Vmxnet3_DriverShared),
2993                                 &adapter->shared_pa, GFP_KERNEL);
2994         if (!adapter->shared) {
2995                 dev_err(&pdev->dev, "Failed to allocate memory\n");
2996                 err = -ENOMEM;
2997                 goto err_alloc_shared;
2998         }
2999
3000         adapter->num_rx_queues = num_rx_queues;
3001         adapter->num_tx_queues = num_tx_queues;
3002         adapter->rx_buf_per_pkt = 1;
3003
3004         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3005         size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
3006         adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
3007                                                 &adapter->queue_desc_pa,
3008                                                 GFP_KERNEL);
3009
3010         if (!adapter->tqd_start) {
3011                 dev_err(&pdev->dev, "Failed to allocate memory\n");
3012                 err = -ENOMEM;
3013                 goto err_alloc_queue_desc;
3014         }
3015         adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3016                                                             adapter->num_tx_queues);
3017
3018         adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3019                                               sizeof(struct Vmxnet3_PMConf),
3020                                               &adapter->pm_conf_pa,
3021                                               GFP_KERNEL);
3022         if (adapter->pm_conf == NULL) {
3023                 err = -ENOMEM;
3024                 goto err_alloc_pm;
3025         }
3026
3027 #ifdef VMXNET3_RSS
3028
3029         adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3030                                                sizeof(struct UPT1_RSSConf),
3031                                                &adapter->rss_conf_pa,
3032                                                GFP_KERNEL);
3033         if (adapter->rss_conf == NULL) {
3034                 err = -ENOMEM;
3035                 goto err_alloc_rss;
3036         }
3037 #endif /* VMXNET3_RSS */
3038
3039         err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3040         if (err < 0)
3041                 goto err_alloc_pci;
3042
3043         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3044         if (ver & 1) {
3045                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3046         } else {
3047                 dev_err(&pdev->dev,
3048                         "Incompatible h/w version (0x%x) for adapter\n", ver);
3049                 err = -EBUSY;
3050                 goto err_ver;
3051         }
3052
3053         ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3054         if (ver & 1) {
3055                 VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3056         } else {
3057                 dev_err(&pdev->dev,
3058                         "Incompatible upt version (0x%x) for adapter\n", ver);
3059                 err = -EBUSY;
3060                 goto err_ver;
3061         }
3062
3063         SET_NETDEV_DEV(netdev, &pdev->dev);
3064         vmxnet3_declare_features(adapter, dma64);
3065
3066         if (adapter->num_tx_queues == adapter->num_rx_queues)
3067                 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3068         else
3069                 adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3070
3071         vmxnet3_alloc_intr_resources(adapter);
3072
3073 #ifdef VMXNET3_RSS
3074         if (adapter->num_rx_queues > 1 &&
3075             adapter->intr.type == VMXNET3_IT_MSIX) {
3076                 adapter->rss = true;
3077                 netdev->hw_features |= NETIF_F_RXHASH;
3078                 netdev->features |= NETIF_F_RXHASH;
3079                 dev_dbg(&pdev->dev, "RSS is enabled.\n");
3080         } else {
3081                 adapter->rss = false;
3082         }
3083 #endif
3084
3085         vmxnet3_read_mac_addr(adapter, mac);
3086         memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3087
3088         netdev->netdev_ops = &vmxnet3_netdev_ops;
3089         vmxnet3_set_ethtool_ops(netdev);
3090         netdev->watchdog_timeo = 5 * HZ;
3091
3092         INIT_WORK(&adapter->work, vmxnet3_reset_work);
3093         set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3094
3095         if (adapter->intr.type == VMXNET3_IT_MSIX) {
3096                 int i;
3097                 for (i = 0; i < adapter->num_rx_queues; i++) {
3098                         netif_napi_add(adapter->netdev,
3099                                        &adapter->rx_queue[i].napi,
3100                                        vmxnet3_poll_rx_only, 64);
3101                 }
3102         } else {
3103                 netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3104                                vmxnet3_poll, 64);
3105         }
3106
3107         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3108         netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3109
3110         netif_carrier_off(netdev);
3111         err = register_netdev(netdev);
3112
3113         if (err) {
3114                 dev_err(&pdev->dev, "Failed to register adapter\n");
3115                 goto err_register;
3116         }
3117
3118         vmxnet3_check_link(adapter, false);
3119         return 0;
3120
3121 err_register:
3122         vmxnet3_free_intr_resources(adapter);
3123 err_ver:
3124         vmxnet3_free_pci_resources(adapter);
3125 err_alloc_pci:
3126 #ifdef VMXNET3_RSS
3127         dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3128                           adapter->rss_conf, adapter->rss_conf_pa);
3129 err_alloc_rss:
3130 #endif
3131         dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3132                           adapter->pm_conf, adapter->pm_conf_pa);
3133 err_alloc_pm:
3134         dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3135                           adapter->queue_desc_pa);
3136 err_alloc_queue_desc:
3137         dma_free_coherent(&adapter->pdev->dev,
3138                           sizeof(struct Vmxnet3_DriverShared),
3139                           adapter->shared, adapter->shared_pa);
3140 err_alloc_shared:
3141         dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3142                          sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3143         free_netdev(netdev);
3144         return err;
3145 }
3146
3147
3148 static void
3149 vmxnet3_remove_device(struct pci_dev *pdev)
3150 {
3151         struct net_device *netdev = pci_get_drvdata(pdev);
3152         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3153         int size = 0;
3154         int num_rx_queues;
3155
3156 #ifdef VMXNET3_RSS
3157         if (enable_mq)
3158                 num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3159                                     (int)num_online_cpus());
3160         else
3161 #endif
3162                 num_rx_queues = 1;
3163         num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3164
3165         cancel_work_sync(&adapter->work);
3166
3167         unregister_netdev(netdev);
3168
3169         vmxnet3_free_intr_resources(adapter);
3170         vmxnet3_free_pci_resources(adapter);
3171 #ifdef VMXNET3_RSS
3172         dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3173                           adapter->rss_conf, adapter->rss_conf_pa);
3174 #endif
3175         dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3176                           adapter->pm_conf, adapter->pm_conf_pa);
3177
3178         size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3179         size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3180         dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3181                           adapter->queue_desc_pa);
3182         dma_free_coherent(&adapter->pdev->dev,
3183                           sizeof(struct Vmxnet3_DriverShared),
3184                           adapter->shared, adapter->shared_pa);
3185         dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3186                          sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3187         free_netdev(netdev);
3188 }
3189
3190
3191 #ifdef CONFIG_PM
3192
3193 static int
3194 vmxnet3_suspend(struct device *device)
3195 {
3196         struct pci_dev *pdev = to_pci_dev(device);
3197         struct net_device *netdev = pci_get_drvdata(pdev);
3198         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3199         struct Vmxnet3_PMConf *pmConf;
3200         struct ethhdr *ehdr;
3201         struct arphdr *ahdr;
3202         u8 *arpreq;
3203         struct in_device *in_dev;
3204         struct in_ifaddr *ifa;
3205         unsigned long flags;
3206         int i = 0;
3207
3208         if (!netif_running(netdev))
3209                 return 0;
3210
3211         for (i = 0; i < adapter->num_rx_queues; i++)
3212                 napi_disable(&adapter->rx_queue[i].napi);
3213
3214         vmxnet3_disable_all_intrs(adapter);
3215         vmxnet3_free_irqs(adapter);
3216         vmxnet3_free_intr_resources(adapter);
3217
3218         netif_device_detach(netdev);
3219         netif_tx_stop_all_queues(netdev);
3220
3221         /* Create wake-up filters. */
3222         pmConf = adapter->pm_conf;
3223         memset(pmConf, 0, sizeof(*pmConf));
3224
3225         if (adapter->wol & WAKE_UCAST) {
3226                 pmConf->filters[i].patternSize = ETH_ALEN;
3227                 pmConf->filters[i].maskSize = 1;
3228                 memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3229                 pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3230
3231                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3232                 i++;
3233         }
3234
3235         if (adapter->wol & WAKE_ARP) {
3236                 in_dev = in_dev_get(netdev);
3237                 if (!in_dev)
3238                         goto skip_arp;
3239
3240                 ifa = (struct in_ifaddr *)in_dev->ifa_list;
3241                 if (!ifa)
3242                         goto skip_arp;
3243
3244                 pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3245                         sizeof(struct arphdr) +         /* ARP header */
3246                         2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3247                         2 * sizeof(u32);        /*2 IPv4 addresses */
3248                 pmConf->filters[i].maskSize =
3249                         (pmConf->filters[i].patternSize - 1) / 8 + 1;
3250
3251                 /* ETH_P_ARP in Ethernet header. */
3252                 ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3253                 ehdr->h_proto = htons(ETH_P_ARP);
3254
3255                 /* ARPOP_REQUEST in ARP header. */
3256                 ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3257                 ahdr->ar_op = htons(ARPOP_REQUEST);
3258                 arpreq = (u8 *)(ahdr + 1);
3259
3260                 /* The Unicast IPv4 address in 'tip' field. */
3261                 arpreq += 2 * ETH_ALEN + sizeof(u32);
3262                 *(u32 *)arpreq = ifa->ifa_address;
3263
3264                 /* The mask for the relevant bits. */
3265                 pmConf->filters[i].mask[0] = 0x00;
3266                 pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3267                 pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3268                 pmConf->filters[i].mask[3] = 0x00;
3269                 pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3270                 pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3271                 in_dev_put(in_dev);
3272
3273                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3274                 i++;
3275         }
3276
3277 skip_arp:
3278         if (adapter->wol & WAKE_MAGIC)
3279                 pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3280
3281         pmConf->numFilters = i;
3282
3283         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3284         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3285                                                                   *pmConf));
3286         adapter->shared->devRead.pmConfDesc.confPA =
3287                 cpu_to_le64(adapter->pm_conf_pa);
3288
3289         spin_lock_irqsave(&adapter->cmd_lock, flags);
3290         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3291                                VMXNET3_CMD_UPDATE_PMCFG);
3292         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3293
3294         pci_save_state(pdev);
3295         pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3296                         adapter->wol);
3297         pci_disable_device(pdev);
3298         pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3299
3300         return 0;
3301 }
3302
3303
3304 static int
3305 vmxnet3_resume(struct device *device)
3306 {
3307         int err, i = 0;
3308         unsigned long flags;
3309         struct pci_dev *pdev = to_pci_dev(device);
3310         struct net_device *netdev = pci_get_drvdata(pdev);
3311         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3312         struct Vmxnet3_PMConf *pmConf;
3313
3314         if (!netif_running(netdev))
3315                 return 0;
3316
3317         /* Destroy wake-up filters. */
3318         pmConf = adapter->pm_conf;
3319         memset(pmConf, 0, sizeof(*pmConf));
3320
3321         adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3322         adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3323                                                                   *pmConf));
3324         adapter->shared->devRead.pmConfDesc.confPA =
3325                 cpu_to_le64(adapter->pm_conf_pa);
3326
3327         netif_device_attach(netdev);
3328         pci_set_power_state(pdev, PCI_D0);
3329         pci_restore_state(pdev);
3330         err = pci_enable_device_mem(pdev);
3331         if (err != 0)
3332                 return err;
3333
3334         pci_enable_wake(pdev, PCI_D0, 0);
3335
3336         spin_lock_irqsave(&adapter->cmd_lock, flags);
3337         VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3338                                VMXNET3_CMD_UPDATE_PMCFG);
3339         spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3340         vmxnet3_alloc_intr_resources(adapter);
3341         vmxnet3_request_irqs(adapter);
3342         for (i = 0; i < adapter->num_rx_queues; i++)
3343                 napi_enable(&adapter->rx_queue[i].napi);
3344         vmxnet3_enable_all_intrs(adapter);
3345
3346         return 0;
3347 }
3348
3349 static const struct dev_pm_ops vmxnet3_pm_ops = {
3350         .suspend = vmxnet3_suspend,
3351         .resume = vmxnet3_resume,
3352 };
3353 #endif
3354
3355 static struct pci_driver vmxnet3_driver = {
3356         .name           = vmxnet3_driver_name,
3357         .id_table       = vmxnet3_pciid_table,
3358         .probe          = vmxnet3_probe_device,
3359         .remove         = vmxnet3_remove_device,
3360 #ifdef CONFIG_PM
3361         .driver.pm      = &vmxnet3_pm_ops,
3362 #endif
3363 };
3364
3365
3366 static int __init
3367 vmxnet3_init_module(void)
3368 {
3369         pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3370                 VMXNET3_DRIVER_VERSION_REPORT);
3371         return pci_register_driver(&vmxnet3_driver);
3372 }
3373
3374 module_init(vmxnet3_init_module);
3375
3376
3377 static void
3378 vmxnet3_exit_module(void)
3379 {
3380         pci_unregister_driver(&vmxnet3_driver);
3381 }
3382
3383 module_exit(vmxnet3_exit_module);
3384
3385 MODULE_AUTHOR("VMware, Inc.");
3386 MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3387 MODULE_LICENSE("GPL v2");
3388 MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);