drivers/net/cxgb3/sge.c

   1 /*
   2  * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3  *
   4  * This software is available to you under a choice of one of two
   5  * licenses.  You may choose to be licensed under the terms of the GNU
   6  * General Public License (GPL) Version 2, available from the file
   7  * COPYING in the main directory of this source tree, or the
   8  * OpenIB.org BSD license below:
   9  *
  10  *     Redistribution and use in source and binary forms, with or
  11  *     without modification, are permitted provided that the following
  12  *     conditions are met:
  13  *
  14  *      - Redistributions of source code must retain the above
  15  *        copyright notice, this list of conditions and the following
  16  *        disclaimer.
  17  *
  18  *      - Redistributions in binary form must reproduce the above
  19  *        copyright notice, this list of conditions and the following
  20  *        disclaimer in the documentation and/or other materials
  21  *        provided with the distribution.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30  * SOFTWARE.
  31  */
  32 #include <linux/skbuff.h>
  33 #include <linux/netdevice.h>
  34 #include <linux/etherdevice.h>
  35 #include <linux/if_vlan.h>
  36 #include <linux/ip.h>
  37 #include <linux/tcp.h>
  38 #include <linux/dma-mapping.h>
  39 #include <net/arp.h>
  40 #include "common.h"
  41 #include "regs.h"
  42 #include "sge_defs.h"
  43 #include "t3_cpl.h"
  44 #include "firmware_exports.h"
  45
  46 #define USE_GTS 0
  47
  48 #define SGE_RX_SM_BUF_SIZE 1536
  49
  50 #define SGE_RX_COPY_THRES  256
  51 #define SGE_RX_PULL_LEN    128
  52
  53 #define SGE_PG_RSVD SMP_CACHE_BYTES
  54 /*
  55  * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  56  * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  57  * directly.
  58  */
  59 #define FL0_PG_CHUNK_SIZE  2048
  60 #define FL0_PG_ORDER 0
  61 #define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
  62 #define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  63 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  64 #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
  65
  66 #define SGE_RX_DROP_THRES 16
  67 #define RX_RECLAIM_PERIOD (HZ/4)
  68
  69 /*
  70  * Max number of Rx buffers we replenish at a time.
  71  */
  72 #define MAX_RX_REFILL 16U
  73 /*
  74  * Period of the Tx buffer reclaim timer.  This timer does not need to run
  75  * frequently as Tx buffers are usually reclaimed by new Tx packets.
  76  */
  77 #define TX_RECLAIM_PERIOD (HZ / 4)
  78 #define TX_RECLAIM_TIMER_CHUNK 64U
  79 #define TX_RECLAIM_CHUNK 16U
  80
  81 /* WR size in bytes */
  82 #define WR_LEN (WR_FLITS * 8)
  83
  84 /*
  85  * Types of Tx queues in each queue set.  Order here matters, do not change.
  86  */
  87 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  88
  89 /* Values for sge_txq.flags */
  90 enum {
  91         TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  92         TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  93 };
  94
  95 struct tx_desc {
  96         __be64 flit[TX_DESC_FLITS];
  97 };
  98
  99 struct rx_desc {
 100         __be32 addr_lo;
 101         __be32 len_gen;
 102         __be32 gen2;
 103         __be32 addr_hi;
 104 };
 105
 106 struct tx_sw_desc {             /* SW state per Tx descriptor */
 107         struct sk_buff *skb;
 108         u8 eop;       /* set if last descriptor for packet */
 109         u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 110         u8 fragidx;   /* first page fragment associated with descriptor */
 111         s8 sflit;     /* start flit of first SGL entry in descriptor */
 112 };
 113
 114 struct rx_sw_desc {                /* SW state per Rx descriptor */
 115         union {
 116                 struct sk_buff *skb;
 117                 struct fl_pg_chunk pg_chunk;
 118         };
 119         DECLARE_PCI_UNMAP_ADDR(dma_addr);
 120 };
 121
 122 struct rsp_desc {               /* response queue descriptor */
 123         struct rss_header rss_hdr;
 124         __be32 flags;
 125         __be32 len_cq;
 126         u8 imm_data[47];
 127         u8 intr_gen;
 128 };
 129
 130 /*
 131  * Holds unmapping information for Tx packets that need deferred unmapping.
 132  * This structure lives at skb->head and must be allocated by callers.
 133  */
 134 struct deferred_unmap_info {
 135         struct pci_dev *pdev;
 136         dma_addr_t addr[MAX_SKB_FRAGS + 1];
 137 };
 138
 139 /*
 140  * Maps a number of flits to the number of Tx descriptors that can hold them.
 141  * The formula is
 142  *
 143  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 144  *
 145  * HW allows up to 4 descriptors to be combined into a WR.
 146  */
 147 static u8 flit_desc_map[] = {
 148         0,
 149 #if SGE_NUM_GENBITS == 1
 150         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 151         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 152         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 153         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 154 #elif SGE_NUM_GENBITS == 2
 155         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 156         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 157         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 158         4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 159 #else
 160 # error "SGE_NUM_GENBITS must be 1 or 2"
 161 #endif
 162 };
 163
 164 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 165 {
 166         return container_of(q, struct sge_qset, fl[qidx]);
 167 }
 168
 169 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 170 {
 171         return container_of(q, struct sge_qset, rspq);
 172 }
 173
 174 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 175 {
 176         return container_of(q, struct sge_qset, txq[qidx]);
 177 }
 178
 179 /**
 180  *      refill_rspq - replenish an SGE response queue
 181  *      @adapter: the adapter
 182  *      @q: the response queue to replenish
 183  *      @credits: how many new responses to make available
 184  *
 185  *      Replenishes a response queue by making the supplied number of responses
 186  *      available to HW.
 187  */
 188 static inline void refill_rspq(struct adapter *adapter,
 189                                const struct sge_rspq *q, unsigned int credits)
 190 {
 191         rmb();
 192         t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 193                      V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 194 }
 195
 196 /**
 197  *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 198  *
 199  *      Returns true if the platfrom needs sk_buff unmapping.  The compiler
 200  *      optimizes away unecessary code if this returns true.
 201  */
 202 static inline int need_skb_unmap(void)
 203 {
 204         /*
 205          * This structure is used to tell if the platfrom needs buffer
 206          * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 207          */
 208         struct dummy {
 209                 DECLARE_PCI_UNMAP_ADDR(addr);
 210         };
 211
 212         return sizeof(struct dummy) != 0;
 213 }
 214
 215 /**
 216  *      unmap_skb - unmap a packet main body and its page fragments
 217  *      @skb: the packet
 218  *      @q: the Tx queue containing Tx descriptors for the packet
 219  *      @cidx: index of Tx descriptor
 220  *      @pdev: the PCI device
 221  *
 222  *      Unmap the main body of an sk_buff and its page fragments, if any.
 223  *      Because of the fairly complicated structure of our SGLs and the desire
 224  *      to conserve space for metadata, the information necessary to unmap an
 225  *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 226  *      descriptors (the physical addresses of the various data buffers), and
 227  *      the SW descriptor state (assorted indices).  The send functions
 228  *      initialize the indices for the first packet descriptor so we can unmap
 229  *      the buffers held in the first Tx descriptor here, and we have enough
 230  *      information at this point to set the state for the next Tx descriptor.
 231  *
 232  *      Note that it is possible to clean up the first descriptor of a packet
 233  *      before the send routines have written the next descriptors, but this
 234  *      race does not cause any problem.  We just end up writing the unmapping
 235  *      info for the descriptor first.
 236  */
 237 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 238                              unsigned int cidx, struct pci_dev *pdev)
 239 {
 240         const struct sg_ent *sgp;
 241         struct tx_sw_desc *d = &q->sdesc[cidx];
 242         int nfrags, frag_idx, curflit, j = d->addr_idx;
 243
 244         sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 245         frag_idx = d->fragidx;
 246
 247         if (frag_idx == 0 && skb_headlen(skb)) {
 248                 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 249                                  skb_headlen(skb), PCI_DMA_TODEVICE);
 250                 j = 1;
 251         }
 252
 253         curflit = d->sflit + 1 + j;
 254         nfrags = skb_shinfo(skb)->nr_frags;
 255
 256         while (frag_idx < nfrags && curflit < WR_FLITS) {
 257                 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 258                                skb_shinfo(skb)->frags[frag_idx].size,
 259                                PCI_DMA_TODEVICE);
 260                 j ^= 1;
 261                 if (j == 0) {
 262                         sgp++;
 263                         curflit++;
 264                 }
 265                 curflit++;
 266                 frag_idx++;
 267         }
 268
 269         if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 270                 d = cidx + 1 == q->size ? q->sdesc : d + 1;
 271                 d->fragidx = frag_idx;
 272                 d->addr_idx = j;
 273                 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 274         }
 275 }
 276
 277 /**
 278  *      free_tx_desc - reclaims Tx descriptors and their buffers
 279  *      @adapter: the adapter
 280  *      @q: the Tx queue to reclaim descriptors from
 281  *      @n: the number of descriptors to reclaim
 282  *
 283  *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 284  *      Tx buffers.  Called with the Tx queue lock held.
 285  */
 286 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 287                          unsigned int n)
 288 {
 289         struct tx_sw_desc *d;
 290         struct pci_dev *pdev = adapter->pdev;
 291         unsigned int cidx = q->cidx;
 292
 293         const int need_unmap = need_skb_unmap() &&
 294                                q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 295
 296         d = &q->sdesc[cidx];
 297         while (n--) {
 298                 if (d->skb) {   /* an SGL is present */
 299                         if (need_unmap)
 300                                 unmap_skb(d->skb, q, cidx, pdev);
 301                         if (d->eop)
 302                                 kfree_skb(d->skb);
 303                 }
 304                 ++d;
 305                 if (++cidx == q->size) {
 306                         cidx = 0;
 307                         d = q->sdesc;
 308                 }
 309         }
 310         q->cidx = cidx;
 311 }
 312
 313 /**
 314  *      reclaim_completed_tx - reclaims completed Tx descriptors
 315  *      @adapter: the adapter
 316  *      @q: the Tx queue to reclaim completed descriptors from
 317  *      @chunk: maximum number of descriptors to reclaim
 318  *
 319  *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 320  *      and frees the associated buffers if possible.  Called with the Tx
 321  *      queue's lock held.
 322  */
 323 static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
 324                                                 struct sge_txq *q,
 325                                                 unsigned int chunk)
 326 {
 327         unsigned int reclaim = q->processed - q->cleaned;
 328
 329         reclaim = min(chunk, reclaim);
 330         if (reclaim) {
 331                 free_tx_desc(adapter, q, reclaim);
 332                 q->cleaned += reclaim;
 333                 q->in_use -= reclaim;
 334         }
 335         return q->processed - q->cleaned;
 336 }
 337
 338 /**
 339  *      should_restart_tx - are there enough resources to restart a Tx queue?
 340  *      @q: the Tx queue
 341  *
 342  *      Checks if there are enough descriptors to restart a suspended Tx queue.
 343  */
 344 static inline int should_restart_tx(const struct sge_txq *q)
 345 {
 346         unsigned int r = q->processed - q->cleaned;
 347
 348         return q->in_use - r < (q->size >> 1);
 349 }
 350
 351 static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 352                           struct rx_sw_desc *d)
 353 {
 354         if (q->use_pages && d->pg_chunk.page) {
 355                 (*d->pg_chunk.p_cnt)--;
 356                 if (!*d->pg_chunk.p_cnt)
 357                         pci_unmap_page(pdev,
 358                                        d->pg_chunk.mapping,
 359                                        q->alloc_size, PCI_DMA_FROMDEVICE);
 360
 361                 put_page(d->pg_chunk.page);
 362                 d->pg_chunk.page = NULL;
 363         } else {
 364                 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 365                                  q->buf_size, PCI_DMA_FROMDEVICE);
 366                 kfree_skb(d->skb);
 367                 d->skb = NULL;
 368         }
 369 }
 370
 371 /**
 372  *      free_rx_bufs - free the Rx buffers on an SGE free list
 373  *      @pdev: the PCI device associated with the adapter
 374  *      @rxq: the SGE free list to clean up
 375  *
 376  *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 377  *      this queue should be stopped before calling this function.
 378  */
 379 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 380 {
 381         unsigned int cidx = q->cidx;
 382
 383         while (q->credits--) {
 384                 struct rx_sw_desc *d = &q->sdesc[cidx];
 385
 386
 387                 clear_rx_desc(pdev, q, d);
 388                 if (++cidx == q->size)
 389                         cidx = 0;
 390         }
 391
 392         if (q->pg_chunk.page) {
 393                 __free_pages(q->pg_chunk.page, q->order);
 394                 q->pg_chunk.page = NULL;
 395         }
 396 }
 397
 398 /**
 399  *      add_one_rx_buf - add a packet buffer to a free-buffer list
 400  *      @va:  buffer start VA
 401  *      @len: the buffer length
 402  *      @d: the HW Rx descriptor to write
 403  *      @sd: the SW Rx descriptor to write
 404  *      @gen: the generation bit value
 405  *      @pdev: the PCI device associated with the adapter
 406  *
 407  *      Add a buffer of the given length to the supplied HW and SW Rx
 408  *      descriptors.
 409  */
 410 static inline int add_one_rx_buf(void *va, unsigned int len,
 411                                  struct rx_desc *d, struct rx_sw_desc *sd,
 412                                  unsigned int gen, struct pci_dev *pdev)
 413 {
 414         dma_addr_t mapping;
 415
 416         mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 417         if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 418                 return -ENOMEM;
 419
 420         pci_unmap_addr_set(sd, dma_addr, mapping);
 421
 422         d->addr_lo = cpu_to_be32(mapping);
 423         d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 424         wmb();
 425         d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 426         d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 427         return 0;
 428 }
 429
 430 static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
 431                                    unsigned int gen)
 432 {
 433         d->addr_lo = cpu_to_be32(mapping);
 434         d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 435         wmb();
 436         d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 437         d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 438         return 0;
 439 }
 440
 441 static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 442                           struct rx_sw_desc *sd, gfp_t gfp,
 443                           unsigned int order)
 444 {
 445         if (!q->pg_chunk.page) {
 446                 dma_addr_t mapping;
 447
 448                 q->pg_chunk.page = alloc_pages(gfp, order);
 449                 if (unlikely(!q->pg_chunk.page))
 450                         return -ENOMEM;
 451                 q->pg_chunk.va = page_address(q->pg_chunk.page);
 452                 q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 453                                     SGE_PG_RSVD;
 454                 q->pg_chunk.offset = 0;
 455                 mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 456                                        0, q->alloc_size, PCI_DMA_FROMDEVICE);
 457                 q->pg_chunk.mapping = mapping;
 458         }
 459         sd->pg_chunk = q->pg_chunk;
 460
 461         prefetch(sd->pg_chunk.p_cnt);
 462
 463         q->pg_chunk.offset += q->buf_size;
 464         if (q->pg_chunk.offset == (PAGE_SIZE << order))
 465                 q->pg_chunk.page = NULL;
 466         else {
 467                 q->pg_chunk.va += q->buf_size;
 468                 get_page(q->pg_chunk.page);
 469         }
 470
 471         if (sd->pg_chunk.offset == 0)
 472                 *sd->pg_chunk.p_cnt = 1;
 473         else
 474                 *sd->pg_chunk.p_cnt += 1;
 475
 476         return 0;
 477 }
 478
 479 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 480 {
 481         if (q->pend_cred >= q->credits / 4) {
 482                 q->pend_cred = 0;
 483                 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 484         }
 485 }
 486
 487 /**
 488  *      refill_fl - refill an SGE free-buffer list
 489  *      @adapter: the adapter
 490  *      @q: the free-list to refill
 491  *      @n: the number of new buffers to allocate
 492  *      @gfp: the gfp flags for allocating new buffers
 493  *
 494  *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 495  *      allocated with the supplied gfp flags.  The caller must assure that
 496  *      @n does not exceed the queue's capacity.
 497  */
 498 static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 499 {
 500         struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 501         struct rx_desc *d = &q->desc[q->pidx];
 502         unsigned int count = 0;
 503
 504         while (n--) {
 505                 dma_addr_t mapping;
 506                 int err;
 507
 508                 if (q->use_pages) {
 509                         if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
 510                                                     q->order))) {
 511 nomem:                          q->alloc_failed++;
 512                                 break;
 513                         }
 514                         mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
 515                         pci_unmap_addr_set(sd, dma_addr, mapping);
 516
 517                         add_one_rx_chunk(mapping, d, q->gen);
 518                         pci_dma_sync_single_for_device(adap->pdev, mapping,
 519                                                 q->buf_size - SGE_PG_RSVD,
 520                                                 PCI_DMA_FROMDEVICE);
 521                 } else {
 522                         void *buf_start;
 523
 524                         struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 525                         if (!skb)
 526                                 goto nomem;
 527
 528                         sd->skb = skb;
 529                         buf_start = skb->data;
 530                         err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
 531                                              q->gen, adap->pdev);
 532                         if (unlikely(err)) {
 533                                 clear_rx_desc(adap->pdev, q, sd);
 534                                 break;
 535                         }
 536                 }
 537
 538                 d++;
 539                 sd++;
 540                 if (++q->pidx == q->size) {
 541                         q->pidx = 0;
 542                         q->gen ^= 1;
 543                         sd = q->sdesc;
 544                         d = q->desc;
 545                 }
 546                 count++;
 547         }
 548
 549         q->credits += count;
 550         q->pend_cred += count;
 551         ring_fl_db(adap, q);
 552
 553         return count;
 554 }
 555
 556 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 557 {
 558         refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 559                   GFP_ATOMIC | __GFP_COMP);
 560 }
 561
 562 /**
 563  *      recycle_rx_buf - recycle a receive buffer
 564  *      @adapter: the adapter
 565  *      @q: the SGE free list
 566  *      @idx: index of buffer to recycle
 567  *
 568  *      Recycles the specified buffer on the given free list by adding it at
 569  *      the next available slot on the list.
 570  */
 571 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 572                            unsigned int idx)
 573 {
 574         struct rx_desc *from = &q->desc[idx];
 575         struct rx_desc *to = &q->desc[q->pidx];
 576
 577         q->sdesc[q->pidx] = q->sdesc[idx];
 578         to->addr_lo = from->addr_lo;    /* already big endian */
 579         to->addr_hi = from->addr_hi;    /* likewise */
 580         wmb();
 581         to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 582         to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 583
 584         if (++q->pidx == q->size) {
 585                 q->pidx = 0;
 586                 q->gen ^= 1;
 587         }
 588
 589         q->credits++;
 590         q->pend_cred++;
 591         ring_fl_db(adap, q);
 592 }
 593
 594 /**
 595  *      alloc_ring - allocate resources for an SGE descriptor ring
 596  *      @pdev: the PCI device
 597  *      @nelem: the number of descriptors
 598  *      @elem_size: the size of each descriptor
 599  *      @sw_size: the size of the SW state associated with each ring element
 600  *      @phys: the physical address of the allocated ring
 601  *      @metadata: address of the array holding the SW state for the ring
 602  *
 603  *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 604  *      free buffer lists, or response queues.  Each SGE ring requires
 605  *      space for its HW descriptors plus, optionally, space for the SW state
 606  *      associated with each HW entry (the metadata).  The function returns
 607  *      three values: the virtual address for the HW ring (the return value
 608  *      of the function), the physical address of the HW ring, and the address
 609  *      of the SW ring.
 610  */
 611 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 612                         size_t sw_size, dma_addr_t * phys, void *metadata)
 613 {
 614         size_t len = nelem * elem_size;
 615         void *s = NULL;
 616         void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 617
 618         if (!p)
 619                 return NULL;
 620         if (sw_size && metadata) {
 621                 s = kcalloc(nelem, sw_size, GFP_KERNEL);
 622
 623                 if (!s) {
 624                         dma_free_coherent(&pdev->dev, len, p, *phys);
 625                         return NULL;
 626                 }
 627                 *(void **)metadata = s;
 628         }
 629         memset(p, 0, len);
 630         return p;
 631 }
 632
 633 /**
 634  *      t3_reset_qset - reset a sge qset
 635  *      @q: the queue set
 636  *
 637  *      Reset the qset structure.
 638  *      the NAPI structure is preserved in the event of
 639  *      the qset's reincarnation, for example during EEH recovery.
 640  */
 641 static void t3_reset_qset(struct sge_qset *q)
 642 {
 643         if (q->adap &&
 644             !(q->adap->flags & NAPI_INIT)) {
 645                 memset(q, 0, sizeof(*q));
 646                 return;
 647         }
 648
 649         q->adap = NULL;
 650         memset(&q->rspq, 0, sizeof(q->rspq));
 651         memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 652         memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 653         q->txq_stopped = 0;
 654         q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 655         q->rx_reclaim_timer.function = NULL;
 656         q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
 657 }
 658
 659
 660 /**
 661  *      free_qset - free the resources of an SGE queue set
 662  *      @adapter: the adapter owning the queue set
 663  *      @q: the queue set
 664  *
 665  *      Release the HW and SW resources associated with an SGE queue set, such
 666  *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 667  *      queue set must be quiesced prior to calling this.
 668  */
 669 static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 670 {
 671         int i;
 672         struct pci_dev *pdev = adapter->pdev;
 673
 674         for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 675                 if (q->fl[i].desc) {
 676                         spin_lock_irq(&adapter->sge.reg_lock);
 677                         t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 678                         spin_unlock_irq(&adapter->sge.reg_lock);
 679                         free_rx_bufs(pdev, &q->fl[i]);
 680                         kfree(q->fl[i].sdesc);
 681                         dma_free_coherent(&pdev->dev,
 682                                           q->fl[i].size *
 683                                           sizeof(struct rx_desc), q->fl[i].desc,
 684                                           q->fl[i].phys_addr);
 685                 }
 686
 687         for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 688                 if (q->txq[i].desc) {
 689                         spin_lock_irq(&adapter->sge.reg_lock);
 690                         t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 691                         spin_unlock_irq(&adapter->sge.reg_lock);
 692                         if (q->txq[i].sdesc) {
 693                                 free_tx_desc(adapter, &q->txq[i],
 694                                              q->txq[i].in_use);
 695                                 kfree(q->txq[i].sdesc);
 696                         }
 697                         dma_free_coherent(&pdev->dev,
 698                                           q->txq[i].size *
 699                                           sizeof(struct tx_desc),
 700                                           q->txq[i].desc, q->txq[i].phys_addr);
 701                         __skb_queue_purge(&q->txq[i].sendq);
 702                 }
 703
 704         if (q->rspq.desc) {
 705                 spin_lock_irq(&adapter->sge.reg_lock);
 706                 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 707                 spin_unlock_irq(&adapter->sge.reg_lock);
 708                 dma_free_coherent(&pdev->dev,
 709                                   q->rspq.size * sizeof(struct rsp_desc),
 710                                   q->rspq.desc, q->rspq.phys_addr);
 711         }
 712
 713         t3_reset_qset(q);
 714 }
 715
 716 /**
 717  *      init_qset_cntxt - initialize an SGE queue set context info
 718  *      @qs: the queue set
 719  *      @id: the queue set id
 720  *
 721  *      Initializes the TIDs and context ids for the queues of a queue set.
 722  */
 723 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 724 {
 725         qs->rspq.cntxt_id = id;
 726         qs->fl[0].cntxt_id = 2 * id;
 727         qs->fl[1].cntxt_id = 2 * id + 1;
 728         qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 729         qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 730         qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 731         qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 732         qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 733 }
 734
 735 /**
 736  *      sgl_len - calculates the size of an SGL of the given capacity
 737  *      @n: the number of SGL entries
 738  *
 739  *      Calculates the number of flits needed for a scatter/gather list that
 740  *      can hold the given number of entries.
 741  */
 742 static inline unsigned int sgl_len(unsigned int n)
 743 {
 744         /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 745         return (3 * n) / 2 + (n & 1);
 746 }
 747
 748 /**
 749  *      flits_to_desc - returns the num of Tx descriptors for the given flits
 750  *      @n: the number of flits
 751  *
 752  *      Calculates the number of Tx descriptors needed for the supplied number
 753  *      of flits.
 754  */
 755 static inline unsigned int flits_to_desc(unsigned int n)
 756 {
 757         BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 758         return flit_desc_map[n];
 759 }
 760
 761 /**
 762  *      get_packet - return the next ingress packet buffer from a free list
 763  *      @adap: the adapter that received the packet
 764  *      @fl: the SGE free list holding the packet
 765  *      @len: the packet length including any SGE padding
 766  *      @drop_thres: # of remaining buffers before we start dropping packets
 767  *
 768  *      Get the next packet from a free list and complete setup of the
 769  *      sk_buff.  If the packet is small we make a copy and recycle the
 770  *      original buffer, otherwise we use the original buffer itself.  If a
 771  *      positive drop threshold is supplied packets are dropped and their
 772  *      buffers recycled if (a) the number of remaining buffers is under the
 773  *      threshold and the packet is too big to copy, or (b) the packet should
 774  *      be copied but there is no memory for the copy.
 775  */
 776 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 777                                   unsigned int len, unsigned int drop_thres)
 778 {
 779         struct sk_buff *skb = NULL;
 780         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 781
 782         prefetch(sd->skb->data);
 783         fl->credits--;
 784
 785         if (len <= SGE_RX_COPY_THRES) {
 786                 skb = alloc_skb(len, GFP_ATOMIC);
 787                 if (likely(skb != NULL)) {
 788                         __skb_put(skb, len);
 789                         pci_dma_sync_single_for_cpu(adap->pdev,
 790                                             pci_unmap_addr(sd, dma_addr), len,
 791                                             PCI_DMA_FROMDEVICE);
 792                         memcpy(skb->data, sd->skb->data, len);
 793                         pci_dma_sync_single_for_device(adap->pdev,
 794                                             pci_unmap_addr(sd, dma_addr), len,
 795                                             PCI_DMA_FROMDEVICE);
 796                 } else if (!drop_thres)
 797                         goto use_orig_buf;
 798 recycle:
 799                 recycle_rx_buf(adap, fl, fl->cidx);
 800                 return skb;
 801         }
 802
 803         if (unlikely(fl->credits < drop_thres) &&
 804             refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 805                       GFP_ATOMIC | __GFP_COMP) == 0)
 806                 goto recycle;
 807
 808 use_orig_buf:
 809         pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 810                          fl->buf_size, PCI_DMA_FROMDEVICE);
 811         skb = sd->skb;
 812         skb_put(skb, len);
 813         __refill_fl(adap, fl);
 814         return skb;
 815 }
 816
 817 /**
 818  *      get_packet_pg - return the next ingress packet buffer from a free list
 819  *      @adap: the adapter that received the packet
 820  *      @fl: the SGE free list holding the packet
 821  *      @len: the packet length including any SGE padding
 822  *      @drop_thres: # of remaining buffers before we start dropping packets
 823  *
 824  *      Get the next packet from a free list populated with page chunks.
 825  *      If the packet is small we make a copy and recycle the original buffer,
 826  *      otherwise we attach the original buffer as a page fragment to a fresh
 827  *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 828  *      and their buffers recycled if (a) the number of remaining buffers is
 829  *      under the threshold and the packet is too big to copy, or (b) there's
 830  *      no system memory.
 831  *
 832  *      Note: this function is similar to @get_packet but deals with Rx buffers
 833  *      that are page chunks rather than sk_buffs.
 834  */
 835 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 836                                      struct sge_rspq *q, unsigned int len,
 837                                      unsigned int drop_thres)
 838 {
 839         struct sk_buff *newskb, *skb;
 840         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 841
 842         dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
 843
 844         newskb = skb = q->pg_skb;
 845         if (!skb && (len <= SGE_RX_COPY_THRES)) {
 846                 newskb = alloc_skb(len, GFP_ATOMIC);
 847                 if (likely(newskb != NULL)) {
 848                         __skb_put(newskb, len);
 849                         pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 850                                             PCI_DMA_FROMDEVICE);
 851                         memcpy(newskb->data, sd->pg_chunk.va, len);
 852                         pci_dma_sync_single_for_device(adap->pdev, dma_addr,
 853                                                        len,
 854                                                        PCI_DMA_FROMDEVICE);
 855                 } else if (!drop_thres)
 856                         return NULL;
 857 recycle:
 858                 fl->credits--;
 859                 recycle_rx_buf(adap, fl, fl->cidx);
 860                 q->rx_recycle_buf++;
 861                 return newskb;
 862         }
 863
 864         if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 865                 goto recycle;
 866
 867         prefetch(sd->pg_chunk.p_cnt);
 868
 869         if (!skb)
 870                 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 871
 872         if (unlikely(!newskb)) {
 873                 if (!drop_thres)
 874                         return NULL;
 875                 goto recycle;
 876         }
 877
 878         pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 879                                     PCI_DMA_FROMDEVICE);
 880         (*sd->pg_chunk.p_cnt)--;
 881         if (!*sd->pg_chunk.p_cnt)
 882                 pci_unmap_page(adap->pdev,
 883                                sd->pg_chunk.mapping,
 884                                fl->alloc_size,
 885                                PCI_DMA_FROMDEVICE);
 886         if (!skb) {
 887                 __skb_put(newskb, SGE_RX_PULL_LEN);
 888                 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 889                 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 890                                    sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 891                                    len - SGE_RX_PULL_LEN);
 892                 newskb->len = len;
 893                 newskb->data_len = len - SGE_RX_PULL_LEN;
 894                 newskb->truesize += newskb->data_len;
 895         } else {
 896                 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 897                                    sd->pg_chunk.page,
 898                                    sd->pg_chunk.offset, len);
 899                 newskb->len += len;
 900                 newskb->data_len += len;
 901                 newskb->truesize += len;
 902         }
 903
 904         fl->credits--;
 905         /*
 906          * We do not refill FLs here, we let the caller do it to overlap a
 907          * prefetch.
 908          */
 909         return newskb;
 910 }
 911
 912 /**
 913  *      get_imm_packet - return the next ingress packet buffer from a response
 914  *      @resp: the response descriptor containing the packet data
 915  *
 916  *      Return a packet containing the immediate data of the given response.
 917  */
 918 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 919 {
 920         struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 921
 922         if (skb) {
 923                 __skb_put(skb, IMMED_PKT_SIZE);
 924                 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 925         }
 926         return skb;
 927 }
 928
 929 /**
 930  *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 931  *      @skb: the packet
 932  *
 933  *      Returns the number of Tx descriptors needed for the given Ethernet
 934  *      packet.  Ethernet packets require addition of WR and CPL headers.
 935  */
 936 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 937 {
 938         unsigned int flits;
 939
 940         if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 941                 return 1;
 942
 943         flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 944         if (skb_shinfo(skb)->gso_size)
 945                 flits++;
 946         return flits_to_desc(flits);
 947 }
 948
 949 /**
 950  *      make_sgl - populate a scatter/gather list for a packet
 951  *      @skb: the packet
 952  *      @sgp: the SGL to populate
 953  *      @start: start address of skb main body data to include in the SGL
 954  *      @len: length of skb main body data to include in the SGL
 955  *      @pdev: the PCI device
 956  *
 957  *      Generates a scatter/gather list for the buffers that make up a packet
 958  *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 959  *      appropriately.
 960  */
 961 static inline unsigned int make_sgl(const struct sk_buff *skb,
 962                                     struct sg_ent *sgp, unsigned char *start,
 963                                     unsigned int len, struct pci_dev *pdev)
 964 {
 965         dma_addr_t mapping;
 966         unsigned int i, j = 0, nfrags;
 967
 968         if (len) {
 969                 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 970                 sgp->len[0] = cpu_to_be32(len);
 971                 sgp->addr[0] = cpu_to_be64(mapping);
 972                 j = 1;
 973         }
 974
 975         nfrags = skb_shinfo(skb)->nr_frags;
 976         for (i = 0; i < nfrags; i++) {
 977                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 978
 979                 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 980                                        frag->size, PCI_DMA_TODEVICE);
 981                 sgp->len[j] = cpu_to_be32(frag->size);
 982                 sgp->addr[j] = cpu_to_be64(mapping);
 983                 j ^= 1;
 984                 if (j == 0)
 985                         ++sgp;
 986         }
 987         if (j)
 988                 sgp->len[j] = 0;
 989         return ((nfrags + (len != 0)) * 3) / 2 + j;
 990 }
 991
 992 /**
 993  *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 994  *      @adap: the adapter
 995  *      @q: the Tx queue
 996  *
 997  *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 998  *      where the HW is going to sleep just after we checked, however,
 999  *      then the interrupt handler will detect the outstanding TX packet
1000  *      and ring the doorbell for us.
1001  *
1002  *      When GTS is disabled we unconditionally ring the doorbell.
1003  */
1004 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1005 {
1006 #if USE_GTS
1007         clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1008         if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1009                 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1010                 t3_write_reg(adap, A_SG_KDOORBELL,
1011                              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1012         }
1013 #else
1014         wmb();                  /* write descriptors before telling HW */
1015         t3_write_reg(adap, A_SG_KDOORBELL,
1016                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1017 #endif
1018 }
1019
1020 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1021 {
1022 #if SGE_NUM_GENBITS == 2
1023         d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1024 #endif
1025 }
1026
1027 /**
1028  *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
1029  *      @ndesc: number of Tx descriptors spanned by the SGL
1030  *      @skb: the packet corresponding to the WR
1031  *      @d: first Tx descriptor to be written
1032  *      @pidx: index of above descriptors
1033  *      @q: the SGE Tx queue
1034  *      @sgl: the SGL
1035  *      @flits: number of flits to the start of the SGL in the first descriptor
1036  *      @sgl_flits: the SGL size in flits
1037  *      @gen: the Tx descriptor generation
1038  *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
1039  *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
1040  *
1041  *      Write a work request header and an associated SGL.  If the SGL is
1042  *      small enough to fit into one Tx descriptor it has already been written
1043  *      and we just need to write the WR header.  Otherwise we distribute the
1044  *      SGL across the number of descriptors it spans.
1045  */
1046 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1047                              struct tx_desc *d, unsigned int pidx,
1048                              const struct sge_txq *q,
1049                              const struct sg_ent *sgl,
1050                              unsigned int flits, unsigned int sgl_flits,
1051                              unsigned int gen, __be32 wr_hi,
1052                              __be32 wr_lo)
1053 {
1054         struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1055         struct tx_sw_desc *sd = &q->sdesc[pidx];
1056
1057         sd->skb = skb;
1058         if (need_skb_unmap()) {
1059                 sd->fragidx = 0;
1060                 sd->addr_idx = 0;
1061                 sd->sflit = flits;
1062         }
1063
1064         if (likely(ndesc == 1)) {
1065                 sd->eop = 1;
1066                 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1067                                    V_WR_SGLSFLT(flits)) | wr_hi;
1068                 wmb();
1069                 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1070                                    V_WR_GEN(gen)) | wr_lo;
1071                 wr_gen2(d, gen);
1072         } else {
1073                 unsigned int ogen = gen;
1074                 const u64 *fp = (const u64 *)sgl;
1075                 struct work_request_hdr *wp = wrp;
1076
1077                 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1078                                    V_WR_SGLSFLT(flits)) | wr_hi;
1079
1080                 while (sgl_flits) {
1081                         unsigned int avail = WR_FLITS - flits;
1082
1083                         if (avail > sgl_flits)
1084                                 avail = sgl_flits;
1085                         memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1086                         sgl_flits -= avail;
1087                         ndesc--;
1088                         if (!sgl_flits)
1089                                 break;
1090
1091                         fp += avail;
1092                         d++;
1093                         sd->eop = 0;
1094                         sd++;
1095                         if (++pidx == q->size) {
1096                                 pidx = 0;
1097                                 gen ^= 1;
1098                                 d = q->desc;
1099                                 sd = q->sdesc;
1100                         }
1101
1102                         sd->skb = skb;
1103                         wrp = (struct work_request_hdr *)d;
1104                         wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1105                                            V_WR_SGLSFLT(1)) | wr_hi;
1106                         wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1107                                                         sgl_flits + 1)) |
1108                                            V_WR_GEN(gen)) | wr_lo;
1109                         wr_gen2(d, gen);
1110                         flits = 1;
1111                 }
1112                 sd->eop = 1;
1113                 wrp->wr_hi |= htonl(F_WR_EOP);
1114                 wmb();
1115                 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1116                 wr_gen2((struct tx_desc *)wp, ogen);
1117                 WARN_ON(ndesc != 0);
1118         }
1119 }
1120
1121 /**
1122  *      write_tx_pkt_wr - write a TX_PKT work request
1123  *      @adap: the adapter
1124  *      @skb: the packet to send
1125  *      @pi: the egress interface
1126  *      @pidx: index of the first Tx descriptor to write
1127  *      @gen: the generation value to use
1128  *      @q: the Tx queue
1129  *      @ndesc: number of descriptors the packet will occupy
1130  *      @compl: the value of the COMPL bit to use
1131  *
1132  *      Generate a TX_PKT work request to send the supplied packet.
1133  */
1134 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1135                             const struct port_info *pi,
1136                             unsigned int pidx, unsigned int gen,
1137                             struct sge_txq *q, unsigned int ndesc,
1138                             unsigned int compl)
1139 {
1140         unsigned int flits, sgl_flits, cntrl, tso_info;
1141         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1142         struct tx_desc *d = &q->desc[pidx];
1143         struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1144
1145         cpl->len = htonl(skb->len);
1146         cntrl = V_TXPKT_INTF(pi->port_id);
1147
1148         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1149                 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1150
1151         tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1152         if (tso_info) {
1153                 int eth_type;
1154                 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1155
1156                 d->flit[2] = 0;
1157                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1158                 hdr->cntrl = htonl(cntrl);
1159                 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1160                     CPL_ETH_II : CPL_ETH_II_VLAN;
1161                 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1162                     V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1163                     V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1164                 hdr->lso_info = htonl(tso_info);
1165                 flits = 3;
1166         } else {
1167                 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1168                 cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1169                 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1170                 cpl->cntrl = htonl(cntrl);
1171
1172                 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1173                         q->sdesc[pidx].skb = NULL;
1174                         if (!skb->data_len)
1175                                 skb_copy_from_linear_data(skb, &d->flit[2],
1176                                                           skb->len);
1177                         else
1178                                 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1179
1180                         flits = (skb->len + 7) / 8 + 2;
1181                         cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1182                                               V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1183                                               | F_WR_SOP | F_WR_EOP | compl);
1184                         wmb();
1185                         cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1186                                               V_WR_TID(q->token));
1187                         wr_gen2(d, gen);
1188                         kfree_skb(skb);
1189                         return;
1190                 }
1191
1192                 flits = 2;
1193         }
1194
1195         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1196         sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1197
1198         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1199                          htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1200                          htonl(V_WR_TID(q->token)));
1201 }
1202
1203 static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1204                                     struct sge_qset *qs, struct sge_txq *q)
1205 {
1206         netif_tx_stop_queue(txq);
1207         set_bit(TXQ_ETH, &qs->txq_stopped);
1208         q->stops++;
1209 }
1210
1211 /**
1212  *      eth_xmit - add a packet to the Ethernet Tx queue
1213  *      @skb: the packet
1214  *      @dev: the egress net device
1215  *
1216  *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1217  */
1218 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1219 {
1220         int qidx;
1221         unsigned int ndesc, pidx, credits, gen, compl;
1222         const struct port_info *pi = netdev_priv(dev);
1223         struct adapter *adap = pi->adapter;
1224         struct netdev_queue *txq;
1225         struct sge_qset *qs;
1226         struct sge_txq *q;
1227
1228         /*
1229          * The chip min packet length is 9 octets but play safe and reject
1230          * anything shorter than an Ethernet header.
1231          */
1232         if (unlikely(skb->len < ETH_HLEN)) {
1233                 dev_kfree_skb(skb);
1234                 return NETDEV_TX_OK;
1235         }
1236
1237         qidx = skb_get_queue_mapping(skb);
1238         qs = &pi->qs[qidx];
1239         q = &qs->txq[TXQ_ETH];
1240         txq = netdev_get_tx_queue(dev, qidx);
1241
1242         spin_lock(&q->lock);
1243         reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1244
1245         credits = q->size - q->in_use;
1246         ndesc = calc_tx_descs(skb);
1247
1248         if (unlikely(credits < ndesc)) {
1249                 t3_stop_tx_queue(txq, qs, q);
1250                 dev_err(&adap->pdev->dev,
1251                         "%s: Tx ring %u full while queue awake!\n",
1252                         dev->name, q->cntxt_id & 7);
1253                 spin_unlock(&q->lock);
1254                 return NETDEV_TX_BUSY;
1255         }
1256
1257         q->in_use += ndesc;
1258         if (unlikely(credits - ndesc < q->stop_thres)) {
1259                 t3_stop_tx_queue(txq, qs, q);
1260
1261                 if (should_restart_tx(q) &&
1262                     test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1263                         q->restarts++;
1264                         netif_tx_wake_queue(txq);
1265                 }
1266         }
1267
1268         gen = q->gen;
1269         q->unacked += ndesc;
1270         compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1271         q->unacked &= 7;
1272         pidx = q->pidx;
1273         q->pidx += ndesc;
1274         if (q->pidx >= q->size) {
1275                 q->pidx -= q->size;
1276                 q->gen ^= 1;
1277         }
1278
1279         /* update port statistics */
1280         if (skb->ip_summed == CHECKSUM_COMPLETE)
1281                 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1282         if (skb_shinfo(skb)->gso_size)
1283                 qs->port_stats[SGE_PSTAT_TSO]++;
1284         if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1285                 qs->port_stats[SGE_PSTAT_VLANINS]++;
1286
1287         dev->trans_start = jiffies;
1288         spin_unlock(&q->lock);
1289
1290         /*
1291          * We do not use Tx completion interrupts to free DMAd Tx packets.
1292          * This is good for performamce but means that we rely on new Tx
1293          * packets arriving to run the destructors of completed packets,
1294          * which open up space in their sockets' send queues.  Sometimes
1295          * we do not get such new packets causing Tx to stall.  A single
1296          * UDP transmitter is a good example of this situation.  We have
1297          * a clean up timer that periodically reclaims completed packets
1298          * but it doesn't run often enough (nor do we want it to) to prevent
1299          * lengthy stalls.  A solution to this problem is to run the
1300          * destructor early, after the packet is queued but before it's DMAd.
1301          * A cons is that we lie to socket memory accounting, but the amount
1302          * of extra memory is reasonable (limited by the number of Tx
1303          * descriptors), the packets do actually get freed quickly by new
1304          * packets almost always, and for protocols like TCP that wait for
1305          * acks to really free up the data the extra memory is even less.
1306          * On the positive side we run the destructors on the sending CPU
1307          * rather than on a potentially different completing CPU, usually a
1308          * good thing.  We also run them without holding our Tx queue lock,
1309          * unlike what reclaim_completed_tx() would otherwise do.
1310          *
1311          * Run the destructor before telling the DMA engine about the packet
1312          * to make sure it doesn't complete and get freed prematurely.
1313          */
1314         if (likely(!skb_shared(skb)))
1315                 skb_orphan(skb);
1316
1317         write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1318         check_ring_tx_db(adap, q);
1319         return NETDEV_TX_OK;
1320 }
1321
1322 /**
1323  *      write_imm - write a packet into a Tx descriptor as immediate data
1324  *      @d: the Tx descriptor to write
1325  *      @skb: the packet
1326  *      @len: the length of packet data to write as immediate data
1327  *      @gen: the generation bit value to write
1328  *
1329  *      Writes a packet as immediate data into a Tx descriptor.  The packet
1330  *      contains a work request at its beginning.  We must write the packet
1331  *      carefully so the SGE doesn't read it accidentally before it's written
1332  *      in its entirety.
1333  */
1334 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1335                              unsigned int len, unsigned int gen)
1336 {
1337         struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1338         struct work_request_hdr *to = (struct work_request_hdr *)d;
1339
1340         if (likely(!skb->data_len))
1341                 memcpy(&to[1], &from[1], len - sizeof(*from));
1342         else
1343                 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1344
1345         to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1346                                         V_WR_BCNTLFLT(len & 7));
1347         wmb();
1348         to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1349                                         V_WR_LEN((len + 7) / 8));
1350         wr_gen2(d, gen);
1351         kfree_skb(skb);
1352 }
1353
1354 /**
1355  *      check_desc_avail - check descriptor availability on a send queue
1356  *      @adap: the adapter
1357  *      @q: the send queue
1358  *      @skb: the packet needing the descriptors
1359  *      @ndesc: the number of Tx descriptors needed
1360  *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1361  *
1362  *      Checks if the requested number of Tx descriptors is available on an
1363  *      SGE send queue.  If the queue is already suspended or not enough
1364  *      descriptors are available the packet is queued for later transmission.
1365  *      Must be called with the Tx queue locked.
1366  *
1367  *      Returns 0 if enough descriptors are available, 1 if there aren't
1368  *      enough descriptors and the packet has been queued, and 2 if the caller
1369  *      needs to retry because there weren't enough descriptors at the
1370  *      beginning of the call but some freed up in the mean time.
1371  */
1372 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1373                                    struct sk_buff *skb, unsigned int ndesc,
1374                                    unsigned int qid)
1375 {
1376         if (unlikely(!skb_queue_empty(&q->sendq))) {
1377               addq_exit:__skb_queue_tail(&q->sendq, skb);
1378                 return 1;
1379         }
1380         if (unlikely(q->size - q->in_use < ndesc)) {
1381                 struct sge_qset *qs = txq_to_qset(q, qid);
1382
1383                 set_bit(qid, &qs->txq_stopped);
1384                 smp_mb__after_clear_bit();
1385
1386                 if (should_restart_tx(q) &&
1387                     test_and_clear_bit(qid, &qs->txq_stopped))
1388                         return 2;
1389
1390                 q->stops++;
1391                 goto addq_exit;
1392         }
1393         return 0;
1394 }
1395
1396 /**
1397  *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1398  *      @q: the SGE control Tx queue
1399  *
1400  *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1401  *      that send only immediate data (presently just the control queues) and
1402  *      thus do not have any sk_buffs to release.
1403  */
1404 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1405 {
1406         unsigned int reclaim = q->processed - q->cleaned;
1407
1408         q->in_use -= reclaim;
1409         q->cleaned += reclaim;
1410 }
1411
1412 static inline int immediate(const struct sk_buff *skb)
1413 {
1414         return skb->len <= WR_LEN;
1415 }
1416
1417 /**
1418  *      ctrl_xmit - send a packet through an SGE control Tx queue
1419  *      @adap: the adapter
1420  *      @q: the control queue
1421  *      @skb: the packet
1422  *
1423  *      Send a packet through an SGE control Tx queue.  Packets sent through
1424  *      a control queue must fit entirely as immediate data in a single Tx
1425  *      descriptor and have no page fragments.
1426  */
1427 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1428                      struct sk_buff *skb)
1429 {
1430         int ret;
1431         struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1432
1433         if (unlikely(!immediate(skb))) {
1434                 WARN_ON(1);
1435                 dev_kfree_skb(skb);
1436                 return NET_XMIT_SUCCESS;
1437         }
1438
1439         wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1440         wrp->wr_lo = htonl(V_WR_TID(q->token));
1441
1442         spin_lock(&q->lock);
1443       again:reclaim_completed_tx_imm(q);
1444
1445         ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1446         if (unlikely(ret)) {
1447                 if (ret == 1) {
1448                         spin_unlock(&q->lock);
1449                         return NET_XMIT_CN;
1450                 }
1451                 goto again;
1452         }
1453
1454         write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1455
1456         q->in_use++;
1457         if (++q->pidx >= q->size) {
1458                 q->pidx = 0;
1459                 q->gen ^= 1;
1460         }
1461         spin_unlock(&q->lock);
1462         wmb();
1463         t3_write_reg(adap, A_SG_KDOORBELL,
1464                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1465         return NET_XMIT_SUCCESS;
1466 }
1467
1468 /**
1469  *      restart_ctrlq - restart a suspended control queue
1470  *      @qs: the queue set cotaining the control queue
1471  *
1472  *      Resumes transmission on a suspended Tx control queue.
1473  */
1474 static void restart_ctrlq(unsigned long data)
1475 {
1476         struct sk_buff *skb;
1477         struct sge_qset *qs = (struct sge_qset *)data;
1478         struct sge_txq *q = &qs->txq[TXQ_CTRL];
1479
1480         spin_lock(&q->lock);
1481       again:reclaim_completed_tx_imm(q);
1482
1483         while (q->in_use < q->size &&
1484                (skb = __skb_dequeue(&q->sendq)) != NULL) {
1485
1486                 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1487
1488                 if (++q->pidx >= q->size) {
1489                         q->pidx = 0;
1490                         q->gen ^= 1;
1491                 }
1492                 q->in_use++;
1493         }
1494
1495         if (!skb_queue_empty(&q->sendq)) {
1496                 set_bit(TXQ_CTRL, &qs->txq_stopped);
1497                 smp_mb__after_clear_bit();
1498
1499                 if (should_restart_tx(q) &&
1500                     test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1501                         goto again;
1502                 q->stops++;
1503         }
1504
1505         spin_unlock(&q->lock);
1506         wmb();
1507         t3_write_reg(qs->adap, A_SG_KDOORBELL,
1508                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1509 }
1510
1511 /*
1512  * Send a management message through control queue 0
1513  */
1514 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1515 {
1516         int ret;
1517         local_bh_disable();
1518         ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1519         local_bh_enable();
1520
1521         return ret;
1522 }
1523
1524 /**
1525  *      deferred_unmap_destructor - unmap a packet when it is freed
1526  *      @skb: the packet
1527  *
1528  *      This is the packet destructor used for Tx packets that need to remain
1529  *      mapped until they are freed rather than until their Tx descriptors are
1530  *      freed.
1531  */
1532 static void deferred_unmap_destructor(struct sk_buff *skb)
1533 {
1534         int i;
1535         const dma_addr_t *p;
1536         const struct skb_shared_info *si;
1537         const struct deferred_unmap_info *dui;
1538
1539         dui = (struct deferred_unmap_info *)skb->head;
1540         p = dui->addr;
1541
1542         if (skb->tail - skb->transport_header)
1543                 pci_unmap_single(dui->pdev, *p++,
1544                                  skb->tail - skb->transport_header,
1545                                  PCI_DMA_TODEVICE);
1546
1547         si = skb_shinfo(skb);
1548         for (i = 0; i < si->nr_frags; i++)
1549                 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1550                                PCI_DMA_TODEVICE);
1551 }
1552
1553 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1554                                      const struct sg_ent *sgl, int sgl_flits)
1555 {
1556         dma_addr_t *p;
1557         struct deferred_unmap_info *dui;
1558
1559         dui = (struct deferred_unmap_info *)skb->head;
1560         dui->pdev = pdev;
1561         for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1562                 *p++ = be64_to_cpu(sgl->addr[0]);
1563                 *p++ = be64_to_cpu(sgl->addr[1]);
1564         }
1565         if (sgl_flits)
1566                 *p = be64_to_cpu(sgl->addr[0]);
1567 }
1568
1569 /**
1570  *      write_ofld_wr - write an offload work request
1571  *      @adap: the adapter
1572  *      @skb: the packet to send
1573  *      @q: the Tx queue
1574  *      @pidx: index of the first Tx descriptor to write
1575  *      @gen: the generation value to use
1576  *      @ndesc: number of descriptors the packet will occupy
1577  *
1578  *      Write an offload work request to send the supplied packet.  The packet
1579  *      data already carry the work request with most fields populated.
1580  */
1581 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1582                           struct sge_txq *q, unsigned int pidx,
1583                           unsigned int gen, unsigned int ndesc)
1584 {
1585         unsigned int sgl_flits, flits;
1586         struct work_request_hdr *from;
1587         struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1588         struct tx_desc *d = &q->desc[pidx];
1589
1590         if (immediate(skb)) {
1591                 q->sdesc[pidx].skb = NULL;
1592                 write_imm(d, skb, skb->len, gen);
1593                 return;
1594         }
1595
1596         /* Only TX_DATA builds SGLs */
1597
1598         from = (struct work_request_hdr *)skb->data;
1599         memcpy(&d->flit[1], &from[1],
1600                skb_transport_offset(skb) - sizeof(*from));
1601
1602         flits = skb_transport_offset(skb) / 8;
1603         sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1604         sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1605                              skb->tail - skb->transport_header,
1606                              adap->pdev);
1607         if (need_skb_unmap()) {
1608                 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1609                 skb->destructor = deferred_unmap_destructor;
1610         }
1611
1612         write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1613                          gen, from->wr_hi, from->wr_lo);
1614 }
1615
1616 /**
1617  *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1618  *      @skb: the packet
1619  *
1620  *      Returns the number of Tx descriptors needed for the given offload
1621  *      packet.  These packets are already fully constructed.
1622  */
1623 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1624 {
1625         unsigned int flits, cnt;
1626
1627         if (skb->len <= WR_LEN)
1628                 return 1;       /* packet fits as immediate data */
1629
1630         flits = skb_transport_offset(skb) / 8;  /* headers */
1631         cnt = skb_shinfo(skb)->nr_frags;
1632         if (skb->tail != skb->transport_header)
1633                 cnt++;
1634         return flits_to_desc(flits + sgl_len(cnt));
1635 }
1636
1637 /**
1638  *      ofld_xmit - send a packet through an offload queue
1639  *      @adap: the adapter
1640  *      @q: the Tx offload queue
1641  *      @skb: the packet
1642  *
1643  *      Send an offload packet through an SGE offload queue.
1644  */
1645 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1646                      struct sk_buff *skb)
1647 {
1648         int ret;
1649         unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1650
1651         spin_lock(&q->lock);
1652 again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1653
1654         ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1655         if (unlikely(ret)) {
1656                 if (ret == 1) {
1657                         skb->priority = ndesc;  /* save for restart */
1658                         spin_unlock(&q->lock);
1659                         return NET_XMIT_CN;
1660                 }
1661                 goto again;
1662         }
1663
1664         gen = q->gen;
1665         q->in_use += ndesc;
1666         pidx = q->pidx;
1667         q->pidx += ndesc;
1668         if (q->pidx >= q->size) {
1669                 q->pidx -= q->size;
1670                 q->gen ^= 1;
1671         }
1672         spin_unlock(&q->lock);
1673
1674         write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1675         check_ring_tx_db(adap, q);
1676         return NET_XMIT_SUCCESS;
1677 }
1678
1679 /**
1680  *      restart_offloadq - restart a suspended offload queue
1681  *      @qs: the queue set cotaining the offload queue
1682  *
1683  *      Resumes transmission on a suspended Tx offload queue.
1684  */
1685 static void restart_offloadq(unsigned long data)
1686 {
1687         struct sk_buff *skb;
1688         struct sge_qset *qs = (struct sge_qset *)data;
1689         struct sge_txq *q = &qs->txq[TXQ_OFLD];
1690         const struct port_info *pi = netdev_priv(qs->netdev);
1691         struct adapter *adap = pi->adapter;
1692
1693         spin_lock(&q->lock);
1694 again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1695
1696         while ((skb = skb_peek(&q->sendq)) != NULL) {
1697                 unsigned int gen, pidx;
1698                 unsigned int ndesc = skb->priority;
1699
1700                 if (unlikely(q->size - q->in_use < ndesc)) {
1701                         set_bit(TXQ_OFLD, &qs->txq_stopped);
1702                         smp_mb__after_clear_bit();
1703
1704                         if (should_restart_tx(q) &&
1705                             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1706                                 goto again;
1707                         q->stops++;
1708                         break;
1709                 }
1710
1711                 gen = q->gen;
1712                 q->in_use += ndesc;
1713                 pidx = q->pidx;
1714                 q->pidx += ndesc;
1715                 if (q->pidx >= q->size) {
1716                         q->pidx -= q->size;
1717                         q->gen ^= 1;
1718                 }
1719                 __skb_unlink(skb, &q->sendq);
1720                 spin_unlock(&q->lock);
1721
1722                 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1723                 spin_lock(&q->lock);
1724         }
1725         spin_unlock(&q->lock);
1726
1727 #if USE_GTS
1728         set_bit(TXQ_RUNNING, &q->flags);
1729         set_bit(TXQ_LAST_PKT_DB, &q->flags);
1730 #endif
1731         wmb();
1732         t3_write_reg(adap, A_SG_KDOORBELL,
1733                      F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1734 }
1735
1736 /**
1737  *      queue_set - return the queue set a packet should use
1738  *      @skb: the packet
1739  *
1740  *      Maps a packet to the SGE queue set it should use.  The desired queue
1741  *      set is carried in bits 1-3 in the packet's priority.
1742  */
1743 static inline int queue_set(const struct sk_buff *skb)
1744 {
1745         return skb->priority >> 1;
1746 }
1747
1748 /**
1749  *      is_ctrl_pkt - return whether an offload packet is a control packet
1750  *      @skb: the packet
1751  *
1752  *      Determines whether an offload packet should use an OFLD or a CTRL
1753  *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1754  */
1755 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1756 {
1757         return skb->priority & 1;
1758 }
1759
1760 /**
1761  *      t3_offload_tx - send an offload packet
1762  *      @tdev: the offload device to send to
1763  *      @skb: the packet
1764  *
1765  *      Sends an offload packet.  We use the packet priority to select the
1766  *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1767  *      should be sent as regular or control, bits 1-3 select the queue set.
1768  */
1769 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1770 {
1771         struct adapter *adap = tdev2adap(tdev);
1772         struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1773
1774         if (unlikely(is_ctrl_pkt(skb)))
1775                 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1776
1777         return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1778 }
1779
1780 /**
1781  *      offload_enqueue - add an offload packet to an SGE offload receive queue
1782  *      @q: the SGE response queue
1783  *      @skb: the packet
1784  *
1785  *      Add a new offload packet to an SGE response queue's offload packet
1786  *      queue.  If the packet is the first on the queue it schedules the RX
1787  *      softirq to process the queue.
1788  */
1789 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1790 {
1791         int was_empty = skb_queue_empty(&q->rx_queue);
1792
1793         __skb_queue_tail(&q->rx_queue, skb);
1794
1795         if (was_empty) {
1796                 struct sge_qset *qs = rspq_to_qset(q);
1797
1798                 napi_schedule(&qs->napi);
1799         }
1800 }
1801
1802 /**
1803  *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1804  *      @tdev: the offload device that will be receiving the packets
1805  *      @q: the SGE response queue that assembled the bundle
1806  *      @skbs: the partial bundle
1807  *      @n: the number of packets in the bundle
1808  *
1809  *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1810  */
1811 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1812                                           struct sge_rspq *q,
1813                                           struct sk_buff *skbs[], int n)
1814 {
1815         if (n) {
1816                 q->offload_bundles++;
1817                 tdev->recv(tdev, skbs, n);
1818         }
1819 }
1820
1821 /**
1822  *      ofld_poll - NAPI handler for offload packets in interrupt mode
1823  *      @dev: the network device doing the polling
1824  *      @budget: polling budget
1825  *
1826  *      The NAPI handler for offload packets when a response queue is serviced
1827  *      by the hard interrupt handler, i.e., when it's operating in non-polling
1828  *      mode.  Creates small packet batches and sends them through the offload
1829  *      receive handler.  Batches need to be of modest size as we do prefetches
1830  *      on the packets in each.
1831  */
1832 static int ofld_poll(struct napi_struct *napi, int budget)
1833 {
1834         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1835         struct sge_rspq *q = &qs->rspq;
1836         struct adapter *adapter = qs->adap;
1837         int work_done = 0;
1838
1839         while (work_done < budget) {
1840                 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1841                 struct sk_buff_head queue;
1842                 int ngathered;
1843
1844                 spin_lock_irq(&q->lock);
1845                 __skb_queue_head_init(&queue);
1846                 skb_queue_splice_init(&q->rx_queue, &queue);
1847                 if (skb_queue_empty(&queue)) {
1848                         napi_complete(napi);
1849                         spin_unlock_irq(&q->lock);
1850                         return work_done;
1851                 }
1852                 spin_unlock_irq(&q->lock);
1853
1854                 ngathered = 0;
1855                 skb_queue_walk_safe(&queue, skb, tmp) {
1856                         if (work_done >= budget)
1857                                 break;
1858                         work_done++;
1859
1860                         __skb_unlink(skb, &queue);
1861                         prefetch(skb->data);
1862                         skbs[ngathered] = skb;
1863                         if (++ngathered == RX_BUNDLE_SIZE) {
1864                                 q->offload_bundles++;
1865                                 adapter->tdev.recv(&adapter->tdev, skbs,
1866                                                    ngathered);
1867                                 ngathered = 0;
1868                         }
1869                 }
1870                 if (!skb_queue_empty(&queue)) {
1871                         /* splice remaining packets back onto Rx queue */
1872                         spin_lock_irq(&q->lock);
1873                         skb_queue_splice(&queue, &q->rx_queue);
1874                         spin_unlock_irq(&q->lock);
1875                 }
1876                 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1877         }
1878
1879         return work_done;
1880 }
1881
1882 /**
1883  *      rx_offload - process a received offload packet
1884  *      @tdev: the offload device receiving the packet
1885  *      @rq: the response queue that received the packet
1886  *      @skb: the packet
1887  *      @rx_gather: a gather list of packets if we are building a bundle
1888  *      @gather_idx: index of the next available slot in the bundle
1889  *
1890  *      Process an ingress offload pakcet and add it to the offload ingress
1891  *      queue.  Returns the index of the next available slot in the bundle.
1892  */
1893 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1894                              struct sk_buff *skb, struct sk_buff *rx_gather[],
1895                              unsigned int gather_idx)
1896 {
1897         skb_reset_mac_header(skb);
1898         skb_reset_network_header(skb);
1899         skb_reset_transport_header(skb);
1900
1901         if (rq->polling) {
1902                 rx_gather[gather_idx++] = skb;
1903                 if (gather_idx == RX_BUNDLE_SIZE) {
1904                         tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1905                         gather_idx = 0;
1906                         rq->offload_bundles++;
1907                 }
1908         } else
1909                 offload_enqueue(rq, skb);
1910
1911         return gather_idx;
1912 }
1913
1914 /**
1915  *      restart_tx - check whether to restart suspended Tx queues
1916  *      @qs: the queue set to resume
1917  *
1918  *      Restarts suspended Tx queues of an SGE queue set if they have enough
1919  *      free resources to resume operation.
1920  */
1921 static void restart_tx(struct sge_qset *qs)
1922 {
1923         if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1924             should_restart_tx(&qs->txq[TXQ_ETH]) &&
1925             test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1926                 qs->txq[TXQ_ETH].restarts++;
1927                 if (netif_running(qs->netdev))
1928                         netif_tx_wake_queue(qs->tx_q);
1929         }
1930
1931         if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1932             should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1933             test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1934                 qs->txq[TXQ_OFLD].restarts++;
1935                 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1936         }
1937         if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1938             should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1939             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1940                 qs->txq[TXQ_CTRL].restarts++;
1941                 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1942         }
1943 }
1944
1945 /**
1946  *      cxgb3_arp_process - process an ARP request probing a private IP address
1947  *      @adapter: the adapter
1948  *      @skb: the skbuff containing the ARP request
1949  *
1950  *      Check if the ARP request is probing the private IP address
1951  *      dedicated to iSCSI, generate an ARP reply if so.
1952  */
1953 static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1954 {
1955         struct net_device *dev = skb->dev;
1956         struct port_info *pi;
1957         struct arphdr *arp;
1958         unsigned char *arp_ptr;
1959         unsigned char *sha;
1960         __be32 sip, tip;
1961
1962         if (!dev)
1963                 return;
1964
1965         skb_reset_network_header(skb);
1966         arp = arp_hdr(skb);
1967
1968         if (arp->ar_op != htons(ARPOP_REQUEST))
1969                 return;
1970
1971         arp_ptr = (unsigned char *)(arp + 1);
1972         sha = arp_ptr;
1973         arp_ptr += dev->addr_len;
1974         memcpy(&sip, arp_ptr, sizeof(sip));
1975         arp_ptr += sizeof(sip);
1976         arp_ptr += dev->addr_len;
1977         memcpy(&tip, arp_ptr, sizeof(tip));
1978
1979         pi = netdev_priv(dev);
1980         if (tip != pi->iscsi_ipv4addr)
1981                 return;
1982
1983         arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1984                  dev->dev_addr, sha);
1985
1986 }
1987
1988 static inline int is_arp(struct sk_buff *skb)
1989 {
1990         return skb->protocol == htons(ETH_P_ARP);
1991 }
1992
1993 /**
1994  *      rx_eth - process an ingress ethernet packet
1995  *      @adap: the adapter
1996  *      @rq: the response queue that received the packet
1997  *      @skb: the packet
1998  *      @pad: amount of padding at the start of the buffer
1999  *
2000  *      Process an ingress ethernet pakcet and deliver it to the stack.
2001  *      The padding is 2 if the packet was delivered in an Rx buffer and 0
2002  *      if it was immediate data in a response.
2003  */
2004 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2005                    struct sk_buff *skb, int pad, int lro)
2006 {
2007         struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2008         struct sge_qset *qs = rspq_to_qset(rq);
2009         struct port_info *pi;
2010
2011         skb_pull(skb, sizeof(*p) + pad);
2012         skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2013         pi = netdev_priv(skb->dev);
2014         if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2015             p->csum == htons(0xffff) && !p->fragment) {
2016                 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2017                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2018         } else
2019                 skb->ip_summed = CHECKSUM_NONE;
2020         skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2021
2022         if (unlikely(p->vlan_valid)) {
2023                 struct vlan_group *grp = pi->vlan_grp;
2024
2025                 qs->port_stats[SGE_PSTAT_VLANEX]++;
2026                 if (likely(grp))
2027                         if (lro)
2028                                 vlan_gro_receive(&qs->napi, grp,
2029                                                  ntohs(p->vlan), skb);
2030                         else {
2031                                 if (unlikely(pi->iscsi_ipv4addr &&
2032                                     is_arp(skb))) {
2033                                         unsigned short vtag = ntohs(p->vlan) &
2034                                                                 VLAN_VID_MASK;
2035                                         skb->dev = vlan_group_get_device(grp,
2036                                                                          vtag);
2037                                         cxgb3_arp_process(adap, skb);
2038                                 }
2039                                 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2040                                                   rq->polling);
2041                         }
2042                 else
2043                         dev_kfree_skb_any(skb);
2044         } else if (rq->polling) {
2045                 if (lro)
2046                         napi_gro_receive(&qs->napi, skb);
2047                 else {
2048                         if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
2049                                 cxgb3_arp_process(adap, skb);
2050                         netif_receive_skb(skb);
2051                 }
2052         } else
2053                 netif_rx(skb);
2054 }
2055
2056 static inline int is_eth_tcp(u32 rss)
2057 {
2058         return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2059 }
2060
2061 /**
2062  *      lro_add_page - add a page chunk to an LRO session
2063  *      @adap: the adapter
2064  *      @qs: the associated queue set
2065  *      @fl: the free list containing the page chunk to add
2066  *      @len: packet length
2067  *      @complete: Indicates the last fragment of a frame
2068  *
2069  *      Add a received packet contained in a page chunk to an existing LRO
2070  *      session.
2071  */
2072 static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2073                          struct sge_fl *fl, int len, int complete)
2074 {
2075         struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2076         struct cpl_rx_pkt *cpl;
2077         struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2078         int nr_frags = qs->lro_frag_tbl.nr_frags;
2079         int frag_len = qs->lro_frag_tbl.len;
2080         int offset = 0;
2081
2082         if (!nr_frags) {
2083                 offset = 2 + sizeof(struct cpl_rx_pkt);
2084                 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2085         }
2086
2087         fl->credits--;
2088
2089         len -= offset;
2090         pci_dma_sync_single_for_cpu(adap->pdev,
2091                                     pci_unmap_addr(sd, dma_addr),
2092                                     fl->buf_size - SGE_PG_RSVD,
2093                                     PCI_DMA_FROMDEVICE);
2094
2095         (*sd->pg_chunk.p_cnt)--;
2096         if (!*sd->pg_chunk.p_cnt)
2097                 pci_unmap_page(adap->pdev,
2098                                sd->pg_chunk.mapping,
2099                                fl->alloc_size,
2100                                PCI_DMA_FROMDEVICE);
2101
2102         prefetch(qs->lro_va);
2103
2104         rx_frag += nr_frags;
2105         rx_frag->page = sd->pg_chunk.page;
2106         rx_frag->page_offset = sd->pg_chunk.offset + offset;
2107         rx_frag->size = len;
2108         frag_len += len;
2109         qs->lro_frag_tbl.nr_frags++;
2110         qs->lro_frag_tbl.len = frag_len;
2111
2112
2113         if (!complete)
2114                 return;
2115
2116         qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2117         cpl = qs->lro_va;
2118
2119         if (unlikely(cpl->vlan_valid)) {
2120                 struct net_device *dev = qs->netdev;
2121                 struct port_info *pi = netdev_priv(dev);
2122                 struct vlan_group *grp = pi->vlan_grp;
2123
2124                 if (likely(grp != NULL)) {
2125                         vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2126                                        &qs->lro_frag_tbl);
2127                         goto out;
2128                 }
2129         }
2130         napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2131
2132 out:
2133         qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2134 }
2135
2136 /**
2137  *      handle_rsp_cntrl_info - handles control information in a response
2138  *      @qs: the queue set corresponding to the response
2139  *      @flags: the response control flags
2140  *
2141  *      Handles the control information of an SGE response, such as GTS
2142  *      indications and completion credits for the queue set's Tx queues.
2143  *      HW coalesces credits, we don't do any extra SW coalescing.
2144  */
2145 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2146 {
2147         unsigned int credits;
2148
2149 #if USE_GTS
2150         if (flags & F_RSPD_TXQ0_GTS)
2151                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2152 #endif
2153
2154         credits = G_RSPD_TXQ0_CR(flags);
2155         if (credits)
2156                 qs->txq[TXQ_ETH].processed += credits;
2157
2158         credits = G_RSPD_TXQ2_CR(flags);
2159         if (credits)
2160                 qs->txq[TXQ_CTRL].processed += credits;
2161
2162 # if USE_GTS
2163         if (flags & F_RSPD_TXQ1_GTS)
2164                 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2165 # endif
2166         credits = G_RSPD_TXQ1_CR(flags);
2167         if (credits)
2168                 qs->txq[TXQ_OFLD].processed += credits;
2169 }
2170
2171 /**
2172  *      check_ring_db - check if we need to ring any doorbells
2173  *      @adapter: the adapter
2174  *      @qs: the queue set whose Tx queues are to be examined
2175  *      @sleeping: indicates which Tx queue sent GTS
2176  *
2177  *      Checks if some of a queue set's Tx queues need to ring their doorbells
2178  *      to resume transmission after idling while they still have unprocessed
2179  *      descriptors.
2180  */
2181 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2182                           unsigned int sleeping)
2183 {
2184         if (sleeping & F_RSPD_TXQ0_GTS) {
2185                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2186
2187                 if (txq->cleaned + txq->in_use != txq->processed &&
2188                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2189                         set_bit(TXQ_RUNNING, &txq->flags);
2190                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2191                                      V_EGRCNTX(txq->cntxt_id));
2192                 }
2193         }
2194
2195         if (sleeping & F_RSPD_TXQ1_GTS) {
2196                 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2197
2198                 if (txq->cleaned + txq->in_use != txq->processed &&
2199                     !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2200                         set_bit(TXQ_RUNNING, &txq->flags);
2201                         t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2202                                      V_EGRCNTX(txq->cntxt_id));
2203                 }
2204         }
2205 }
2206
2207 /**
2208  *      is_new_response - check if a response is newly written
2209  *      @r: the response descriptor
2210  *      @q: the response queue
2211  *
2212  *      Returns true if a response descriptor contains a yet unprocessed
2213  *      response.
2214  */
2215 static inline int is_new_response(const struct rsp_desc *r,
2216                                   const struct sge_rspq *q)
2217 {
2218         return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2219 }
2220
2221 static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2222 {
2223         q->pg_skb = NULL;
2224         q->rx_recycle_buf = 0;
2225 }
2226
2227 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2228 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2229                         V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2230                         V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2231                         V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2232
2233 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2234 #define NOMEM_INTR_DELAY 2500
2235
2236 /**
2237  *      process_responses - process responses from an SGE response queue
2238  *      @adap: the adapter
2239  *      @qs: the queue set to which the response queue belongs
2240  *      @budget: how many responses can be processed in this round
2241  *
2242  *      Process responses from an SGE response queue up to the supplied budget.
2243  *      Responses include received packets as well as credits and other events
2244  *      for the queues that belong to the response queue's queue set.
2245  *      A negative budget is effectively unlimited.
2246  *
2247  *      Additionally choose the interrupt holdoff time for the next interrupt
2248  *      on this queue.  If the system is under memory shortage use a fairly
2249  *      long delay to help recovery.
2250  */
2251 static int process_responses(struct adapter *adap, struct sge_qset *qs,
2252                              int budget)
2253 {
2254         struct sge_rspq *q = &qs->rspq;
2255         struct rsp_desc *r = &q->desc[q->cidx];
2256         int budget_left = budget;
2257         unsigned int sleeping = 0;
2258         struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2259         int ngathered = 0;
2260
2261         q->next_holdoff = q->holdoff_tmr;
2262
2263         while (likely(budget_left && is_new_response(r, q))) {
2264                 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2265                 struct sk_buff *skb = NULL;
2266                 u32 len, flags = ntohl(r->flags);
2267                 __be32 rss_hi = *(const __be32 *)r,
2268                        rss_lo = r->rss_hdr.rss_hash_val;
2269
2270                 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2271
2272                 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2273                         skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2274                         if (!skb)
2275                                 goto no_mem;
2276
2277                         memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2278                         skb->data[0] = CPL_ASYNC_NOTIF;
2279                         rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2280                         q->async_notif++;
2281                 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2282                         skb = get_imm_packet(r);
2283                         if (unlikely(!skb)) {
2284 no_mem:
2285                                 q->next_holdoff = NOMEM_INTR_DELAY;
2286                                 q->nomem++;
2287                                 /* consume one credit since we tried */
2288                                 budget_left--;
2289                                 break;
2290                         }
2291                         q->imm_data++;
2292                         ethpad = 0;
2293                 } else if ((len = ntohl(r->len_cq)) != 0) {
2294                         struct sge_fl *fl;
2295
2296                         lro &= eth && is_eth_tcp(rss_hi);
2297
2298                         fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2299                         if (fl->use_pages) {
2300                                 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2301
2302                                 prefetch(&qs->lro_frag_tbl);
2303
2304                                 prefetch(addr);
2305 #if L1_CACHE_BYTES < 128
2306                                 prefetch(addr + L1_CACHE_BYTES);
2307 #endif
2308                                 __refill_fl(adap, fl);
2309                                 if (lro > 0) {
2310                                         lro_add_page(adap, qs, fl,
2311                                                      G_RSPD_LEN(len),
2312                                                      flags & F_RSPD_EOP);
2313                                          goto next_fl;
2314                                 }
2315
2316                                 skb = get_packet_pg(adap, fl, q,
2317                                                     G_RSPD_LEN(len),
2318                                                     eth ?
2319                                                     SGE_RX_DROP_THRES : 0);
2320                                 q->pg_skb = skb;
2321                         } else
2322                                 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2323                                                  eth ? SGE_RX_DROP_THRES : 0);
2324                         if (unlikely(!skb)) {
2325                                 if (!eth)
2326                                         goto no_mem;
2327                                 q->rx_drops++;
2328                         } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2329                                 __skb_pull(skb, 2);
2330 next_fl:
2331                         if (++fl->cidx == fl->size)
2332                                 fl->cidx = 0;
2333                 } else
2334                         q->pure_rsps++;
2335
2336                 if (flags & RSPD_CTRL_MASK) {
2337                         sleeping |= flags & RSPD_GTS_MASK;
2338                         handle_rsp_cntrl_info(qs, flags);
2339                 }
2340
2341                 r++;
2342                 if (unlikely(++q->cidx == q->size)) {
2343                         q->cidx = 0;
2344                         q->gen ^= 1;
2345                         r = q->desc;
2346                 }
2347                 prefetch(r);
2348
2349                 if (++q->credits >= (q->size / 4)) {
2350                         refill_rspq(adap, q, q->credits);
2351                         q->credits = 0;
2352                 }
2353
2354                 packet_complete = flags &
2355                                   (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2356                                    F_RSPD_ASYNC_NOTIF);
2357
2358                 if (skb != NULL && packet_complete) {
2359                         if (eth)
2360                                 rx_eth(adap, q, skb, ethpad, lro);
2361                         else {
2362                                 q->offload_pkts++;
2363                                 /* Preserve the RSS info in csum & priority */
2364                                 skb->csum = rss_hi;
2365                                 skb->priority = rss_lo;
2366                                 ngathered = rx_offload(&adap->tdev, q, skb,
2367                                                        offload_skbs,
2368                                                        ngathered);
2369                         }
2370
2371                         if (flags & F_RSPD_EOP)
2372                                 clear_rspq_bufstate(q);
2373                 }
2374                 --budget_left;
2375         }
2376
2377         deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2378
2379         if (sleeping)
2380                 check_ring_db(adap, qs, sleeping);
2381
2382         smp_mb();               /* commit Tx queue .processed updates */
2383         if (unlikely(qs->txq_stopped != 0))
2384                 restart_tx(qs);
2385
2386         budget -= budget_left;
2387         return budget;
2388 }
2389
2390 static inline int is_pure_response(const struct rsp_desc *r)
2391 {
2392         __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2393
2394         return (n | r->len_cq) == 0;
2395 }
2396
2397 /**
2398  *      napi_rx_handler - the NAPI handler for Rx processing
2399  *      @napi: the napi instance
2400  *      @budget: how many packets we can process in this round
2401  *
2402  *      Handler for new data events when using NAPI.
2403  */
2404 static int napi_rx_handler(struct napi_struct *napi, int budget)
2405 {
2406         struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2407         struct adapter *adap = qs->adap;
2408         int work_done = process_responses(adap, qs, budget);
2409
2410         if (likely(work_done < budget)) {
2411                 napi_complete(napi);
2412
2413                 /*
2414                  * Because we don't atomically flush the following
2415                  * write it is possible that in very rare cases it can
2416                  * reach the device in a way that races with a new
2417                  * response being written plus an error interrupt
2418                  * causing the NAPI interrupt handler below to return
2419                  * unhandled status to the OS.  To protect against
2420                  * this would require flushing the write and doing
2421                  * both the write and the flush with interrupts off.
2422                  * Way too expensive and unjustifiable given the
2423                  * rarity of the race.
2424                  *
2425                  * The race cannot happen at all with MSI-X.
2426                  */
2427                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2428                              V_NEWTIMER(qs->rspq.next_holdoff) |
2429                              V_NEWINDEX(qs->rspq.cidx));
2430         }
2431         return work_done;
2432 }
2433
2434 /*
2435  * Returns true if the device is already scheduled for polling.
2436  */
2437 static inline int napi_is_scheduled(struct napi_struct *napi)
2438 {
2439         return test_bit(NAPI_STATE_SCHED, &napi->state);
2440 }
2441
2442 /**
2443  *      process_pure_responses - process pure responses from a response queue
2444  *      @adap: the adapter
2445  *      @qs: the queue set owning the response queue
2446  *      @r: the first pure response to process
2447  *
2448  *      A simpler version of process_responses() that handles only pure (i.e.,
2449  *      non data-carrying) responses.  Such respones are too light-weight to
2450  *      justify calling a softirq under NAPI, so we handle them specially in
2451  *      the interrupt handler.  The function is called with a pointer to a
2452  *      response, which the caller must ensure is a valid pure response.
2453  *
2454  *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2455  */
2456 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2457                                   struct rsp_desc *r)
2458 {
2459         struct sge_rspq *q = &qs->rspq;
2460         unsigned int sleeping = 0;
2461
2462         do {
2463                 u32 flags = ntohl(r->flags);
2464
2465                 r++;
2466                 if (unlikely(++q->cidx == q->size)) {
2467                         q->cidx = 0;
2468                         q->gen ^= 1;
2469                         r = q->desc;
2470                 }
2471                 prefetch(r);
2472
2473                 if (flags & RSPD_CTRL_MASK) {
2474                         sleeping |= flags & RSPD_GTS_MASK;
2475                         handle_rsp_cntrl_info(qs, flags);
2476                 }
2477
2478                 q->pure_rsps++;
2479                 if (++q->credits >= (q->size / 4)) {
2480                         refill_rspq(adap, q, q->credits);
2481                         q->credits = 0;
2482                 }
2483         } while (is_new_response(r, q) && is_pure_response(r));
2484
2485         if (sleeping)
2486                 check_ring_db(adap, qs, sleeping);
2487
2488         smp_mb();               /* commit Tx queue .processed updates */
2489         if (unlikely(qs->txq_stopped != 0))
2490                 restart_tx(qs);
2491
2492         return is_new_response(r, q);
2493 }
2494
2495 /**
2496  *      handle_responses - decide what to do with new responses in NAPI mode
2497  *      @adap: the adapter
2498  *      @q: the response queue
2499  *
2500  *      This is used by the NAPI interrupt handlers to decide what to do with
2501  *      new SGE responses.  If there are no new responses it returns -1.  If
2502  *      there are new responses and they are pure (i.e., non-data carrying)
2503  *      it handles them straight in hard interrupt context as they are very
2504  *      cheap and don't deliver any packets.  Finally, if there are any data
2505  *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2506  *      schedules NAPI, 0 if all new responses were pure.
2507  *
2508  *      The caller must ascertain NAPI is not already running.
2509  */
2510 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2511 {
2512         struct sge_qset *qs = rspq_to_qset(q);
2513         struct rsp_desc *r = &q->desc[q->cidx];
2514
2515         if (!is_new_response(r, q))
2516                 return -1;
2517         if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2518                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2519                              V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2520                 return 0;
2521         }
2522         napi_schedule(&qs->napi);
2523         return 1;
2524 }
2525
2526 /*
2527  * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2528  * (i.e., response queue serviced in hard interrupt).
2529  */
2530 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2531 {
2532         struct sge_qset *qs = cookie;
2533         struct adapter *adap = qs->adap;
2534         struct sge_rspq *q = &qs->rspq;
2535
2536         spin_lock(&q->lock);
2537         if (process_responses(adap, qs, -1) == 0)
2538                 q->unhandled_irqs++;
2539         t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2540                      V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2541         spin_unlock(&q->lock);
2542         return IRQ_HANDLED;
2543 }
2544
2545 /*
2546  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2547  * (i.e., response queue serviced by NAPI polling).
2548  */
2549 static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2550 {
2551         struct sge_qset *qs = cookie;
2552         struct sge_rspq *q = &qs->rspq;
2553
2554         spin_lock(&q->lock);
2555
2556         if (handle_responses(qs->adap, q) < 0)
2557                 q->unhandled_irqs++;
2558         spin_unlock(&q->lock);
2559         return IRQ_HANDLED;
2560 }
2561
2562 /*
2563  * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2564  * SGE response queues as well as error and other async events as they all use
2565  * the same MSI vector.  We use one SGE response queue per port in this mode
2566  * and protect all response queues with queue 0's lock.
2567  */
2568 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2569 {
2570         int new_packets = 0;
2571         struct adapter *adap = cookie;
2572         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2573
2574         spin_lock(&q->lock);
2575
2576         if (process_responses(adap, &adap->sge.qs[0], -1)) {
2577                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2578                              V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2579                 new_packets = 1;
2580         }
2581
2582         if (adap->params.nports == 2 &&
2583             process_responses(adap, &adap->sge.qs[1], -1)) {
2584                 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2585
2586                 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2587                              V_NEWTIMER(q1->next_holdoff) |
2588                              V_NEWINDEX(q1->cidx));
2589                 new_packets = 1;
2590         }
2591
2592         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2593                 q->unhandled_irqs++;
2594
2595         spin_unlock(&q->lock);
2596         return IRQ_HANDLED;
2597 }
2598
2599 static int rspq_check_napi(struct sge_qset *qs)
2600 {
2601         struct sge_rspq *q = &qs->rspq;
2602
2603         if (!napi_is_scheduled(&qs->napi) &&
2604             is_new_response(&q->desc[q->cidx], q)) {
2605                 napi_schedule(&qs->napi);
2606                 return 1;
2607         }
2608         return 0;
2609 }
2610
2611 /*
2612  * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2613  * by NAPI polling).  Handles data events from SGE response queues as well as
2614  * error and other async events as they all use the same MSI vector.  We use
2615  * one SGE response queue per port in this mode and protect all response
2616  * queues with queue 0's lock.
2617  */
2618 static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2619 {
2620         int new_packets;
2621         struct adapter *adap = cookie;
2622         struct sge_rspq *q = &adap->sge.qs[0].rspq;
2623
2624         spin_lock(&q->lock);
2625
2626         new_packets = rspq_check_napi(&adap->sge.qs[0]);
2627         if (adap->params.nports == 2)
2628                 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2629         if (!new_packets && t3_slow_intr_handler(adap) == 0)
2630                 q->unhandled_irqs++;
2631
2632         spin_unlock(&q->lock);
2633         return IRQ_HANDLED;
2634 }
2635
2636 /*
2637  * A helper function that processes responses and issues GTS.
2638  */
2639 static inline int process_responses_gts(struct adapter *adap,
2640                                         struct sge_rspq *rq)
2641 {
2642         int work;
2643
2644         work = process_responses(adap, rspq_to_qset(rq), -1);
2645         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2646                      V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2647         return work;
2648 }
2649
2650 /*
2651  * The legacy INTx interrupt handler.  This needs to handle data events from
2652  * SGE response queues as well as error and other async events as they all use
2653  * the same interrupt pin.  We use one SGE response queue per port in this mode
2654  * and protect all response queues with queue 0's lock.
2655  */
2656 static irqreturn_t t3_intr(int irq, void *cookie)
2657 {
2658         int work_done, w0, w1;
2659         struct adapter *adap = cookie;
2660         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2661         struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2662
2663         spin_lock(&q0->lock);
2664
2665         w0 = is_new_response(&q0->desc[q0->cidx], q0);
2666         w1 = adap->params.nports == 2 &&
2667             is_new_response(&q1->desc[q1->cidx], q1);
2668
2669         if (likely(w0 | w1)) {
2670                 t3_write_reg(adap, A_PL_CLI, 0);
2671                 t3_read_reg(adap, A_PL_CLI);    /* flush */
2672
2673                 if (likely(w0))
2674                         process_responses_gts(adap, q0);
2675
2676                 if (w1)
2677                         process_responses_gts(adap, q1);
2678
2679                 work_done = w0 | w1;
2680         } else
2681                 work_done = t3_slow_intr_handler(adap);
2682
2683         spin_unlock(&q0->lock);
2684         return IRQ_RETVAL(work_done != 0);
2685 }
2686
2687 /*
2688  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2689  * Handles data events from SGE response queues as well as error and other
2690  * async events as they all use the same interrupt pin.  We use one SGE
2691  * response queue per port in this mode and protect all response queues with
2692  * queue 0's lock.
2693  */
2694 static irqreturn_t t3b_intr(int irq, void *cookie)
2695 {
2696         u32 map;
2697         struct adapter *adap = cookie;
2698         struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2699
2700         t3_write_reg(adap, A_PL_CLI, 0);
2701         map = t3_read_reg(adap, A_SG_DATA_INTR);
2702
2703         if (unlikely(!map))     /* shared interrupt, most likely */
2704                 return IRQ_NONE;
2705
2706         spin_lock(&q0->lock);
2707
2708         if (unlikely(map & F_ERRINTR))
2709                 t3_slow_intr_handler(adap);
2710
2711         if (likely(map & 1))
2712                 process_responses_gts(adap, q0);
2713
2714         if (map & 2)
2715                 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2716
2717         spin_unlock(&q0->lock);
2718         return IRQ_HANDLED;
2719 }
2720
2721 /*
2722  * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2723  * Handles data events from SGE response queues as well as error and other
2724  * async events as they all use the same interrupt pin.  We use one SGE
2725  * response queue per port in this mode and protect all response queues with
2726  * queue 0's lock.
2727  */
2728 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2729 {
2730         u32 map;
2731         struct adapter *adap = cookie;
2732         struct sge_qset *qs0 = &adap->sge.qs[0];
2733         struct sge_rspq *q0 = &qs0->rspq;
2734
2735         t3_write_reg(adap, A_PL_CLI, 0);
2736         map = t3_read_reg(adap, A_SG_DATA_INTR);
2737
2738         if (unlikely(!map))     /* shared interrupt, most likely */
2739                 return IRQ_NONE;
2740
2741         spin_lock(&q0->lock);
2742
2743         if (unlikely(map & F_ERRINTR))
2744                 t3_slow_intr_handler(adap);
2745
2746         if (likely(map & 1))
2747                 napi_schedule(&qs0->napi);
2748
2749         if (map & 2)
2750                 napi_schedule(&adap->sge.qs[1].napi);
2751
2752         spin_unlock(&q0->lock);
2753         return IRQ_HANDLED;
2754 }
2755
2756 /**
2757  *      t3_intr_handler - select the top-level interrupt handler
2758  *      @adap: the adapter
2759  *      @polling: whether using NAPI to service response queues
2760  *
2761  *      Selects the top-level interrupt handler based on the type of interrupts
2762  *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2763  *      response queues.
2764  */
2765 irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2766 {
2767         if (adap->flags & USING_MSIX)
2768                 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2769         if (adap->flags & USING_MSI)
2770                 return polling ? t3_intr_msi_napi : t3_intr_msi;
2771         if (adap->params.rev > 0)
2772                 return polling ? t3b_intr_napi : t3b_intr;
2773         return t3_intr;
2774 }
2775
2776 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2777                     F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2778                     V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2779                     F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2780                     F_HIRCQPARITYERROR)
2781 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2782 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2783                       F_RSPQDISABLED)
2784
2785 /**
2786  *      t3_sge_err_intr_handler - SGE async event interrupt handler
2787  *      @adapter: the adapter
2788  *
2789  *      Interrupt handler for SGE asynchronous (non-data) events.
2790  */
2791 void t3_sge_err_intr_handler(struct adapter *adapter)
2792 {
2793         unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2794                                  ~F_FLEMPTY;
2795
2796         if (status & SGE_PARERR)
2797                 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2798                          status & SGE_PARERR);
2799         if (status & SGE_FRAMINGERR)
2800                 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2801                          status & SGE_FRAMINGERR);
2802
2803         if (status & F_RSPQCREDITOVERFOW)
2804                 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2805
2806         if (status & F_RSPQDISABLED) {
2807                 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2808
2809                 CH_ALERT(adapter,
2810                          "packet delivered to disabled response queue "
2811                          "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2812         }
2813
2814         if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2815                 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2816                          status & F_HIPIODRBDROPERR ? "high" : "lo");
2817
2818         t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2819         if (status &  SGE_FATALERR)
2820                 t3_fatal_err(adapter);
2821 }
2822
2823 /**
2824  *      sge_timer_tx - perform periodic maintenance of an SGE qset
2825  *      @data: the SGE queue set to maintain
2826  *
2827  *      Runs periodically from a timer to perform maintenance of an SGE queue
2828  *      set.  It performs two tasks:
2829  *
2830  *      Cleans up any completed Tx descriptors that may still be pending.
2831  *      Normal descriptor cleanup happens when new packets are added to a Tx
2832  *      queue so this timer is relatively infrequent and does any cleanup only
2833  *      if the Tx queue has not seen any new packets in a while.  We make a
2834  *      best effort attempt to reclaim descriptors, in that we don't wait
2835  *      around if we cannot get a queue's lock (which most likely is because
2836  *      someone else is queueing new packets and so will also handle the clean
2837  *      up).  Since control queues use immediate data exclusively we don't
2838  *      bother cleaning them up here.
2839  *
2840  */
2841 static void sge_timer_tx(unsigned long data)
2842 {
2843         struct sge_qset *qs = (struct sge_qset *)data;
2844         struct port_info *pi = netdev_priv(qs->netdev);
2845         struct adapter *adap = pi->adapter;
2846         unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2847         unsigned long next_period;
2848
2849         if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2850                 tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2851                                                     TX_RECLAIM_TIMER_CHUNK);
2852                 spin_unlock(&qs->txq[TXQ_ETH].lock);
2853         }
2854         if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2855                 tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2856                                                      TX_RECLAIM_TIMER_CHUNK);
2857                 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2858         }
2859
2860         next_period = TX_RECLAIM_PERIOD >>
2861                       (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2862                        TX_RECLAIM_TIMER_CHUNK);
2863         mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2864 }
2865
2866 /*
2867  *      sge_timer_rx - perform periodic maintenance of an SGE qset
2868  *      @data: the SGE queue set to maintain
2869  *
2870  *      a) Replenishes Rx queues that have run out due to memory shortage.
2871  *      Normally new Rx buffers are added when existing ones are consumed but
2872  *      when out of memory a queue can become empty.  We try to add only a few
2873  *      buffers here, the queue will be replenished fully as these new buffers
2874  *      are used up if memory shortage has subsided.
2875  *
2876  *      b) Return coalesced response queue credits in case a response queue is
2877  *      starved.
2878  *
2879  */
2880 static void sge_timer_rx(unsigned long data)
2881 {
2882         spinlock_t *lock;
2883         struct sge_qset *qs = (struct sge_qset *)data;
2884         struct port_info *pi = netdev_priv(qs->netdev);
2885         struct adapter *adap = pi->adapter;
2886         u32 status;
2887
2888         lock = adap->params.rev > 0 ?
2889                &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2890
2891         if (!spin_trylock_irq(lock))
2892                 goto out;
2893
2894         if (napi_is_scheduled(&qs->napi))
2895                 goto unlock;
2896
2897         if (adap->params.rev < 4) {
2898                 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2899
2900                 if (status & (1 << qs->rspq.cntxt_id)) {
2901                         qs->rspq.starved++;
2902                         if (qs->rspq.credits) {
2903                                 qs->rspq.credits--;
2904                                 refill_rspq(adap, &qs->rspq, 1);
2905                                 qs->rspq.restarted++;
2906                                 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2907                                              1 << qs->rspq.cntxt_id);
2908                         }
2909                 }
2910         }
2911
2912         if (qs->fl[0].credits < qs->fl[0].size)
2913                 __refill_fl(adap, &qs->fl[0]);
2914         if (qs->fl[1].credits < qs->fl[1].size)
2915                 __refill_fl(adap, &qs->fl[1]);
2916
2917 unlock:
2918         spin_unlock_irq(lock);
2919 out:
2920         mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2921 }
2922
2923 /**
2924  *      t3_update_qset_coalesce - update coalescing settings for a queue set
2925  *      @qs: the SGE queue set
2926  *      @p: new queue set parameters
2927  *
2928  *      Update the coalescing settings for an SGE queue set.  Nothing is done
2929  *      if the queue set is not initialized yet.
2930  */
2931 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2932 {
2933         qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2934         qs->rspq.polling = p->polling;
2935         qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2936 }
2937
2938 /**
2939  *      t3_sge_alloc_qset - initialize an SGE queue set
2940  *      @adapter: the adapter
2941  *      @id: the queue set id
2942  *      @nports: how many Ethernet ports will be using this queue set
2943  *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2944  *      @p: configuration parameters for this queue set
2945  *      @ntxq: number of Tx queues for the queue set
2946  *      @netdev: net device associated with this queue set
2947  *      @netdevq: net device TX queue associated with this queue set
2948  *
2949  *      Allocate resources and initialize an SGE queue set.  A queue set
2950  *      comprises a response queue, two Rx free-buffer queues, and up to 3
2951  *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2952  *      queue, offload queue, and control queue.
2953  */
2954 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2955                       int irq_vec_idx, const struct qset_params *p,
2956                       int ntxq, struct net_device *dev,
2957                       struct netdev_queue *netdevq)
2958 {
2959         int i, avail, ret = -ENOMEM;
2960         struct sge_qset *q = &adapter->sge.qs[id];
2961
2962         init_qset_cntxt(q, id);
2963         setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2964         setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
2965
2966         q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2967                                    sizeof(struct rx_desc),
2968                                    sizeof(struct rx_sw_desc),
2969                                    &q->fl[0].phys_addr, &q->fl[0].sdesc);
2970         if (!q->fl[0].desc)
2971                 goto err;
2972
2973         q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2974                                    sizeof(struct rx_desc),
2975                                    sizeof(struct rx_sw_desc),
2976                                    &q->fl[1].phys_addr, &q->fl[1].sdesc);
2977         if (!q->fl[1].desc)
2978                 goto err;
2979
2980         q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2981                                   sizeof(struct rsp_desc), 0,
2982                                   &q->rspq.phys_addr, NULL);
2983         if (!q->rspq.desc)
2984                 goto err;
2985
2986         for (i = 0; i < ntxq; ++i) {
2987                 /*
2988                  * The control queue always uses immediate data so does not
2989                  * need to keep track of any sk_buffs.
2990                  */
2991                 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2992
2993                 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2994                                             sizeof(struct tx_desc), sz,
2995                                             &q->txq[i].phys_addr,
2996                                             &q->txq[i].sdesc);
2997                 if (!q->txq[i].desc)
2998                         goto err;
2999
3000                 q->txq[i].gen = 1;
3001                 q->txq[i].size = p->txq_size[i];
3002                 spin_lock_init(&q->txq[i].lock);
3003                 skb_queue_head_init(&q->txq[i].sendq);
3004         }
3005
3006         tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3007                      (unsigned long)q);
3008         tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3009                      (unsigned long)q);
3010
3011         q->fl[0].gen = q->fl[1].gen = 1;
3012         q->fl[0].size = p->fl_size;
3013         q->fl[1].size = p->jumbo_size;
3014
3015         q->rspq.gen = 1;
3016         q->rspq.size = p->rspq_size;
3017         spin_lock_init(&q->rspq.lock);
3018         skb_queue_head_init(&q->rspq.rx_queue);
3019
3020         q->txq[TXQ_ETH].stop_thres = nports *
3021             flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3022
3023 #if FL0_PG_CHUNK_SIZE > 0
3024         q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3025 #else
3026         q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3027 #endif
3028 #if FL1_PG_CHUNK_SIZE > 0
3029         q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3030 #else
3031         q->fl[1].buf_size = is_offload(adapter) ?
3032                 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3033                 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3034 #endif
3035
3036         q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3037         q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3038         q->fl[0].order = FL0_PG_ORDER;
3039         q->fl[1].order = FL1_PG_ORDER;
3040         q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3041         q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3042
3043         spin_lock_irq(&adapter->sge.reg_lock);
3044
3045         /* FL threshold comparison uses < */
3046         ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3047                                    q->rspq.phys_addr, q->rspq.size,
3048                                    q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3049         if (ret)
3050                 goto err_unlock;
3051
3052         for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3053                 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3054                                           q->fl[i].phys_addr, q->fl[i].size,
3055                                           q->fl[i].buf_size - SGE_PG_RSVD,
3056                                           p->cong_thres, 1, 0);
3057                 if (ret)
3058                         goto err_unlock;
3059         }
3060
3061         ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3062                                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3063                                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3064                                  1, 0);
3065         if (ret)
3066                 goto err_unlock;
3067
3068         if (ntxq > 1) {
3069                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3070                                          USE_GTS, SGE_CNTXT_OFLD, id,
3071                                          q->txq[TXQ_OFLD].phys_addr,
3072                                          q->txq[TXQ_OFLD].size, 0, 1, 0);
3073                 if (ret)
3074                         goto err_unlock;
3075         }
3076
3077         if (ntxq > 2) {
3078                 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3079                                          SGE_CNTXT_CTRL, id,
3080                                          q->txq[TXQ_CTRL].phys_addr,
3081                                          q->txq[TXQ_CTRL].size,
3082                                          q->txq[TXQ_CTRL].token, 1, 0);
3083                 if (ret)
3084                         goto err_unlock;
3085         }
3086
3087         spin_unlock_irq(&adapter->sge.reg_lock);
3088
3089         q->adap = adapter;
3090         q->netdev = dev;
3091         q->tx_q = netdevq;
3092         t3_update_qset_coalesce(q, p);
3093
3094         avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3095                           GFP_KERNEL | __GFP_COMP);
3096         if (!avail) {
3097                 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3098                 goto err;
3099         }
3100         if (avail < q->fl[0].size)
3101                 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3102                         avail);
3103
3104         avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3105                           GFP_KERNEL | __GFP_COMP);
3106         if (avail < q->fl[1].size)
3107                 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3108                         avail);
3109         refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3110
3111         t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3112                      V_NEWTIMER(q->rspq.holdoff_tmr));
3113
3114         return 0;
3115
3116 err_unlock:
3117         spin_unlock_irq(&adapter->sge.reg_lock);
3118 err:
3119         t3_free_qset(adapter, q);
3120         return ret;
3121 }
3122
3123 /**
3124  *      t3_start_sge_timers - start SGE timer call backs
3125  *      @adap: the adapter
3126  *
3127  *      Starts each SGE queue set's timer call back
3128  */
3129 void t3_start_sge_timers(struct adapter *adap)
3130 {
3131         int i;
3132
3133         for (i = 0; i < SGE_QSETS; ++i) {
3134                 struct sge_qset *q = &adap->sge.qs[i];
3135
3136         if (q->tx_reclaim_timer.function)
3137                 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3138
3139         if (q->rx_reclaim_timer.function)
3140                 mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3141         }
3142 }
3143
3144 /**
3145  *      t3_stop_sge_timers - stop SGE timer call backs
3146  *      @adap: the adapter
3147  *
3148  *      Stops each SGE queue set's timer call back
3149  */
3150 void t3_stop_sge_timers(struct adapter *adap)
3151 {
3152         int i;
3153
3154         for (i = 0; i < SGE_QSETS; ++i) {
3155                 struct sge_qset *q = &adap->sge.qs[i];
3156
3157                 if (q->tx_reclaim_timer.function)
3158                         del_timer_sync(&q->tx_reclaim_timer);
3159                 if (q->rx_reclaim_timer.function)
3160                         del_timer_sync(&q->rx_reclaim_timer);
3161         }
3162 }
3163
3164 /**
3165  *      t3_free_sge_resources - free SGE resources
3166  *      @adap: the adapter
3167  *
3168  *      Frees resources used by the SGE queue sets.
3169  */
3170 void t3_free_sge_resources(struct adapter *adap)
3171 {
3172         int i;
3173
3174         for (i = 0; i < SGE_QSETS; ++i)
3175                 t3_free_qset(adap, &adap->sge.qs[i]);
3176 }
3177
3178 /**
3179  *      t3_sge_start - enable SGE
3180  *      @adap: the adapter
3181  *
3182  *      Enables the SGE for DMAs.  This is the last step in starting packet
3183  *      transfers.
3184  */
3185 void t3_sge_start(struct adapter *adap)
3186 {
3187         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3188 }
3189
3190 /**
3191  *      t3_sge_stop - disable SGE operation
3192  *      @adap: the adapter
3193  *
3194  *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
3195  *      from error interrupts) or from normal process context.  In the latter
3196  *      case it also disables any pending queue restart tasklets.  Note that
3197  *      if it is called in interrupt context it cannot disable the restart
3198  *      tasklets as it cannot wait, however the tasklets will have no effect
3199  *      since the doorbells are disabled and the driver will call this again
3200  *      later from process context, at which time the tasklets will be stopped
3201  *      if they are still running.
3202  */
3203 void t3_sge_stop(struct adapter *adap)
3204 {
3205         t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3206         if (!in_interrupt()) {
3207                 int i;
3208
3209                 for (i = 0; i < SGE_QSETS; ++i) {
3210                         struct sge_qset *qs = &adap->sge.qs[i];
3211
3212                         tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3213                         tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3214                 }
3215         }
3216 }
3217
3218 /**
3219  *      t3_sge_init - initialize SGE
3220  *      @adap: the adapter
3221  *      @p: the SGE parameters
3222  *
3223  *      Performs SGE initialization needed every time after a chip reset.
3224  *      We do not initialize any of the queue sets here, instead the driver
3225  *      top-level must request those individually.  We also do not enable DMA
3226  *      here, that should be done after the queues have been set up.
3227  */
3228 void t3_sge_init(struct adapter *adap, struct sge_params *p)
3229 {
3230         unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3231
3232         ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3233             F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3234             V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3235             V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3236 #if SGE_NUM_GENBITS == 1
3237         ctrl |= F_EGRGENCTRL;
3238 #endif
3239         if (adap->params.rev > 0) {
3240                 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3241                         ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3242         }
3243         t3_write_reg(adap, A_SG_CONTROL, ctrl);
3244         t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3245                      V_LORCQDRBTHRSH(512));
3246         t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3247         t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3248                      V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3249         t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3250                      adap->params.rev < T3_REV_C ? 1000 : 500);
3251         t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3252         t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3253         t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3254         t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3255         t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3256 }
3257
3258 /**
3259  *      t3_sge_prep - one-time SGE initialization
3260  *      @adap: the associated adapter
3261  *      @p: SGE parameters
3262  *
3263  *      Performs one-time initialization of SGE SW state.  Includes determining
3264  *      defaults for the assorted SGE parameters, which admins can change until
3265  *      they are used to initialize the SGE.
3266  */
3267 void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3268 {
3269         int i;
3270
3271         p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3272             SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3273
3274         for (i = 0; i < SGE_QSETS; ++i) {
3275                 struct qset_params *q = p->qset + i;
3276
3277                 q->polling = adap->params.rev > 0;
3278                 q->coalesce_usecs = 5;
3279                 q->rspq_size = 1024;
3280                 q->fl_size = 1024;
3281                 q->jumbo_size = 512;
3282                 q->txq_size[TXQ_ETH] = 1024;
3283                 q->txq_size[TXQ_OFLD] = 1024;
3284                 q->txq_size[TXQ_CTRL] = 256;
3285                 q->cong_thres = 0;
3286         }
3287
3288         spin_lock_init(&adap->sge.reg_lock);
3289 }
3290
3291 /**
3292  *      t3_get_desc - dump an SGE descriptor for debugging purposes
3293  *      @qs: the queue set
3294  *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3295  *      @idx: the descriptor index in the queue
3296  *      @data: where to dump the descriptor contents
3297  *
3298  *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3299  *      size of the descriptor.
3300  */
3301 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3302                 unsigned char *data)
3303 {
3304         if (qnum >= 6)
3305                 return -EINVAL;
3306
3307         if (qnum < 3) {
3308                 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3309                         return -EINVAL;
3310                 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3311                 return sizeof(struct tx_desc);
3312         }
3313
3314         if (qnum == 3) {
3315                 if (!qs->rspq.desc || idx >= qs->rspq.size)
3316                         return -EINVAL;
3317                 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3318                 return sizeof(struct rsp_desc);
3319         }
3320
3321         qnum -= 4;
3322         if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3323                 return -EINVAL;
3324         memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3325         return sizeof(struct rx_desc);
3326 }