new helper: file_inode(file)
[pandora-kernel.git] / drivers / net / ethernet / chelsio / cxgb4 / cxgb4_main.c
index 0df1284..8fb1ccf 100644 (file)
@@ -175,6 +175,30 @@ enum {
        MIN_FL_ENTRIES       = 16
 };
 
+/* Host shadow copy of ingress filter entry.  This is in host native format
+ * and doesn't match the ordering or bit order, etc. of the hardware of the
+ * firmware command.  The use of bit-field structure elements is purely to
+ * remind ourselves of the field size limitations and save memory in the case
+ * where the filter table is large.
+ */
+struct filter_entry {
+       /* Administrative fields for filter.
+        */
+       u32 valid:1;            /* filter allocated and valid */
+       u32 locked:1;           /* filter is administratively locked */
+
+       u32 pending:1;          /* filter action is pending firmware reply */
+       u32 smtidx:8;           /* Source MAC Table index for smac */
+       struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
+
+       /* The filter itself.  Most of this is a straight copy of information
+        * provided by the extended ioctl().  Some fields are translated to
+        * internal forms -- for instance the Ingress Queue ID passed in from
+        * the ioctl() is translated into the Absolute Ingress Queue ID.
+        */
+       struct ch_filter_specification fs;
+};
+
 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
                         NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
                         NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@ -325,6 +349,9 @@ enum {
 
 static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT;
 
+module_param(tp_vlan_pri_map, uint, 0644);
+MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration");
+
 static struct dentry *cxgb4_debugfs_root;
 
 static LIST_HEAD(adapter_list);
@@ -506,8 +533,67 @@ static int link_start(struct net_device *dev)
        return ret;
 }
 
-/*
- * Response queue handler for the FW event queue.
+/* Clear a filter and release any of its resources that we own.  This also
+ * clears the filter's "pending" status.
+ */
+static void clear_filter(struct adapter *adap, struct filter_entry *f)
+{
+       /* If the new or old filter have loopback rewriteing rules then we'll
+        * need to free any existing Layer Two Table (L2T) entries of the old
+        * filter rule.  The firmware will handle freeing up any Source MAC
+        * Table (SMT) entries used for rewriting Source MAC Addresses in
+        * loopback rules.
+        */
+       if (f->l2t)
+               cxgb4_l2t_release(f->l2t);
+
+       /* The zeroing of the filter rule below clears the filter valid,
+        * pending, locked flags, l2t pointer, etc. so it's all we need for
+        * this operation.
+        */
+       memset(f, 0, sizeof(*f));
+}
+
+/* Handle a filter write/deletion reply.
+ */
+static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
+{
+       unsigned int idx = GET_TID(rpl);
+       unsigned int nidx = idx - adap->tids.ftid_base;
+       unsigned int ret;
+       struct filter_entry *f;
+
+       if (idx >= adap->tids.ftid_base && nidx <
+          (adap->tids.nftids + adap->tids.nsftids)) {
+               idx = nidx;
+               ret = GET_TCB_COOKIE(rpl->cookie);
+               f = &adap->tids.ftid_tab[idx];
+
+               if (ret == FW_FILTER_WR_FLT_DELETED) {
+                       /* Clear the filter when we get confirmation from the
+                        * hardware that the filter has been deleted.
+                        */
+                       clear_filter(adap, f);
+               } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
+                       dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
+                               idx);
+                       clear_filter(adap, f);
+               } else if (ret == FW_FILTER_WR_FLT_ADDED) {
+                       f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
+                       f->pending = 0;  /* asynchronous setup completed */
+                       f->valid = 1;
+               } else {
+                       /* Something went wrong.  Issue a warning about the
+                        * problem and clear everything out.
+                        */
+                       dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
+                               idx, ret);
+                       clear_filter(adap, f);
+               }
+       }
+}
+
+/* Response queue handler for the FW event queue.
  */
 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
                          const struct pkt_gl *gl)
@@ -542,6 +628,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
                const struct cpl_l2t_write_rpl *p = (void *)rsp;
 
                do_l2t_write_rpl(q->adap, p);
+       } else if (opcode == CPL_SET_TCB_RPL) {
+               const struct cpl_set_tcb_rpl *p = (void *)rsp;
+
+               filter_rpl(q->adap, p);
        } else
                dev_err(q->adap->pdev_dev,
                        "unexpected CPL %#x on FW event queue\n", opcode);
@@ -983,6 +1073,148 @@ static void t4_free_mem(void *addr)
                kfree(addr);
 }
 
+/* Send a Work Request to write the filter at a specified index.  We construct
+ * a Firmware Filter Work Request to have the work done and put the indicated
+ * filter into "pending" mode which will prevent any further actions against
+ * it till we get a reply from the firmware on the completion status of the
+ * request.
+ */
+static int set_filter_wr(struct adapter *adapter, int fidx)
+{
+       struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+       struct sk_buff *skb;
+       struct fw_filter_wr *fwr;
+       unsigned int ftid;
+
+       /* If the new filter requires loopback Destination MAC and/or VLAN
+        * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+        * the filter.
+        */
+       if (f->fs.newdmac || f->fs.newvlan) {
+               /* allocate L2T entry for new filter */
+               f->l2t = t4_l2t_alloc_switching(adapter->l2t);
+               if (f->l2t == NULL)
+                       return -EAGAIN;
+               if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan,
+                                       f->fs.eport, f->fs.dmac)) {
+                       cxgb4_l2t_release(f->l2t);
+                       f->l2t = NULL;
+                       return -ENOMEM;
+               }
+       }
+
+       ftid = adapter->tids.ftid_base + fidx;
+
+       skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL);
+       fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
+       memset(fwr, 0, sizeof(*fwr));
+
+       /* It would be nice to put most of the following in t4_hw.c but most
+        * of the work is translating the cxgbtool ch_filter_specification
+        * into the Work Request and the definition of that structure is
+        * currently in cxgbtool.h which isn't appropriate to pull into the
+        * common code.  We may eventually try to come up with a more neutral
+        * filter specification structure but for now it's easiest to simply
+        * put this fairly direct code in line ...
+        */
+       fwr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR));
+       fwr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*fwr)/16));
+       fwr->tid_to_iq =
+               htonl(V_FW_FILTER_WR_TID(ftid) |
+                     V_FW_FILTER_WR_RQTYPE(f->fs.type) |
+                     V_FW_FILTER_WR_NOREPLY(0) |
+                     V_FW_FILTER_WR_IQ(f->fs.iq));
+       fwr->del_filter_to_l2tix =
+               htonl(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
+                     V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
+                     V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
+                     V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
+                     V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
+                     V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
+                     V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
+                     V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
+                     V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
+                                            f->fs.newvlan == VLAN_REWRITE) |
+                     V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
+                                           f->fs.newvlan == VLAN_REWRITE) |
+                     V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
+                     V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
+                     V_FW_FILTER_WR_PRIO(f->fs.prio) |
+                     V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
+       fwr->ethtype = htons(f->fs.val.ethtype);
+       fwr->ethtypem = htons(f->fs.mask.ethtype);
+       fwr->frag_to_ovlan_vldm =
+               (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
+                V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
+                V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) |
+                V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) |
+                V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) |
+                V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld));
+       fwr->smac_sel = 0;
+       fwr->rx_chan_rx_rpl_iq =
+               htons(V_FW_FILTER_WR_RX_CHAN(0) |
+                     V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id));
+       fwr->maci_to_matchtypem =
+               htonl(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
+                     V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
+                     V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
+                     V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
+                     V_FW_FILTER_WR_PORT(f->fs.val.iport) |
+                     V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
+                     V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
+                     V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
+       fwr->ptcl = f->fs.val.proto;
+       fwr->ptclm = f->fs.mask.proto;
+       fwr->ttyp = f->fs.val.tos;
+       fwr->ttypm = f->fs.mask.tos;
+       fwr->ivlan = htons(f->fs.val.ivlan);
+       fwr->ivlanm = htons(f->fs.mask.ivlan);
+       fwr->ovlan = htons(f->fs.val.ovlan);
+       fwr->ovlanm = htons(f->fs.mask.ovlan);
+       memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
+       memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
+       memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
+       memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
+       fwr->lp = htons(f->fs.val.lport);
+       fwr->lpm = htons(f->fs.mask.lport);
+       fwr->fp = htons(f->fs.val.fport);
+       fwr->fpm = htons(f->fs.mask.fport);
+       if (f->fs.newsmac)
+               memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+       /* Mark the filter as "pending" and ship off the Filter Work Request.
+        * When we get the Work Request Reply we'll clear the pending status.
+        */
+       f->pending = 1;
+       set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+       t4_ofld_send(adapter, skb);
+       return 0;
+}
+
+/* Delete the filter at a specified index.
+ */
+static int del_filter_wr(struct adapter *adapter, int fidx)
+{
+       struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+       struct sk_buff *skb;
+       struct fw_filter_wr *fwr;
+       unsigned int len, ftid;
+
+       len = sizeof(*fwr);
+       ftid = adapter->tids.ftid_base + fidx;
+
+       skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+       fwr = (struct fw_filter_wr *)__skb_put(skb, len);
+       t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
+
+       /* Mark the filter as "pending" and ship off the Filter Work Request.
+        * When we get the Work Request Reply we'll clear the pending status.
+        */
+       f->pending = 1;
+       t4_mgmt_tx(adapter, skb);
+       return 0;
+}
+
 static inline int is_offload(const struct adapter *adap)
 {
        return adap->params.offload;
@@ -2104,7 +2336,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
                        loff_t *ppos)
 {
        loff_t pos = *ppos;
-       loff_t avail = file->f_path.dentry->d_inode->i_size;
+       loff_t avail = file_inode(file)->i_size;
        unsigned int mem = (uintptr_t)file->private_data & 3;
        struct adapter *adap = file->private_data - mem;
 
@@ -2148,8 +2380,8 @@ static const struct file_operations mem_debugfs_fops = {
        .llseek  = default_llseek,
 };
 
-static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
-                                     unsigned int idx, unsigned int size_mb)
+static void add_debugfs_mem(struct adapter *adap, const char *name,
+                           unsigned int idx, unsigned int size_mb)
 {
        struct dentry *de;
 
@@ -2159,7 +2391,7 @@ static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
                de->d_inode->i_size = size_mb << 20;
 }
 
-static int __devinit setup_debugfs(struct adapter *adap)
+static int setup_debugfs(struct adapter *adap)
 {
        int i;
 
@@ -2195,7 +2427,7 @@ int cxgb4_alloc_atid(struct tid_info *t, void *data)
        if (t->afree) {
                union aopen_entry *p = t->afree;
 
-               atid = p - t->atid_tab;
+               atid = (p - t->atid_tab) + t->atid_base;
                t->afree = p->next;
                p->data = data;
                t->atids_in_use++;
@@ -2210,7 +2442,7 @@ EXPORT_SYMBOL(cxgb4_alloc_atid);
  */
 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
 {
-       union aopen_entry *p = &t->atid_tab[atid];
+       union aopen_entry *p = &t->atid_tab[atid - t->atid_base];
 
        spin_lock_bh(&t->atid_lock);
        p->next = t->afree;
@@ -2249,8 +2481,34 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
 }
 EXPORT_SYMBOL(cxgb4_alloc_stid);
 
-/*
- * Release a server TID.
+/* Allocate a server filter TID and set it to the supplied value.
+ */
+int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data)
+{
+       int stid;
+
+       spin_lock_bh(&t->stid_lock);
+       if (family == PF_INET) {
+               stid = find_next_zero_bit(t->stid_bmap,
+                               t->nstids + t->nsftids, t->nstids);
+               if (stid < (t->nstids + t->nsftids))
+                       __set_bit(stid, t->stid_bmap);
+               else
+                       stid = -1;
+       } else {
+               stid = -1;
+       }
+       if (stid >= 0) {
+               t->stid_tab[stid].data = data;
+               stid += t->stid_base;
+               t->stids_in_use++;
+       }
+       spin_unlock_bh(&t->stid_lock);
+       return stid;
+}
+EXPORT_SYMBOL(cxgb4_alloc_sftid);
+
+/* Release a server TID.
  */
 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
 {
@@ -2362,18 +2620,26 @@ EXPORT_SYMBOL(cxgb4_remove_tid);
 static int tid_init(struct tid_info *t)
 {
        size_t size;
+       unsigned int stid_bmap_size;
        unsigned int natids = t->natids;
 
-       size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
+       stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+       size = t->ntids * sizeof(*t->tid_tab) +
+              natids * sizeof(*t->atid_tab) +
               t->nstids * sizeof(*t->stid_tab) +
-              BITS_TO_LONGS(t->nstids) * sizeof(long);
+              t->nsftids * sizeof(*t->stid_tab) +
+              stid_bmap_size * sizeof(long) +
+              t->nftids * sizeof(*t->ftid_tab) +
+              t->nsftids * sizeof(*t->ftid_tab);
+
        t->tid_tab = t4_alloc_mem(size);
        if (!t->tid_tab)
                return -ENOMEM;
 
        t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
        t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
-       t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
+       t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
+       t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
        spin_lock_init(&t->stid_lock);
        spin_lock_init(&t->atid_lock);
 
@@ -2388,7 +2654,7 @@ static int tid_init(struct tid_info *t)
                        t->atid_tab[natids - 1].next = &t->atid_tab[natids];
                t->afree = t->atid_tab;
        }
-       bitmap_zero(t->stid_bmap, t->nstids);
+       bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
        return 0;
 }
 
@@ -2404,7 +2670,8 @@ static int tid_init(struct tid_info *t)
  *     Returns <0 on error and one of the %NET_XMIT_* values on success.
  */
 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
-                       __be32 sip, __be16 sport, unsigned int queue)
+                       __be32 sip, __be16 sport, __be16 vlan,
+                       unsigned int queue)
 {
        unsigned int chan;
        struct sk_buff *skb;
@@ -2750,6 +3017,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 {
        void *handle;
        struct cxgb4_lld_info lli;
+       unsigned short i;
 
        lli.pdev = adap->pdev;
        lli.l2t = adap->l2t;
@@ -2776,10 +3044,16 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
        lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
                        t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
                        (adap->fn * 4));
+       lli.filt_mode = adap->filter_mode;
+       /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
+       for (i = 0; i < NCHAN; i++)
+               lli.tx_modq[i] = i;
        lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
        lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
        lli.fw_vers = adap->params.fw_vers;
        lli.dbfifo_int_thresh = dbfifo_int_thresh;
+       lli.sge_pktshift = adap->sge.pktshift;
+       lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
 
        handle = ulds[uld].add(&lli);
        if (IS_ERR(handle)) {
@@ -2999,6 +3273,126 @@ static int cxgb_close(struct net_device *dev)
        return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
 }
 
+/* Return an error number if the indicated filter isn't writable ...
+ */
+static int writable_filter(struct filter_entry *f)
+{
+       if (f->locked)
+               return -EPERM;
+       if (f->pending)
+               return -EBUSY;
+
+       return 0;
+}
+
+/* Delete the filter at the specified index (if valid).  The checks for all
+ * the common problems with doing this like the filter being locked, currently
+ * pending in another operation, etc.
+ */
+static int delete_filter(struct adapter *adapter, unsigned int fidx)
+{
+       struct filter_entry *f;
+       int ret;
+
+       if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+               return -EINVAL;
+
+       f = &adapter->tids.ftid_tab[fidx];
+       ret = writable_filter(f);
+       if (ret)
+               return ret;
+       if (f->valid)
+               return del_filter_wr(adapter, fidx);
+
+       return 0;
+}
+
+int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
+               __be32 sip, __be16 sport, __be16 vlan,
+               unsigned int queue, unsigned char port, unsigned char mask)
+{
+       int ret;
+       struct filter_entry *f;
+       struct adapter *adap;
+       int i;
+       u8 *val;
+
+       adap = netdev2adap(dev);
+
+       /* Adjust stid to correct filter index */
+       stid -= adap->tids.nstids;
+       stid += adap->tids.nftids;
+
+       /* Check to make sure the filter requested is writable ...
+        */
+       f = &adap->tids.ftid_tab[stid];
+       ret = writable_filter(f);
+       if (ret)
+               return ret;
+
+       /* Clear out any old resources being used by the filter before
+        * we start constructing the new filter.
+        */
+       if (f->valid)
+               clear_filter(adap, f);
+
+       /* Clear out filter specifications */
+       memset(&f->fs, 0, sizeof(struct ch_filter_specification));
+       f->fs.val.lport = cpu_to_be16(sport);
+       f->fs.mask.lport  = ~0;
+       val = (u8 *)&sip;
+       if ((val[0] | val[1] | val[2] | val[3]) != 0) {
+               for (i = 0; i < 4; i++) {
+                       f->fs.val.lip[i] = val[i];
+                       f->fs.mask.lip[i] = ~0;
+               }
+               if (adap->filter_mode & F_PORT) {
+                       f->fs.val.iport = port;
+                       f->fs.mask.iport = mask;
+               }
+       }
+
+       f->fs.dirsteer = 1;
+       f->fs.iq = queue;
+       /* Mark filter as locked */
+       f->locked = 1;
+       f->fs.rpttid = 1;
+
+       ret = set_filter_wr(adap, stid);
+       if (ret) {
+               clear_filter(adap, f);
+               return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(cxgb4_create_server_filter);
+
+int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
+               unsigned int queue, bool ipv6)
+{
+       int ret;
+       struct filter_entry *f;
+       struct adapter *adap;
+
+       adap = netdev2adap(dev);
+
+       /* Adjust stid to correct filter index */
+       stid -= adap->tids.nstids;
+       stid += adap->tids.nftids;
+
+       f = &adap->tids.ftid_tab[stid];
+       /* Unlock the filter */
+       f->locked = 0;
+
+       ret = delete_filter(adap, stid);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+EXPORT_SYMBOL(cxgb4_remove_server_filter);
+
 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
                                                struct rtnl_link_stats64 *ns)
 {
@@ -3203,7 +3597,7 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
        memset(c, 0, sizeof(*c));
        c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
                               FW_CMD_REQUEST | FW_CMD_READ);
-       c->retval_len16 = htonl(FW_LEN16(*c));
+       c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
        ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
        if (ret < 0)
                return ret;
@@ -3245,6 +3639,34 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
        v = t4_read_reg(adap, TP_PIO_DATA);
        t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
 
+       /* first 4 Tx modulation queues point to consecutive Tx channels */
+       adap->params.tp.tx_modq_map = 0xE4;
+       t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP,
+                    V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map));
+
+       /* associate each Tx modulation queue with consecutive Tx channels */
+       v = 0x84218421;
+       t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
+                         &v, 1, A_TP_TX_SCHED_HDR);
+       t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
+                         &v, 1, A_TP_TX_SCHED_FIFO);
+       t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
+                         &v, 1, A_TP_TX_SCHED_PCMD);
+
+#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */
+       if (is_offload(adap)) {
+               t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0,
+                            V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
+               t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT,
+                            V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
+                            V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
+       }
+
        /* get basic stuff going */
        return t4_early_init(adap, adap->fn);
 }
@@ -3397,7 +3819,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
                htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
                      FW_CMD_REQUEST |
                      FW_CMD_READ);
-       caps_cmd.retval_len16 =
+       caps_cmd.cfvalid_to_len16 =
                htonl(FW_CAPS_CONFIG_CMD_CFVALID |
                      FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
                      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
@@ -3422,7 +3844,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
                htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
                      FW_CMD_REQUEST |
                      FW_CMD_WRITE);
-       caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+       caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
        ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
                         NULL);
        if (ret < 0)
@@ -3497,7 +3919,7 @@ static int adap_init0_no_config(struct adapter *adapter, int reset)
        memset(&caps_cmd, 0, sizeof(caps_cmd));
        caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
                                     FW_CMD_REQUEST | FW_CMD_READ);
-       caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+       caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
        ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
                         &caps_cmd);
        if (ret < 0)
@@ -3929,7 +4351,7 @@ static int adap_init0(struct adapter *adap)
        memset(&caps_cmd, 0, sizeof(caps_cmd));
        caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
                                     FW_CMD_REQUEST | FW_CMD_READ);
-       caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+       caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
        ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
                         &caps_cmd);
        if (ret < 0)
@@ -4035,6 +4457,10 @@ static int adap_init0(struct adapter *adap)
        for (j = 0; j < NCHAN; j++)
                adap->params.tp.tx_modq[j] = j;
 
+       t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA,
+                        &adap->filter_mode, 1,
+                        TP_VLAN_PRI_MAP);
+
        adap->flags |= FW_OK;
        return 0;
 
@@ -4173,7 +4599,7 @@ static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
  * of ports we found and the number of available CPUs.  Most settings can be
  * modified by the admin prior to actual use.
  */
-static void __devinit cfg_queues(struct adapter *adap)
+static void cfg_queues(struct adapter *adap)
 {
        struct sge *s = &adap->sge;
        int i, q10g = 0, n10g = 0, qidx = 0;
@@ -4257,7 +4683,7 @@ static void __devinit cfg_queues(struct adapter *adap)
  * Reduce the number of Ethernet queues across all ports to at most n.
  * n provides at least one queue per port.
  */
-static void __devinit reduce_ethqs(struct adapter *adap, int n)
+static void reduce_ethqs(struct adapter *adap, int n)
 {
        int i;
        struct port_info *pi;
@@ -4284,7 +4710,7 @@ static void __devinit reduce_ethqs(struct adapter *adap, int n)
 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
 #define EXTRA_VECS 2
 
-static int __devinit enable_msix(struct adapter *adap)
+static int enable_msix(struct adapter *adap)
 {
        int ofld_need = 0;
        int i, err, want, need;
@@ -4333,7 +4759,7 @@ static int __devinit enable_msix(struct adapter *adap)
 
 #undef EXTRA_VECS
 
-static int __devinit init_rss(struct adapter *adap)
+static int init_rss(struct adapter *adap)
 {
        unsigned int i, j;
 
@@ -4349,7 +4775,7 @@ static int __devinit init_rss(struct adapter *adap)
        return 0;
 }
 
-static void __devinit print_port_info(const struct net_device *dev)
+static void print_port_info(const struct net_device *dev)
 {
        static const char *base[] = {
                "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
@@ -4386,7 +4812,7 @@ static void __devinit print_port_info(const struct net_device *dev)
                    adap->params.vpd.sn, adap->params.vpd.ec);
 }
 
-static void __devinit enable_pcie_relaxed_ordering(struct pci_dev *dev)
+static void enable_pcie_relaxed_ordering(struct pci_dev *dev)
 {
        pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
 }
@@ -4419,8 +4845,7 @@ static void free_some_resources(struct adapter *adapter)
 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
                   NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 
-static int __devinit init_one(struct pci_dev *pdev,
-                             const struct pci_device_id *ent)
+static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        int func, i, err;
        struct port_info *pi;
@@ -4640,7 +5065,7 @@ sriov:
        return err;
 }
 
-static void __devexit remove_one(struct pci_dev *pdev)
+static void remove_one(struct pci_dev *pdev)
 {
        struct adapter *adapter = pci_get_drvdata(pdev);
 
@@ -4662,6 +5087,17 @@ static void __devexit remove_one(struct pci_dev *pdev)
                if (adapter->debugfs_root)
                        debugfs_remove_recursive(adapter->debugfs_root);
 
+               /* If we allocated filters, free up state associated with any
+                * valid filters ...
+                */
+               if (adapter->tids.ftid_tab) {
+                       struct filter_entry *f = &adapter->tids.ftid_tab[0];
+                       for (i = 0; i < (adapter->tids.nftids +
+                                       adapter->tids.nsftids); i++, f++)
+                               if (f->valid)
+                                       clear_filter(adapter, f);
+               }
+
                if (adapter->flags & FULL_INIT_DONE)
                        cxgb_down(adapter);
 
@@ -4680,7 +5116,7 @@ static struct pci_driver cxgb4_driver = {
        .name     = KBUILD_MODNAME,
        .id_table = cxgb4_pci_tbl,
        .probe    = init_one,
-       .remove   = __devexit_p(remove_one),
+       .remove   = remove_one,
        .err_handler = &cxgb4_eeh,
 };