pasemi_mac: performance tweaks
[pandora-kernel.git] / drivers / net / pasemi_mac.c
index e63cc33..c6e24a8 100644 (file)
 #include <net/checksum.h>
 
 #include <asm/irq.h>
+#include <asm/firmware.h>
+#include <asm/pasemi_dma.h>
 
 #include "pasemi_mac.h"
 
+/* We have our own align, since ppc64 in general has it at 0 because
+ * of design flaws in some of the server bridge chips. However, for
+ * PWRficient doing the unaligned copies is more expensive than doing
+ * unaligned DMA, so make sure the data is aligned instead.
+ */
+#define LOCAL_SKB_ALIGN        2
 
 /* TODO list
  *
- * - Get rid of pci_{read,write}_config(), map registers with ioremap
- *   for performance
- * - PHY support
  * - Multicast support
  * - Large MTU support
- * - Other performance improvements
+ * - SW LRO
+ * - Multiqueue RX/TX
  */
 
 
 /* Must be a power of two */
-#define RX_RING_SIZE 512
-#define TX_RING_SIZE 512
+#define RX_RING_SIZE 1024
+#define TX_RING_SIZE 4096
 
 #define DEFAULT_MSG_ENABLE       \
        (NETIF_MSG_DRV          | \
         NETIF_MSG_RX_ERR       | \
         NETIF_MSG_TX_ERR)
 
-#define TX_DESC(mac, num)      ((mac)->tx->desc[(num) & (TX_RING_SIZE-1)])
-#define TX_DESC_INFO(mac, num) ((mac)->tx->desc_info[(num) & (TX_RING_SIZE-1)])
-#define RX_DESC(mac, num)      ((mac)->rx->desc[(num) & (RX_RING_SIZE-1)])
-#define RX_DESC_INFO(mac, num) ((mac)->rx->desc_info[(num) & (RX_RING_SIZE-1)])
-#define RX_BUFF(mac, num)      ((mac)->rx->buffers[(num) & (RX_RING_SIZE-1)])
+#define TX_DESC(tx, num)       ((tx)->chan.ring_virt[(num) & (TX_RING_SIZE-1)])
+#define TX_DESC_INFO(tx, num)  ((tx)->ring_info[(num) & (TX_RING_SIZE-1)])
+#define RX_DESC(rx, num)       ((rx)->chan.ring_virt[(num) & (RX_RING_SIZE-1)])
+#define RX_DESC_INFO(rx, num)  ((rx)->ring_info[(num) & (RX_RING_SIZE-1)])
+#define RX_BUFF(rx, num)       ((rx)->buffers[(num) & (RX_RING_SIZE-1)])
+
+#define RING_USED(ring)                (((ring)->next_to_fill - (ring)->next_to_clean) \
+                                & ((ring)->size - 1))
+#define RING_AVAIL(ring)       ((ring->size) - RING_USED(ring))
 
 #define BUF_SIZE 1646 /* 1500 MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */
 
@@ -79,7 +89,88 @@ static int debug = -1;       /* -1 == use DEFAULT_MSG_ENABLE as value */
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "PA Semi MAC bitmapped debugging message enable value");
 
-static struct pasdma_status *dma_status;
+static int translation_enabled(void)
+{
+#if defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+       return 1;
+#else
+       return firmware_has_feature(FW_FEATURE_LPAR);
+#endif
+}
+
+static void write_iob_reg(unsigned int reg, unsigned int val)
+{
+       pasemi_write_iob_reg(reg, val);
+}
+
+static unsigned int read_mac_reg(const struct pasemi_mac *mac, unsigned int reg)
+{
+       return pasemi_read_mac_reg(mac->dma_if, reg);
+}
+
+static void write_mac_reg(const struct pasemi_mac *mac, unsigned int reg,
+                         unsigned int val)
+{
+       pasemi_write_mac_reg(mac->dma_if, reg, val);
+}
+
+static unsigned int read_dma_reg(unsigned int reg)
+{
+       return pasemi_read_dma_reg(reg);
+}
+
+static void write_dma_reg(unsigned int reg, unsigned int val)
+{
+       pasemi_write_dma_reg(reg, val);
+}
+
+static struct pasemi_mac_rxring *rx_ring(const struct pasemi_mac *mac)
+{
+       return mac->rx;
+}
+
+static struct pasemi_mac_txring *tx_ring(const struct pasemi_mac *mac)
+{
+       return mac->tx;
+}
+
+static inline void prefetch_skb(const struct sk_buff *skb)
+{
+       const void *d = skb;
+
+       prefetch(d);
+       prefetch(d+64);
+       prefetch(d+128);
+       prefetch(d+192);
+}
+
+static int mac_to_intf(struct pasemi_mac *mac)
+{
+       struct pci_dev *pdev = mac->pdev;
+       u32 tmp;
+       int nintf, off, i, j;
+       int devfn = pdev->devfn;
+
+       tmp = read_dma_reg(PAS_DMA_CAP_IFI);
+       nintf = (tmp & PAS_DMA_CAP_IFI_NIN_M) >> PAS_DMA_CAP_IFI_NIN_S;
+       off = (tmp & PAS_DMA_CAP_IFI_IOFF_M) >> PAS_DMA_CAP_IFI_IOFF_S;
+
+       /* IOFF contains the offset to the registers containing the
+        * DMA interface-to-MAC-pci-id mappings, and NIN contains number
+        * of total interfaces. Each register contains 4 devfns.
+        * Just do a linear search until we find the devfn of the MAC
+        * we're trying to look up.
+        */
+
+       for (i = 0; i < (nintf+3)/4; i++) {
+               tmp = read_dma_reg(off+4*i);
+               for (j = 0; j < 4; j++) {
+                       if (((tmp >> (8*j)) & 0xff) == devfn)
+                               return i*4 + j;
+               }
+       }
+       return -1;
+}
 
 static int pasemi_get_mac_addr(struct pasemi_mac *mac)
 {
@@ -128,246 +219,268 @@ static int pasemi_get_mac_addr(struct pasemi_mac *mac)
        return 0;
 }
 
-static int pasemi_mac_setup_rx_resources(struct net_device *dev)
+static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
+                                   struct sk_buff *skb,
+                                   const dma_addr_t *dmas)
+{
+       int f;
+       int nfrags = skb_shinfo(skb)->nr_frags;
+       struct pci_dev *pdev = mac->dma_pdev;
+
+       pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE);
+
+       for (f = 0; f < nfrags; f++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+               pci_unmap_page(pdev, dmas[f+1], frag->size, PCI_DMA_TODEVICE);
+       }
+       dev_kfree_skb_irq(skb);
+
+       /* Freed descriptor slot + main SKB ptr + nfrags additional ptrs,
+        * aligned up to a power of 2
+        */
+       return (nfrags + 3) & ~1;
+}
+
+static int pasemi_mac_setup_rx_resources(const struct net_device *dev)
 {
        struct pasemi_mac_rxring *ring;
        struct pasemi_mac *mac = netdev_priv(dev);
-       int chan_id = mac->dma_rxch;
+       int chno;
+       unsigned int cfg;
 
-       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+       ring = pasemi_dma_alloc_chan(RXCHAN, sizeof(struct pasemi_mac_rxring),
+                                    offsetof(struct pasemi_mac_rxring, chan));
 
-       if (!ring)
-               goto out_ring;
+       if (!ring) {
+               dev_err(&mac->pdev->dev, "Can't allocate RX channel\n");
+               goto out_chan;
+       }
+       chno = ring->chan.chno;
 
        spin_lock_init(&ring->lock);
 
-       ring->desc_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
+       ring->size = RX_RING_SIZE;
+       ring->ring_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
                                  RX_RING_SIZE, GFP_KERNEL);
 
-       if (!ring->desc_info)
-               goto out_desc_info;
+       if (!ring->ring_info)
+               goto out_ring_info;
 
        /* Allocate descriptors */
-       ring->desc = dma_alloc_coherent(&mac->dma_pdev->dev,
-                                       RX_RING_SIZE *
-                                       sizeof(struct pas_dma_xct_descr),
-                                       &ring->dma, GFP_KERNEL);
-
-       if (!ring->desc)
-               goto out_desc;
-
-       memset(ring->desc, 0, RX_RING_SIZE * sizeof(struct pas_dma_xct_descr));
+       if (pasemi_dma_alloc_ring(&ring->chan, RX_RING_SIZE))
+               goto out_ring_desc;
 
        ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev,
                                           RX_RING_SIZE * sizeof(u64),
                                           &ring->buf_dma, GFP_KERNEL);
        if (!ring->buffers)
-               goto out_buffers;
+               goto out_ring_desc;
 
        memset(ring->buffers, 0, RX_RING_SIZE * sizeof(u64));
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_RXCHAN_BASEL(chan_id),
-                              PAS_DMA_RXCHAN_BASEL_BRBL(ring->dma));
+       write_dma_reg(PAS_DMA_RXCHAN_BASEL(chno),
+                     PAS_DMA_RXCHAN_BASEL_BRBL(ring->chan.ring_dma));
+
+       write_dma_reg(PAS_DMA_RXCHAN_BASEU(chno),
+                     PAS_DMA_RXCHAN_BASEU_BRBH(ring->chan.ring_dma >> 32) |
+                     PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 3));
+
+       cfg = PAS_DMA_RXCHAN_CFG_HBU(2);
+
+       if (translation_enabled())
+               cfg |= PAS_DMA_RXCHAN_CFG_CTR;
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_RXCHAN_BASEU(chan_id),
-                              PAS_DMA_RXCHAN_BASEU_BRBH(ring->dma >> 32) |
-                              PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 2));
+       write_dma_reg(PAS_DMA_RXCHAN_CFG(chno), cfg);
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_RXCHAN_CFG(chan_id),
-                              PAS_DMA_RXCHAN_CFG_HBU(1));
+       write_dma_reg(PAS_DMA_RXINT_BASEL(mac->dma_if),
+                     PAS_DMA_RXINT_BASEL_BRBL(ring->buf_dma));
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_RXINT_BASEL(mac->dma_if),
-                              PAS_DMA_RXINT_BASEL_BRBL(__pa(ring->buffers)));
+       write_dma_reg(PAS_DMA_RXINT_BASEU(mac->dma_if),
+                     PAS_DMA_RXINT_BASEU_BRBH(ring->buf_dma >> 32) |
+                     PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3));
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_RXINT_BASEU(mac->dma_if),
-                              PAS_DMA_RXINT_BASEU_BRBH(__pa(ring->buffers) >> 32) |
-                              PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3));
+       cfg = PAS_DMA_RXINT_CFG_DHL(2) | PAS_DMA_RXINT_CFG_L2 |
+             PAS_DMA_RXINT_CFG_LW | PAS_DMA_RXINT_CFG_RBP |
+             PAS_DMA_RXINT_CFG_HEN;
+
+       if (translation_enabled())
+               cfg |= PAS_DMA_RXINT_CFG_ITRR | PAS_DMA_RXINT_CFG_ITR;
+
+       write_dma_reg(PAS_DMA_RXINT_CFG(mac->dma_if), cfg);
 
        ring->next_to_fill = 0;
        ring->next_to_clean = 0;
-
-       snprintf(ring->irq_name, sizeof(ring->irq_name),
-                "%s rx", dev->name);
+       ring->mac = mac;
        mac->rx = ring;
 
        return 0;
 
-out_buffers:
-       dma_free_coherent(&mac->dma_pdev->dev,
-                         RX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->rx->desc, mac->rx->dma);
-out_desc:
-       kfree(ring->desc_info);
-out_desc_info:
-       kfree(ring);
-out_ring:
+out_ring_desc:
+       kfree(ring->ring_info);
+out_ring_info:
+       pasemi_dma_free_chan(&ring->chan);
+out_chan:
        return -ENOMEM;
 }
 
-
-static int pasemi_mac_setup_tx_resources(struct net_device *dev)
+static struct pasemi_mac_txring *
+pasemi_mac_setup_tx_resources(const struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
        u32 val;
-       int chan_id = mac->dma_txch;
        struct pasemi_mac_txring *ring;
+       unsigned int cfg;
+       int chno;
+
+       ring = pasemi_dma_alloc_chan(TXCHAN, sizeof(struct pasemi_mac_txring),
+                                    offsetof(struct pasemi_mac_txring, chan));
+
+       if (!ring) {
+               dev_err(&mac->pdev->dev, "Can't allocate TX channel\n");
+               goto out_chan;
+       }
 
-       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-       if (!ring)
-               goto out_ring;
+       chno = ring->chan.chno;
 
        spin_lock_init(&ring->lock);
 
-       ring->desc_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
+       ring->size = TX_RING_SIZE;
+       ring->ring_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
                                  TX_RING_SIZE, GFP_KERNEL);
-       if (!ring->desc_info)
-               goto out_desc_info;
+       if (!ring->ring_info)
+               goto out_ring_info;
 
        /* Allocate descriptors */
-       ring->desc = dma_alloc_coherent(&mac->dma_pdev->dev,
-                                       TX_RING_SIZE *
-                                       sizeof(struct pas_dma_xct_descr),
-                                       &ring->dma, GFP_KERNEL);
-       if (!ring->desc)
-               goto out_desc;
+       if (pasemi_dma_alloc_ring(&ring->chan, TX_RING_SIZE))
+               goto out_ring_desc;
 
-       memset(ring->desc, 0, TX_RING_SIZE * sizeof(struct pas_dma_xct_descr));
+       write_dma_reg(PAS_DMA_TXCHAN_BASEL(chno),
+                     PAS_DMA_TXCHAN_BASEL_BRBL(ring->chan.ring_dma));
+       val = PAS_DMA_TXCHAN_BASEU_BRBH(ring->chan.ring_dma >> 32);
+       val |= PAS_DMA_TXCHAN_BASEU_SIZ(TX_RING_SIZE >> 3);
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_TXCHAN_BASEL(chan_id),
-                              PAS_DMA_TXCHAN_BASEL_BRBL(ring->dma));
-       val = PAS_DMA_TXCHAN_BASEU_BRBH(ring->dma >> 32);
-       val |= PAS_DMA_TXCHAN_BASEU_SIZ(TX_RING_SIZE >> 2);
+       write_dma_reg(PAS_DMA_TXCHAN_BASEU(chno), val);
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_TXCHAN_BASEU(chan_id), val);
+       cfg = PAS_DMA_TXCHAN_CFG_TY_IFACE |
+             PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
+             PAS_DMA_TXCHAN_CFG_UP |
+             PAS_DMA_TXCHAN_CFG_WT(2);
 
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_TXCHAN_CFG(chan_id),
-                              PAS_DMA_TXCHAN_CFG_TY_IFACE |
-                              PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
-                              PAS_DMA_TXCHAN_CFG_UP |
-                              PAS_DMA_TXCHAN_CFG_WT(2));
+       if (translation_enabled())
+               cfg |= PAS_DMA_TXCHAN_CFG_TRD | PAS_DMA_TXCHAN_CFG_TRR;
 
-       ring->next_to_use = 0;
-       ring->next_to_clean = 0;
+       write_dma_reg(PAS_DMA_TXCHAN_CFG(chno), cfg);
 
-       snprintf(ring->irq_name, sizeof(ring->irq_name),
-                "%s tx", dev->name);
-       mac->tx = ring;
+       ring->next_to_fill = 0;
+       ring->next_to_clean = 0;
+       ring->mac = mac;
 
-       return 0;
+       return ring;
 
-out_desc:
-       kfree(ring->desc_info);
-out_desc_info:
-       kfree(ring);
-out_ring:
-       return -ENOMEM;
+out_ring_desc:
+       kfree(ring->ring_info);
+out_ring_info:
+       pasemi_dma_free_chan(&ring->chan);
+out_chan:
+       return NULL;
 }
 
-static void pasemi_mac_free_tx_resources(struct net_device *dev)
+static void pasemi_mac_free_tx_resources(struct pasemi_mac *mac)
 {
-       struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int i;
+       struct pasemi_mac_txring *txring = tx_ring(mac);
+       unsigned int i, j;
        struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-
-       for (i = 0; i < TX_RING_SIZE; i++) {
-               info = &TX_DESC_INFO(mac, i);
-               dp = &TX_DESC(mac, i);
-               if (info->dma) {
-                       if (info->skb) {
-                               pci_unmap_single(mac->dma_pdev,
-                                                info->dma,
-                                                info->skb->len,
-                                                PCI_DMA_TODEVICE);
-                               dev_kfree_skb_any(info->skb);
-                       }
-                       info->dma = 0;
-                       info->skb = NULL;
-                       dp->mactx = 0;
-                       dp->ptr = 0;
-               }
+       dma_addr_t dmas[MAX_SKB_FRAGS+1];
+       int freed;
+       int start, limit;
+
+       start = txring->next_to_clean;
+       limit = txring->next_to_fill;
+
+       /* Compensate for when fill has wrapped and clean has not */
+       if (start > limit)
+               limit += TX_RING_SIZE;
+
+       for (i = start; i < limit; i += freed) {
+               info = &txring->ring_info[(i+1) & (TX_RING_SIZE-1)];
+               if (info->dma && info->skb) {
+                       for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
+                               dmas[j] = txring->ring_info[(i+1+j) &
+                                               (TX_RING_SIZE-1)].dma;
+                       freed = pasemi_mac_unmap_tx_skb(mac, info->skb, dmas);
+               } else
+                       freed = 2;
        }
 
-       dma_free_coherent(&mac->dma_pdev->dev,
-                         TX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->tx->desc, mac->tx->dma);
+       kfree(txring->ring_info);
+       pasemi_dma_free_chan(&txring->chan);
 
-       kfree(mac->tx->desc_info);
-       kfree(mac->tx);
-       mac->tx = NULL;
 }
 
-static void pasemi_mac_free_rx_resources(struct net_device *dev)
+static void pasemi_mac_free_rx_resources(struct pasemi_mac *mac)
 {
-       struct pasemi_mac *mac = netdev_priv(dev);
+       struct pasemi_mac_rxring *rx = rx_ring(mac);
        unsigned int i;
        struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
 
        for (i = 0; i < RX_RING_SIZE; i++) {
-               info = &RX_DESC_INFO(mac, i);
-               dp = &RX_DESC(mac, i);
-               if (info->skb) {
-                       if (info->dma) {
-                               pci_unmap_single(mac->dma_pdev,
-                                                info->dma,
-                                                info->skb->len,
-                                                PCI_DMA_FROMDEVICE);
-                               dev_kfree_skb_any(info->skb);
-                       }
-                       info->dma = 0;
-                       info->skb = NULL;
-                       dp->macrx = 0;
-                       dp->ptr = 0;
+               info = &RX_DESC_INFO(rx, i);
+               if (info->skb && info->dma) {
+                       pci_unmap_single(mac->dma_pdev,
+                                        info->dma,
+                                        info->skb->len,
+                                        PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb_any(info->skb);
                }
+               info->dma = 0;
+               info->skb = NULL;
        }
 
-       dma_free_coherent(&mac->dma_pdev->dev,
-                         RX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->rx->desc, mac->rx->dma);
+       for (i = 0; i < RX_RING_SIZE; i++)
+               RX_DESC(rx, i) = 0;
 
        dma_free_coherent(&mac->dma_pdev->dev, RX_RING_SIZE * sizeof(u64),
-                         mac->rx->buffers, mac->rx->buf_dma);
+                         rx_ring(mac)->buffers, rx_ring(mac)->buf_dma);
 
-       kfree(mac->rx->desc_info);
-       kfree(mac->rx);
+       kfree(rx_ring(mac)->ring_info);
+       pasemi_dma_free_chan(&rx_ring(mac)->chan);
        mac->rx = NULL;
 }
 
-static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
+static void pasemi_mac_replenish_rx_ring(const struct net_device *dev,
+                                        const int limit)
 {
-       struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int i;
-       int start = mac->rx->next_to_fill;
-       unsigned int limit, count;
-
-       limit = (mac->rx->next_to_clean + RX_RING_SIZE -
-                mac->rx->next_to_fill) & (RX_RING_SIZE - 1);
-
-       /* Check to see if we're doing first-time setup */
-       if (unlikely(mac->rx->next_to_clean == 0 && mac->rx->next_to_fill == 0))
-               limit = RX_RING_SIZE;
+       const struct pasemi_mac *mac = netdev_priv(dev);
+       struct pasemi_mac_rxring *rx = rx_ring(mac);
+       int fill, count;
 
        if (limit <= 0)
                return;
 
-       i = start;
-       for (count = limit; count; count--) {
-               struct pasemi_mac_buffer *info = &RX_DESC_INFO(mac, i);
-               u64 *buff = &RX_BUFF(mac, i);
+       fill = rx_ring(mac)->next_to_fill;
+       for (count = 0; count < limit; count++) {
+               struct pasemi_mac_buffer *info = &RX_DESC_INFO(rx, fill);
+               u64 *buff = &RX_BUFF(rx, fill);
                struct sk_buff *skb;
                dma_addr_t dma;
 
+               /* Entry in use? */
+               WARN_ON(*buff);
+
                /* skb might still be in there for recycle on short receives */
                if (info->skb)
                        skb = info->skb;
-               else
+               else {
                        skb = dev_alloc_skb(BUF_SIZE);
+                       skb_reserve(skb, LOCAL_SKB_ALIGN);
+               }
 
                if (unlikely(!skb))
                        break;
 
-               dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
+               dma = pci_map_single(mac->dma_pdev, skb->data,
+                                    BUF_SIZE - LOCAL_SKB_ALIGN,
                                     PCI_DMA_FROMDEVICE);
 
                if (unlikely(dma_mapping_error(dma))) {
@@ -378,109 +491,150 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
                info->skb = skb;
                info->dma = dma;
                *buff = XCT_RXB_LEN(BUF_SIZE) | XCT_RXB_ADDR(dma);
-               i++;
+               fill++;
        }
 
        wmb();
 
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXCHAN_INCR(mac->dma_rxch),
-                              limit - count);
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXINT_INCR(mac->dma_if),
-                              limit - count);
+       write_dma_reg(PAS_DMA_RXINT_INCR(mac->dma_if), count);
 
-       mac->rx->next_to_fill += limit - count;
+       rx_ring(mac)->next_to_fill = (rx_ring(mac)->next_to_fill + count) &
+                               (RX_RING_SIZE - 1);
 }
 
-static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac)
+static void pasemi_mac_restart_rx_intr(const struct pasemi_mac *mac)
 {
        unsigned int reg, pcnt;
        /* Re-enable packet count interrupts: finally
         * ack the packet count interrupt we got in rx_intr.
         */
 
-       pcnt = *mac->rx_status & PAS_STATUS_PCNT_M;
+       pcnt = *rx_ring(mac)->chan.status & PAS_STATUS_PCNT_M;
 
        reg = PAS_IOB_DMA_RXCH_RESET_PCNT(pcnt) | PAS_IOB_DMA_RXCH_RESET_PINTC;
 
-       pci_write_config_dword(mac->iob_pdev,
-                              PAS_IOB_DMA_RXCH_RESET(mac->dma_rxch),
-                              reg);
+       write_iob_reg(PAS_IOB_DMA_RXCH_RESET(mac->rx->chan.chno), reg);
 }
 
-static void pasemi_mac_restart_tx_intr(struct pasemi_mac *mac)
+static void pasemi_mac_restart_tx_intr(const struct pasemi_mac *mac)
 {
        unsigned int reg, pcnt;
 
        /* Re-enable packet count interrupts */
-       pcnt = *mac->tx_status & PAS_STATUS_PCNT_M;
+       pcnt = *tx_ring(mac)->chan.status & PAS_STATUS_PCNT_M;
 
        reg = PAS_IOB_DMA_TXCH_RESET_PCNT(pcnt) | PAS_IOB_DMA_TXCH_RESET_PINTC;
 
-       pci_write_config_dword(mac->iob_pdev,
-                              PAS_IOB_DMA_TXCH_RESET(mac->dma_txch), reg);
+       write_iob_reg(PAS_IOB_DMA_TXCH_RESET(tx_ring(mac)->chan.chno), reg);
+}
+
+
+static inline void pasemi_mac_rx_error(const struct pasemi_mac *mac,
+                                      const u64 macrx)
+{
+       unsigned int rcmdsta, ccmdsta;
+       struct pasemi_dmachan *chan = &rx_ring(mac)->chan;
+
+       if (!netif_msg_rx_err(mac))
+               return;
+
+       rcmdsta = read_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+       ccmdsta = read_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno));
+
+       printk(KERN_ERR "pasemi_mac: rx error. macrx %016lx, rx status %lx\n",
+               macrx, *chan->status);
+
+       printk(KERN_ERR "pasemi_mac: rcmdsta %08x ccmdsta %08x\n",
+               rcmdsta, ccmdsta);
 }
 
+static inline void pasemi_mac_tx_error(const struct pasemi_mac *mac,
+                                      const u64 mactx)
+{
+       unsigned int cmdsta;
+       struct pasemi_dmachan *chan = &tx_ring(mac)->chan;
+
+       if (!netif_msg_tx_err(mac))
+               return;
+
+       cmdsta = read_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno));
 
-static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
+       printk(KERN_ERR "pasemi_mac: tx error. mactx 0x%016lx, "\
+               "tx status 0x%016lx\n", mactx, *chan->status);
+
+       printk(KERN_ERR "pasemi_mac: tcmdsta 0x%08x\n", cmdsta);
+}
+
+static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx,
+                              const int limit)
 {
+       const struct pasemi_dmachan *chan = &rx->chan;
+       struct pasemi_mac *mac = rx->mac;
+       struct pci_dev *pdev = mac->dma_pdev;
        unsigned int n;
-       int count;
-       struct pas_dma_xct_descr *dp;
+       int count, buf_index, tot_bytes, packets;
        struct pasemi_mac_buffer *info;
        struct sk_buff *skb;
-       unsigned int i, len;
-       u64 macrx;
+       unsigned int len;
+       u64 macrx, eval;
        dma_addr_t dma;
 
-       spin_lock(&mac->rx->lock);
+       tot_bytes = 0;
+       packets = 0;
+
+       spin_lock(&rx->lock);
 
-       n = mac->rx->next_to_clean;
+       n = rx->next_to_clean;
 
-       for (count = limit; count; count--) {
+       prefetch(&RX_DESC(rx, n));
 
-               rmb();
+       for (count = 0; count < limit; count++) {
+               macrx = RX_DESC(rx, n);
+               prefetch(&RX_DESC(rx, n+4));
 
-               dp = &RX_DESC(mac, n);
-               macrx = dp->macrx;
+               if ((macrx & XCT_MACRX_E) ||
+                   (*chan->status & PAS_STATUS_ERROR))
+                       pasemi_mac_rx_error(mac, macrx);
 
                if (!(macrx & XCT_MACRX_O))
                        break;
 
-
                info = NULL;
 
-               /* We have to scan for our skb since there's no way
-                * to back-map them from the descriptor, and if we
-                * have several receive channels then they might not
-                * show up in the same order as they were put on the
-                * interface ring.
-                */
+               BUG_ON(!(macrx & XCT_MACRX_RR_8BRES));
 
-               dma = (dp->ptr & XCT_PTR_ADDR_M);
-               for (i = n; i < (n + RX_RING_SIZE); i++) {
-                       info = &RX_DESC_INFO(mac, i);
-                       if (info->dma == dma)
-                               break;
-               }
+               eval = (RX_DESC(rx, n+1) & XCT_RXRES_8B_EVAL_M) >>
+                       XCT_RXRES_8B_EVAL_S;
+               buf_index = eval-1;
+
+               dma = (RX_DESC(rx, n+2) & XCT_PTR_ADDR_M);
+               info = &RX_DESC_INFO(rx, buf_index);
 
                skb = info->skb;
-               info->dma = 0;
 
-               pci_unmap_single(mac->dma_pdev, dma, skb->len,
-                                PCI_DMA_FROMDEVICE);
+               prefetch_skb(skb);
 
                len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
 
+               pci_unmap_single(pdev, dma, BUF_SIZE-LOCAL_SKB_ALIGN,
+                                PCI_DMA_FROMDEVICE);
+
+               if (macrx & XCT_MACRX_CRC) {
+                       /* CRC error flagged */
+                       mac->netdev->stats.rx_errors++;
+                       mac->netdev->stats.rx_crc_errors++;
+                       /* No need to free skb, it'll be reused */
+                       goto next;
+               }
+
                if (len < 256) {
-                       struct sk_buff *new_skb =
-                           netdev_alloc_skb(mac->netdev, len + NET_IP_ALIGN);
+                       struct sk_buff *new_skb;
+
+                       new_skb = netdev_alloc_skb(mac->netdev,
+                                                  len + LOCAL_SKB_ALIGN);
                        if (new_skb) {
-                               skb_reserve(new_skb, NET_IP_ALIGN);
-                               memcpy(new_skb->data - NET_IP_ALIGN,
-                                       skb->data - NET_IP_ALIGN,
-                                       len + NET_IP_ALIGN);
+                               skb_reserve(new_skb, LOCAL_SKB_ALIGN);
+                               memcpy(new_skb->data, skb->data, len);
                                /* save the skb in buffer_info as good */
                                skb = new_skb;
                        }
@@ -488,134 +642,190 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
                } else
                        info->skb = NULL;
 
-               skb_put(skb, len);
-
-               skb->protocol = eth_type_trans(skb, mac->netdev);
+               info->dma = 0;
 
-               if ((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK) {
-                       skb->ip_summed = CHECKSUM_COMPLETE;
+               if (likely((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK)) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
                        skb->csum = (macrx & XCT_MACRX_CSUM_M) >>
                                           XCT_MACRX_CSUM_S;
                } else
                        skb->ip_summed = CHECKSUM_NONE;
 
-               mac->stats.rx_bytes += len;
-               mac->stats.rx_packets++;
+               packets++;
+               tot_bytes += len;
 
+               /* Don't include CRC */
+               skb_put(skb, len-4);
+
+               skb->protocol = eth_type_trans(skb, mac->netdev);
                netif_receive_skb(skb);
 
-               dp->ptr = 0;
-               dp->macrx = 0;
+next:
+               RX_DESC(rx, n) = 0;
+               RX_DESC(rx, n+1) = 0;
 
-               n++;
+               /* Need to zero it out since hardware doesn't, since the
+                * replenish loop uses it to tell when it's done.
+                */
+               RX_BUFF(rx, buf_index) = 0;
+
+               n += 4;
        }
 
-       mac->rx->next_to_clean += limit - count;
-       pasemi_mac_replenish_rx_ring(mac->netdev);
+       if (n > RX_RING_SIZE) {
+               /* Errata 5971 workaround: L2 target of headers */
+               write_iob_reg(PAS_IOB_COM_PKTHDRCNT, 0);
+               n &= (RX_RING_SIZE-1);
+       }
 
-       spin_unlock(&mac->rx->lock);
+       rx_ring(mac)->next_to_clean = n;
+
+       /* Increase is in number of 16-byte entries, and since each descriptor
+        * with an 8BRES takes up 3x8 bytes (padded to 4x8), increase with
+        * count*2.
+        */
+       write_dma_reg(PAS_DMA_RXCHAN_INCR(mac->rx->chan.chno), count << 1);
+
+       pasemi_mac_replenish_rx_ring(mac->netdev, count);
+
+       mac->netdev->stats.rx_bytes += tot_bytes;
+       mac->netdev->stats.rx_packets += packets;
+
+       spin_unlock(&rx_ring(mac)->lock);
 
        return count;
 }
 
-static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
+/* Can't make this too large or we blow the kernel stack limits */
+#define TX_CLEAN_BATCHSIZE (128/MAX_SKB_FRAGS)
+
+static int pasemi_mac_clean_tx(struct pasemi_mac_txring *txring)
 {
-       int i;
-       struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-       int start, count;
-       int flags;
+       struct pasemi_dmachan *chan = &txring->chan;
+       struct pasemi_mac *mac = txring->mac;
+       int i, j;
+       unsigned int start, descr_count, buf_count, batch_limit;
+       unsigned int ring_limit;
+       unsigned int total_count;
+       unsigned long flags;
+       struct sk_buff *skbs[TX_CLEAN_BATCHSIZE];
+       dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1];
+
+       total_count = 0;
+       batch_limit = TX_CLEAN_BATCHSIZE;
+restart:
+       spin_lock_irqsave(&txring->lock, flags);
 
-       spin_lock_irqsave(&mac->tx->lock, flags);
+       start = txring->next_to_clean;
+       ring_limit = txring->next_to_fill;
 
-       start = mac->tx->next_to_clean;
-       count = 0;
+       /* Compensate for when fill has wrapped but clean has not */
+       if (start > ring_limit)
+               ring_limit += TX_RING_SIZE;
 
-       for (i = start; i < mac->tx->next_to_use; i++) {
-               dp = &TX_DESC(mac, i);
-               if (!dp || (dp->mactx & XCT_MACTX_O))
+       buf_count = 0;
+       descr_count = 0;
+
+       for (i = start;
+            descr_count < batch_limit && i < ring_limit;
+            i += buf_count) {
+               u64 mactx = TX_DESC(txring, i);
+               struct sk_buff *skb;
+
+               if ((mactx  & XCT_MACTX_E) ||
+                   (*chan->status & PAS_STATUS_ERROR))
+                       pasemi_mac_tx_error(mac, mactx);
+
+               if (unlikely(mactx & XCT_MACTX_O))
+                       /* Not yet transmitted */
                        break;
 
-               count++;
+               skb = TX_DESC_INFO(txring, i+1).skb;
+               skbs[descr_count] = skb;
 
-               info = &TX_DESC_INFO(mac, i);
+               buf_count = 2 + skb_shinfo(skb)->nr_frags;
+               for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++)
+                       dmas[descr_count][j] = TX_DESC_INFO(txring, i+1+j).dma;
 
-               pci_unmap_single(mac->dma_pdev, info->dma,
-                                info->skb->len, PCI_DMA_TODEVICE);
-               dev_kfree_skb_irq(info->skb);
+               TX_DESC(txring, i) = 0;
+               TX_DESC(txring, i+1) = 0;
 
-               info->skb = NULL;
-               info->dma = 0;
-               dp->mactx = 0;
-               dp->ptr = 0;
+               /* Since we always fill with an even number of entries, make
+                * sure we skip any unused one at the end as well.
+                */
+               if (buf_count & 1)
+                       buf_count++;
+               descr_count++;
        }
-       mac->tx->next_to_clean += count;
-       spin_unlock_irqrestore(&mac->tx->lock, flags);
+       txring->next_to_clean = i & (TX_RING_SIZE-1);
 
+       spin_unlock_irqrestore(&txring->lock, flags);
        netif_wake_queue(mac->netdev);
 
-       return count;
+       for (i = 0; i < descr_count; i++)
+               pasemi_mac_unmap_tx_skb(mac, skbs[i], dmas[i]);
+
+       total_count += descr_count;
+
+       /* If the batch was full, try to clean more */
+       if (descr_count == batch_limit)
+               goto restart;
+
+       return total_count;
 }
 
 
 static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
 {
-       struct net_device *dev = data;
-       struct pasemi_mac *mac = netdev_priv(dev);
+       const struct pasemi_mac_rxring *rxring = data;
+       struct pasemi_mac *mac = rxring->mac;
+       struct net_device *dev = mac->netdev;
+       const struct pasemi_dmachan *chan = &rxring->chan;
        unsigned int reg;
 
-       if (!(*mac->rx_status & PAS_STATUS_CAUSE_M))
+       if (!(*chan->status & PAS_STATUS_CAUSE_M))
                return IRQ_NONE;
 
-       if (*mac->rx_status & PAS_STATUS_ERROR)
-               printk("rx_status reported error\n");
-
        /* Don't reset packet count so it won't fire again but clear
         * all others.
         */
 
-       pci_read_config_dword(mac->dma_pdev, PAS_DMA_RXINT_RCMDSTA(mac->dma_if), &reg);
-
        reg = 0;
-       if (*mac->rx_status & PAS_STATUS_SOFT)
+       if (*chan->status & PAS_STATUS_SOFT)
                reg |= PAS_IOB_DMA_RXCH_RESET_SINTC;
-       if (*mac->rx_status & PAS_STATUS_ERROR)
+       if (*chan->status & PAS_STATUS_ERROR)
                reg |= PAS_IOB_DMA_RXCH_RESET_DINTC;
-       if (*mac->rx_status & PAS_STATUS_TIMER)
+       if (*chan->status & PAS_STATUS_TIMER)
                reg |= PAS_IOB_DMA_RXCH_RESET_TINTC;
 
        netif_rx_schedule(dev, &mac->napi);
 
-       pci_write_config_dword(mac->iob_pdev,
-                              PAS_IOB_DMA_RXCH_RESET(mac->dma_rxch), reg);
-
+       write_iob_reg(PAS_IOB_DMA_RXCH_RESET(chan->chno), reg);
 
        return IRQ_HANDLED;
 }
 
 static irqreturn_t pasemi_mac_tx_intr(int irq, void *data)
 {
-       struct net_device *dev = data;
-       struct pasemi_mac *mac = netdev_priv(dev);
+       struct pasemi_mac_txring *txring = data;
+       const struct pasemi_dmachan *chan = &txring->chan;
        unsigned int reg, pcnt;
 
-       if (!(*mac->tx_status & PAS_STATUS_CAUSE_M))
+       if (!(*chan->status & PAS_STATUS_CAUSE_M))
                return IRQ_NONE;
 
-       pasemi_mac_clean_tx(mac);
+       pasemi_mac_clean_tx(txring);
 
-       pcnt = *mac->tx_status & PAS_STATUS_PCNT_M;
+       pcnt = *chan->status & PAS_STATUS_PCNT_M;
 
        reg = PAS_IOB_DMA_TXCH_RESET_PCNT(pcnt) | PAS_IOB_DMA_TXCH_RESET_PINTC;
 
-       if (*mac->tx_status & PAS_STATUS_SOFT)
+       if (*chan->status & PAS_STATUS_SOFT)
                reg |= PAS_IOB_DMA_TXCH_RESET_SINTC;
-       if (*mac->tx_status & PAS_STATUS_ERROR)
+       if (*chan->status & PAS_STATUS_ERROR)
                reg |= PAS_IOB_DMA_TXCH_RESET_DINTC;
 
-       pci_write_config_dword(mac->iob_pdev,
-                              PAS_IOB_DMA_TXCH_RESET(mac->dma_txch),
-                              reg);
+       write_iob_reg(PAS_IOB_DMA_TXCH_RESET(chan->chno), reg);
 
        return IRQ_HANDLED;
 }
@@ -641,7 +851,7 @@ static void pasemi_adjust_link(struct net_device *dev)
        } else
                netif_carrier_on(dev);
 
-       pci_read_config_dword(mac->pdev, PAS_MAC_CFG_PCFG, &flags);
+       flags = read_mac_reg(mac, PAS_MAC_CFG_PCFG);
        new_flags = flags & ~(PAS_MAC_CFG_PCFG_HD | PAS_MAC_CFG_PCFG_SPD_M |
                              PAS_MAC_CFG_PCFG_TSR_M);
 
@@ -673,7 +883,7 @@ static void pasemi_adjust_link(struct net_device *dev)
        mac->link = mac->phydev->link;
 
        if (new_flags != flags)
-               pci_write_config_dword(mac->pdev, PAS_MAC_CFG_PCFG, new_flags);
+               write_mac_reg(mac, PAS_MAC_CFG_PCFG, new_flags);
 
        if (msg && netif_msg_link(mac))
                printk(KERN_INFO "%s: Link is up at %d Mbps, %s duplex.\n",
@@ -731,74 +941,83 @@ err:
 static int pasemi_mac_open(struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
-       int base_irq;
        unsigned int flags;
        int ret;
 
        /* enable rx section */
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_COM_RXCMD,
-                              PAS_DMA_COM_RXCMD_EN);
+       write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN);
 
        /* enable tx section */
-       pci_write_config_dword(mac->dma_pdev, PAS_DMA_COM_TXCMD,
-                              PAS_DMA_COM_TXCMD_EN);
+       write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN);
 
        flags = PAS_MAC_CFG_TXP_FCE | PAS_MAC_CFG_TXP_FPC(3) |
                PAS_MAC_CFG_TXP_SL(3) | PAS_MAC_CFG_TXP_COB(0xf) |
                PAS_MAC_CFG_TXP_TIFT(8) | PAS_MAC_CFG_TXP_TIFG(12);
 
-       pci_write_config_dword(mac->pdev, PAS_MAC_CFG_TXP, flags);
-
-       flags = PAS_MAC_CFG_PCFG_S1 | PAS_MAC_CFG_PCFG_PE |
-               PAS_MAC_CFG_PCFG_PR | PAS_MAC_CFG_PCFG_CE;
-
-       flags |= PAS_MAC_CFG_PCFG_TSR_1G | PAS_MAC_CFG_PCFG_SPD_1G;
-
-       pci_write_config_dword(mac->iob_pdev, PAS_IOB_DMA_RXCH_CFG(mac->dma_rxch),
-                              PAS_IOB_DMA_RXCH_CFG_CNTTH(0));
-
-       pci_write_config_dword(mac->iob_pdev, PAS_IOB_DMA_TXCH_CFG(mac->dma_txch),
-                              PAS_IOB_DMA_TXCH_CFG_CNTTH(32));
-
-       /* Clear out any residual packet count state from firmware */
-       pasemi_mac_restart_rx_intr(mac);
-       pasemi_mac_restart_tx_intr(mac);
+       write_mac_reg(mac, PAS_MAC_CFG_TXP, flags);
 
        /* 0xffffff is max value, about 16ms */
-       pci_write_config_dword(mac->iob_pdev, PAS_IOB_DMA_COM_TIMEOUTCFG,
-                              PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0xffffff));
-
-       pci_write_config_dword(mac->pdev, PAS_MAC_CFG_PCFG, flags);
+       write_iob_reg(PAS_IOB_DMA_COM_TIMEOUTCFG,
+                     PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0xffffff));
 
        ret = pasemi_mac_setup_rx_resources(dev);
        if (ret)
                goto out_rx_resources;
 
-       ret = pasemi_mac_setup_tx_resources(dev);
-       if (ret)
-               goto out_tx_resources;
+       mac->tx = pasemi_mac_setup_tx_resources(dev);
+
+       if (!mac->tx)
+               goto out_tx_ring;
+
+       write_iob_reg(PAS_IOB_DMA_RXCH_CFG(mac->rx->chan.chno),
+                     PAS_IOB_DMA_RXCH_CFG_CNTTH(0));
+
+       write_iob_reg(PAS_IOB_DMA_TXCH_CFG(mac->tx->chan.chno),
+                     PAS_IOB_DMA_TXCH_CFG_CNTTH(128));
 
-       pci_write_config_dword(mac->pdev, PAS_MAC_IPC_CHNL,
-                              PAS_MAC_IPC_CHNL_DCHNO(mac->dma_rxch) |
-                              PAS_MAC_IPC_CHNL_BCH(mac->dma_rxch));
+       write_mac_reg(mac, PAS_MAC_IPC_CHNL,
+                     PAS_MAC_IPC_CHNL_DCHNO(mac->rx->chan.chno) |
+                     PAS_MAC_IPC_CHNL_BCH(mac->rx->chan.chno));
 
        /* enable rx if */
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
-                              PAS_DMA_RXINT_RCMDSTA_EN);
+       write_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
+                     PAS_DMA_RXINT_RCMDSTA_EN |
+                     PAS_DMA_RXINT_RCMDSTA_DROPS_M |
+                     PAS_DMA_RXINT_RCMDSTA_BP |
+                     PAS_DMA_RXINT_RCMDSTA_OO |
+                     PAS_DMA_RXINT_RCMDSTA_BT);
 
        /* enable rx channel */
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch),
-                              PAS_DMA_RXCHAN_CCMDSTA_EN |
-                              PAS_DMA_RXCHAN_CCMDSTA_DU);
+       pasemi_dma_start_chan(&rx_ring(mac)->chan, PAS_DMA_RXCHAN_CCMDSTA_DU |
+                                                  PAS_DMA_RXCHAN_CCMDSTA_OD |
+                                                  PAS_DMA_RXCHAN_CCMDSTA_FD |
+                                                  PAS_DMA_RXCHAN_CCMDSTA_DT);
 
        /* enable tx channel */
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch),
-                              PAS_DMA_TXCHAN_TCMDSTA_EN);
+       pasemi_dma_start_chan(&tx_ring(mac)->chan, PAS_DMA_TXCHAN_TCMDSTA_SZ |
+                                                  PAS_DMA_TXCHAN_TCMDSTA_DB |
+                                                  PAS_DMA_TXCHAN_TCMDSTA_DE |
+                                                  PAS_DMA_TXCHAN_TCMDSTA_DA);
 
-       pasemi_mac_replenish_rx_ring(dev);
+       pasemi_mac_replenish_rx_ring(dev, RX_RING_SIZE);
+
+       write_dma_reg(PAS_DMA_RXCHAN_INCR(rx_ring(mac)->chan.chno),
+                     RX_RING_SIZE>>1);
+
+       /* Clear out any residual packet count state from firmware */
+       pasemi_mac_restart_rx_intr(mac);
+       pasemi_mac_restart_tx_intr(mac);
+
+       flags = PAS_MAC_CFG_PCFG_S1 | PAS_MAC_CFG_PCFG_PE |
+               PAS_MAC_CFG_PCFG_PR | PAS_MAC_CFG_PCFG_CE;
+
+       if (mac->type == MAC_TYPE_GMAC)
+               flags |= PAS_MAC_CFG_PCFG_TSR_1G | PAS_MAC_CFG_PCFG_SPD_1G;
+       else
+               flags |= PAS_MAC_CFG_PCFG_TSR_10G | PAS_MAC_CFG_PCFG_SPD_10G;
+
+       /* Enable interface in MAC */
+       write_mac_reg(mac, PAS_MAC_CFG_PCFG, flags);
 
        ret = pasemi_mac_phy_init(dev);
        /* Some configs don't have PHYs (XAUI etc), so don't complain about
@@ -810,30 +1029,25 @@ static int pasemi_mac_open(struct net_device *dev)
        netif_start_queue(dev);
        napi_enable(&mac->napi);
 
-       /* Interrupts are a bit different for our DMA controller: While
-        * it's got one a regular PCI device header, the interrupt there
-        * is really the base of the range it's using. Each tx and rx
-        * channel has it's own interrupt source.
-        */
-
-       base_irq = virq_to_hw(mac->dma_pdev->irq);
-
-       mac->tx_irq = irq_create_mapping(NULL, base_irq + mac->dma_txch);
-       mac->rx_irq = irq_create_mapping(NULL, base_irq + 20 + mac->dma_txch);
+       snprintf(mac->tx_irq_name, sizeof(mac->tx_irq_name), "%s tx",
+                dev->name);
 
-       ret = request_irq(mac->tx_irq, &pasemi_mac_tx_intr, IRQF_DISABLED,
-                         mac->tx->irq_name, dev);
+       ret = request_irq(mac->tx->chan.irq, &pasemi_mac_tx_intr, IRQF_DISABLED,
+                         mac->tx_irq_name, mac->tx);
        if (ret) {
                dev_err(&mac->pdev->dev, "request_irq of irq %d failed: %d\n",
-                       base_irq + mac->dma_txch, ret);
+                       mac->tx->chan.irq, ret);
                goto out_tx_int;
        }
 
-       ret = request_irq(mac->rx_irq, &pasemi_mac_rx_intr, IRQF_DISABLED,
-                         mac->rx->irq_name, dev);
+       snprintf(mac->rx_irq_name, sizeof(mac->rx_irq_name), "%s rx",
+                dev->name);
+
+       ret = request_irq(mac->rx->chan.irq, &pasemi_mac_rx_intr, IRQF_DISABLED,
+                         mac->rx_irq_name, mac->rx);
        if (ret) {
                dev_err(&mac->pdev->dev, "request_irq of irq %d failed: %d\n",
-                       base_irq + 20 + mac->dma_rxch, ret);
+                       mac->rx->chan.irq, ret);
                goto out_rx_int;
        }
 
@@ -843,13 +1057,14 @@ static int pasemi_mac_open(struct net_device *dev)
        return 0;
 
 out_rx_int:
-       free_irq(mac->tx_irq, dev);
+       free_irq(mac->tx->chan.irq, mac->tx);
 out_tx_int:
        napi_disable(&mac->napi);
        netif_stop_queue(dev);
-       pasemi_mac_free_tx_resources(dev);
-out_tx_resources:
-       pasemi_mac_free_rx_resources(dev);
+out_tx_ring:
+       if (mac->tx)
+               pasemi_mac_free_tx_resources(mac);
+       pasemi_mac_free_rx_resources(mac);
 out_rx_resources:
 
        return ret;
@@ -860,8 +1075,12 @@ out_rx_resources:
 static int pasemi_mac_close(struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int stat;
+       unsigned int sta;
        int retries;
+       int rxch, txch;
+
+       rxch = rx_ring(mac)->chan.chno;
+       txch = tx_ring(mac)->chan.chno;
 
        if (mac->phydev) {
                phy_stop(mac->phydev);
@@ -871,74 +1090,80 @@ static int pasemi_mac_close(struct net_device *dev)
        netif_stop_queue(dev);
        napi_disable(&mac->napi);
 
+       sta = read_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+       if (sta & (PAS_DMA_RXINT_RCMDSTA_BP |
+                     PAS_DMA_RXINT_RCMDSTA_OO |
+                     PAS_DMA_RXINT_RCMDSTA_BT))
+               printk(KERN_DEBUG "pasemi_mac: rcmdsta error: 0x%08x\n", sta);
+
+       sta = read_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(rxch));
+       if (sta & (PAS_DMA_RXCHAN_CCMDSTA_DU |
+                    PAS_DMA_RXCHAN_CCMDSTA_OD |
+                    PAS_DMA_RXCHAN_CCMDSTA_FD |
+                    PAS_DMA_RXCHAN_CCMDSTA_DT))
+               printk(KERN_DEBUG "pasemi_mac: ccmdsta error: 0x%08x\n", sta);
+
+       sta = read_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(txch));
+       if (sta & (PAS_DMA_TXCHAN_TCMDSTA_SZ | PAS_DMA_TXCHAN_TCMDSTA_DB |
+                     PAS_DMA_TXCHAN_TCMDSTA_DE | PAS_DMA_TXCHAN_TCMDSTA_DA))
+               printk(KERN_DEBUG "pasemi_mac: tcmdsta error: 0x%08x\n", sta);
+
        /* Clean out any pending buffers */
-       pasemi_mac_clean_tx(mac);
-       pasemi_mac_clean_rx(mac, RX_RING_SIZE);
+       pasemi_mac_clean_tx(tx_ring(mac));
+       pasemi_mac_clean_rx(rx_ring(mac), RX_RING_SIZE);
 
        /* Disable interface */
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch),
-                              PAS_DMA_TXCHAN_TCMDSTA_ST);
-       pci_write_config_dword(mac->dma_pdev,
-                     PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
+       write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(txch),
+                     PAS_DMA_TXCHAN_TCMDSTA_ST);
+       write_dma_reg( PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
                      PAS_DMA_RXINT_RCMDSTA_ST);
-       pci_write_config_dword(mac->dma_pdev,
-                     PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch),
+       write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(rxch),
                      PAS_DMA_RXCHAN_CCMDSTA_ST);
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               pci_read_config_dword(mac->dma_pdev,
-                                     PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch),
-                                     &stat);
-               if (!(stat & PAS_DMA_TXCHAN_TCMDSTA_ACT))
+               sta = read_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(rxch));
+               if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_TXCHAN_TCMDSTA_ACT)
+       if (sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop tx channel\n");
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               pci_read_config_dword(mac->dma_pdev,
-                                     PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch),
-                                     &stat);
-               if (!(stat & PAS_DMA_RXCHAN_CCMDSTA_ACT))
+               sta = read_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(rxch));
+               if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_RXCHAN_CCMDSTA_ACT)
+       if (sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop rx channel\n");
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               pci_read_config_dword(mac->dma_pdev,
-                                     PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
-                                     &stat);
-               if (!(stat & PAS_DMA_RXINT_RCMDSTA_ACT))
+               sta = read_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+               if (!(sta & PAS_DMA_RXINT_RCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_RXINT_RCMDSTA_ACT)
+       if (sta & PAS_DMA_RXINT_RCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop rx interface\n");
 
        /* Then, disable the channel. This must be done separately from
         * stopping, since you can't disable when active.
         */
 
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch), 0);
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch), 0);
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_RXINT_RCMDSTA(mac->dma_if), 0);
+       write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(txch), 0);
+       write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(rxch), 0);
+       write_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if), 0);
 
-       free_irq(mac->tx_irq, dev);
-       free_irq(mac->rx_irq, dev);
+       free_irq(mac->tx->chan.irq, mac->tx);
+       free_irq(mac->rx->chan.irq, mac->rx);
 
        /* Free resources */
-       pasemi_mac_free_rx_resources(dev);
-       pasemi_mac_free_tx_resources(dev);
+       pasemi_mac_free_rx_resources(mac);
+       pasemi_mac_free_tx_resources(mac);
 
        return 0;
 }
@@ -947,13 +1172,14 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
        struct pasemi_mac_txring *txring;
-       struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-       u64 dflags;
-       dma_addr_t map;
-       int flags;
+       u64 dflags, mactx;
+       dma_addr_t map[MAX_SKB_FRAGS+1];
+       unsigned int map_size[MAX_SKB_FRAGS+1];
+       unsigned long flags;
+       int i, nfrags;
+       int fill;
 
-       dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
+       dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD;
 
        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                const unsigned char *nh = skb_network_header(skb);
@@ -972,71 +1198,88 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       map = pci_map_single(mac->dma_pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
+       nfrags = skb_shinfo(skb)->nr_frags;
+
+       map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
+                               PCI_DMA_TODEVICE);
+       map_size[0] = skb_headlen(skb);
+       if (dma_mapping_error(map[0]))
+               goto out_err_nolock;
+
+       for (i = 0; i < nfrags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               map[i+1] = pci_map_page(mac->dma_pdev, frag->page,
+                                       frag->page_offset, frag->size,
+                                       PCI_DMA_TODEVICE);
+               map_size[i+1] = frag->size;
+               if (dma_mapping_error(map[i+1])) {
+                       nfrags = i;
+                       goto out_err_nolock;
+               }
+       }
 
-       if (dma_mapping_error(map))
-               return NETDEV_TX_BUSY;
+       mactx = dflags | XCT_MACTX_LLEN(skb->len);
 
-       txring = mac->tx;
+       txring = tx_ring(mac);
 
        spin_lock_irqsave(&txring->lock, flags);
 
-       if (txring->next_to_clean - txring->next_to_use == TX_RING_SIZE) {
-               spin_unlock_irqrestore(&txring->lock, flags);
-               pasemi_mac_clean_tx(mac);
-               pasemi_mac_restart_tx_intr(mac);
-               spin_lock_irqsave(&txring->lock, flags);
-
-               if (txring->next_to_clean - txring->next_to_use ==
-                   TX_RING_SIZE) {
-                       /* Still no room -- stop the queue and wait for tx
-                        * intr when there's room.
-                        */
-                       netif_stop_queue(dev);
-                       goto out_err;
-               }
+       fill = txring->next_to_fill;
+
+       /* Avoid stepping on the same cache line that the DMA controller
+        * is currently about to send, so leave at least 8 words available.
+        * Total free space needed is mactx + fragments + 8
+        */
+       if (RING_AVAIL(txring) < nfrags + 10) {
+               /* no room -- stop the queue and wait for tx intr */
+               netif_stop_queue(dev);
+               goto out_err;
        }
 
+       TX_DESC(txring, fill) = mactx;
+       fill++;
+       TX_DESC_INFO(txring, fill).skb = skb;
+       for (i = 0; i <= nfrags; i++) {
+               TX_DESC(txring, fill+i) =
+                       XCT_PTR_LEN(map_size[i]) | XCT_PTR_ADDR(map[i]);
+               TX_DESC_INFO(txring, fill+i).dma = map[i];
+       }
 
-       dp = &TX_DESC(mac, txring->next_to_use);
-       info = &TX_DESC_INFO(mac, txring->next_to_use);
+       /* We have to add an even number of 8-byte entries to the ring
+        * even if the last one is unused. That means always an odd number
+        * of pointers + one mactx descriptor.
+        */
+       if (nfrags & 1)
+               nfrags++;
 
-       dp->mactx = dflags | XCT_MACTX_LLEN(skb->len);
-       dp->ptr   = XCT_PTR_LEN(skb->len) | XCT_PTR_ADDR(map);
-       info->dma = map;
-       info->skb = skb;
+       txring->next_to_fill = (fill + nfrags + 1) & (TX_RING_SIZE-1);
 
-       txring->next_to_use++;
-       mac->stats.tx_packets++;
-       mac->stats.tx_bytes += skb->len;
+       dev->stats.tx_packets++;
+       dev->stats.tx_bytes += skb->len;
 
        spin_unlock_irqrestore(&txring->lock, flags);
 
-       pci_write_config_dword(mac->dma_pdev,
-                              PAS_DMA_TXCHAN_INCR(mac->dma_txch), 1);
+       write_dma_reg(PAS_DMA_TXCHAN_INCR(txring->chan.chno), (nfrags+2) >> 1);
 
        return NETDEV_TX_OK;
 
 out_err:
        spin_unlock_irqrestore(&txring->lock, flags);
-       pci_unmap_single(mac->dma_pdev, map, skb->len, PCI_DMA_TODEVICE);
-       return NETDEV_TX_BUSY;
-}
+out_err_nolock:
+       while (nfrags--)
+               pci_unmap_single(mac->dma_pdev, map[nfrags], map_size[nfrags],
+                                PCI_DMA_TODEVICE);
 
-static struct net_device_stats *pasemi_mac_get_stats(struct net_device *dev)
-{
-       struct pasemi_mac *mac = netdev_priv(dev);
-
-       return &mac->stats;
+       return NETDEV_TX_BUSY;
 }
 
-
 static void pasemi_mac_set_rx_mode(struct net_device *dev)
 {
-       struct pasemi_mac *mac = netdev_priv(dev);
+       const struct pasemi_mac *mac = netdev_priv(dev);
        unsigned int flags;
 
-       pci_read_config_dword(mac->pdev, PAS_MAC_CFG_PCFG, &flags);
+       flags = read_mac_reg(mac, PAS_MAC_CFG_PCFG);
 
        /* Set promiscuous */
        if (dev->flags & IFF_PROMISC)
@@ -1044,7 +1287,7 @@ static void pasemi_mac_set_rx_mode(struct net_device *dev)
        else
                flags &= ~PAS_MAC_CFG_PCFG_PR;
 
-       pci_write_config_dword(mac->pdev, PAS_MAC_CFG_PCFG, flags);
+       write_mac_reg(mac, PAS_MAC_CFG_PCFG, flags);
 }
 
 
@@ -1054,7 +1297,8 @@ static int pasemi_mac_poll(struct napi_struct *napi, int budget)
        struct net_device *dev = mac->netdev;
        int pkts;
 
-       pkts = pasemi_mac_clean_rx(mac, budget);
+       pasemi_mac_clean_tx(tx_ring(mac));
+       pkts = pasemi_mac_clean_rx(rx_ring(mac), budget);
        if (pkts < budget) {
                /* all done, no more packets present */
                netif_rx_complete(dev, napi);
@@ -1067,10 +1311,10 @@ static int pasemi_mac_poll(struct napi_struct *napi, int budget)
 static int __devinit
 pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-       static int index = 0;
        struct net_device *dev;
        struct pasemi_mac *mac;
        int err;
+       DECLARE_MAC_BUF(mac_buf);
 
        err = pci_enable_device(pdev);
        if (err)
@@ -1084,7 +1328,6 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_disable_device;
        }
 
-       SET_MODULE_OWNER(dev);
        pci_set_drvdata(pdev, dev);
        SET_NETDEV_DEV(dev, &pdev->dev);
 
@@ -1092,38 +1335,39 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        mac->pdev = pdev;
        mac->netdev = dev;
-       mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
 
        netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64);
 
-       dev->features = NETIF_F_HW_CSUM;
+       dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG |
+                       NETIF_F_HIGHDMA;
 
+       mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
        if (!mac->dma_pdev) {
-               dev_err(&pdev->dev, "Can't find DMA Controller\n");
+               dev_err(&mac->pdev->dev, "Can't find DMA Controller\n");
                err = -ENODEV;
-               goto out_free_netdev;
+               goto out;
        }
 
        mac->iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
-
        if (!mac->iob_pdev) {
-               dev_err(&pdev->dev, "Can't find I/O Bridge\n");
+               dev_err(&mac->pdev->dev, "Can't find I/O Bridge\n");
                err = -ENODEV;
-               goto out_put_dma_pdev;
+               goto out;
        }
 
-       /* These should come out of the device tree eventually */
-       mac->dma_txch = index;
-       mac->dma_rxch = index;
+       /* get mac addr from device tree */
+       if (pasemi_get_mac_addr(mac) || !is_valid_ether_addr(mac->mac_addr)) {
+               err = -ENODEV;
+               goto out;
+       }
+       memcpy(dev->dev_addr, mac->mac_addr, sizeof(mac->mac_addr));
 
-       /* We probe GMAC before XAUI, but the DMA interfaces are
-        * in XAUI, GMAC order.
-        */
-       if (index < 4)
-               mac->dma_if = index + 2;
-       else
-               mac->dma_if = index - 4;
-       index++;
+       mac->dma_if = mac_to_intf(mac);
+       if (mac->dma_if < 0) {
+               dev_err(&mac->pdev->dev, "Can't map DMA interface\n");
+               err = -ENODEV;
+               goto out;
+       }
 
        switch (pdev->device) {
        case 0xa005:
@@ -1137,28 +1381,13 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out;
        }
 
-       /* get mac addr from device tree */
-       if (pasemi_get_mac_addr(mac) || !is_valid_ether_addr(mac->mac_addr)) {
-               err = -ENODEV;
-               goto out;
-       }
-       memcpy(dev->dev_addr, mac->mac_addr, sizeof(mac->mac_addr));
-
        dev->open = pasemi_mac_open;
        dev->stop = pasemi_mac_close;
        dev->hard_start_xmit = pasemi_mac_start_tx;
-       dev->get_stats = pasemi_mac_get_stats;
        dev->set_multicast_list = pasemi_mac_set_rx_mode;
 
-       /* The dma status structure is located in the I/O bridge, and
-        * is cache coherent.
-        */
-       if (!dma_status)
-               /* XXXOJN This should come from the device tree */
-               dma_status = __ioremap(0xfd800000, 0x1000, 0);
-
-       mac->rx_status = &dma_status->rx_sta[mac->dma_rxch];
-       mac->tx_status = &dma_status->tx_sta[mac->dma_txch];
+       if (err)
+               goto out;
 
        mac->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
@@ -1171,21 +1400,19 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                dev_err(&mac->pdev->dev, "register_netdev failed with error %d\n",
                        err);
                goto out;
-       } else
-               printk(KERN_INFO "%s: PA Semi %s: intf %d, txch %d, rxch %d, "
-                      "hw addr %02x:%02x:%02x:%02x:%02x:%02x\n",
+       } else if netif_msg_probe(mac)
+               printk(KERN_INFO "%s: PA Semi %s: intf %d, hw addr %s\n",
                       dev->name, mac->type == MAC_TYPE_GMAC ? "GMAC" : "XAUI",
-                      mac->dma_if, mac->dma_txch, mac->dma_rxch,
-                      dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2],
-                      dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]);
+                      mac->dma_if, print_mac(mac_buf, dev->dev_addr));
 
        return err;
 
 out:
-       pci_dev_put(mac->iob_pdev);
-out_put_dma_pdev:
-       pci_dev_put(mac->dma_pdev);
-out_free_netdev:
+       if (mac->iob_pdev)
+               pci_dev_put(mac->iob_pdev);
+       if (mac->dma_pdev)
+               pci_dev_put(mac->dma_pdev);
+
        free_netdev(dev);
 out_disable_device:
        pci_disable_device(pdev);
@@ -1209,6 +1436,9 @@ static void __devexit pasemi_mac_remove(struct pci_dev *pdev)
        pci_dev_put(mac->dma_pdev);
        pci_dev_put(mac->iob_pdev);
 
+       pasemi_dma_free_chan(&mac->tx->chan);
+       pasemi_dma_free_chan(&mac->rx->chan);
+
        pci_set_drvdata(pdev, NULL);
        free_netdev(netdev);
 }
@@ -1231,12 +1461,16 @@ static struct pci_driver pasemi_mac_driver = {
 static void __exit pasemi_mac_cleanup_module(void)
 {
        pci_unregister_driver(&pasemi_mac_driver);
-       __iounmap(dma_status);
-       dma_status = NULL;
 }
 
 int pasemi_mac_init_module(void)
 {
+       int err;
+
+       err = pasemi_dma_init();
+       if (err)
+               return err;
+
        return pci_register_driver(&pasemi_mac_driver);
 }