Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorDavid S. Miller <davem@davemloft.net>
Tue, 10 Feb 2009 07:22:21 +0000 (23:22 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 10 Feb 2009 07:22:21 +0000 (23:22 -0800)
Conflicts:
drivers/net/gianfar.c

1  2 
drivers/net/gianfar.c
drivers/net/netxen/netxen_nic_main.c
drivers/net/tun.c

diff --combined drivers/net/gianfar.c
@@@ -93,7 -93,7 +93,7 @@@
  #include <linux/of.h>
  
  #include "gianfar.h"
 -#include "gianfar_mii.h"
 +#include "fsl_pq_mdio.h"
  
  #define TX_TIMEOUT      (1*HZ)
  #undef BRIEF_GFAR_ERRORS
@@@ -141,6 -141,8 +141,6 @@@ void gfar_start(struct net_device *dev)
  static void gfar_clear_exact_match(struct net_device *dev);
  static void gfar_set_mac_for_addr(struct net_device *dev, int num, u8 *addr);
  
 -extern const struct ethtool_ops gfar_ethtool_ops;
 -
  MODULE_AUTHOR("Freescale Semiconductor, Inc");
  MODULE_DESCRIPTION("Gianfar Ethernet Driver");
  MODULE_LICENSE("GPL");
@@@ -164,9 -166,6 +164,9 @@@ static int gfar_of_init(struct net_devi
        struct gfar_private *priv = netdev_priv(dev);
        struct device_node *np = priv->node;
        char bus_name[MII_BUS_ID_SIZE];
 +      const u32 *stash;
 +      const u32 *stash_len;
 +      const u32 *stash_idx;
  
        if (!np || !of_device_is_available(np))
                return -ENODEV;
                }
        }
  
 +      stash = of_get_property(np, "bd-stash", NULL);
 +
 +      if(stash) {
 +              priv->device_flags |= FSL_GIANFAR_DEV_HAS_BD_STASHING;
 +              priv->bd_stash_en = 1;
 +      }
 +
 +      stash_len = of_get_property(np, "rx-stash-len", NULL);
 +
 +      if (stash_len)
 +              priv->rx_stash_size = *stash_len;
 +
 +      stash_idx = of_get_property(np, "rx-stash-idx", NULL);
 +
 +      if (stash_idx)
 +              priv->rx_stash_index = *stash_idx;
 +
 +      if (stash_len || stash_idx)
 +              priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
 +
        mac_addr = of_get_mac_address(np);
        if (mac_addr)
                memcpy(dev->dev_addr, mac_addr, MAC_ADDR_LEN);
                of_node_put(phy);
                of_node_put(mdio);
  
 -              gfar_mdio_bus_name(bus_name, mdio);
 +              fsl_pq_mdio_bus_name(bus_name, mdio);
                snprintf(priv->phy_bus_id, sizeof(priv->phy_bus_id), "%s:%02x",
                                bus_name, *id);
        }
@@@ -446,7 -425,7 +446,7 @@@ static int gfar_probe(struct of_device 
                priv->hash_width = 8;
  
                priv->hash_regs[0] = &priv->regs->gaddr0;
 -                priv->hash_regs[1] = &priv->regs->gaddr1;
 +              priv->hash_regs[1] = &priv->regs->gaddr1;
                priv->hash_regs[2] = &priv->regs->gaddr2;
                priv->hash_regs[3] = &priv->regs->gaddr3;
                priv->hash_regs[4] = &priv->regs->gaddr4;
                goto register_fail;
        }
  
 +      device_init_wakeup(&dev->dev,
 +              priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
 +
        /* fill out IRQ number and name fields */
        len_devname = strlen(dev->name);
        strncpy(&priv->int_name_tx[0], dev->name, len_devname);
@@@ -862,7 -838,7 +862,7 @@@ void stop_gfar(struct net_device *dev
                free_irq(priv->interruptTransmit, dev);
                free_irq(priv->interruptReceive, dev);
        } else {
 -              free_irq(priv->interruptTransmit, dev);
 +              free_irq(priv->interruptTransmit, dev);
        }
  
        free_skb_resources(priv);
@@@ -1207,8 -1183,6 +1207,8 @@@ static int gfar_enet_open(struct net_de
  
        napi_enable(&priv->napi);
  
 +      skb_queue_head_init(&priv->rx_recycle);
 +
        /* Initialize a bunch of registers */
        init_registers(dev);
  
  
        netif_start_queue(dev);
  
 +      device_set_wakeup_enable(&dev->dev, priv->wol_en);
 +
        return err;
  }
  
@@@ -1427,7 -1399,6 +1427,7 @@@ static int gfar_close(struct net_devic
  
        napi_disable(&priv->napi);
  
 +      skb_queue_purge(&priv->rx_recycle);
        cancel_work_sync(&priv->reset_task);
        stop_gfar(dev);
  
@@@ -1624,17 -1595,7 +1624,17 @@@ static int gfar_clean_tx_ring(struct ne
                        bdp = next_txbd(bdp, base, tx_ring_size);
                }
  
 -              dev_kfree_skb_any(skb);
 +              /*
 +               * If there's room in the queue (limit it to rx_buffer_size)
 +               * we add this skb back into the pool, if it's the right size
 +               */
 +              if (skb_queue_len(&priv->rx_recycle) < priv->rx_ring_size &&
 +                              skb_recycle_check(skb, priv->rx_buffer_size +
 +                                      RXBUF_ALIGNMENT))
 +                      __skb_queue_head(&priv->rx_recycle, skb);
 +              else
 +                      dev_kfree_skb_any(skb);
 +
                priv->tx_skbuff[skb_dirtytx] = NULL;
  
                skb_dirtytx = (skb_dirtytx + 1) &
@@@ -1665,9 -1626,15 +1665,15 @@@ static void gfar_schedule_cleanup(struc
        spin_lock_irqsave(&priv->txlock, flags);
        spin_lock(&priv->rxlock);
  
 -      if (netif_rx_schedule_prep(&priv->napi)) {
 +      if (napi_schedule_prep(&priv->napi)) {
                gfar_write(&priv->regs->imask, IMASK_RTX_DISABLED);
 -              __netif_rx_schedule(&priv->napi);
 +              __napi_schedule(&priv->napi);
+       } else {
+               /*
+                * Clear IEVENT, so interrupts aren't called again
+                * because of the packets that have already arrived.
+                */
+               gfar_write(&priv->regs->ievent, IEVENT_RTX_MASK);
        }
  
        spin_unlock(&priv->rxlock);
@@@ -1707,10 -1674,8 +1713,10 @@@ struct sk_buff * gfar_new_skb(struct ne
        struct gfar_private *priv = netdev_priv(dev);
        struct sk_buff *skb = NULL;
  
 -      /* We have to allocate the skb, so keep trying till we succeed */
 -      skb = netdev_alloc_skb(dev, priv->rx_buffer_size + RXBUF_ALIGNMENT);
 +      skb = __skb_dequeue(&priv->rx_recycle);
 +      if (!skb)
 +              skb = netdev_alloc_skb(dev,
 +                              priv->rx_buffer_size + RXBUF_ALIGNMENT);
  
        if (!skb)
                return NULL;
@@@ -1858,7 -1823,7 +1864,7 @@@ int gfar_clean_rx_ring(struct net_devic
                        if (unlikely(!newskb))
                                newskb = skb;
                        else if (skb)
 -                              dev_kfree_skb_any(skb);
 +                              __skb_queue_head(&priv->rx_recycle, skb);
                } else {
                        /* Increment the number of packets */
                        dev->stats.rx_packets++;
                                skb_put(skb, pkt_len);
                                dev->stats.rx_bytes += pkt_len;
  
 +                              if (in_irq() || irqs_disabled())
 +                                      printk("Interrupt problem!\n");
                                gfar_process_frame(dev, skb, amount_pull);
  
                        } else {
@@@ -1928,7 -1891,7 +1934,7 @@@ static int gfar_poll(struct napi_struc
                return budget;
  
        if (rx_cleaned < budget) {
 -              netif_rx_complete(napi);
 +              napi_complete(napi);
  
                /* Clear the halt bit in RSTAT */
                gfar_write(&priv->regs->rstat, RSTAT_CLEAR_RHALT);
@@@ -2345,12 -2308,23 +2351,12 @@@ static struct of_platform_driver gfar_d
  
  static int __init gfar_init(void)
  {
 -      int err = gfar_mdio_init();
 -
 -      if (err)
 -              return err;
 -
 -      err = of_register_platform_driver(&gfar_driver);
 -
 -      if (err)
 -              gfar_mdio_exit();
 -
 -      return err;
 +      return of_register_platform_driver(&gfar_driver);
  }
  
  static void __exit gfar_exit(void)
  {
        of_unregister_platform_driver(&gfar_driver);
 -      gfar_mdio_exit();
  }
  
  module_init(gfar_init);
@@@ -76,6 -76,7 +76,7 @@@ static void netxen_nic_poll_controller(
  #endif
  static irqreturn_t netxen_intr(int irq, void *data);
  static irqreturn_t netxen_msi_intr(int irq, void *data);
+ static irqreturn_t netxen_msix_intr(int irq, void *data);
  
  /*  PCI Device ID Table  */
  #define ENTRY(device) \
@@@ -1084,7 -1085,9 +1085,9 @@@ static int netxen_nic_open(struct net_d
                        for (ring = 0; ring < adapter->max_rds_rings; ring++)
                                netxen_post_rx_buffers(adapter, ctx, ring);
                }
-               if (NETXEN_IS_MSI_FAMILY(adapter))
+               if (adapter->flags & NETXEN_NIC_MSIX_ENABLED)
+                       handler = netxen_msix_intr;
+               else if (adapter->flags & NETXEN_NIC_MSI_ENABLED)
                        handler = netxen_msi_intr;
                else {
                        flags |= IRQF_SHARED;
@@@ -1170,7 -1173,7 +1173,7 @@@ static bool netxen_tso_check(struct net
        __be16 protocol = skb->protocol;
        u16 flags = 0;
  
 -      if (protocol == __constant_htons(ETH_P_8021Q)) {
 +      if (protocol == cpu_to_be16(ETH_P_8021Q)) {
                struct vlan_ethhdr *vh = (struct vlan_ethhdr *)skb->data;
                protocol = vh->h_vlan_encapsulated_proto;
                flags = FLAGS_VLAN_TAGGED;
                desc->total_hdr_length =
                        skb_transport_offset(skb) + tcp_hdrlen(skb);
  
 -              opcode = (protocol == __constant_htons(ETH_P_IPV6)) ?
 +              opcode = (protocol == cpu_to_be16(ETH_P_IPV6)) ?
                                TX_TCP_LSO6 : TX_TCP_LSO;
                tso = true;
  
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                u8 l4proto;
  
 -              if (protocol == __constant_htons(ETH_P_IP)) {
 +              if (protocol == cpu_to_be16(ETH_P_IP)) {
                        l4proto = ip_hdr(skb)->protocol;
  
                        if (l4proto == IPPROTO_TCP)
                                opcode = TX_TCP_PKT;
                        else if(l4proto == IPPROTO_UDP)
                                opcode = TX_UDP_PKT;
 -              } else if (protocol == __constant_htons(ETH_P_IPV6)) {
 +              } else if (protocol == cpu_to_be16(ETH_P_IPV6)) {
                        l4proto = ipv6_hdr(skb)->nexthdr;
  
                        if (l4proto == IPPROTO_TCP)
@@@ -1612,6 -1615,14 +1615,14 @@@ static irqreturn_t netxen_msi_intr(int 
        return IRQ_HANDLED;
  }
  
+ static irqreturn_t netxen_msix_intr(int irq, void *data)
+ {
+       struct netxen_adapter *adapter = data;
+       napi_schedule(&adapter->napi);
+       return IRQ_HANDLED;
+ }
  static int netxen_nic_poll(struct napi_struct *napi, int budget)
  {
        struct netxen_adapter *adapter = container_of(napi, struct netxen_adapter, napi);
        }
  
        if ((work_done < budget) && tx_complete) {
 -              netif_rx_complete(&adapter->napi);
 +              napi_complete(&adapter->napi);
                netxen_nic_enable_int(adapter);
        }
  
diff --combined drivers/net/tun.c
@@@ -63,8 -63,6 +63,8 @@@
  #include <linux/virtio_net.h>
  #include <net/net_namespace.h>
  #include <net/netns/generic.h>
 +#include <net/rtnetlink.h>
 +#include <net/sock.h>
  
  #include <asm/system.h>
  #include <asm/uaccess.h>
@@@ -89,127 -87,26 +89,127 @@@ struct tap_filter 
        unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
  };
  
 +struct tun_file {
 +      atomic_t count;
 +      struct tun_struct *tun;
 +      struct net *net;
 +      wait_queue_head_t       read_wait;
 +};
 +
 +struct tun_sock;
 +
  struct tun_struct {
 -      struct list_head        list;
 +      struct tun_file         *tfile;
        unsigned int            flags;
 -      int                     attached;
        uid_t                   owner;
        gid_t                   group;
  
 -      wait_queue_head_t       read_wait;
        struct sk_buff_head     readq;
  
        struct net_device       *dev;
        struct fasync_struct    *fasync;
  
        struct tap_filter       txflt;
 +      struct sock             *sk;
 +      struct socket           socket;
  
  #ifdef TUN_DEBUG
        int debug;
  #endif
  };
  
 +struct tun_sock {
 +      struct sock             sk;
 +      struct tun_struct       *tun;
 +};
 +
 +static inline struct tun_sock *tun_sk(struct sock *sk)
 +{
 +      return container_of(sk, struct tun_sock, sk);
 +}
 +
 +static int tun_attach(struct tun_struct *tun, struct file *file)
 +{
 +      struct tun_file *tfile = file->private_data;
 +      const struct cred *cred = current_cred();
 +      int err;
 +
 +      ASSERT_RTNL();
 +
 +      /* Check permissions */
 +      if (((tun->owner != -1 && cred->euid != tun->owner) ||
 +           (tun->group != -1 && !in_egroup_p(tun->group))) &&
 +              !capable(CAP_NET_ADMIN))
 +              return -EPERM;
 +
 +      netif_tx_lock_bh(tun->dev);
 +
 +      err = -EINVAL;
 +      if (tfile->tun)
 +              goto out;
 +
 +      err = -EBUSY;
 +      if (tun->tfile)
 +              goto out;
 +
 +      err = 0;
 +      tfile->tun = tun;
 +      tun->tfile = tfile;
 +      dev_hold(tun->dev);
 +      atomic_inc(&tfile->count);
 +
 +out:
 +      netif_tx_unlock_bh(tun->dev);
 +      return err;
 +}
 +
 +static void __tun_detach(struct tun_struct *tun)
 +{
 +      struct tun_file *tfile = tun->tfile;
 +
 +      /* Detach from net device */
 +      netif_tx_lock_bh(tun->dev);
 +      tfile->tun = NULL;
 +      tun->tfile = NULL;
 +      netif_tx_unlock_bh(tun->dev);
 +
 +      /* Drop read queue */
 +      skb_queue_purge(&tun->readq);
 +
 +      /* Drop the extra count on the net device */
 +      dev_put(tun->dev);
 +}
 +
 +static void tun_detach(struct tun_struct *tun)
 +{
 +      rtnl_lock();
 +      __tun_detach(tun);
 +      rtnl_unlock();
 +}
 +
 +static struct tun_struct *__tun_get(struct tun_file *tfile)
 +{
 +      struct tun_struct *tun = NULL;
 +
 +      if (atomic_inc_not_zero(&tfile->count))
 +              tun = tfile->tun;
 +
 +      return tun;
 +}
 +
 +static struct tun_struct *tun_get(struct file *file)
 +{
 +      return __tun_get(file->private_data);
 +}
 +
 +static void tun_put(struct tun_struct *tun)
 +{
 +      struct tun_file *tfile = tun->tfile;
 +
 +      if (atomic_dec_and_test(&tfile->count))
 +              tun_detach(tfile->tun);
 +}
 +
  /* TAP filterting */
  static void addr_hash_set(u32 *mask, const u8 *addr)
  {
@@@ -260,10 -157,16 +260,16 @@@ static int update_filter(struct tap_fil
  
        nexact = n;
  
-       /* The rest is hashed */
+       /* Remaining multicast addresses are hashed,
+        * unicast will leave the filter disabled. */
        memset(filter->mask, 0, sizeof(filter->mask));
-       for (; n < uf.count; n++)
+       for (; n < uf.count; n++) {
+               if (!is_multicast_ether_addr(addr[n].u)) {
+                       err = 0; /* no filter */
+                       goto done;
+               }
                addr_hash_set(filter->mask, addr[n].u);
+       }
  
        /* For ALLMULTI just set the mask to all ones.
         * This overrides the mask populated above. */
@@@ -316,23 -219,13 +322,23 @@@ static int check_filter(struct tap_filt
  
  /* Network device part of the driver */
  
 -static int tun_net_id;
 -struct tun_net {
 -      struct list_head dev_list;
 -};
 -
  static const struct ethtool_ops tun_ethtool_ops;
  
 +/* Net device detach from fd. */
 +static void tun_net_uninit(struct net_device *dev)
 +{
 +      struct tun_struct *tun = netdev_priv(dev);
 +      struct tun_file *tfile = tun->tfile;
 +
 +      /* Inform the methods they need to stop using the dev.
 +       */
 +      if (tfile) {
 +              wake_up_all(&tfile->read_wait);
 +              if (atomic_dec_and_test(&tfile->count))
 +                      __tun_detach(tun);
 +      }
 +}
 +
  /* Net device open. */
  static int tun_net_open(struct net_device *dev)
  {
@@@ -355,7 -248,7 +361,7 @@@ static int tun_net_xmit(struct sk_buff 
        DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
  
        /* Drop packet if interface is not attached */
 -      if (!tun->attached)
 +      if (!tun->tfile)
                goto drop;
  
        /* Drop if the filter does not like it.
        /* Notify and wake up reader process */
        if (tun->flags & TUN_FASYNC)
                kill_fasync(&tun->fasync, SIGIO, POLL_IN);
 -      wake_up_interruptible(&tun->read_wait);
 +      wake_up_interruptible(&tun->tfile->read_wait);
        return 0;
  
  drop:
@@@ -419,7 -312,6 +425,7 @@@ tun_net_change_mtu(struct net_device *d
  }
  
  static const struct net_device_ops tun_netdev_ops = {
 +      .ndo_uninit             = tun_net_uninit,
        .ndo_open               = tun_net_open,
        .ndo_stop               = tun_net_close,
        .ndo_start_xmit         = tun_net_xmit,
  };
  
  static const struct net_device_ops tap_netdev_ops = {
 +      .ndo_uninit             = tun_net_uninit,
        .ndo_open               = tun_net_open,
        .ndo_stop               = tun_net_close,
        .ndo_start_xmit         = tun_net_xmit,
@@@ -474,66 -365,86 +480,66 @@@ static void tun_net_init(struct net_dev
  /* Poll */
  static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
  {
 -      struct tun_struct *tun = file->private_data;
 -      unsigned int mask = POLLOUT | POLLWRNORM;
 +      struct tun_file *tfile = file->private_data;
 +      struct tun_struct *tun = __tun_get(tfile);
 +      struct sock *sk = tun->sk;
 +      unsigned int mask = 0;
  
        if (!tun)
 -              return -EBADFD;
 +              return POLLERR;
  
        DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
  
 -      poll_wait(file, &tun->read_wait, wait);
 +      poll_wait(file, &tfile->read_wait, wait);
  
        if (!skb_queue_empty(&tun->readq))
                mask |= POLLIN | POLLRDNORM;
  
 +      if (sock_writeable(sk) ||
 +          (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
 +           sock_writeable(sk)))
 +              mask |= POLLOUT | POLLWRNORM;
 +
 +      if (tun->dev->reg_state != NETREG_REGISTERED)
 +              mask = POLLERR;
 +
 +      tun_put(tun);
        return mask;
  }
  
  /* prepad is the amount to reserve at front.  len is length after that.
   * linear is a hint as to how much to copy (usually headers). */
 -static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear,
 -                                   gfp_t gfp)
 +static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 +                                          size_t prepad, size_t len,
 +                                          size_t linear, int noblock)
  {
 +      struct sock *sk = tun->sk;
        struct sk_buff *skb;
 -      unsigned int i;
 -
 -      skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
 -      if (skb) {
 -              skb_reserve(skb, prepad);
 -              skb_put(skb, len);
 -              return skb;
 -      }
 +      int err;
  
        /* Under a page?  Don't bother with paged skb. */
        if (prepad + len < PAGE_SIZE)
 -              return NULL;
 +              linear = len;
  
 -      /* Start with a normal skb, and add pages. */
 -      skb = alloc_skb(prepad + linear, gfp);
 +      skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
 +                                 &err);
        if (!skb)
 -              return NULL;
 +              return ERR_PTR(err);
  
        skb_reserve(skb, prepad);
        skb_put(skb, linear);
 -
 -      len -= linear;
 -
 -      for (i = 0; i < MAX_SKB_FRAGS; i++) {
 -              skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 -
 -              f->page = alloc_page(gfp|__GFP_ZERO);
 -              if (!f->page)
 -                      break;
 -
 -              f->page_offset = 0;
 -              f->size = PAGE_SIZE;
 -
 -              skb->data_len += PAGE_SIZE;
 -              skb->len += PAGE_SIZE;
 -              skb->truesize += PAGE_SIZE;
 -              skb_shinfo(skb)->nr_frags++;
 -
 -              if (len < PAGE_SIZE) {
 -                      len = 0;
 -                      break;
 -              }
 -              len -= PAGE_SIZE;
 -      }
 -
 -      /* Too large, or alloc fail? */
 -      if (unlikely(len)) {
 -              kfree_skb(skb);
 -              skb = NULL;
 -      }
 +      skb->data_len = len - linear;
 +      skb->len += len - linear;
  
        return skb;
  }
  
  /* Get packet from user space buffer */
 -static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
 +static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 +                                     struct iovec *iv, size_t count,
 +                                     int noblock)
  {
 -      struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
 +      struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
        struct sk_buff *skb;
        size_t len = count, align = 0;
        struct virtio_net_hdr gso = { 0 };
                        return -EINVAL;
        }
  
 -      if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) {
 -              tun->dev->stats.rx_dropped++;
 -              return -ENOMEM;
 +      skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
 +      if (IS_ERR(skb)) {
 +              if (PTR_ERR(skb) != -EAGAIN)
 +                      tun->dev->stats.rx_dropped++;
 +              return PTR_ERR(skb);
        }
  
        if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
  static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
                              unsigned long count, loff_t pos)
  {
 -      struct tun_struct *tun = iocb->ki_filp->private_data;
 +      struct file *file = iocb->ki_filp;
 +      struct tun_struct *tun = file->private_data;
 +      ssize_t result;
  
        if (!tun)
                return -EBADFD;
  
        DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
  
 -      return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
 +      result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count),
 +                            file->f_flags & O_NONBLOCK);
 +
 +      tun_put(tun);
 +      return result;
  }
  
  /* Put packet to the user space buffer */
@@@ -739,8 -642,7 +745,8 @@@ static ssize_t tun_chr_aio_read(struct 
                            unsigned long count, loff_t pos)
  {
        struct file *file = iocb->ki_filp;
 -      struct tun_struct *tun = file->private_data;
 +      struct tun_file *tfile = file->private_data;
 +      struct tun_struct *tun = __tun_get(tfile);
        DECLARE_WAITQUEUE(wait, current);
        struct sk_buff *skb;
        ssize_t len, ret = 0;
        DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
  
        len = iov_length(iv, count);
 -      if (len < 0)
 -              return -EINVAL;
 +      if (len < 0) {
 +              ret = -EINVAL;
 +              goto out;
 +      }
  
 -      add_wait_queue(&tun->read_wait, &wait);
 +      add_wait_queue(&tfile->read_wait, &wait);
        while (len) {
                current->state = TASK_INTERRUPTIBLE;
  
                                ret = -ERESTARTSYS;
                                break;
                        }
 +                      if (tun->dev->reg_state != NETREG_REGISTERED) {
 +                              ret = -EIO;
 +                              break;
 +                      }
  
                        /* Nothing to read, let's sleep */
                        schedule();
        }
  
        current->state = TASK_RUNNING;
 -      remove_wait_queue(&tun->read_wait, &wait);
 +      remove_wait_queue(&tfile->read_wait, &wait);
  
 +out:
 +      tun_put(tun);
        return ret;
  }
  
@@@ -799,78 -693,54 +805,78 @@@ static void tun_setup(struct net_devic
        struct tun_struct *tun = netdev_priv(dev);
  
        skb_queue_head_init(&tun->readq);
 -      init_waitqueue_head(&tun->read_wait);
  
        tun->owner = -1;
        tun->group = -1;
  
        dev->ethtool_ops = &tun_ethtool_ops;
        dev->destructor = free_netdev;
 -      dev->features |= NETIF_F_NETNS_LOCAL;
  }
  
 -static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name)
 +/* Trivial set of netlink ops to allow deleting tun or tap
 + * device with netlink.
 + */
 +static int tun_validate(struct nlattr *tb[], struct nlattr *data[])
 +{
 +      return -EINVAL;
 +}
 +
 +static struct rtnl_link_ops tun_link_ops __read_mostly = {
 +      .kind           = DRV_NAME,
 +      .priv_size      = sizeof(struct tun_struct),
 +      .setup          = tun_setup,
 +      .validate       = tun_validate,
 +};
 +
 +static void tun_sock_write_space(struct sock *sk)
  {
        struct tun_struct *tun;
  
 -      ASSERT_RTNL();
 -      list_for_each_entry(tun, &tn->dev_list, list) {
 -              if (!strncmp(tun->dev->name, name, IFNAMSIZ))
 -                  return tun;
 -      }
 +      if (!sock_writeable(sk))
 +              return;
 +
 +      if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 +              wake_up_interruptible_sync(sk->sk_sleep);
 +
 +      if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 +              return;
  
 -      return NULL;
 +      tun = container_of(sk, struct tun_sock, sk)->tun;
 +      kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
  }
  
 +static void tun_sock_destruct(struct sock *sk)
 +{
 +      dev_put(container_of(sk, struct tun_sock, sk)->tun->dev);
 +}
 +
 +static struct proto tun_proto = {
 +      .name           = "tun",
 +      .owner          = THIS_MODULE,
 +      .obj_size       = sizeof(struct tun_sock),
 +};
 +
  static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
  {
 -      struct tun_net *tn;
 +      struct sock *sk;
        struct tun_struct *tun;
        struct net_device *dev;
 -      const struct cred *cred = current_cred();
 +      struct tun_file *tfile = file->private_data;
        int err;
  
 -      tn = net_generic(net, tun_net_id);
 -      tun = tun_get_by_name(tn, ifr->ifr_name);
 -      if (tun) {
 -              if (tun->attached)
 -                      return -EBUSY;
 -
 -              /* Check permissions */
 -              if (((tun->owner != -1 &&
 -                    cred->euid != tun->owner) ||
 -                   (tun->group != -1 &&
 -                    cred->egid != tun->group)) &&
 -                  !capable(CAP_NET_ADMIN)) {
 -                      return -EPERM;
 -              }
 +      dev = __dev_get_by_name(net, ifr->ifr_name);
 +      if (dev) {
 +              if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
 +                      tun = netdev_priv(dev);
 +              else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
 +                      tun = netdev_priv(dev);
 +              else
 +                      return -EINVAL;
 +
 +              err = tun_attach(tun, file);
 +              if (err < 0)
 +                      return err;
        }
 -      else if (__dev_get_by_name(net, ifr->ifr_name))
 -              return -EINVAL;
        else {
                char *name;
                unsigned long flags = 0;
                        return -ENOMEM;
  
                dev_net_set(dev, net);
 +              dev->rtnl_link_ops = &tun_link_ops;
  
                tun = netdev_priv(dev);
                tun->dev = dev;
                tun->flags = flags;
                tun->txflt.count = 0;
  
 +              err = -ENOMEM;
 +              sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
 +              if (!sk)
 +                      goto err_free_dev;
 +
 +              /* This ref count is for tun->sk. */
 +              dev_hold(dev);
 +              sock_init_data(&tun->socket, sk);
 +              sk->sk_write_space = tun_sock_write_space;
 +              sk->sk_destruct = tun_sock_destruct;
 +              sk->sk_sndbuf = INT_MAX;
 +              sk->sk_sleep = &tfile->read_wait;
 +
 +              tun->sk = sk;
 +              container_of(sk, struct tun_sock, sk)->tun = tun;
 +
                tun_net_init(dev);
  
                if (strchr(dev->name, '%')) {
                        err = dev_alloc_name(dev, dev->name);
                        if (err < 0)
 -                              goto err_free_dev;
 +                              goto err_free_sk;
                }
  
 +              err = -EINVAL;
                err = register_netdevice(tun->dev);
                if (err < 0)
                        goto err_free_dev;
  
 -              list_add(&tun->list, &tn->dev_list);
 +              err = tun_attach(tun, file);
 +              if (err < 0)
 +                      goto err_free_dev;
        }
  
        DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
        else
                tun->flags &= ~TUN_VNET_HDR;
  
 -      file->private_data = tun;
 -      tun->attached = 1;
 -      get_net(dev_net(tun->dev));
 -
        /* Make sure persistent devices do not get stuck in
         * xoff state.
         */
        strcpy(ifr->ifr_name, tun->dev->name);
        return 0;
  
 + err_free_sk:
 +      sock_put(sk);
   err_free_dev:
        free_netdev(dev);
   failed:
  
  static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
  {
 -      struct tun_struct *tun = file->private_data;
 +      struct tun_struct *tun = tun_get(file);
  
        if (!tun)
                return -EBADFD;
        if (tun->flags & TUN_VNET_HDR)
                ifr->ifr_flags |= IFF_VNET_HDR;
  
 +      tun_put(tun);
        return 0;
  }
  
@@@ -1050,34 -901,22 +1056,34 @@@ static int set_offload(struct net_devic
  static int tun_chr_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
  {
 -      struct tun_struct *tun = file->private_data;
 +      struct tun_file *tfile = file->private_data;
 +      struct tun_struct *tun;
        void __user* argp = (void __user*)arg;
        struct ifreq ifr;
 +      int sndbuf;
        int ret;
  
        if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
                if (copy_from_user(&ifr, argp, sizeof ifr))
                        return -EFAULT;
  
 +      if (cmd == TUNGETFEATURES) {
 +              /* Currently this just means: "what IFF flags are valid?".
 +               * This is needed because we never checked for invalid flags on
 +               * TUNSETIFF. */
 +              return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
 +                              IFF_VNET_HDR,
 +                              (unsigned int __user*)argp);
 +      }
 +
 +      tun = __tun_get(tfile);
        if (cmd == TUNSETIFF && !tun) {
                int err;
  
                ifr.ifr_name[IFNAMSIZ-1] = '\0';
  
                rtnl_lock();
 -              err = tun_set_iff(current->nsproxy->net_ns, file, &ifr);
 +              err = tun_set_iff(tfile->net, file, &ifr);
                rtnl_unlock();
  
                if (err)
                return 0;
        }
  
 -      if (cmd == TUNGETFEATURES) {
 -              /* Currently this just means: "what IFF flags are valid?".
 -               * This is needed because we never checked for invalid flags on
 -               * TUNSETIFF. */
 -              return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
 -                              IFF_VNET_HDR,
 -                              (unsigned int __user*)argp);
 -      }
  
        if (!tun)
                return -EBADFD;
  
        DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
  
 +      ret = 0;
        switch (cmd) {
        case TUNGETIFF:
                ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
                if (ret)
 -                      return ret;
 +                      break;
  
                if (copy_to_user(argp, &ifr, sizeof(ifr)))
 -                      return -EFAULT;
 +                      ret = -EFAULT;
                break;
  
        case TUNSETNOCSUM:
                        ret = 0;
                }
                rtnl_unlock();
 -              return ret;
 +              break;
  
  #ifdef TUN_DEBUG
        case TUNSETDEBUG:
                rtnl_lock();
                ret = set_offload(tun->dev, arg);
                rtnl_unlock();
 -              return ret;
 +              break;
  
        case TUNSETTXFILTER:
                /* Can be set only for TAPs */
 +              ret = -EINVAL;
                if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
 -                      return -EINVAL;
 +                      break;
                rtnl_lock();
                ret = update_filter(&tun->txflt, (void __user *)arg);
                rtnl_unlock();
 -              return ret;
 +              break;
  
        case SIOCGIFHWADDR:
                /* Get hw addres */
                memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
                ifr.ifr_hwaddr.sa_family = tun->dev->type;
                if (copy_to_user(argp, &ifr, sizeof ifr))
 -                      return -EFAULT;
 -              return 0;
 +                      ret = -EFAULT;
 +              break;
  
        case SIOCSIFHWADDR:
                /* Set hw address */
                rtnl_lock();
                ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
                rtnl_unlock();
 -              return ret;
 +              break;
 +
 +      case TUNGETSNDBUF:
 +              sndbuf = tun->sk->sk_sndbuf;
 +              if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
 +                      ret = -EFAULT;
 +              break;
 +
 +      case TUNSETSNDBUF:
 +              if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
 +                      ret = -EFAULT;
 +                      break;
 +              }
 +
 +              tun->sk->sk_sndbuf = sndbuf;
 +              break;
  
        default:
 -              return -EINVAL;
 +              ret = -EINVAL;
 +              break;
        };
  
 -      return 0;
 +      tun_put(tun);
 +      return ret;
  }
  
  static int tun_chr_fasync(int fd, struct file *file, int on)
  {
 -      struct tun_struct *tun = file->private_data;
 +      struct tun_struct *tun = tun_get(file);
        int ret;
  
        if (!tun)
        ret = 0;
  out:
        unlock_kernel();
 +      tun_put(tun);
        return ret;
  }
  
  static int tun_chr_open(struct inode *inode, struct file * file)
  {
 +      struct tun_file *tfile;
        cycle_kernel_lock();
        DBG1(KERN_INFO "tunX: tun_chr_open\n");
 -      file->private_data = NULL;
 +
 +      tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
 +      if (!tfile)
 +              return -ENOMEM;
 +      atomic_set(&tfile->count, 0);
 +      tfile->tun = NULL;
 +      tfile->net = get_net(current->nsproxy->net_ns);
 +      init_waitqueue_head(&tfile->read_wait);
 +      file->private_data = tfile;
        return 0;
  }
  
  static int tun_chr_close(struct inode *inode, struct file *file)
  {
 -      struct tun_struct *tun = file->private_data;
 -
 -      if (!tun)
 -              return 0;
 +      struct tun_file *tfile = file->private_data;
 +      struct tun_struct *tun = __tun_get(tfile);
  
 -      DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
  
 -      rtnl_lock();
 +      if (tun) {
 +              DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
  
 -      /* Detach from net device */
 -      file->private_data = NULL;
 -      tun->attached = 0;
 -      put_net(dev_net(tun->dev));
 +              rtnl_lock();
 +              __tun_detach(tun);
  
 -      /* Drop read queue */
 -      skb_queue_purge(&tun->readq);
 +              /* If desireable, unregister the netdevice. */
 +              if (!(tun->flags & TUN_PERSIST)) {
 +                      sock_put(tun->sk);
 +                      unregister_netdevice(tun->dev);
 +              }
  
 -      if (!(tun->flags & TUN_PERSIST)) {
 -              list_del(&tun->list);
 -              unregister_netdevice(tun->dev);
 +              rtnl_unlock();
        }
  
 -      rtnl_unlock();
 +      put_net(tfile->net);
 +      kfree(tfile);
  
        return 0;
  }
@@@ -1367,7 -1187,7 +1373,7 @@@ static void tun_set_msglevel(struct net
  static u32 tun_get_link(struct net_device *dev)
  {
        struct tun_struct *tun = netdev_priv(dev);
 -      return tun->attached;
 +      return !!tun->tfile;
  }
  
  static u32 tun_get_rx_csum(struct net_device *dev)
@@@ -1396,6 -1216,45 +1402,6 @@@ static const struct ethtool_ops tun_eth
        .set_rx_csum    = tun_set_rx_csum
  };
  
 -static int tun_init_net(struct net *net)
 -{
 -      struct tun_net *tn;
 -
 -      tn = kmalloc(sizeof(*tn), GFP_KERNEL);
 -      if (tn == NULL)
 -              return -ENOMEM;
 -
 -      INIT_LIST_HEAD(&tn->dev_list);
 -
 -      if (net_assign_generic(net, tun_net_id, tn)) {
 -              kfree(tn);
 -              return -ENOMEM;
 -      }
 -
 -      return 0;
 -}
 -
 -static void tun_exit_net(struct net *net)
 -{
 -      struct tun_net *tn;
 -      struct tun_struct *tun, *nxt;
 -
 -      tn = net_generic(net, tun_net_id);
 -
 -      rtnl_lock();
 -      list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) {
 -              DBG(KERN_INFO "%s cleaned up\n", tun->dev->name);
 -              unregister_netdevice(tun->dev);
 -      }
 -      rtnl_unlock();
 -
 -      kfree(tn);
 -}
 -
 -static struct pernet_operations tun_net_ops = {
 -      .init = tun_init_net,
 -      .exit = tun_exit_net,
 -};
  
  static int __init tun_init(void)
  {
        printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
        printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
  
 -      ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops);
 +      ret = rtnl_link_register(&tun_link_ops);
        if (ret) {
 -              printk(KERN_ERR "tun: Can't register pernet ops\n");
 -              goto err_pernet;
 +              printk(KERN_ERR "tun: Can't register link_ops\n");
 +              goto err_linkops;
        }
  
        ret = misc_register(&tun_miscdev);
                printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
                goto err_misc;
        }
 -      return 0;
 -
 +      return  0;
  err_misc:
 -      unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
 -err_pernet:
 +      rtnl_link_unregister(&tun_link_ops);
 +err_linkops:
        return ret;
  }
  
  static void tun_cleanup(void)
  {
        misc_deregister(&tun_miscdev);
 -      unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
 +      rtnl_link_unregister(&tun_link_ops);
  }
  
  module_init(tun_init);