net: fix __netdev_update_features return on ndo_set_features failure

[pandora-kernel.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index b10ff0a..9a95205 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,6 +133,10 @@
  #include <linux/pci.h>
  #include <linux/inetdevice.h>
  #include <linux/cpu_rmap.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <linux/net_tstamp.h>
  
  #include "net-sysfs.h"
  
@@ -855,7 +859,7 @@ int dev_valid_name(const char *name)
                 return 0;
  
         while (*name) {
-               if (*name == '/' || isspace(*name))
+               if (*name == '/' || *name == ':' || isspace(*name))
                         return 0;
                 name++;
         }
@@ -1055,6 +1059,8 @@ rollback:
   */
  int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
  {
+       char *new_ifalias;
+
         ASSERT_RTNL();
  
         if (len >= IFALIASZ)
@@ -1068,9 +1074,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
                 return 0;
         }
  
-       dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-       if (!dev->ifalias)
+       new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+       if (!new_ifalias)
                 return -ENOMEM;
+       dev->ifalias = new_ifalias;
  
         strlcpy(dev->ifalias, alias, len+1);
         return len;
@@ -1173,6 +1180,7 @@ static int __dev_open(struct net_device *dev)
                 net_dmaengine_get();
                 dev_set_rx_mode(dev);
                 dev_activate(dev);
+               add_device_randomness(dev->dev_addr, dev->addr_len);
         }
  
         return ret;
@@ -1392,7 +1400,7 @@ rollback:
         for_each_net(net) {
                 for_each_netdev(net, dev) {
                         if (dev == last)
-                               break;
+                               goto outroll;
  
                         if (dev->flags & IFF_UP) {
                                 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
@@ -1403,6 +1411,7 @@ rollback:
                 }
         }
  
+outroll:
         raw_notifier_chain_unregister(&netdev_chain, nb);
         goto unlock;
  }
@@ -1416,14 +1425,34 @@ EXPORT_SYMBOL(register_netdevice_notifier);
   *     register_netdevice_notifier(). The notifier is unlinked into the
   *     kernel structures and may then be reused. A negative errno code
   *     is returned on a failure.
+ *
+ *     After unregistering unregister and down device events are synthesized
+ *     for all devices on the device list to the removed notifier to remove
+ *     the need for special case cleanup code.
   */
  
  int unregister_netdevice_notifier(struct notifier_block *nb)
  {
+       struct net_device *dev;
+       struct net *net;
         int err;
  
         rtnl_lock();
         err = raw_notifier_chain_unregister(&netdev_chain, nb);
+       if (err)
+               goto unlock;
+
+       for_each_net(net) {
+               for_each_netdev(net, dev) {
+                       if (dev->flags & IFF_UP) {
+                               nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+                               nb->notifier_call(nb, NETDEV_DOWN, dev);
+                       }
+                       nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+                       nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
+               }
+       }
+unlock:
         rtnl_unlock();
         return err;
  }
@@ -1474,6 +1503,57 @@ static inline void net_timestamp_check(struct sk_buff *skb)
                 __net_timestamp(skb);
  }
  
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+       struct hwtstamp_config cfg;
+       enum hwtstamp_tx_types tx_type;
+       enum hwtstamp_rx_filters rx_filter;
+       int tx_type_valid = 0;
+       int rx_filter_valid = 0;
+
+       if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+               return -EFAULT;
+
+       if (cfg.flags) /* reserved for future extensions */
+               return -EINVAL;
+
+       tx_type = cfg.tx_type;
+       rx_filter = cfg.rx_filter;
+
+       switch (tx_type) {
+       case HWTSTAMP_TX_OFF:
+       case HWTSTAMP_TX_ON:
+       case HWTSTAMP_TX_ONESTEP_SYNC:
+               tx_type_valid = 1;
+               break;
+       }
+
+       switch (rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+       case HWTSTAMP_FILTER_ALL:
+       case HWTSTAMP_FILTER_SOME:
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               rx_filter_valid = 1;
+               break;
+       }
+
+       if (!tx_type_valid || !rx_filter_valid)
+               return -ERANGE;
+
+       return 0;
+}
+
  static inline bool is_skb_forwardable(struct net_device *dev,
                                       struct sk_buff *skb)
  {
@@ -1531,10 +1611,16 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
                 kfree_skb(skb);
                 return NET_RX_DROP;
         }
-       skb_set_dev(skb, dev);
+       skb->dev = dev;
+       skb_dst_drop(skb);
         skb->tstamp.tv64 = 0;
         skb->pkt_type = PACKET_HOST;
         skb->protocol = eth_type_trans(skb, dev);
+       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+       skb->mark = 0;
+       secpath_reset(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
         return netif_rx(skb);
  }
  EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1547,6 +1633,19 @@ static inline int deliver_skb(struct sk_buff *skb,
         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
  }
  
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+       if (!ptype->af_packet_priv || !skb->sk)
+               return false;
+
+       if (ptype->id_match)
+               return ptype->id_match(ptype, skb->sk);
+       else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+               return true;
+
+       return false;
+}
+
  /*
   *     Support routine. Sends outgoing frames to any network
   *     taps currently in use.
@@ -1564,8 +1663,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                  * they originated from - MvS (miquels@drinkel.ow.org)
                  */
                 if ((ptype->dev == dev || !ptype->dev) &&
-                   (ptype->af_packet_priv == NULL ||
-                    (struct sock *)ptype->af_packet_priv != skb->sk)) {
+                   (!skb_loop_sk(ptype, skb))) {
                         if (pt_prev) {
                                 deliver_skb(skb2, pt_prev, skb->dev);
                                 pt_prev = ptype;
@@ -1789,36 +1887,6 @@ void netif_device_attach(struct net_device *dev)
  }
  EXPORT_SYMBOL(netif_device_attach);
  
-/**
- * skb_dev_set -- assign a new device to a buffer
- * @skb: buffer for the new device
- * @dev: network device
- *
- * If an skb is owned by a device already, we have to reset
- * all data private to the namespace a device belongs to
- * before assigning it a new device.
- */
-#ifdef CONFIG_NET_NS
-void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
-{
-       skb_dst_drop(skb);
-       if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
-               secpath_reset(skb);
-               nf_reset(skb);
-               skb_init_secmark(skb);
-               skb->mark = 0;
-               skb->priority = 0;
-               skb->nf_trace = 0;
-               skb->ipvs_property = 0;
-#ifdef CONFIG_NET_SCHED
-               skb->tc_index = 0;
-#endif
-       }
-       skb->dev = dev;
-}
-EXPORT_SYMBOL(skb_set_dev);
-#endif /* CONFIG_NET_NS */
-
  /*
   * Invalidate hardware checksum when packet is to be mangled, and
   * complete checksum manually on outgoing path.
@@ -1955,9 +2023,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
  #ifdef CONFIG_HIGHMEM
         int i;
         if (!(dev->features & NETIF_F_HIGHDMA)) {
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                       if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       if (PageHighMem(skb_frag_page(frag)))
                                 return 1;
+               }
         }
  
         if (PCI_DMA_BUS_IS_PHYS) {
@@ -1966,7 +2036,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
                 if (!pdev)
                         return 0;
                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       dma_addr_t addr = page_to_phys(skb_frag_page(frag));
                         if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
                                 return 1;
                 }
@@ -2026,25 +2097,6 @@ static int dev_gso_segment(struct sk_buff *skb, int features)
         return 0;
  }
  
-/*
- * Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested or the sk-reference
- * is needed on driver level for other reasons, e.g. see net/can/raw.c
- */
-static inline void skb_orphan_try(struct sk_buff *skb)
-{
-       struct sock *sk = skb->sk;
-
-       if (sk && !skb_shinfo(skb)->tx_flags) {
-               /* skb_tx_hash() wont be able to get sk.
-                * We copy sk_hash into skb->rxhash
-                */
-               if (!skb->rxhash)
-                       skb->rxhash = sk->sk_hash;
-               skb_orphan(skb);
-       }
-}
-
  static bool can_checksum_protocol(unsigned long features, __be16 protocol)
  {
         return ((features & NETIF_F_GEN_CSUM) ||
@@ -2058,7 +2110,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
  
  static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
  {
-       if (!can_checksum_protocol(features, protocol)) {
+       if (skb->ip_summed != CHECKSUM_NONE &&
+           !can_checksum_protocol(features, protocol)) {
                 features &= ~NETIF_F_ALL_CSUM;
                 features &= ~NETIF_F_SG;
         } else if (illegal_highdma(skb->dev, skb)) {
@@ -2073,11 +2126,16 @@ u32 netif_skb_features(struct sk_buff *skb)
         __be16 protocol = skb->protocol;
         u32 features = skb->dev->features;
  
-       if (protocol == htons(ETH_P_8021Q)) {
-               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-               protocol = veh->h_vlan_encapsulated_proto;
-       } else if (!vlan_tx_tag_present(skb)) {
-               return harmonize_features(skb, protocol, features);
+       if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+               features &= ~NETIF_F_GSO_MASK;
+
+       if (!vlan_tx_tag_present(skb)) {
+               if (unlikely(protocol == htons(ETH_P_8021Q))) {
+                       struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+                       protocol = veh->h_vlan_encapsulated_proto;
+               } else {
+                       return harmonize_features(skb, protocol, features);
+               }
         }
  
         features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
@@ -2129,8 +2187,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                 if (!list_empty(&ptype_all))
                         dev_queue_xmit_nit(skb, dev);
  
-               skb_orphan_try(skb);
-
                 features = netif_skb_features(skb);
  
                 if (vlan_tx_tag_present(skb) &&
@@ -2240,7 +2296,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
         if (skb->sk && skb->sk->sk_hash)
                 hash = skb->sk->sk_hash;
         else
-               hash = (__force u16) skb->protocol ^ skb->rxhash;
+               hash = (__force u16) skb->protocol;
         hash = jhash_1word(hash, hashrnd);
  
         return (u16) (((u64) hash * qcount) >> 32) + qoffset;
@@ -2527,38 +2583,47 @@ static inline void ____napi_schedule(struct softnet_data *sd,
  
  /*
   * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers. Returns a non-zero hash number on success
- * and 0 on failure.
+ * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
   */
-__u32 __skb_get_rxhash(struct sk_buff *skb)
+void __skb_get_rxhash(struct sk_buff *skb)
  {
         int nhoff, hash = 0, poff;
         const struct ipv6hdr *ip6;
         const struct iphdr *ip;
+       const struct vlan_hdr *vlan;
         u8 ip_proto;
-       u32 addr1, addr2, ihl;
+       u32 addr1, addr2;
+       u16 proto;
         union {
                 u32 v32;
                 u16 v16[2];
         } ports;
  
         nhoff = skb_network_offset(skb);
+       proto = skb->protocol;
  
-       switch (skb->protocol) {
+again:
+       switch (proto) {
         case __constant_htons(ETH_P_IP):
+ip:
                 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
                         goto done;
  
                 ip = (const struct iphdr *) (skb->data + nhoff);
+               if (ip->ihl < 5)
+                       goto done;
                 if (ip_is_fragment(ip))
                         ip_proto = 0;
                 else
                         ip_proto = ip->protocol;
                 addr1 = (__force u32) ip->saddr;
                 addr2 = (__force u32) ip->daddr;
-               ihl = ip->ihl;
+               nhoff += ip->ihl * 4;
                 break;
         case __constant_htons(ETH_P_IPV6):
+ipv6:
                 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
                         goto done;
  
@@ -2566,33 +2631,85 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
                 ip_proto = ip6->nexthdr;
                 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
                 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
-               ihl = (40 >> 2);
+               nhoff += 40;
                 break;
+       case __constant_htons(ETH_P_8021Q):
+               if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
+                       goto done;
+               vlan = (const struct vlan_hdr *) (skb->data + nhoff);
+               proto = vlan->h_vlan_encapsulated_proto;
+               nhoff += sizeof(*vlan);
+               goto again;
+       case __constant_htons(ETH_P_PPP_SES):
+               if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
+                       goto done;
+               proto = *((__be16 *) (skb->data + nhoff +
+                                     sizeof(struct pppoe_hdr)));
+               nhoff += PPPOE_SES_HLEN;
+               switch (proto) {
+               case __constant_htons(PPP_IP):
+                       goto ip;
+               case __constant_htons(PPP_IPV6):
+                       goto ipv6;
+               default:
+                       goto done;
+               }
         default:
                 goto done;
         }
  
+       switch (ip_proto) {
+       case IPPROTO_GRE:
+               if (pskb_may_pull(skb, nhoff + 16)) {
+                       u8 *h = skb->data + nhoff;
+                       __be16 flags = *(__be16 *)h;
+
+                       /*
+                        * Only look inside GRE if version zero and no
+                        * routing
+                        */
+                       if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
+                               proto = *(__be16 *)(h + 2);
+                               nhoff += 4;
+                               if (flags & GRE_CSUM)
+                                       nhoff += 4;
+                               if (flags & GRE_KEY)
+                                       nhoff += 4;
+                               if (flags & GRE_SEQ)
+                                       nhoff += 4;
+                               goto again;
+                       }
+               }
+               break;
+       case IPPROTO_IPIP:
+               goto again;
+       default:
+               break;
+       }
+
         ports.v32 = 0;
         poff = proto_ports_offset(ip_proto);
         if (poff >= 0) {
-               nhoff += ihl * 4 + poff;
+               nhoff += poff;
                 if (pskb_may_pull(skb, nhoff + 4)) {
                         ports.v32 = * (__force u32 *) (skb->data + nhoff);
-                       if (ports.v16[1] < ports.v16[0])
-                               swap(ports.v16[0], ports.v16[1]);
+                       skb->l4_rxhash = 1;
                 }
         }
  
         /* get a consistent hash (same value on both flow directions) */
-       if (addr2 < addr1)
+       if (addr2 < addr1 ||
+           (addr2 == addr1 &&
+            ports.v16[1] < ports.v16[0])) {
                 swap(addr1, addr2);
-
+               swap(ports.v16[0], ports.v16[1]);
+       }
         hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
         if (!hash)
                 hash = 1;
  
  done:
-       return hash;
+       skb->rxhash = hash;
  }
  EXPORT_SYMBOL(__skb_get_rxhash);
  
@@ -2606,10 +2723,7 @@ static struct rps_dev_flow *
  set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
             struct rps_dev_flow *rflow, u16 next_cpu)
  {
-       u16 tcpu;
-
-       tcpu = rflow->cpu = next_cpu;
-       if (tcpu != RPS_NO_CPU) {
+       if (next_cpu != RPS_NO_CPU) {
  #ifdef CONFIG_RFS_ACCEL
                 struct netdev_rx_queue *rxqueue;
                 struct rps_dev_flow_table *flow_table;
@@ -2637,16 +2751,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                         goto out;
                 old_rflow = rflow;
                 rflow = &flow_table->flows[flow_id];
-               rflow->cpu = next_cpu;
                 rflow->filter = rc;
                 if (old_rflow->filter == rflow->filter)
                         old_rflow->filter = RPS_NO_FILTER;
         out:
  #endif
                 rflow->last_qtail =
-                       per_cpu(softnet_data, tcpu).input_queue_head;
+                       per_cpu(softnet_data, next_cpu).input_queue_head;
         }
  
+       rflow->cpu = next_cpu;
         return rflow;
  }
  
@@ -2681,13 +2795,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
         map = rcu_dereference(rxqueue->rps_map);
         if (map) {
                 if (map->len == 1 &&
-                   !rcu_dereference_raw(rxqueue->rps_flow_table)) {
+                   !rcu_access_pointer(rxqueue->rps_flow_table)) {
                         tcpu = map->cpus[0];
                         if (cpu_online(tcpu))
                                 cpu = tcpu;
                         goto done;
                 }
-       } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+       } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
                 goto done;
         }
  
@@ -2721,8 +2835,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                 if (unlikely(tcpu != next_cpu) &&
                     (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
                      ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
-                     rflow->last_qtail)) >= 0))
+                     rflow->last_qtail)) >= 0)) {
+                       tcpu = next_cpu;
                         rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+               }
  
                 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
                         *rflowp = rflow;
@@ -2831,6 +2947,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
         local_irq_save(flags);
  
         rps_lock(sd);
+       if (!netif_running(skb->dev))
+               goto drop;
         if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
                 if (skb_queue_len(&sd->input_pkt_queue)) {
  enqueue:
@@ -2851,6 +2969,7 @@ enqueue:
                 goto enqueue;
         }
  
+drop:
         sd->dropped++;
         rps_unlock(sd);
  
@@ -3083,6 +3202,7 @@ int netdev_rx_handler_register(struct net_device *dev,
         if (dev->rx_handler)
                 return -EBUSY;
  
+       /* Note: rx_handler_data must be set before rx_handler */
         rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
         rcu_assign_pointer(dev->rx_handler, rx_handler);
  
@@ -3102,8 +3222,13 @@ void netdev_rx_handler_unregister(struct net_device *dev)
  {
  
         ASSERT_RTNL();
-       rcu_assign_pointer(dev->rx_handler, NULL);
-       rcu_assign_pointer(dev->rx_handler_data, NULL);
+       RCU_INIT_POINTER(dev->rx_handler, NULL);
+       /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
+        * section has a guarantee to see a non NULL rx_handler_data
+        * as well.
+        */
+       synchronize_net();
+       RCU_INIT_POINTER(dev->rx_handler_data, NULL);
  }
  EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
  
@@ -3136,8 +3261,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
  
         pt_prev = NULL;
  
-       rcu_read_lock();
-
  another_round:
  
         __this_cpu_inc(softnet_data.processed);
@@ -3170,6 +3293,17 @@ another_round:
  ncls:
  #endif
  
+       if (vlan_tx_tag_present(skb)) {
+               if (pt_prev) {
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+                       pt_prev = NULL;
+               }
+               if (vlan_do_receive(&skb))
+                       goto another_round;
+               else if (unlikely(!skb))
+                       goto out;
+       }
+
         rx_handler = rcu_dereference(skb->dev->rx_handler);
         if (rx_handler) {
                 if (pt_prev) {
@@ -3178,6 +3312,7 @@ ncls:
                 }
                 switch (rx_handler(&skb)) {
                 case RX_HANDLER_CONSUMED:
+                       ret = NET_RX_SUCCESS;
                         goto out;
                 case RX_HANDLER_ANOTHER:
                         goto another_round;
@@ -3190,17 +3325,8 @@ ncls:
                 }
         }
  
-       if (vlan_tx_tag_present(skb)) {
-               if (pt_prev) {
-                       ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = NULL;
-               }
-               if (vlan_do_receive(&skb)) {
-                       ret = __netif_receive_skb(skb);
-                       goto out;
-               } else if (unlikely(!skb))
-                       goto out;
-       }
+       if (vlan_tx_nonzero_tag_present(skb))
+               skb->pkt_type = PACKET_OTHERHOST;
  
         /* deliver only exact match when indicated */
         null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3229,7 +3355,6 @@ ncls:
         }
  
  out:
-       rcu_read_unlock();
         return ret;
  }
  
@@ -3250,34 +3375,31 @@ out:
   */
  int netif_receive_skb(struct sk_buff *skb)
  {
+       int ret;
+
         if (netdev_tstamp_prequeue)
                 net_timestamp_check(skb);
  
         if (skb_defer_rx_timestamp(skb))
                 return NET_RX_SUCCESS;
  
+       rcu_read_lock();
+
  #ifdef CONFIG_RPS
         {
                 struct rps_dev_flow voidflow, *rflow = &voidflow;
-               int cpu, ret;
-
-               rcu_read_lock();
-
-               cpu = get_rps_cpu(skb->dev, skb, &rflow);
+               int cpu = get_rps_cpu(skb->dev, skb, &rflow);
  
                 if (cpu >= 0) {
                         ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
                         rcu_read_unlock();
-               } else {
-                       rcu_read_unlock();
-                       ret = __netif_receive_skb(skb);
+                       return ret;
                 }
-
-               return ret;
         }
-#else
-       return __netif_receive_skb(skb);
  #endif
+       ret = __netif_receive_skb(skb);
+       rcu_read_unlock();
+       return ret;
  }
  EXPORT_SYMBOL(netif_receive_skb);
  
@@ -3429,10 +3551,10 @@ pull:
                 skb->data_len -= grow;
  
                 skb_shinfo(skb)->frags[0].page_offset += grow;
-               skb_shinfo(skb)->frags[0].size -= grow;
+               skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
  
-               if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
-                       put_page(skb_shinfo(skb)->frags[0].page);
+               if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
+                       skb_frag_unref(skb, 0);
                         memmove(skb_shinfo(skb)->frags,
                                 skb_shinfo(skb)->frags + 1,
                                 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3452,14 +3574,20 @@ static inline gro_result_t
  __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  {
         struct sk_buff *p;
+       unsigned int maclen = skb->dev->hard_header_len;
  
         for (p = napi->gro_list; p; p = p->next) {
                 unsigned long diffs;
  
                 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                 diffs |= p->vlan_tci ^ skb->vlan_tci;
-               diffs |= compare_ether_header(skb_mac_header(p),
-                                             skb_gro_mac_header(skb));
+               if (maclen == ETH_HLEN)
+                       diffs |= compare_ether_header(skb_mac_header(p),
+                                                     skb_gro_mac_header(skb));
+               else if (!diffs)
+                       diffs = memcmp(skb_mac_header(p),
+                                      skb_gro_mac_header(skb),
+                                      maclen);
                 NAPI_GRO_CB(p)->same_flow = !diffs;
                 NAPI_GRO_CB(p)->flush = 0;
         }
@@ -3496,11 +3624,10 @@ void skb_gro_reset_offset(struct sk_buff *skb)
         NAPI_GRO_CB(skb)->frag0_len = 0;
  
         if (skb->mac_header == skb->tail &&
-           !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+           !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
                 NAPI_GRO_CB(skb)->frag0 =
-                       page_address(skb_shinfo(skb)->frags[0].page) +
-                       skb_shinfo(skb)->frags[0].page_offset;
-               NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+                       skb_frag_address(&skb_shinfo(skb)->frags[0]);
+               NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
         }
  }
  EXPORT_SYMBOL(skb_gro_reset_offset);
@@ -3516,10 +3643,12 @@ EXPORT_SYMBOL(napi_gro_receive);
  static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
  {
         __skb_pull(skb, skb_headlen(skb));
-       skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+       /* restore the reserve we had after netdev_alloc_skb_ip_align() */
+       skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
         skb->vlan_tci = 0;
         skb->dev = napi->dev;
         skb->skb_iif = 0;
+       skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
  
         napi->skb = skb;
  }
@@ -3661,8 +3790,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
                 unsigned int qlen;
  
                 while ((skb = __skb_dequeue(&sd->process_queue))) {
+                       rcu_read_lock();
                         local_irq_enable();
                         __netif_receive_skb(skb);
+                       rcu_read_unlock();
                         local_irq_disable();
                         input_queue_head_incr(sd);
                         if (++work >= quota) {
@@ -3982,6 +4113,47 @@ static int dev_ifconf(struct net *net, char __user *arg)
  }
  
  #ifdef CONFIG_PROC_FS
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+{
+       struct net *net = seq_file_net(seq);
+       struct net_device *dev;
+       struct hlist_node *p;
+       struct hlist_head *h;
+       unsigned int count = 0, offset = get_offset(*pos);
+
+       h = &net->dev_name_head[get_bucket(*pos)];
+       hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+               if (++count == offset)
+                       return dev;
+       }
+
+       return NULL;
+}
+
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+{
+       struct net_device *dev;
+       unsigned int bucket;
+
+       do {
+               dev = dev_from_same_bucket(seq, pos);
+               if (dev)
+                       return dev;
+
+               bucket = get_bucket(*pos) + 1;
+               *pos = set_bucket_offset(bucket, 1);
+       } while (bucket < NETDEV_HASHENTRIES);
+
+       return NULL;
+}
+
  /*
   *     This is invoked by the /proc filesystem handler to display a device
   *     in detail.
@@ -3989,33 +4161,20 @@ static int dev_ifconf(struct net *net, char __user *arg)
  void *dev_seq_start(struct seq_file *seq, loff_t *pos)
         __acquires(RCU)
  {
-       struct net *net = seq_file_net(seq);
-       loff_t off;
-       struct net_device *dev;
-
         rcu_read_lock();
         if (!*pos)
                 return SEQ_START_TOKEN;
  
-       off = 1;
-       for_each_netdev_rcu(net, dev)
-               if (off++ == *pos)
-                       return dev;
+       if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
+               return NULL;
  
-       return NULL;
+       return dev_from_bucket(seq, pos);
  }
  
  void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
-       struct net_device *dev = v;
-
-       if (v == SEQ_START_TOKEN)
-               dev = first_net_device_rcu(seq_file_net(seq));
-       else
-               dev = next_net_device_rcu(dev);
-
         ++*pos;
-       return dev;
+       return dev_from_bucket(seq, pos);
  }
  
  void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4361,7 +4520,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
  {
         const struct net_device_ops *ops = dev->netdev_ops;
  
-       if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+       if (ops->ndo_change_rx_flags)
                 ops->ndo_change_rx_flags(dev, flags);
  }
  
@@ -4497,9 +4656,7 @@ void __dev_set_rx_mode(struct net_device *dev)
         if (!netif_device_present(dev))
                 return;
  
-       if (ops->ndo_set_rx_mode)
-               ops->ndo_set_rx_mode(dev);
-       else {
+       if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
                 /* Unicast addresses changes may only happen under the rtnl,
                  * therefore calling __dev_set_promiscuity here is safe.
                  */
@@ -4510,10 +4667,10 @@ void __dev_set_rx_mode(struct net_device *dev)
                         __dev_set_promiscuity(dev, -1);
                         dev->uc_promisc = false;
                 }
-
-               if (ops->ndo_set_multicast_list)
-                       ops->ndo_set_multicast_list(dev);
         }
+
+       if (ops->ndo_set_rx_mode)
+               ops->ndo_set_rx_mode(dev);
  }
  
  void dev_set_rx_mode(struct net_device *dev)
@@ -4523,30 +4680,6 @@ void dev_set_rx_mode(struct net_device *dev)
         netif_addr_unlock_bh(dev);
  }
  
-/**
- *     dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
- *     @dev: device
- *     @cmd: memory area for ethtool_ops::get_settings() result
- *
- *      The cmd arg is initialized properly (cleared and
- *      ethtool_cmd::cmd field set to ETHTOOL_GSET).
- *
- *     Return device's ethtool_ops::get_settings() result value or
- *     -EOPNOTSUPP when device doesn't expose
- *     ethtool_ops::get_settings() operation.
- */
-int dev_ethtool_get_settings(struct net_device *dev,
-                            struct ethtool_cmd *cmd)
-{
-       if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-               return -EOPNOTSUPP;
-
-       memset(cmd, 0, sizeof(struct ethtool_cmd));
-       cmd->cmd = ETHTOOL_GSET;
-       return dev->ethtool_ops->get_settings(dev, cmd);
-}
-EXPORT_SYMBOL(dev_ethtool_get_settings);
-
  /**
   *     dev_get_flags - get flags reported to userspace
   *     @dev: device
@@ -4747,6 +4880,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
         err = ops->ndo_set_mac_address(dev, sa);
         if (!err)
                 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+       add_device_randomness(dev->dev_addr, dev->addr_len);
         return err;
  }
  EXPORT_SYMBOL(dev_set_mac_address);
@@ -4863,7 +4997,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                 return -EOPNOTSUPP;
  
         case SIOCADDMULTI:
-               if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+               if (!ops->ndo_set_rx_mode ||
                     ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                         return -EINVAL;
                 if (!netif_device_present(dev))
@@ -4871,7 +5005,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
  
         case SIOCDELMULTI:
-               if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+               if (!ops->ndo_set_rx_mode ||
                     ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                         return -EINVAL;
                 if (!netif_device_present(dev))
@@ -4888,6 +5022,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
                 return dev_change_name(dev, ifr->ifr_newname);
  
+       case SIOCSHWTSTAMP:
+               err = net_hwtstamp_validate(ifr);
+               if (err)
+                       return err;
+               /* fall through */
+
         /*
          *      Unknown or private ioctl
          */
@@ -5164,6 +5304,7 @@ static void rollback_registered_many(struct list_head *head)
                 unlist_netdevice(dev);
  
                 dev->reg_state = NETREG_UNREGISTERING;
+               on_each_cpu(flush_backlog, dev, 1);
         }
  
         synchronize_net();
@@ -5202,7 +5343,7 @@ static void rollback_registered_many(struct list_head *head)
         dev = list_first_entry(head, struct net_device, unreg_list);
         call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
  
-       rcu_barrier();
+       synchronize_net();
  
         list_for_each_entry(dev, head, unreg_list)
                 dev_put(dev);
@@ -5305,7 +5446,7 @@ int __netdev_update_features(struct net_device *dev)
                 netdev_err(dev,
                         "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
                         err, features, dev->features);
-               return -1;
+               return 0;
         }
  
         if (!err)
@@ -5521,6 +5662,7 @@ int register_netdevice(struct net_device *dev)
         dev_init_scheduler(dev);
         dev_hold(dev);
         list_netdevice(dev);
+       add_device_randomness(dev->dev_addr, dev->addr_len);
  
         /* Notify protocols, that a new device appeared. */
         ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
@@ -5715,6 +5857,12 @@ void netdev_run_todo(void)
  
         __rtnl_unlock();
  
+       /* Wait for rcu callbacks to finish before attempting to drain
+        * the device list.  This usually avoids a 250ms wait.
+        */
+       if (!list_empty(&list))
+               rcu_barrier();
+
         while (!list_empty(&list)) {
                 struct net_device *dev
                         = list_first_entry(&list, struct net_device, todo_list);
@@ -5729,14 +5877,12 @@ void netdev_run_todo(void)
  
                 dev->reg_state = NETREG_UNREGISTERED;
  
-               on_each_cpu(flush_backlog, dev, 1);
-
                 netdev_wait_allrefs(dev);
  
                 /* paranoia */
                 BUG_ON(netdev_refcnt_read(dev));
-               WARN_ON(rcu_dereference_raw(dev->ip_ptr));
-               WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
+               WARN_ON(rcu_access_pointer(dev->ip_ptr));
+               WARN_ON(rcu_access_pointer(dev->ip6_ptr));
                 WARN_ON(dev->dn_ptr);
  
                 if (dev->destructor)
@@ -5881,6 +6027,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
         dev_net_set(dev, &init_net);
  
         dev->gso_max_size = GSO_MAX_SIZE;
+       dev->gso_max_segs = GSO_MAX_SEGS;
  
         INIT_LIST_HEAD(&dev->napi_list);
         INIT_LIST_HEAD(&dev->unreg_list);
@@ -5940,7 +6087,7 @@ void free_netdev(struct net_device *dev)
         kfree(dev->_rx);
  #endif
  
-       kfree(rcu_dereference_raw(dev->ingress_queue));
+       kfree(rcu_dereference_protected(dev->ingress_queue, 1));
  
         /* Flush device addresses */
         dev_addr_flush(dev);
@@ -6115,6 +6262,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
         */
         call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
         call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+       rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
  
         /*
          *      Flush the unicast and multicast chains
@@ -6188,10 +6336,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                 oldsd->output_queue = NULL;
                 oldsd->output_queue_tailp = &oldsd->output_queue;
         }
-       /* Append NAPI poll list from offline CPU. */
-       if (!list_empty(&oldsd->poll_list)) {
-               list_splice_init(&oldsd->poll_list, &sd->poll_list);
-               raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       /* Append NAPI poll list from offline CPU, with one exception :
+        * process_backlog() must be called by cpu owning percpu backlog.
+        * We properly handle process_queue & input_pkt_queue later.
+        */
+       while (!list_empty(&oldsd->poll_list)) {
+               struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+                                                           struct napi_struct,
+                                                           poll_list);
+
+               list_del_init(&napi->poll_list);
+               if (napi->poll == process_backlog)
+                       napi->state = 0;
+               else
+                       ____napi_schedule(sd, napi);
         }
  
         raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -6202,7 +6360,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                 netif_rx(skb);
                 input_queue_head_incr(oldsd);
         }
-       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+       while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
                 netif_rx(skb);
                 input_queue_head_incr(oldsd);
         }
@@ -6258,7 +6416,8 @@ static struct hlist_head *netdev_create_hash(void)
  /* Initialize per network namespace state */
  static int __net_init netdev_init(struct net *net)
  {
-       INIT_LIST_HEAD(&net->dev_base_head);
+       if (net != &init_net)
+               INIT_LIST_HEAD(&net->dev_base_head);
  
         net->dev_name_head = netdev_create_hash();
         if (net->dev_name_head == NULL)
@@ -6298,7 +6457,7 @@ const char *netdev_drivername(const struct net_device *dev)
         return empty;
  }
  
-static int __netdev_printk(const char *level, const struct net_device *dev,
+int __netdev_printk(const char *level, const struct net_device *dev,
                            struct va_format *vaf)
  {
         int r;
@@ -6313,6 +6472,7 @@ static int __netdev_printk(const char *level, const struct net_device *dev,
  
         return r;
  }
+EXPORT_SYMBOL(__netdev_printk);
  
  int netdev_printk(const char *level, const struct net_device *dev,
                   const char *format, ...)