Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
[pandora-kernel.git] / net / core / dev.c
index 231d312..edcf019 100644 (file)
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <linux/net_tstamp.h>
 
 #include "net-sysfs.h"
 
@@ -1474,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb)
                __net_timestamp(skb);
 }
 
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+       struct hwtstamp_config cfg;
+       enum hwtstamp_tx_types tx_type;
+       enum hwtstamp_rx_filters rx_filter;
+       int tx_type_valid = 0;
+       int rx_filter_valid = 0;
+
+       if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+               return -EFAULT;
+
+       if (cfg.flags) /* reserved for future extensions */
+               return -EINVAL;
+
+       tx_type = cfg.tx_type;
+       rx_filter = cfg.rx_filter;
+
+       switch (tx_type) {
+       case HWTSTAMP_TX_OFF:
+       case HWTSTAMP_TX_ON:
+       case HWTSTAMP_TX_ONESTEP_SYNC:
+               tx_type_valid = 1;
+               break;
+       }
+
+       switch (rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+       case HWTSTAMP_FILTER_ALL:
+       case HWTSTAMP_FILTER_SOME:
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               rx_filter_valid = 1;
+               break;
+       }
+
+       if (!tx_type_valid || !rx_filter_valid)
+               return -ERANGE;
+
+       return 0;
+}
+
 static inline bool is_skb_forwardable(struct net_device *dev,
                                      struct sk_buff *skb)
 {
@@ -1955,9 +2010,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 #ifdef CONFIG_HIGHMEM
        int i;
        if (!(dev->features & NETIF_F_HIGHDMA)) {
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                       if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       if (PageHighMem(skb_frag_page(frag)))
                                return 1;
+               }
        }
 
        if (PCI_DMA_BUS_IS_PHYS) {
@@ -1966,7 +2023,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
                if (!pdev)
                        return 0;
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       dma_addr_t addr = page_to_phys(skb_frag_page(frag));
                        if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
                                return 1;
                }
@@ -2527,25 +2585,31 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 
 /*
  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers. Returns a non-zero hash number on success
- * and 0 on failure.
+ * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
  */
-__u32 __skb_get_rxhash(struct sk_buff *skb)
+void __skb_get_rxhash(struct sk_buff *skb)
 {
        int nhoff, hash = 0, poff;
        const struct ipv6hdr *ip6;
        const struct iphdr *ip;
+       const struct vlan_hdr *vlan;
        u8 ip_proto;
-       u32 addr1, addr2, ihl;
+       u32 addr1, addr2;
+       u16 proto;
        union {
                u32 v32;
                u16 v16[2];
        } ports;
 
        nhoff = skb_network_offset(skb);
+       proto = skb->protocol;
 
-       switch (skb->protocol) {
+again:
+       switch (proto) {
        case __constant_htons(ETH_P_IP):
+ip:
                if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
                        goto done;
 
@@ -2556,9 +2620,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
                        ip_proto = ip->protocol;
                addr1 = (__force u32) ip->saddr;
                addr2 = (__force u32) ip->daddr;
-               ihl = ip->ihl;
+               nhoff += ip->ihl * 4;
                break;
        case __constant_htons(ETH_P_IPV6):
+ipv6:
                if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
                        goto done;
 
@@ -2566,20 +2631,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
                ip_proto = ip6->nexthdr;
                addr1 = (__force u32) ip6->saddr.s6_addr32[3];
                addr2 = (__force u32) ip6->daddr.s6_addr32[3];
-               ihl = (40 >> 2);
+               nhoff += 40;
                break;
+       case __constant_htons(ETH_P_8021Q):
+               if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
+                       goto done;
+               vlan = (const struct vlan_hdr *) (skb->data + nhoff);
+               proto = vlan->h_vlan_encapsulated_proto;
+               nhoff += sizeof(*vlan);
+               goto again;
+       case __constant_htons(ETH_P_PPP_SES):
+               if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
+                       goto done;
+               proto = *((__be16 *) (skb->data + nhoff +
+                                     sizeof(struct pppoe_hdr)));
+               nhoff += PPPOE_SES_HLEN;
+               switch (proto) {
+               case __constant_htons(PPP_IP):
+                       goto ip;
+               case __constant_htons(PPP_IPV6):
+                       goto ipv6;
+               default:
+                       goto done;
+               }
        default:
                goto done;
        }
 
+       switch (ip_proto) {
+       case IPPROTO_GRE:
+               if (pskb_may_pull(skb, nhoff + 16)) {
+                       u8 *h = skb->data + nhoff;
+                       __be16 flags = *(__be16 *)h;
+
+                       /*
+                        * Only look inside GRE if version zero and no
+                        * routing
+                        */
+                       if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
+                               proto = *(__be16 *)(h + 2);
+                               nhoff += 4;
+                               if (flags & GRE_CSUM)
+                                       nhoff += 4;
+                               if (flags & GRE_KEY)
+                                       nhoff += 4;
+                               if (flags & GRE_SEQ)
+                                       nhoff += 4;
+                               goto again;
+                       }
+               }
+               break;
+       case IPPROTO_IPIP:
+               goto again;
+       default:
+               break;
+       }
+
        ports.v32 = 0;
        poff = proto_ports_offset(ip_proto);
        if (poff >= 0) {
-               nhoff += ihl * 4 + poff;
+               nhoff += poff;
                if (pskb_may_pull(skb, nhoff + 4)) {
                        ports.v32 = * (__force u32 *) (skb->data + nhoff);
                        if (ports.v16[1] < ports.v16[0])
                                swap(ports.v16[0], ports.v16[1]);
+                       skb->l4_rxhash = 1;
                }
        }
 
@@ -2592,7 +2708,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
                hash = 1;
 
 done:
-       return hash;
+       skb->rxhash = hash;
 }
 EXPORT_SYMBOL(__skb_get_rxhash);
 
@@ -2606,10 +2722,7 @@ static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
            struct rps_dev_flow *rflow, u16 next_cpu)
 {
-       u16 tcpu;
-
-       tcpu = rflow->cpu = next_cpu;
-       if (tcpu != RPS_NO_CPU) {
+       if (next_cpu != RPS_NO_CPU) {
 #ifdef CONFIG_RFS_ACCEL
                struct netdev_rx_queue *rxqueue;
                struct rps_dev_flow_table *flow_table;
@@ -2637,16 +2750,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                        goto out;
                old_rflow = rflow;
                rflow = &flow_table->flows[flow_id];
-               rflow->cpu = next_cpu;
                rflow->filter = rc;
                if (old_rflow->filter == rflow->filter)
                        old_rflow->filter = RPS_NO_FILTER;
        out:
 #endif
                rflow->last_qtail =
-                       per_cpu(softnet_data, tcpu).input_queue_head;
+                       per_cpu(softnet_data, next_cpu).input_queue_head;
        }
 
+       rflow->cpu = next_cpu;
        return rflow;
 }
 
@@ -2681,13 +2794,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
        map = rcu_dereference(rxqueue->rps_map);
        if (map) {
                if (map->len == 1 &&
-                   !rcu_dereference_raw(rxqueue->rps_flow_table)) {
+                   !rcu_access_pointer(rxqueue->rps_flow_table)) {
                        tcpu = map->cpus[0];
                        if (cpu_online(tcpu))
                                cpu = tcpu;
                        goto done;
                }
-       } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+       } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
                goto done;
        }
 
@@ -3102,8 +3215,8 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 {
 
        ASSERT_RTNL();
-       rcu_assign_pointer(dev->rx_handler, NULL);
-       rcu_assign_pointer(dev->rx_handler_data, NULL);
+       RCU_INIT_POINTER(dev->rx_handler, NULL);
+       RCU_INIT_POINTER(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
@@ -3170,6 +3283,17 @@ another_round:
 ncls:
 #endif
 
+       if (vlan_tx_tag_present(skb)) {
+               if (pt_prev) {
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+                       pt_prev = NULL;
+               }
+               if (vlan_do_receive(&skb))
+                       goto another_round;
+               else if (unlikely(!skb))
+                       goto out;
+       }
+
        rx_handler = rcu_dereference(skb->dev->rx_handler);
        if (rx_handler) {
                if (pt_prev) {
@@ -3190,18 +3314,6 @@ ncls:
                }
        }
 
-       if (vlan_tx_tag_present(skb)) {
-               if (pt_prev) {
-                       ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = NULL;
-               }
-               if (vlan_do_receive(&skb)) {
-                       ret = __netif_receive_skb(skb);
-                       goto out;
-               } else if (unlikely(!skb))
-                       goto out;
-       }
-
        /* deliver only exact match when indicated */
        null_or_dev = deliver_exact ? skb->dev : NULL;
 
@@ -3429,10 +3541,10 @@ pull:
                skb->data_len -= grow;
 
                skb_shinfo(skb)->frags[0].page_offset += grow;
-               skb_shinfo(skb)->frags[0].size -= grow;
+               skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
 
-               if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
-                       put_page(skb_shinfo(skb)->frags[0].page);
+               if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
+                       skb_frag_unref(skb, 0);
                        memmove(skb_shinfo(skb)->frags,
                                skb_shinfo(skb)->frags + 1,
                                --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3496,11 +3608,10 @@ void skb_gro_reset_offset(struct sk_buff *skb)
        NAPI_GRO_CB(skb)->frag0_len = 0;
 
        if (skb->mac_header == skb->tail &&
-           !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+           !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
                NAPI_GRO_CB(skb)->frag0 =
-                       page_address(skb_shinfo(skb)->frags[0].page) +
-                       skb_shinfo(skb)->frags[0].page_offset;
-               NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+                       skb_frag_address(&skb_shinfo(skb)->frags[0]);
+               NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
        }
 }
 EXPORT_SYMBOL(skb_gro_reset_offset);
@@ -3982,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg)
 }
 
 #ifdef CONFIG_PROC_FS
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
+
+struct dev_iter_state {
+       struct seq_net_private p;
+       unsigned int pos; /* bucket << BUCKET_SPACE + offset */
+};
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+{
+       struct dev_iter_state *state = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct net_device *dev;
+       struct hlist_node *p;
+       struct hlist_head *h;
+       unsigned int count, bucket, offset;
+
+       bucket = get_bucket(state->pos);
+       offset = get_offset(state->pos);
+       h = &net->dev_name_head[bucket];
+       count = 0;
+       hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+               if (count++ == offset) {
+                       state->pos = set_bucket_offset(bucket, count);
+                       return dev;
+               }
+       }
+
+       return NULL;
+}
+
+static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+{
+       struct dev_iter_state *state = seq->private;
+       struct net_device *dev;
+       unsigned int bucket;
+
+       bucket = get_bucket(state->pos);
+       do {
+               dev = dev_from_same_bucket(seq);
+               if (dev)
+                       return dev;
+
+               bucket++;
+               state->pos = set_bucket_offset(bucket, 0);
+       } while (bucket < NETDEV_HASHENTRIES);
+
+       return NULL;
+}
+
 /*
  *     This is invoked by the /proc filesystem handler to display a device
  *     in detail.
@@ -3989,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg)
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
 {
-       struct net *net = seq_file_net(seq);
-       loff_t off;
-       struct net_device *dev;
+       struct dev_iter_state *state = seq->private;
 
        rcu_read_lock();
        if (!*pos)
                return SEQ_START_TOKEN;
 
-       off = 1;
-       for_each_netdev_rcu(net, dev)
-               if (off++ == *pos)
-                       return dev;
+       /* check for end of the hash */
+       if (state->pos == 0 && *pos > 1)
+               return NULL;
 
-       return NULL;
+       return dev_from_new_bucket(seq);
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-       struct net_device *dev = v;
+       struct net_device *dev;
+
+       ++*pos;
 
        if (v == SEQ_START_TOKEN)
-               dev = first_net_device_rcu(seq_file_net(seq));
-       else
-               dev = next_net_device_rcu(dev);
+               return dev_from_new_bucket(seq);
 
-       ++*pos;
-       return dev;
+       dev = dev_from_same_bucket(seq);
+       if (dev)
+               return dev;
+
+       return dev_from_new_bucket(seq);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4114,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = {
 static int dev_seq_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &dev_seq_ops,
-                           sizeof(struct seq_net_private));
+                           sizeof(struct dev_iter_state));
 }
 
 static const struct file_operations dev_seq_fops = {
@@ -4497,9 +4662,7 @@ void __dev_set_rx_mode(struct net_device *dev)
        if (!netif_device_present(dev))
                return;
 
-       if (ops->ndo_set_rx_mode)
-               ops->ndo_set_rx_mode(dev);
-       else {
+       if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
                /* Unicast addresses changes may only happen under the rtnl,
                 * therefore calling __dev_set_promiscuity here is safe.
                 */
@@ -4510,10 +4673,10 @@ void __dev_set_rx_mode(struct net_device *dev)
                        __dev_set_promiscuity(dev, -1);
                        dev->uc_promisc = false;
                }
-
-               if (ops->ndo_set_multicast_list)
-                       ops->ndo_set_multicast_list(dev);
        }
+
+       if (ops->ndo_set_rx_mode)
+               ops->ndo_set_rx_mode(dev);
 }
 
 void dev_set_rx_mode(struct net_device *dev)
@@ -4523,30 +4686,6 @@ void dev_set_rx_mode(struct net_device *dev)
        netif_addr_unlock_bh(dev);
 }
 
-/**
- *     dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
- *     @dev: device
- *     @cmd: memory area for ethtool_ops::get_settings() result
- *
- *      The cmd arg is initialized properly (cleared and
- *      ethtool_cmd::cmd field set to ETHTOOL_GSET).
- *
- *     Return device's ethtool_ops::get_settings() result value or
- *     -EOPNOTSUPP when device doesn't expose
- *     ethtool_ops::get_settings() operation.
- */
-int dev_ethtool_get_settings(struct net_device *dev,
-                            struct ethtool_cmd *cmd)
-{
-       if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-               return -EOPNOTSUPP;
-
-       memset(cmd, 0, sizeof(struct ethtool_cmd));
-       cmd->cmd = ETHTOOL_GSET;
-       return dev->ethtool_ops->get_settings(dev, cmd);
-}
-EXPORT_SYMBOL(dev_ethtool_get_settings);
-
 /**
  *     dev_get_flags - get flags reported to userspace
  *     @dev: device
@@ -4863,7 +5002,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                return -EOPNOTSUPP;
 
        case SIOCADDMULTI:
-               if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+               if (!ops->ndo_set_rx_mode ||
                    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                        return -EINVAL;
                if (!netif_device_present(dev))
@@ -4871,7 +5010,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
 
        case SIOCDELMULTI:
-               if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+               if (!ops->ndo_set_rx_mode ||
                    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
                        return -EINVAL;
                if (!netif_device_present(dev))
@@ -4888,6 +5027,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                ifr->ifr_newname[IFNAMSIZ-1] = '\0';
                return dev_change_name(dev, ifr->ifr_newname);
 
+       case SIOCSHWTSTAMP:
+               err = net_hwtstamp_validate(ifr);
+               if (err)
+                       return err;
+               /* fall through */
+
        /*
         *      Unknown or private ioctl
         */
@@ -5202,7 +5347,7 @@ static void rollback_registered_many(struct list_head *head)
        dev = list_first_entry(head, struct net_device, unreg_list);
        call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 
-       rcu_barrier();
+       synchronize_net();
 
        list_for_each_entry(dev, head, unreg_list)
                dev_put(dev);
@@ -5715,6 +5860,12 @@ void netdev_run_todo(void)
 
        __rtnl_unlock();
 
+       /* Wait for rcu callbacks to finish before attempting to drain
+        * the device list.  This usually avoids a 250ms wait.
+        */
+       if (!list_empty(&list))
+               rcu_barrier();
+
        while (!list_empty(&list)) {
                struct net_device *dev
                        = list_first_entry(&list, struct net_device, todo_list);
@@ -5735,8 +5886,8 @@ void netdev_run_todo(void)
 
                /* paranoia */
                BUG_ON(netdev_refcnt_read(dev));
-               WARN_ON(rcu_dereference_raw(dev->ip_ptr));
-               WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
+               WARN_ON(rcu_access_pointer(dev->ip_ptr));
+               WARN_ON(rcu_access_pointer(dev->ip6_ptr));
                WARN_ON(dev->dn_ptr);
 
                if (dev->destructor)
@@ -5940,7 +6091,7 @@ void free_netdev(struct net_device *dev)
        kfree(dev->_rx);
 #endif
 
-       kfree(rcu_dereference_raw(dev->ingress_queue));
+       kfree(rcu_dereference_protected(dev->ingress_queue, 1));
 
        /* Flush device addresses */
        dev_addr_flush(dev);
@@ -6115,6 +6266,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        */
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
        call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+       rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
 
        /*
         *      Flush the unicast and multicast chains