Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 01:43:13 +0000 (18:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Aug 2012 01:43:13 +0000 (18:43 -0700)
Pull networking update from David S. Miller:
 "I think Eric Dumazet and I have dealt with all of the known routing
  cache removal fallout.  Some other minor fixes all around.

  1) Fix RCU of cached routes, particular of output routes which require
     liberation via call_rcu() instead of call_rcu_bh().  From Eric
     Dumazet.

  2) Make sure we purge net device references in cached routes properly.

  3) TG3 driver bug fixes from Michael Chan.

  4) Fix reported 'expires' value in ipv6 routes, from Li Wei.

  5) TUN driver ioctl leaks kernel bytes to userspace, from Mathias
     Krause."

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (22 commits)
  ipv4: Properly purge netdev references on uncached routes.
  ipv4: Cache routes in nexthop exception entries.
  ipv4: percpu nh_rth_output cache
  ipv4: Restore old dst_free() behavior.
  bridge: make port attributes const
  ipv4: remove rt_cache_rebuild_count
  net: ipv4: fix RCU races on dst refcounts
  net: TCP early demux cleanup
  tun: Fix formatting.
  net/tun: fix ioctl() based info leaks
  tg3: Update version to 3.124
  tg3: Fix race condition in tg3_get_stats64()
  tg3: Add New 5719 Read DMA workaround
  tg3: Fix Read DMA workaround for 5719 A0.
  tg3: Request APE_LOCK_PHY before PHY access
  ipv6: fix incorrect route 'expires' value passed to userspace
  mISDN: Bugfix only few bytes are transfered on a connection
  seeq: use PTR_RET at init_module of driver
  bnx2x: remove cast around the kmalloc in bnx2x_prev_mark_path
  ipv4: clean up put_child
  ...

26 files changed:
Documentation/networking/ip-sysctl.txt
drivers/isdn/hardware/mISDN/avmfritz.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/qlogic/qlge/qlge_main.c
drivers/net/ethernet/seeq/seeq8005.c
drivers/net/tun.c
include/net/inet_sock.h
include/net/ip_fib.h
include/net/netns/ipv4.h
include/net/route.h
net/bridge/br_sysfs_if.c
net/core/rtnetlink.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/ip_input.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/xfrm4_policy.c
net/ipv6/ip6_input.c
net/ipv6/route.c

index 406a522..ca447b3 100644 (file)
@@ -48,12 +48,6 @@ min_adv_mss - INTEGER
        The advertised MSS depends on the first hop route MTU, but will
        never be lower than this setting.
 
-rt_cache_rebuild_count - INTEGER
-       The per net-namespace route cache emergency rebuild threshold.
-       Any net-namespace having its route cache rebuilt due to
-       a hash bucket chain being too long more than this many times
-       will have its route caching disabled
-
 IP Fragmentation:
 
 ipfrag_high_thresh - INTEGER
index c08fc60..fa6ca47 100644 (file)
@@ -449,7 +449,8 @@ hdlc_fill_fifo(struct bchannel *bch)
 {
        struct fritzcard *fc = bch->hw;
        struct hdlc_hw *hdlc;
-       int count, fs, cnt = 0, idx, fillempty = 0;
+       int count, fs, cnt = 0, idx;
+       bool fillempty = false;
        u8 *p;
        u32 *ptr, val, addr;
 
@@ -462,7 +463,7 @@ hdlc_fill_fifo(struct bchannel *bch)
                        return;
                count = fs;
                p = bch->fill;
-               fillempty = 1;
+               fillempty = true;
        } else {
                count = bch->tx_skb->len - bch->tx_idx;
                if (count <= 0)
@@ -477,7 +478,7 @@ hdlc_fill_fifo(struct bchannel *bch)
                        hdlc->ctrl.sr.cmd |= HDLC_CMD_XME;
        }
        ptr = (u32 *)p;
-       if (fillempty) {
+       if (!fillempty) {
                pr_debug("%s.B%d: %d/%d/%d", fc->name, bch->nr, count,
                         bch->tx_idx, bch->tx_skb->len);
                bch->tx_idx += count;
index 9aaf863..dd451c3 100644 (file)
@@ -9360,8 +9360,7 @@ static int __devinit bnx2x_prev_mark_path(struct bnx2x *bp)
        struct bnx2x_prev_path_list *tmp_list;
        int rc;
 
-       tmp_list = (struct bnx2x_prev_path_list *)
-                   kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
+       tmp_list = kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
        if (!tmp_list) {
                BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n");
                return -ENOMEM;
index 9a009fd..bf906c5 100644 (file)
@@ -92,7 +92,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 #define DRV_MODULE_NAME                "tg3"
 #define TG3_MAJ_NUM                    3
-#define TG3_MIN_NUM                    123
+#define TG3_MIN_NUM                    124
 #define DRV_MODULE_VERSION     \
        __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
 #define DRV_MODULE_RELDATE     "March 21, 2012"
@@ -672,6 +672,12 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum)
                else
                        bit = 1 << tp->pci_fn;
                break;
+       case TG3_APE_LOCK_PHY0:
+       case TG3_APE_LOCK_PHY1:
+       case TG3_APE_LOCK_PHY2:
+       case TG3_APE_LOCK_PHY3:
+               bit = APE_LOCK_REQ_DRIVER;
+               break;
        default:
                return -EINVAL;
        }
@@ -723,6 +729,12 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum)
                else
                        bit = 1 << tp->pci_fn;
                break;
+       case TG3_APE_LOCK_PHY0:
+       case TG3_APE_LOCK_PHY1:
+       case TG3_APE_LOCK_PHY2:
+       case TG3_APE_LOCK_PHY3:
+               bit = APE_LOCK_GRANT_DRIVER;
+               break;
        default:
                return;
        }
@@ -1052,6 +1064,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
                udelay(80);
        }
 
+       tg3_ape_lock(tp, tp->phy_ape_lock);
+
        *val = 0x0;
 
        frame_val  = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
@@ -1086,6 +1100,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
                udelay(80);
        }
 
+       tg3_ape_unlock(tp, tp->phy_ape_lock);
+
        return ret;
 }
 
@@ -1105,6 +1121,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
                udelay(80);
        }
 
+       tg3_ape_lock(tp, tp->phy_ape_lock);
+
        frame_val  = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
                      MI_COM_PHY_ADDR_MASK);
        frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
@@ -1135,6 +1153,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
                udelay(80);
        }
 
+       tg3_ape_unlock(tp, tp->phy_ape_lock);
+
        return ret;
 }
 
@@ -9066,8 +9086,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
            GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 ||
            tg3_flag(tp, 57765_PLUS)) {
                val = tr32(TG3_RDMA_RSRVCTRL_REG);
-               if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
-                   GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) {
+               if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0) {
                        val &= ~(TG3_RDMA_RSRVCTRL_TXMRGN_MASK |
                                 TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK |
                                 TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK);
@@ -9257,6 +9276,19 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
        tw32_f(RDMAC_MODE, rdmac_mode);
        udelay(40);
 
+       if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
+               for (i = 0; i < TG3_NUM_RDMA_CHANNELS; i++) {
+                       if (tr32(TG3_RDMA_LENGTH + (i << 2)) > TG3_MAX_MTU(tp))
+                               break;
+               }
+               if (i < TG3_NUM_RDMA_CHANNELS) {
+                       val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
+                       val |= TG3_LSO_RD_DMA_TX_LENGTH_WA;
+                       tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
+                       tg3_flag_set(tp, 5719_RDMA_BUG);
+               }
+       }
+
        tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
        if (!tg3_flag(tp, 5705_PLUS))
                tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
@@ -9616,6 +9648,16 @@ static void tg3_periodic_fetch_stats(struct tg3 *tp)
        TG3_STAT_ADD32(&sp->tx_ucast_packets, MAC_TX_STATS_UCAST);
        TG3_STAT_ADD32(&sp->tx_mcast_packets, MAC_TX_STATS_MCAST);
        TG3_STAT_ADD32(&sp->tx_bcast_packets, MAC_TX_STATS_BCAST);
+       if (unlikely(tg3_flag(tp, 5719_RDMA_BUG) &&
+                    (sp->tx_ucast_packets.low + sp->tx_mcast_packets.low +
+                     sp->tx_bcast_packets.low) > TG3_NUM_RDMA_CHANNELS)) {
+               u32 val;
+
+               val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
+               val &= ~TG3_LSO_RD_DMA_TX_LENGTH_WA;
+               tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
+               tg3_flag_clear(tp, 5719_RDMA_BUG);
+       }
 
        TG3_STAT_ADD32(&sp->rx_octets, MAC_RX_STATS_OCTETS);
        TG3_STAT_ADD32(&sp->rx_fragments, MAC_RX_STATS_FRAGMENTS);
@@ -12482,10 +12524,12 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
 {
        struct tg3 *tp = netdev_priv(dev);
 
-       if (!tp->hw_stats)
+       spin_lock_bh(&tp->lock);
+       if (!tp->hw_stats) {
+               spin_unlock_bh(&tp->lock);
                return &tp->net_stats_prev;
+       }
 
-       spin_lock_bh(&tp->lock);
        tg3_get_nstats(tp, stats);
        spin_unlock_bh(&tp->lock);
 
@@ -13648,6 +13692,23 @@ static int __devinit tg3_phy_probe(struct tg3 *tp)
        tg3_flag_set(tp, PAUSE_AUTONEG);
        tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX;
 
+       if (tg3_flag(tp, ENABLE_APE)) {
+               switch (tp->pci_fn) {
+               case 0:
+                       tp->phy_ape_lock = TG3_APE_LOCK_PHY0;
+                       break;
+               case 1:
+                       tp->phy_ape_lock = TG3_APE_LOCK_PHY1;
+                       break;
+               case 2:
+                       tp->phy_ape_lock = TG3_APE_LOCK_PHY2;
+                       break;
+               case 3:
+                       tp->phy_ape_lock = TG3_APE_LOCK_PHY3;
+                       break;
+               }
+       }
+
        if (tg3_flag(tp, USE_PHYLIB))
                return tg3_phy_init(tp);
 
index a1b75cd..6d52cb2 100644 (file)
 #define TG3_LSO_RD_DMA_CRPTEN_CTRL     0x00004910
 #define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K   0x00030000
 #define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K  0x000c0000
-/* 0x4914 --> 0x4c00 unused */
+#define TG3_LSO_RD_DMA_TX_LENGTH_WA     0x02000000
+/* 0x4914 --> 0x4be0 unused */
+
+#define TG3_NUM_RDMA_CHANNELS          4
+#define TG3_RDMA_LENGTH                        0x00004be0
 
 /* Write DMA control registers */
 #define WDMAC_MODE                     0x00004c00
@@ -2959,6 +2963,7 @@ enum TG3_FLAGS {
        TG3_FLAG_L1PLLPD_EN,
        TG3_FLAG_APE_HAS_NCSI,
        TG3_FLAG_4K_FIFO_LIMIT,
+       TG3_FLAG_5719_RDMA_BUG,
        TG3_FLAG_RESET_TASK_PENDING,
        TG3_FLAG_5705_PLUS,
        TG3_FLAG_IS_5788,
@@ -3107,6 +3112,7 @@ struct tg3 {
        int                             old_link;
 
        u8                              phy_addr;
+       u8                              phy_ape_lock;
 
        /* PHY info */
        u32                             phy_id;
index 3769f57..b53a3b6 100644 (file)
@@ -4682,6 +4682,7 @@ static int __devinit qlge_probe(struct pci_dev *pdev,
                NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM;
        ndev->features = ndev->hw_features |
                NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER;
+       ndev->vlan_features = ndev->hw_features;
 
        if (test_bit(QL_DMA64, &qdev->flags))
                ndev->features |= NETIF_F_HIGHDMA;
index 698edbb..d6e50de 100644 (file)
@@ -736,9 +736,7 @@ MODULE_PARM_DESC(irq, "SEEQ 8005 IRQ number");
 int __init init_module(void)
 {
        dev_seeq = seeq8005_probe(-1);
-       if (IS_ERR(dev_seeq))
-               return PTR_ERR(dev_seeq);
-       return 0;
+       return PTR_RET(dev_seeq);
 }
 
 void __exit cleanup_module(void)
index c62163e..926d4db 100644 (file)
@@ -1379,10 +1379,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        int vnet_hdr_sz;
        int ret;
 
-       if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+       if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
                if (copy_from_user(&ifr, argp, ifreq_len))
                        return -EFAULT;
-
+       } else {
+               memset(&ifr, 0, sizeof(ifr));
+       }
        if (cmd == TUNGETFEATURES) {
                /* Currently this just means: "what IFF flags are valid?".
                 * This is needed because we never checked for invalid flags on
index 613cfa4..83b567f 100644 (file)
@@ -249,4 +249,13 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
        return flags;
 }
 
+static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+
+       dst_hold(dst);
+       sk->sk_rx_dst = dst;
+       inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+
 #endif /* _INET_SOCK_H */
index e69c3a4..926142e 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
+#include <linux/percpu.h>
 
 struct fib_config {
        u8                      fc_dst_len;
@@ -54,6 +55,7 @@ struct fib_nh_exception {
        u32                             fnhe_pmtu;
        __be32                          fnhe_gw;
        unsigned long                   fnhe_expires;
+       struct rtable __rcu             *fnhe_rth;
        unsigned long                   fnhe_stamp;
 };
 
@@ -81,8 +83,8 @@ struct fib_nh {
        __be32                  nh_gw;
        __be32                  nh_saddr;
        int                     nh_saddr_genid;
-       struct rtable           *nh_rth_output;
-       struct rtable           *nh_rth_input;
+       struct rtable __rcu * __percpu *nh_pcpu_rth_output;
+       struct rtable __rcu     *nh_rth_input;
        struct fnhe_hash_bucket *nh_exceptions;
 };
 
index 0ffb8e3..1474dd6 100644 (file)
@@ -61,8 +61,6 @@ struct netns_ipv4 {
        int sysctl_icmp_ratelimit;
        int sysctl_icmp_ratemask;
        int sysctl_icmp_errors_use_inbound_ifaddr;
-       int sysctl_rt_cache_rebuild_count;
-       int current_rt_cache_rebuild_count;
 
        unsigned int sysctl_ping_group_range[2];
        long sysctl_tcp_mem[3];
index 8c52bc6..776a27f 100644 (file)
@@ -57,6 +57,8 @@ struct rtable {
 
        /* Miscellaneous cached information */
        u32                     rt_pmtu;
+
+       struct list_head        rt_uncached;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
 struct in_device;
 extern int             ip_rt_init(void);
 extern void            rt_cache_flush(struct net *net, int how);
+extern void            rt_flush_dev(struct net_device *dev);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
 extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
                                           struct sock *sk);
index 6229b62..13b36bd 100644 (file)
@@ -27,7 +27,7 @@ struct brport_attribute {
 };
 
 #define BRPORT_ATTR(_name,_mode,_show,_store)                  \
-struct brport_attribute brport_attr_##_name = {                \
+const struct brport_attribute brport_attr_##_name = {          \
        .attr = {.name = __stringify(_name),                    \
                 .mode = _mode },                               \
        .show   = _show,                                        \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
                   store_multicast_router);
 #endif
 
-static struct brport_attribute *brport_attrs[] = {
+static const struct brport_attribute *brport_attrs[] = {
        &brport_attr_path_cost,
        &brport_attr_priority,
        &brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
 int br_sysfs_addif(struct net_bridge_port *p)
 {
        struct net_bridge *br = p->br;
-       struct brport_attribute **a;
+       const struct brport_attribute **a;
        int err;
 
        err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
index bc9e380..5ff949d 100644 (file)
@@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
                .rta_id =  id,
        };
 
-       if (expires)
-               ci.rta_expires = jiffies_to_clock_t(expires);
+       if (expires) {
+               unsigned long clock;
 
+               clock = jiffies_to_clock_t(abs(expires));
+               clock = min_t(unsigned long, clock, INT_MAX);
+               ci.rta_expires = (expires > 0) ? clock : -clock;
+       }
        return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 }
 EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
index 8732cc7..c43ae3f 100644 (file)
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 
        if (event == NETDEV_UNREGISTER) {
                fib_disable_ip(dev, 2, -1);
+               rt_flush_dev(dev);
                return NOTIFY_DONE;
        }
 
index da0cc2e..da80dc1 100644 (file)
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
        },
 };
 
+static void rt_fibinfo_free(struct rtable __rcu **rtp)
+{
+       struct rtable *rt = rcu_dereference_protected(*rtp, 1);
+
+       if (!rt)
+               return;
+
+       /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
+        * because we waited an RCU grace period before calling
+        * free_fib_info_rcu()
+        */
+
+       dst_free(&rt->dst);
+}
+
 static void free_nh_exceptions(struct fib_nh *nh)
 {
        struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
                        struct fib_nh_exception *next;
                        
                        next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+                       rt_fibinfo_free(&fnhe->fnhe_rth);
+
                        kfree(fnhe);
 
                        fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
        kfree(hash);
 }
 
+static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
+{
+       int cpu;
+
+       if (!rtp)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct rtable *rt;
+
+               rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
+               if (rt)
+                       dst_free(&rt->dst);
+       }
+       free_percpu(rtp);
+}
+
 /* Release a nexthop info record */
 static void free_fib_info_rcu(struct rcu_head *head)
 {
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
                        dev_put(nexthop_nh->nh_dev);
                if (nexthop_nh->nh_exceptions)
                        free_nh_exceptions(nexthop_nh);
-               if (nexthop_nh->nh_rth_output)
-                       dst_free(&nexthop_nh->nh_rth_output->dst);
-               if (nexthop_nh->nh_rth_input)
-                       dst_free(&nexthop_nh->nh_rth_input->dst);
+               rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
+               rt_fibinfo_free(&nexthop_nh->nh_rth_input);
        } endfor_nexthops(fi);
 
        release_net(fi->fib_net);
@@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
        fi->fib_nhs = nhs;
        change_nexthops(fi) {
                nexthop_nh->nh_parent = fi;
+               nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
        } endfor_nexthops(fi)
 
        if (cfg->fc_mx) {
index 18cbc15..f0cdb30 100644 (file)
@@ -159,7 +159,6 @@ struct trie {
 #endif
 };
 
-static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
 static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
                                  int wasfull);
 static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
        }
 
        pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
-                sizeof(struct rt_trie_node) << bits);
+                sizeof(struct rt_trie_node *) << bits);
        return tn;
 }
 
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
        return ((struct tnode *) n)->pos == tn->pos + tn->bits;
 }
 
-static inline void put_child(struct trie *t, struct tnode *tn, int i,
+static inline void put_child(struct tnode *tn, int i,
                             struct rt_trie_node *n)
 {
        tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
                                goto nomem;
                        }
 
-                       put_child(t, tn, 2*i, (struct rt_trie_node *) left);
-                       put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
+                       put_child(tn, 2*i, (struct rt_trie_node *) left);
+                       put_child(tn, 2*i+1, (struct rt_trie_node *) right);
                }
        }
 
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
                        if (tkey_extract_bits(node->key,
                                              oldtnode->pos + oldtnode->bits,
                                              1) == 0)
-                               put_child(t, tn, 2*i, node);
+                               put_child(tn, 2*i, node);
                        else
-                               put_child(t, tn, 2*i+1, node);
+                               put_child(tn, 2*i+1, node);
                        continue;
                }
 
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
                inode = (struct tnode *) node;
 
                if (inode->bits == 1) {
-                       put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
-                       put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
+                       put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
+                       put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
 
                        tnode_free_safe(inode);
                        continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
                 */
 
                left = (struct tnode *) tnode_get_child(tn, 2*i);
-               put_child(t, tn, 2*i, NULL);
+               put_child(tn, 2*i, NULL);
 
                BUG_ON(!left);
 
                right = (struct tnode *) tnode_get_child(tn, 2*i+1);
-               put_child(t, tn, 2*i+1, NULL);
+               put_child(tn, 2*i+1, NULL);
 
                BUG_ON(!right);
 
                size = tnode_child_length(left);
                for (j = 0; j < size; j++) {
-                       put_child(t, left, j, rtnl_dereference(inode->child[j]));
-                       put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
+                       put_child(left, j, rtnl_dereference(inode->child[j]));
+                       put_child(right, j, rtnl_dereference(inode->child[j + size]));
                }
-               put_child(t, tn, 2*i, resize(t, left));
-               put_child(t, tn, 2*i+1, resize(t, right));
+               put_child(tn, 2*i, resize(t, left));
+               put_child(tn, 2*i+1, resize(t, right));
 
                tnode_free_safe(inode);
        }
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
                        if (!newn)
                                goto nomem;
 
-                       put_child(t, tn, i/2, (struct rt_trie_node *)newn);
+                       put_child(tn, i/2, (struct rt_trie_node *)newn);
                }
 
        }
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
                if (left == NULL) {
                        if (right == NULL)    /* Both are empty */
                                continue;
-                       put_child(t, tn, i/2, right);
+                       put_child(tn, i/2, right);
                        continue;
                }
 
                if (right == NULL) {
-                       put_child(t, tn, i/2, left);
+                       put_child(tn, i/2, left);
                        continue;
                }
 
                /* Two nonempty children */
                newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
-               put_child(t, tn, i/2, NULL);
-               put_child(t, newBinNode, 0, left);
-               put_child(t, newBinNode, 1, right);
-               put_child(t, tn, i/2, resize(t, newBinNode));
+               put_child(tn, i/2, NULL);
+               put_child(newBinNode, 0, left);
+               put_child(newBinNode, 1, right);
+               put_child(tn, i/2, resize(t, newBinNode));
        }
        tnode_free_safe(oldtnode);
        return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
                node_set_parent((struct rt_trie_node *)l, tp);
 
                cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-               put_child(t, tp, cindex, (struct rt_trie_node *)l);
+               put_child(tp, cindex, (struct rt_trie_node *)l);
        } else {
                /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
                /*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
                node_set_parent((struct rt_trie_node *)tn, tp);
 
                missbit = tkey_extract_bits(key, newpos, 1);
-               put_child(t, tn, missbit, (struct rt_trie_node *)l);
-               put_child(t, tn, 1-missbit, n);
+               put_child(tn, missbit, (struct rt_trie_node *)l);
+               put_child(tn, 1-missbit, n);
 
                if (tp) {
                        cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-                       put_child(t, tp, cindex, (struct rt_trie_node *)tn);
+                       put_child(tp, cindex, (struct rt_trie_node *)tn);
                } else {
                        rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
                        tp = tn;
@@ -1619,7 +1618,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
 
        if (tp) {
                t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
-               put_child(t, tp, cindex, NULL);
+               put_child(tp, cindex, NULL);
                trie_rebalance(t, tp);
        } else
                RCU_INIT_POINTER(t->trie, NULL);
index 981ff1e..f1395a6 100644 (file)
@@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
                const struct net_protocol *ipprot;
                int protocol = iph->protocol;
 
-               rcu_read_lock();
                ipprot = rcu_dereference(inet_protos[protocol]);
                if (ipprot && ipprot->early_demux) {
                        ipprot->early_demux(skb);
                        /* must reload iph, skb->head might have changed */
                        iph = ip_hdr(skb);
                }
-               rcu_read_unlock();
        }
 
        /*
index fc1a81c..c035251 100644 (file)
@@ -147,6 +147,7 @@ static void          ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                                           struct sk_buff *skb, u32 mtu);
 static void             ip_do_redirect(struct dst_entry *dst, struct sock *sk,
                                        struct sk_buff *skb);
+static void            ipv4_dst_destroy(struct dst_entry *dst);
 
 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
                            int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
        .default_advmss =       ipv4_default_advmss,
        .mtu =                  ipv4_mtu,
        .cow_metrics =          ipv4_cow_metrics,
+       .destroy =              ipv4_dst_destroy,
        .ifdown =               ipv4_dst_ifdown,
        .negative_advice =      ipv4_negative_advice,
        .link_failure =         ipv4_link_failure,
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
                build_sk_flow_key(fl4, sk);
 }
 
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+       call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
 
 static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
 {
        struct fib_nh_exception *fnhe, *oldest;
+       struct rtable *orig;
 
        oldest = rcu_dereference(hash->chain);
        for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
                if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
                        oldest = fnhe;
        }
+       orig = rcu_dereference(oldest->fnhe_rth);
+       if (orig) {
+               RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+               rt_free(orig);
+       }
        return oldest;
 }
 
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
        int depth;
        u32 hval = fnhe_hashfun(daddr);
 
-       write_seqlock_bh(&fnhe_seqlock);
+       spin_lock_bh(&fnhe_lock);
 
        hash = nh->nh_exceptions;
        if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
        fnhe->fnhe_stamp = jiffies;
 
 out_unlock:
-       write_sequnlock_bh(&fnhe_seqlock);
+       spin_unlock_bh(&fnhe_lock);
        return;
 }
 
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
        return NULL;
 }
 
-static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
                              __be32 daddr)
 {
-       __be32 fnhe_daddr, gw;
-       unsigned long expires;
-       unsigned int seq;
-       u32 pmtu;
-
-restart:
-       seq = read_seqbegin(&fnhe_seqlock);
-       fnhe_daddr = fnhe->fnhe_daddr;
-       gw = fnhe->fnhe_gw;
-       pmtu = fnhe->fnhe_pmtu;
-       expires = fnhe->fnhe_expires;
-       if (read_seqretry(&fnhe_seqlock, seq))
-               goto restart;
-
-       if (daddr != fnhe_daddr)
-               return;
+       bool ret = false;
+
+       spin_lock_bh(&fnhe_lock);
 
-       if (pmtu) {
-               unsigned long diff = expires - jiffies;
+       if (daddr == fnhe->fnhe_daddr) {
+               struct rtable *orig;
 
-               if (time_before(jiffies, expires)) {
-                       rt->rt_pmtu = pmtu;
-                       dst_set_expires(&rt->dst, diff);
+               if (fnhe->fnhe_pmtu) {
+                       unsigned long expires = fnhe->fnhe_expires;
+                       unsigned long diff = expires - jiffies;
+
+                       if (time_before(jiffies, expires)) {
+                               rt->rt_pmtu = fnhe->fnhe_pmtu;
+                               dst_set_expires(&rt->dst, diff);
+                       }
                }
+               if (fnhe->fnhe_gw) {
+                       rt->rt_flags |= RTCF_REDIRECTED;
+                       rt->rt_gateway = fnhe->fnhe_gw;
+               }
+
+               orig = rcu_dereference(fnhe->fnhe_rth);
+               rcu_assign_pointer(fnhe->fnhe_rth, rt);
+               if (orig)
+                       rt_free(orig);
+
+               fnhe->fnhe_stamp = jiffies;
+               ret = true;
+       } else {
+               /* Routes we intend to cache in nexthop exception have
+                * the DST_NOCACHE bit clear.  However, if we are
+                * unsuccessful at storing this route into the cache
+                * we really need to set it.
+                */
+               rt->dst.flags |= DST_NOCACHE;
        }
-       if (gw) {
-               rt->rt_flags |= RTCF_REDIRECTED;
-               rt->rt_gateway = gw;
-       }
-       fnhe->fnhe_stamp = jiffies;
-}
+       spin_unlock_bh(&fnhe_lock);
 
-static inline void rt_free(struct rtable *rt)
-{
-       call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
+       return ret;
 }
 
-static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 {
-       struct rtable *orig, *prev, **p = &nh->nh_rth_output;
-
-       if (rt_is_input_route(rt))
-               p = &nh->nh_rth_input;
+       struct rtable *orig, *prev, **p;
+       bool ret = true;
 
+       if (rt_is_input_route(rt)) {
+               p = (struct rtable **)&nh->nh_rth_input;
+       } else {
+               if (!nh->nh_pcpu_rth_output)
+                       goto nocache;
+               p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+       }
        orig = *p;
 
        prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
                 * unsuccessful at storing this route into the cache
                 * we really need to set it.
                 */
+nocache:
                rt->dst.flags |= DST_NOCACHE;
+               ret = false;
+       }
+
+       return ret;
+}
+
+static DEFINE_SPINLOCK(rt_uncached_lock);
+static LIST_HEAD(rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
+{
+       spin_lock_bh(&rt_uncached_lock);
+       list_add_tail(&rt->rt_uncached, &rt_uncached_list);
+       spin_unlock_bh(&rt_uncached_lock);
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+       struct rtable *rt = (struct rtable *) dst;
+
+       if (dst->flags & DST_NOCACHE) {
+               spin_lock_bh(&rt_uncached_lock);
+               list_del(&rt->rt_uncached);
+               spin_unlock_bh(&rt_uncached_lock);
+       }
+}
+
+void rt_flush_dev(struct net_device *dev)
+{
+       if (!list_empty(&rt_uncached_list)) {
+               struct net *net = dev_net(dev);
+               struct rtable *rt;
+
+               spin_lock_bh(&rt_uncached_lock);
+               list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+                       if (rt->dst.dev != dev)
+                               continue;
+                       rt->dst.dev = net->loopback_dev;
+                       dev_hold(rt->dst.dev);
+                       dev_put(dev);
+               }
+               spin_unlock_bh(&rt_uncached_lock);
        }
 }
 
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                           struct fib_nh_exception *fnhe,
                           struct fib_info *fi, u16 type, u32 itag)
 {
+       bool cached = false;
+
        if (fi) {
                struct fib_nh *nh = &FIB_RES_NH(*res);
 
                if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
                        rt->rt_gateway = nh->nh_gw;
-               if (unlikely(fnhe))
-                       rt_bind_exception(rt, fnhe, daddr);
                dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 #ifdef CONFIG_IP_ROUTE_CLASSID
                rt->dst.tclassid = nh->nh_tclassid;
 #endif
-               if (!(rt->dst.flags & DST_NOCACHE))
-                       rt_cache_route(nh, rt);
+               if (unlikely(fnhe))
+                       cached = rt_bind_exception(rt, fnhe, daddr);
+               else if (!(rt->dst.flags & DST_NOCACHE))
+                       cached = rt_cache_route(nh, rt);
        }
+       if (unlikely(!cached))
+               rt_add_uncached_list(rt);
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->rt_iif     = 0;
        rth->rt_pmtu    = 0;
        rth->rt_gateway = 0;
+       INIT_LIST_HEAD(&rth->rt_uncached);
        if (our) {
                rth->dst.input= ip_local_deliver;
                rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb,
        do_cache = false;
        if (res->fi) {
                if (!itag) {
-                       rth = FIB_RES_NH(*res).nh_rth_input;
+                       rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
                        if (rt_cache_valid(rth)) {
                                skb_dst_set_noref(skb, &rth->dst);
                                goto out;
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
        rth->rt_iif     = 0;
        rth->rt_pmtu    = 0;
        rth->rt_gateway = 0;
+       INIT_LIST_HEAD(&rth->rt_uncached);
 
        rth->dst.input = ip_forward;
        rth->dst.output = ip_output;
@@ -1582,7 +1653,7 @@ local_input:
        do_cache = false;
        if (res.fi) {
                if (!itag) {
-                       rth = FIB_RES_NH(res).nh_rth_input;
+                       rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
                        if (rt_cache_valid(rth)) {
                                skb_dst_set_noref(skb, &rth->dst);
                                err = 0;
@@ -1610,6 +1681,7 @@ local_input:
        rth->rt_iif     = 0;
        rth->rt_pmtu    = 0;
        rth->rt_gateway = 0;
+       INIT_LIST_HEAD(&rth->rt_uncached);
        if (res.type == RTN_UNREACHABLE) {
                rth->dst.input= ip_error;
                rth->dst.error= -err;
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 
        fnhe = NULL;
        if (fi) {
+               struct rtable __rcu **prth;
+
                fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
-               if (!fnhe) {
-                       rth = FIB_RES_NH(*res).nh_rth_output;
-                       if (rt_cache_valid(rth)) {
-                               dst_hold(&rth->dst);
-                               return rth;
-                       }
+               if (fnhe)
+                       prth = &fnhe->fnhe_rth;
+               else
+                       prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+               rth = rcu_dereference(*prth);
+               if (rt_cache_valid(rth)) {
+                       dst_hold(&rth->dst);
+                       return rth;
                }
        }
        rth = rt_dst_alloc(dev_out,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY),
                           IN_DEV_CONF_GET(in_dev, NOXFRM),
-                          fi && !fnhe);
+                          fi);
        if (!rth)
                return ERR_PTR(-ENOBUFS);
 
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
        rth->rt_iif     = orig_oif ? : 0;
        rth->rt_pmtu    = 0;
        rth->rt_gateway = 0;
+       INIT_LIST_HEAD(&rth->rt_uncached);
 
        RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                rt->rt_type = ort->rt_type;
                rt->rt_gateway = ort->rt_gateway;
 
+               INIT_LIST_HEAD(&rt->rt_uncached);
+
                dst_free(new);
        }
 
index 5840c32..4b6487a 100644 (file)
@@ -783,13 +783,6 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
-       {
-               .procname       = "rt_cache_rebuild_count",
-               .data           = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec
-       },
        {
                .procname       = "ping_group_range",
                .data           = &init_net.ipv4.sysctl_ping_group_range,
@@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
                table[5].data =
                        &net->ipv4.sysctl_icmp_ratemask;
                table[6].data =
-                       &net->ipv4.sysctl_rt_cache_rebuild_count;
-               table[7].data =
                        &net->ipv4.sysctl_ping_group_range;
 
        }
@@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
        net->ipv4.sysctl_ping_group_range[0] = 1;
        net->ipv4.sysctl_ping_group_range[1] = 0;
 
-       net->ipv4.sysctl_rt_cache_rebuild_count = 4;
-
        tcp_init_mem(net);
 
        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
index a356e1f..9be30b0 100644 (file)
@@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
        tcp_set_state(sk, TCP_ESTABLISHED);
 
        if (skb != NULL) {
-               sk->sk_rx_dst = dst_clone(skb_dst(skb));
-               inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+               inet_sk_rx_dst_set(sk, skb);
                security_inet_conn_established(sk, skb);
        }
 
index 2fbd992..7f91e5a 100644 (file)
@@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 #endif
 
        if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+               struct dst_entry *dst = sk->sk_rx_dst;
+
                sock_rps_save_rxhash(sk, skb);
-               if (sk->sk_rx_dst) {
-                       struct dst_entry *dst = sk->sk_rx_dst;
+               if (dst) {
                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
                            dst->ops->check(dst, 0) == NULL) {
                                dst_release(dst);
                                sk->sk_rx_dst = NULL;
                        }
                }
-               if (unlikely(sk->sk_rx_dst == NULL)) {
-                       sk->sk_rx_dst = dst_clone(skb_dst(skb));
-                       inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
-               }
+               if (unlikely(sk->sk_rx_dst == NULL))
+                       inet_sk_rx_dst_set(sk, skb);
+
                if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
                        rsk = sk;
                        goto reset;
index 3f1cc20..232a90c 100644 (file)
@@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                struct tcp_sock *oldtp = tcp_sk(sk);
                struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
 
-               newsk->sk_rx_dst = dst_clone(skb_dst(skb));
-               inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif;
+               inet_sk_rx_dst_set(newsk, skb);
 
                /* TCP Cookie Transactions require space for the cookie pair,
                 * as it differs for each connection.  There is no need to
index c628184..681ea2f 100644 (file)
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt.rt_type = rt->rt_type;
        xdst->u.rt.rt_gateway = rt->rt_gateway;
        xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+       INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
        return 0;
 }
index 47975e3..a52d864 100644 (file)
@@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb)
        if (sysctl_ip_early_demux && !skb_dst(skb)) {
                const struct inet6_protocol *ipprot;
 
-               rcu_read_lock();
                ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
                if (ipprot && ipprot->early_demux)
                        ipprot->early_demux(skb);
-               rcu_read_unlock();
        }
        if (!skb_dst(skb))
                ip6_route_input(skb);
index cf02cb9..8e80fd2 100644 (file)
@@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net,
                goto nla_put_failure;
        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
                goto nla_put_failure;
-       if (!(rt->rt6i_flags & RTF_EXPIRES))
-               expires = 0;
-       else if (rt->dst.expires - jiffies < INT_MAX)
-               expires = rt->dst.expires - jiffies;
-       else
-               expires = INT_MAX;
+
+       expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
                goto nla_put_failure;