Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[pandora-kernel.git] / net / ipv6 / route.c
index e7db701..fd0eec6 100644 (file)
@@ -97,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
                                           struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+       struct rt6_info *rt = (struct rt6_info *) dst;
+       struct inet_peer *peer;
+       u32 *p = NULL;
+
+       if (!rt->rt6i_peer)
+               rt6_bind_peer(rt, 1);
+
+       peer = rt->rt6i_peer;
+       if (peer) {
+               u32 *old_p = __DST_METRICS_PTR(old);
+               unsigned long prev, new;
+
+               p = peer->metrics;
+               if (inet_metrics_new(peer))
+                       memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+               new = (unsigned long) p;
+               prev = cmpxchg(&dst->_metrics, old, new);
+
+               if (prev != old) {
+                       p = __DST_METRICS_PTR(prev);
+                       if (prev & DST_METRICS_READ_ONLY)
+                               p = NULL;
+               }
+       }
+       return p;
+}
+
 static struct dst_ops ip6_dst_ops_template = {
        .family                 =       AF_INET6,
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
@@ -105,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
        .check                  =       ip6_dst_check,
        .default_advmss         =       ip6_default_advmss,
        .default_mtu            =       ip6_default_mtu,
+       .cow_metrics            =       ipv6_cow_metrics,
        .destroy                =       ip6_dst_destroy,
        .ifdown                 =       ip6_dst_ifdown,
        .negative_advice        =       ip6_negative_advice,
@@ -122,6 +153,12 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
+                                        unsigned long old)
+{
+       return NULL;
+}
+
 static struct dst_ops ip6_dst_blackhole_ops = {
        .family                 =       AF_INET6,
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
@@ -130,6 +167,11 @@ static struct dst_ops ip6_dst_blackhole_ops = {
        .default_mtu            =       ip6_blackhole_default_mtu,
        .default_advmss         =       ip6_default_advmss,
        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
+       .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
+};
+
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+       [RTAX_HOPLIMIT - 1] = 255,
 };
 
 static struct rt6_info ip6_null_entry_template = {
@@ -187,7 +229,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
 {
-       return (struct rt6_info *)dst_alloc(ops);
+       return (struct rt6_info *)dst_alloc(ops, 0);
 }
 
 static void ip6_dst_destroy(struct dst_entry *dst)
@@ -206,6 +248,13 @@ static void ip6_dst_destroy(struct dst_entry *dst)
        }
 }
 
+static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt6_peer_genid(void)
+{
+       return atomic_read(&__rt6_peer_genid);
+}
+
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
        struct inet_peer *peer;
@@ -213,6 +262,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
        peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
        if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
                inet_putpeer(peer);
+       else
+               rt->rt6i_peer_genid = rt6_peer_genid();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -555,17 +606,17 @@ do { \
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi6 *fl6, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt;
 
        read_lock_bh(&table->tb6_lock);
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        rt = fn->leaf;
-       rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
-       BACKTRACK(net, &fl->fl6_src);
+       rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+       BACKTRACK(net, &fl6->saddr);
 out:
        dst_use(&rt->dst, jiffies);
        read_unlock_bh(&table->tb6_lock);
@@ -576,19 +627,19 @@ out:
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                            const struct in6_addr *saddr, int oif, int strict)
 {
-       struct flowi fl = {
-               .oif = oif,
-               .fl6_dst = *daddr,
+       struct flowi6 fl6 = {
+               .flowi6_oif = oif,
+               .daddr = *daddr,
        };
        struct dst_entry *dst;
        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 
        if (saddr) {
-               memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
+               memcpy(&fl6.saddr, saddr, sizeof(*saddr));
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        }
 
-       dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
+       dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
        if (dst->error == 0)
                return (struct rt6_info *) dst;
 
@@ -709,7 +760,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 }
 
 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
-                                     struct flowi *fl, int flags)
+                                     struct flowi6 *fl6, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
@@ -724,12 +775,12 @@ relookup:
        read_lock_bh(&table->tb6_lock);
 
 restart_2:
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 restart:
        rt = rt6_select(fn, oif, strict | reachable);
 
-       BACKTRACK(net, &fl->fl6_src);
+       BACKTRACK(net, &fl6->saddr);
        if (rt == net->ipv6.ip6_null_entry ||
            rt->rt6i_flags & RTF_CACHE)
                goto out;
@@ -738,9 +789,9 @@ restart:
        read_unlock_bh(&table->tb6_lock);
 
        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-               nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+               nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
        else if (!(rt->dst.flags & DST_HOST))
-               nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+               nrt = rt6_alloc_clone(rt, &fl6->daddr);
        else
                goto out2;
 
@@ -779,9 +830,9 @@ out2:
 }
 
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-                                           struct flowi *fl, int flags)
+                                           struct flowi6 *fl6, int flags)
 {
-       return ip6_pol_route(net, table, fl->iif, fl, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -789,56 +840,54 @@ void ip6_route_input(struct sk_buff *skb)
        struct ipv6hdr *iph = ipv6_hdr(skb);
        struct net *net = dev_net(skb->dev);
        int flags = RT6_LOOKUP_F_HAS_SADDR;
-       struct flowi fl = {
-               .iif = skb->dev->ifindex,
-               .fl6_dst = iph->daddr,
-               .fl6_src = iph->saddr,
-               .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
-               .mark = skb->mark,
-               .proto = iph->nexthdr,
+       struct flowi6 fl6 = {
+               .flowi6_iif = skb->dev->ifindex,
+               .daddr = iph->daddr,
+               .saddr = iph->saddr,
+               .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
+               .flowi6_mark = skb->mark,
+               .flowi6_proto = iph->nexthdr,
        };
 
        if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;
 
-       skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
+       skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 }
 
 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi6 *fl6, int flags)
 {
-       return ip6_pol_route(net, table, fl->oif, fl, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
-                                   struct flowi *fl)
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+                                   struct flowi6 *fl6)
 {
        int flags = 0;
 
-       if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
+       if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
                flags |= RT6_LOOKUP_F_IFACE;
 
-       if (!ipv6_addr_any(&fl->fl6_src))
+       if (!ipv6_addr_any(&fl6->saddr))
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        else if (sk)
                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-       return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
+       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
 
 EXPORT_SYMBOL(ip6_route_output);
 
-int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
-       struct rt6_info *ort = (struct rt6_info *) *dstp;
-       struct rt6_info *rt = (struct rt6_info *)
-               dst_alloc(&ip6_dst_blackhole_ops);
+       struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
+       struct rt6_info *ort = (struct rt6_info *) dst_orig;
        struct dst_entry *new = NULL;
 
        if (rt) {
                new = &rt->dst;
 
-               atomic_set(&new->__refcnt, 1);
                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard;
@@ -864,11 +913,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
                dst_free(new);
        }
 
-       dst_release(*dstp);
-       *dstp = new;
-       return new ? 0 : -ENOMEM;
+       dst_release(dst_orig);
+       return new ? new : ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 
 /*
  *     Destination cache support functions
@@ -880,9 +927,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
        rt = (struct rt6_info *) dst;
 
-       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
+               if (rt->rt6i_peer_genid != rt6_peer_genid()) {
+                       if (!rt->rt6i_peer)
+                               rt6_bind_peer(rt, 0);
+                       rt->rt6i_peer_genid = rt6_peer_genid();
+               }
                return dst;
-
+       }
        return NULL;
 }
 
@@ -933,7 +985,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
                        dst_metric_set(dst, RTAX_FEATURES, features);
                }
                dst_metric_set(dst, RTAX_MTU, mtu);
-               call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
        }
 }
 
@@ -1030,11 +1081,9 @@ out:
 
 int icmp6_dst_gc(void)
 {
-       struct dst_entry *dst, *next, **pprev;
+       struct dst_entry *dst, **pprev;
        int more = 0;
 
-       next = NULL;
-
        spin_lock_bh(&icmp6_dst_lock);
        pprev = &icmp6_dst_gc_list;
 
@@ -1402,16 +1451,16 @@ static int ip6_route_del(struct fib6_config *cfg)
  *     Handle redirects
  */
 struct ip6rd_flowi {
-       struct flowi fl;
+       struct flowi6 fl6;
        struct in6_addr gateway;
 };
 
 static struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi *fl,
+                                            struct flowi6 *fl6,
                                             int flags)
 {
-       struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+       struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
        struct rt6_info *rt;
        struct fib6_node *fn;
 
@@ -1427,7 +1476,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
         */
 
        read_lock_bh(&table->tb6_lock);
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
                /*
@@ -1442,7 +1491,7 @@ restart:
                        continue;
                if (!(rt->rt6i_flags & RTF_GATEWAY))
                        continue;
-               if (fl->oif != rt->rt6i_dev->ifindex)
+               if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
                        continue;
                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
                        continue;
@@ -1451,7 +1500,7 @@ restart:
 
        if (!rt)
                rt = net->ipv6.ip6_null_entry;
-       BACKTRACK(net, &fl->fl6_src);
+       BACKTRACK(net, &fl6->saddr);
 out:
        dst_hold(&rt->dst);
 
@@ -1468,10 +1517,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct net *net = dev_net(dev);
        struct ip6rd_flowi rdfl = {
-               .fl = {
-                       .oif = dev->ifindex,
-                       .fl6_dst = *dest,
-                       .fl6_src = *src,
+               .fl6 = {
+                       .flowi6_oif = dev->ifindex,
+                       .daddr = *dest,
+                       .saddr = *src,
                },
        };
 
@@ -1480,7 +1529,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
        if (rt6_need_strict(dest))
                flags |= RT6_LOOKUP_F_IFACE;
 
-       return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
+       return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
                                                   flags, __ip6_route_redirect);
 }
 
@@ -1970,7 +2019,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
        rt->dst.output = ip6_output;
        rt->rt6i_dev = net->loopback_dev;
        rt->rt6i_idev = idev;
-       dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
        rt->dst.obsolete = -1;
 
        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -1982,12 +2030,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
        if (IS_ERR(neigh)) {
                dst_free(&rt->dst);
 
-               /* We are casting this because that is the return
-                * value type.  But an errno encoded pointer is the
-                * same regardless of the underlying pointer type,
-                * and that's what we are returning.  So this is OK.
-                */
-               return (struct rt6_info *) neigh;
+               return ERR_CAST(neigh);
        }
        rt->rt6i_nexthop = neigh;
 
@@ -2348,7 +2391,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        struct rt6_info *rt;
        struct sk_buff *skb;
        struct rtmsg *rtm;
-       struct flowi fl;
+       struct flowi6 fl6;
        int err, iif = 0;
 
        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2356,27 +2399,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
                goto errout;
 
        err = -EINVAL;
-       memset(&fl, 0, sizeof(fl));
+       memset(&fl6, 0, sizeof(fl6));
 
        if (tb[RTA_SRC]) {
                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
                        goto errout;
 
-               ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+               ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
        }
 
        if (tb[RTA_DST]) {
                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
                        goto errout;
 
-               ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+               ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
        }
 
        if (tb[RTA_IIF])
                iif = nla_get_u32(tb[RTA_IIF]);
 
        if (tb[RTA_OIF])
-               fl.oif = nla_get_u32(tb[RTA_OIF]);
+               fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
        if (iif) {
                struct net_device *dev;
@@ -2399,10 +2442,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        skb_reset_mac_header(skb);
        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
-       rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
+       rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
        skb_dst_set(skb, &rt->dst);
 
-       err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
+       err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
                            nlh->nlmsg_seq, 0, 0, 0);
        if (err < 0) {
@@ -2689,7 +2732,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_null_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_null_entry;
        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+                        ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2700,7 +2744,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_prohibit_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+                        ip6_template_metrics, true);
 
        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2710,7 +2755,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_blk_hole_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+                        ip6_template_metrics, true);
 #endif
 
        net->ipv6.sysctl.flush_delay = 0;