Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
[pandora-kernel.git] / net / ipv6 / route.c
index 1a314bc..7907874 100644 (file)
@@ -98,6 +98,14 @@ static int           ip6_pkt_discard_out(struct sk_buff *skb);
 static void            ip6_link_failure(struct sk_buff *skb);
 static void            ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex,
+                                          unsigned pref);
+static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex);
+#endif
+
 static struct dst_ops ip6_dst_ops = {
        .family                 =       AF_INET6,
        .protocol               =       __constant_htons(ETH_P_IPV6),
@@ -218,6 +226,42 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
        return rt;
 }
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+static void rt6_probe(struct rt6_info *rt)
+{
+       struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
+       /*
+        * Okay, this does not seem to be appropriate
+        * for now, however, we need to check if it
+        * is really so; aka Router Reachability Probing.
+        *
+        * Router Reachability Probe MUST be rate-limited
+        * to no more than one per minute.
+        */
+       if (!neigh || (neigh->nud_state & NUD_VALID))
+               return;
+       read_lock_bh(&neigh->lock);
+       if (!(neigh->nud_state & NUD_VALID) &&
+           time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
+               struct in6_addr mcaddr;
+               struct in6_addr *target;
+
+               neigh->updated = jiffies;
+               read_unlock_bh(&neigh->lock);
+
+               target = (struct in6_addr *)&neigh->primary_key;
+               addrconf_addr_solict_mult(target, &mcaddr);
+               ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
+       } else
+               read_unlock_bh(&neigh->lock);
+}
+#else
+static inline void rt6_probe(struct rt6_info *rt)
+{
+       return;
+}
+#endif
+
 /*
  * Default Router Selection (RFC 2461 6.3.6)
  */
@@ -251,8 +295,11 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
        int m = rt6_check_dev(rt, oif);
        if (!m && (strict & RT6_SELECT_F_IFACE))
                return -1;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+       m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
+#endif
        if (rt6_check_neigh(rt))
-               m |= 4;
+               m |= 16;
        else if (strict & RT6_SELECT_F_REACHABLE)
                return -1;
        return m;
@@ -284,8 +331,11 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
                        continue;
 
                if (m > mpri) {
+                       rt6_probe(match);
                        match = rt;
                        mpri = m;
+               } else {
+                       rt6_probe(rt);
                }
        }
 
@@ -304,6 +354,84 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
        return (match ? match : &ip6_null_entry);
 }
 
+#ifdef CONFIG_IPV6_ROUTE_INFO
+int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
+                 struct in6_addr *gwaddr)
+{
+       struct route_info *rinfo = (struct route_info *) opt;
+       struct in6_addr prefix_buf, *prefix;
+       unsigned int pref;
+       u32 lifetime;
+       struct rt6_info *rt;
+
+       if (len < sizeof(struct route_info)) {
+               return -EINVAL;
+       }
+
+       /* Sanity check for prefix_len and length */
+       if (rinfo->length > 3) {
+               return -EINVAL;
+       } else if (rinfo->prefix_len > 128) {
+               return -EINVAL;
+       } else if (rinfo->prefix_len > 64) {
+               if (rinfo->length < 2) {
+                       return -EINVAL;
+               }
+       } else if (rinfo->prefix_len > 0) {
+               if (rinfo->length < 1) {
+                       return -EINVAL;
+               }
+       }
+
+       pref = rinfo->route_pref;
+       if (pref == ICMPV6_ROUTER_PREF_INVALID)
+               pref = ICMPV6_ROUTER_PREF_MEDIUM;
+
+       lifetime = htonl(rinfo->lifetime);
+       if (lifetime == 0xffffffff) {
+               /* infinity */
+       } else if (lifetime > 0x7fffffff/HZ) {
+               /* Avoid arithmetic overflow */
+               lifetime = 0x7fffffff/HZ - 1;
+       }
+
+       if (rinfo->length == 3)
+               prefix = (struct in6_addr *)rinfo->prefix;
+       else {
+               /* this function is safe */
+               ipv6_addr_prefix(&prefix_buf,
+                                (struct in6_addr *)rinfo->prefix,
+                                rinfo->prefix_len);
+               prefix = &prefix_buf;
+       }
+
+       rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
+
+       if (rt && !lifetime) {
+               ip6_del_rt(rt, NULL, NULL, NULL);
+               rt = NULL;
+       }
+
+       if (!rt && lifetime)
+               rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
+                                       pref);
+       else if (rt)
+               rt->rt6i_flags = RTF_ROUTEINFO |
+                                (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+
+       if (rt) {
+               if (lifetime == 0xffffffff) {
+                       rt->rt6i_flags &= ~RTF_EXPIRES;
+               } else {
+                       rt->rt6i_expires = jiffies + HZ * lifetime;
+                       rt->rt6i_flags |= RTF_EXPIRES;
+               }
+               dst_release(&rt->u.dst);
+       }
+       return 0;
+}
+#endif
+
 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
                            int oif, int strict)
 {
@@ -396,7 +524,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 }
 
 #define BACKTRACK() \
-if (rt == &ip6_null_entry && strict) { \
+if (rt == &ip6_null_entry) { \
        while ((fn = fn->parent) != NULL) { \
                if (fn->fn_flags & RTN_ROOT) { \
                        goto out; \
@@ -414,23 +542,22 @@ void ip6_route_input(struct sk_buff *skb)
        int strict;
        int attempts = 3;
        int err;
+       int reachable = RT6_SELECT_F_REACHABLE;
 
        strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
 
 relookup:
        read_lock_bh(&rt6_lock);
 
+restart_2:
        fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
                         &skb->nh.ipv6h->saddr);
 
 restart:
-       rt = fn->leaf;
-
-       rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
-       if (rt == &ip6_null_entry)
-               rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
+       rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
        BACKTRACK();
-       if ((rt->rt6i_flags & RTF_CACHE))
+       if (rt == &ip6_null_entry ||
+           rt->rt6i_flags & RTF_CACHE)
                goto out;
 
        dst_hold(&rt->u.dst);
@@ -467,6 +594,10 @@ restart:
        goto relookup;
 
 out:
+       if (reachable) {
+               reachable = 0;
+               goto restart_2;
+       }
        dst_hold(&rt->u.dst);
        read_unlock_bh(&rt6_lock);
 out2:
@@ -483,20 +614,21 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
        int strict;
        int attempts = 3;
        int err;
+       int reachable = RT6_SELECT_F_REACHABLE;
 
        strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
 
 relookup:
        read_lock_bh(&rt6_lock);
 
+restart_2:
        fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-       rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
-       if (rt == &ip6_null_entry)
-               rt = rt6_select(&fn->leaf, fl->oif, strict);
+       rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
        BACKTRACK();
-       if ((rt->rt6i_flags & RTF_CACHE))
+       if (rt == &ip6_null_entry ||
+           rt->rt6i_flags & RTF_CACHE)
                goto out;
 
        dst_hold(&rt->u.dst);
@@ -533,6 +665,10 @@ restart:
        goto relookup;
 
 out:
+       if (reachable) {
+               reachable = 0;
+               goto restart_2;
+       }
        dst_hold(&rt->u.dst);
        read_unlock_bh(&rt6_lock);
 out2:
@@ -1008,59 +1144,63 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
                  struct neighbour *neigh, u8 *lladdr, int on_link)
 {
-       struct rt6_info *rt, *nrt;
-
-       /* Locate old route to this destination. */
-       rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
-
-       if (rt == NULL)
-               return;
-
-       if (neigh->dev != rt->rt6i_dev)
-               goto out;
+       struct rt6_info *rt, *nrt = NULL;
+       int strict;
+       struct fib6_node *fn;
 
        /*
-        * Current route is on-link; redirect is always invalid.
-        * 
-        * Seems, previous statement is not true. It could
-        * be node, which looks for us as on-link (f.e. proxy ndisc)
-        * But then router serving it might decide, that we should
-        * know truth 8)8) --ANK (980726).
+        * Get the "current" route for this destination and
+        * check if the redirect has come from approriate router.
+        *
+        * RFC 2461 specifies that redirects should only be
+        * accepted if they come from the nexthop to the target.
+        * Due to the way the routes are chosen, this notion
+        * is a bit fuzzy and one might need to check all possible
+        * routes.
         */
-       if (!(rt->rt6i_flags&RTF_GATEWAY))
-               goto out;
+       strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-       /*
-        *      RFC 2461 specifies that redirects should only be
-        *      accepted if they come from the nexthop to the target.
-        *      Due to the way default routers are chosen, this notion
-        *      is a bit fuzzy and one might need to check all default
-        *      routers.
-        */
-       if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
-               if (rt->rt6i_flags & RTF_DEFAULT) {
-                       struct rt6_info *rt1;
-
-                       read_lock(&rt6_lock);
-                       for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
-                               if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
-                                       dst_hold(&rt1->u.dst);
-                                       dst_release(&rt->u.dst);
-                                       read_unlock(&rt6_lock);
-                                       rt = rt1;
-                                       goto source_ok;
-                               }
-                       }
-                       read_unlock(&rt6_lock);
+       read_lock_bh(&rt6_lock);
+       fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+restart:
+       for (rt = fn->leaf; rt; rt = rt->u.next) {
+               /*
+                * Current route is on-link; redirect is always invalid.
+                *
+                * Seems, previous statement is not true. It could
+                * be node, which looks for us as on-link (f.e. proxy ndisc)
+                * But then router serving it might decide, that we should
+                * know truth 8)8) --ANK (980726).
+                */
+               if (rt6_check_expired(rt))
+                       continue;
+               if (!(rt->rt6i_flags & RTF_GATEWAY))
+                       continue;
+               if (neigh->dev != rt->rt6i_dev)
+                       continue;
+               if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+                       continue;
+               break;
+       }
+       if (rt)
+               dst_hold(&rt->u.dst);
+       else if (strict) {
+               while ((fn = fn->parent) != NULL) {
+                       if (fn->fn_flags & RTN_ROOT)
+                               break;
+                       if (fn->fn_flags & RTN_RTINFO)
+                               goto restart;
                }
+       }
+       read_unlock_bh(&rt6_lock);
+
+       if (!rt) {
                if (net_ratelimit())
                        printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
                               "for redirect target\n");
-               goto out;
+               return;
        }
 
-source_ok:
-
        /*
         *      We have finally decided to accept it.
         */
@@ -1227,6 +1367,57 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
        return rt;
 }
 
+#ifdef CONFIG_IPV6_ROUTE_INFO
+static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex)
+{
+       struct fib6_node *fn;
+       struct rt6_info *rt = NULL;
+
+       write_lock_bh(&rt6_lock);
+       fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+       if (!fn)
+               goto out;
+
+       for (rt = fn->leaf; rt; rt = rt->u.next) {
+               if (rt->rt6i_dev->ifindex != ifindex)
+                       continue;
+               if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
+                       continue;
+               if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
+                       continue;
+               dst_hold(&rt->u.dst);
+               break;
+       }
+out:
+       write_unlock_bh(&rt6_lock);
+       return rt;
+}
+
+static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
+                                          struct in6_addr *gwaddr, int ifindex,
+                                          unsigned pref)
+{
+       struct in6_rtmsg rtmsg;
+
+       memset(&rtmsg, 0, sizeof(rtmsg));
+       rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+       ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
+       rtmsg.rtmsg_dst_len = prefixlen;
+       ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
+       rtmsg.rtmsg_metric = 1024;
+       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
+       /* We should treat it as a default route if prefix length is 0. */
+       if (!prefixlen)
+               rtmsg.rtmsg_flags |= RTF_DEFAULT;
+       rtmsg.rtmsg_ifindex = ifindex;
+
+       ip6_route_add(&rtmsg, NULL, NULL, NULL);
+
+       return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
+}
+#endif
+
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {      
        struct rt6_info *rt;
@@ -1248,7 +1439,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 }
 
 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
-                                    struct net_device *dev)
+                                    struct net_device *dev,
+                                    unsigned int pref)
 {
        struct in6_rtmsg rtmsg;
 
@@ -1256,7 +1448,8 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
        rtmsg.rtmsg_type = RTMSG_NEWROUTE;
        ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
        rtmsg.rtmsg_metric = 1024;
-       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
+       rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
+                           RTF_PREF(pref);
 
        rtmsg.rtmsg_ifindex = dev->ifindex;
 
@@ -1684,11 +1877,10 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
                /*
                 * 2. allocate and initialize walker.
                 */
-               w = kmalloc(sizeof(*w), GFP_ATOMIC);
+               w = kzalloc(sizeof(*w), GFP_ATOMIC);
                if (w == NULL)
                        return -ENOMEM;
                RT6_TRACE("dump<%p", w);
-               memset(w, 0, sizeof(*w));
                w->root = &ip6_routing_table;
                w->func = fib6_dump_node;
                w->args = &arg;