2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void ip6_dst_destroy(struct dst_entry *);
88 static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90 static int ip6_dst_gc(void);
92 static int ip6_pkt_discard(struct sk_buff *skb);
93 static int ip6_pkt_discard_out(struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97 static struct dst_ops ip6_dst_ops = {
99 .protocol = __constant_htons(ETH_P_IPV6),
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
111 struct rt6_info ip6_null_entry = {
114 .__refcnt = ATOMIC_INIT(1),
116 .dev = &loopback_dev,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
123 .path = (struct dst_entry*)&ip6_null_entry,
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
131 struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
136 /* Protects all the ip6 fib */
138 DEFINE_RWLOCK(rt6_lock);
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
147 static void ip6_dst_destroy(struct dst_entry *dst)
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
153 rt->rt6i_idev = NULL;
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
180 * Route lookup. Any rt6_lock is implied.
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
212 return &ip6_null_entry;
218 * pointer to the last default router chosen. BH is disabled locally.
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
230 spin_unlock_bh(&rt6_dflt_lock);
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
246 sprt->rt6i_dev->ifindex == oif))
249 if (rt6_check_expired(sprt))
252 if (sprt == rt6_dflt_pointer)
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
275 read_unlock_bh(&neigh->lock);
278 read_unlock_bh(&neigh->lock);
283 if (m > mpri || m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
297 spin_lock(&rt6_dflt_lock);
300 * No default routers are known to be reachable.
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
322 if (sprt == rt6_dflt_pointer)
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
334 spin_unlock(&rt6_dflt_lock);
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
348 sprt->rt6i_dev->ifindex == oif))) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
365 struct fib6_node *fn;
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
373 read_unlock_bh(&rt6_lock);
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
378 dst_release(&rt->u.dst);
382 /* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
393 write_lock_bh(&rt6_lock);
394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395 write_unlock_bh(&rt6_lock);
400 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
401 struct in6_addr *saddr)
409 rt = ip6_rt_copy(ort);
412 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
413 if (rt->rt6i_dst.plen != 128 &&
414 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
415 rt->rt6i_flags |= RTF_ANYCAST;
416 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
420 rt->rt6i_dst.plen = 128;
421 rt->rt6i_flags |= RTF_CACHE;
422 rt->u.dst.flags |= DST_HOST;
424 #ifdef CONFIG_IPV6_SUBTREES
425 if (rt->rt6i_src.plen && saddr) {
426 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 rt->rt6i_src.plen = 128;
431 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
438 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
440 struct rt6_info *rt = ip6_rt_copy(ort);
442 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
443 rt->rt6i_dst.plen = 128;
444 rt->rt6i_flags |= RTF_CACHE;
445 if (rt->rt6i_flags & RTF_REJECT)
446 rt->u.dst.error = ort->u.dst.error;
447 rt->u.dst.flags |= DST_HOST;
448 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
453 #define BACKTRACK() \
454 if (rt == &ip6_null_entry && strict) { \
455 while ((fn = fn->parent) != NULL) { \
456 if (fn->fn_flags & RTN_ROOT) { \
459 if (fn->fn_flags & RTN_RTINFO) \
465 void ip6_route_input(struct sk_buff *skb)
467 struct fib6_node *fn;
472 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
475 read_lock_bh(&rt6_lock);
477 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 &skb->nh.ipv6h->saddr);
483 if ((rt->rt6i_flags & RTF_CACHE)) {
484 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
489 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
492 dst_hold(&rt->u.dst);
493 read_unlock_bh(&rt6_lock);
495 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
496 struct rt6_info *nrt;
499 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr,
500 &skb->nh.ipv6h->saddr);
502 dst_release(&rt->u.dst);
503 rt = nrt ? : &ip6_null_entry;
505 dst_hold(&rt->u.dst);
507 err = ip6_ins_rt(nrt, NULL, NULL,
516 /* Race condition! In the gap, when rt6_lock was
517 released someone could insert this route. Relookup.
519 dst_release(&rt->u.dst);
524 rt->u.dst.lastuse = jiffies;
526 skb->dst = (struct dst_entry *) rt;
529 dst_hold(&rt->u.dst);
530 read_unlock_bh(&rt6_lock);
534 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
536 struct fib6_node *fn;
541 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
544 read_lock_bh(&rt6_lock);
546 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
551 if ((rt->rt6i_flags & RTF_CACHE)) {
552 rt = rt6_device_match(rt, fl->oif, strict);
556 if (rt->rt6i_flags & RTF_DEFAULT) {
557 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
558 rt = rt6_best_dflt(rt, fl->oif);
560 rt = rt6_device_match(rt, fl->oif, strict);
564 dst_hold(&rt->u.dst);
565 read_unlock_bh(&rt6_lock);
567 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
568 struct rt6_info *nrt;
571 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
573 dst_release(&rt->u.dst);
574 rt = nrt ? : &ip6_null_entry;
576 dst_hold(&rt->u.dst);
578 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
586 /* Race condition! In the gap, when rt6_lock was
587 released someone could insert this route. Relookup.
589 dst_release(&rt->u.dst);
594 rt->u.dst.lastuse = jiffies;
598 dst_hold(&rt->u.dst);
599 read_unlock_bh(&rt6_lock);
605 * Destination cache support functions
608 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
612 rt = (struct rt6_info *) dst;
614 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
620 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
622 struct rt6_info *rt = (struct rt6_info *) dst;
625 if (rt->rt6i_flags & RTF_CACHE)
626 ip6_del_rt(rt, NULL, NULL, NULL);
633 static void ip6_link_failure(struct sk_buff *skb)
637 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
639 rt = (struct rt6_info *) skb->dst;
641 if (rt->rt6i_flags&RTF_CACHE) {
642 dst_set_expires(&rt->u.dst, 0);
643 rt->rt6i_flags |= RTF_EXPIRES;
644 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
645 rt->rt6i_node->fn_sernum = -1;
649 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
651 struct rt6_info *rt6 = (struct rt6_info*)dst;
653 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
654 rt6->rt6i_flags |= RTF_MODIFIED;
655 if (mtu < IPV6_MIN_MTU) {
657 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
659 dst->metrics[RTAX_MTU-1] = mtu;
663 /* Protected by rt6_lock. */
664 static struct dst_entry *ndisc_dst_gc_list;
665 static int ipv6_get_mtu(struct net_device *dev);
667 static inline unsigned int ipv6_advmss(unsigned int mtu)
669 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
671 if (mtu < ip6_rt_min_advmss)
672 mtu = ip6_rt_min_advmss;
675 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
676 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
677 * IPV6_MAXPLEN is also valid and means: "any MSS,
678 * rely only on pmtu discovery"
680 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
685 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
686 struct neighbour *neigh,
687 struct in6_addr *addr,
688 int (*output)(struct sk_buff *))
691 struct inet6_dev *idev = in6_dev_get(dev);
693 if (unlikely(idev == NULL))
696 rt = ip6_dst_alloc();
697 if (unlikely(rt == NULL)) {
706 neigh = ndisc_get_neigh(dev, addr);
709 rt->rt6i_idev = idev;
710 rt->rt6i_nexthop = neigh;
711 atomic_set(&rt->u.dst.__refcnt, 1);
712 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
713 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
714 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
715 rt->u.dst.output = output;
717 #if 0 /* there's no chance to use these for ndisc */
718 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
721 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
722 rt->rt6i_dst.plen = 128;
725 write_lock_bh(&rt6_lock);
726 rt->u.dst.next = ndisc_dst_gc_list;
727 ndisc_dst_gc_list = &rt->u.dst;
728 write_unlock_bh(&rt6_lock);
730 fib6_force_start_gc();
733 return (struct dst_entry *)rt;
736 int ndisc_dst_gc(int *more)
738 struct dst_entry *dst, *next, **pprev;
742 pprev = &ndisc_dst_gc_list;
744 while ((dst = *pprev) != NULL) {
745 if (!atomic_read(&dst->__refcnt)) {
758 static int ip6_dst_gc(void)
760 static unsigned expire = 30*HZ;
761 static unsigned long last_gc;
762 unsigned long now = jiffies;
764 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
765 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
771 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
772 expire = ip6_rt_gc_timeout>>1;
775 expire -= expire>>ip6_rt_gc_elasticity;
776 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
779 /* Clean host part of a prefix. Not necessary in radix tree,
780 but results in cleaner routing tables.
782 Remove it only when all the things will work!
785 static int ipv6_get_mtu(struct net_device *dev)
787 int mtu = IPV6_MIN_MTU;
788 struct inet6_dev *idev;
790 idev = in6_dev_get(dev);
792 mtu = idev->cnf.mtu6;
798 int ipv6_get_hoplimit(struct net_device *dev)
800 int hoplimit = ipv6_devconf.hop_limit;
801 struct inet6_dev *idev;
803 idev = in6_dev_get(dev);
805 hoplimit = idev->cnf.hop_limit;
815 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
816 void *_rtattr, struct netlink_skb_parms *req)
821 struct rt6_info *rt = NULL;
822 struct net_device *dev = NULL;
823 struct inet6_dev *idev = NULL;
826 rta = (struct rtattr **) _rtattr;
828 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
830 #ifndef CONFIG_IPV6_SUBTREES
831 if (rtmsg->rtmsg_src_len)
834 if (rtmsg->rtmsg_ifindex) {
836 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
839 idev = in6_dev_get(dev);
844 if (rtmsg->rtmsg_metric == 0)
845 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
847 rt = ip6_dst_alloc();
854 rt->u.dst.obsolete = -1;
855 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
856 if (nlh && (r = NLMSG_DATA(nlh))) {
857 rt->rt6i_protocol = r->rtm_protocol;
859 rt->rt6i_protocol = RTPROT_BOOT;
862 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
864 if (addr_type & IPV6_ADDR_MULTICAST)
865 rt->u.dst.input = ip6_mc_input;
867 rt->u.dst.input = ip6_forward;
869 rt->u.dst.output = ip6_output;
871 ipv6_addr_prefix(&rt->rt6i_dst.addr,
872 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
873 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
874 if (rt->rt6i_dst.plen == 128)
875 rt->u.dst.flags = DST_HOST;
877 #ifdef CONFIG_IPV6_SUBTREES
878 ipv6_addr_prefix(&rt->rt6i_src.addr,
879 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
880 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
883 rt->rt6i_metric = rtmsg->rtmsg_metric;
885 /* We cannot add true routes via loopback here,
886 they would result in kernel looping; promote them to reject routes
888 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
889 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
890 /* hold loopback dev/idev if we haven't done so. */
891 if (dev != &loopback_dev) {
898 idev = in6_dev_get(dev);
904 rt->u.dst.output = ip6_pkt_discard_out;
905 rt->u.dst.input = ip6_pkt_discard;
906 rt->u.dst.error = -ENETUNREACH;
907 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
911 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
912 struct in6_addr *gw_addr;
915 gw_addr = &rtmsg->rtmsg_gateway;
916 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
917 gwa_type = ipv6_addr_type(gw_addr);
919 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
920 struct rt6_info *grt;
922 /* IPv6 strictly inhibits using not link-local
923 addresses as nexthop address.
924 Otherwise, router will not able to send redirects.
925 It is very good, but in some (rare!) circumstances
926 (SIT, PtP, NBMA NOARP links) it is handy to allow
927 some exceptions. --ANK
930 if (!(gwa_type&IPV6_ADDR_UNICAST))
933 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
939 if (dev != grt->rt6i_dev) {
940 dst_release(&grt->u.dst);
945 idev = grt->rt6i_idev;
947 in6_dev_hold(grt->rt6i_idev);
949 if (!(grt->rt6i_flags&RTF_GATEWAY))
951 dst_release(&grt->u.dst);
957 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
965 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
966 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
967 if (IS_ERR(rt->rt6i_nexthop)) {
968 err = PTR_ERR(rt->rt6i_nexthop);
969 rt->rt6i_nexthop = NULL;
974 rt->rt6i_flags = rtmsg->rtmsg_flags;
977 if (rta && rta[RTA_METRICS-1]) {
978 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
979 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
981 while (RTA_OK(attr, attrlen)) {
982 unsigned flavor = attr->rta_type;
984 if (flavor > RTAX_MAX) {
988 rt->u.dst.metrics[flavor-1] =
989 *(u32 *)RTA_DATA(attr);
991 attr = RTA_NEXT(attr, attrlen);
995 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
996 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
997 if (!rt->u.dst.metrics[RTAX_MTU-1])
998 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
999 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1000 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1001 rt->u.dst.dev = dev;
1002 rt->rt6i_idev = idev;
1003 return ip6_ins_rt(rt, nlh, _rtattr, req);
1011 dst_free((struct dst_entry *) rt);
1015 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1019 write_lock_bh(&rt6_lock);
1021 rt6_reset_dflt_pointer(NULL);
1023 err = fib6_del(rt, nlh, _rtattr, req);
1024 dst_release(&rt->u.dst);
1026 write_unlock_bh(&rt6_lock);
1031 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1033 struct fib6_node *fn;
1034 struct rt6_info *rt;
1037 read_lock_bh(&rt6_lock);
1039 fn = fib6_locate(&ip6_routing_table,
1040 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1041 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1044 for (rt = fn->leaf; rt; rt = rt->u.next) {
1045 if (rtmsg->rtmsg_ifindex &&
1046 (rt->rt6i_dev == NULL ||
1047 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1049 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1050 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1052 if (rtmsg->rtmsg_metric &&
1053 rtmsg->rtmsg_metric != rt->rt6i_metric)
1055 dst_hold(&rt->u.dst);
1056 read_unlock_bh(&rt6_lock);
1058 return ip6_del_rt(rt, nlh, _rtattr, req);
1061 read_unlock_bh(&rt6_lock);
1069 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1070 struct neighbour *neigh, u8 *lladdr, int on_link)
1072 struct rt6_info *rt, *nrt;
1074 /* Locate old route to this destination. */
1075 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1080 if (neigh->dev != rt->rt6i_dev)
1084 * Current route is on-link; redirect is always invalid.
1086 * Seems, previous statement is not true. It could
1087 * be node, which looks for us as on-link (f.e. proxy ndisc)
1088 * But then router serving it might decide, that we should
1089 * know truth 8)8) --ANK (980726).
1091 if (!(rt->rt6i_flags&RTF_GATEWAY))
1095 * RFC 2461 specifies that redirects should only be
1096 * accepted if they come from the nexthop to the target.
1097 * Due to the way default routers are chosen, this notion
1098 * is a bit fuzzy and one might need to check all default
1101 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1102 if (rt->rt6i_flags & RTF_DEFAULT) {
1103 struct rt6_info *rt1;
1105 read_lock(&rt6_lock);
1106 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1107 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1108 dst_hold(&rt1->u.dst);
1109 dst_release(&rt->u.dst);
1110 read_unlock(&rt6_lock);
1115 read_unlock(&rt6_lock);
1117 if (net_ratelimit())
1118 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1119 "for redirect target\n");
1126 * We have finally decided to accept it.
1129 neigh_update(neigh, lladdr, NUD_STALE,
1130 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1131 NEIGH_UPDATE_F_OVERRIDE|
1132 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1133 NEIGH_UPDATE_F_ISROUTER))
1137 * Redirect received -> path was valid.
1138 * Look, redirects are sent only in response to data packets,
1139 * so that this nexthop apparently is reachable. --ANK
1141 dst_confirm(&rt->u.dst);
1143 /* Duplicate redirect: silently ignore. */
1144 if (neigh == rt->u.dst.neighbour)
1147 nrt = ip6_rt_copy(rt);
1151 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1153 nrt->rt6i_flags &= ~RTF_GATEWAY;
1155 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1156 nrt->rt6i_dst.plen = 128;
1157 nrt->u.dst.flags |= DST_HOST;
1159 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1160 nrt->rt6i_nexthop = neigh_clone(neigh);
1161 /* Reset pmtu, it may be better */
1162 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1163 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1165 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1168 if (rt->rt6i_flags&RTF_CACHE) {
1169 ip6_del_rt(rt, NULL, NULL, NULL);
1174 dst_release(&rt->u.dst);
1179 * Handle ICMP "packet too big" messages
1180 * i.e. Path MTU discovery
1183 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1184 struct net_device *dev, u32 pmtu)
1186 struct rt6_info *rt, *nrt;
1189 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1193 if (pmtu >= dst_mtu(&rt->u.dst))
1196 if (pmtu < IPV6_MIN_MTU) {
1198 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1199 * MTU (1280) and a fragment header should always be included
1200 * after a node receiving Too Big message reporting PMTU is
1201 * less than the IPv6 Minimum Link MTU.
1203 pmtu = IPV6_MIN_MTU;
1207 /* New mtu received -> path was valid.
1208 They are sent only in response to data packets,
1209 so that this nexthop apparently is reachable. --ANK
1211 dst_confirm(&rt->u.dst);
1213 /* Host route. If it is static, it would be better
1214 not to override it, but add new one, so that
1215 when cache entry will expire old pmtu
1216 would return automatically.
1218 if (rt->rt6i_flags & RTF_CACHE) {
1219 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1221 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1222 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1223 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1228 Two cases are possible:
1229 1. It is connected route. Action: COW
1230 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1232 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1233 nrt = rt6_alloc_cow(rt, daddr, saddr);
1235 nrt = rt6_alloc_clone(rt, daddr);
1238 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1242 /* According to RFC 1981, detecting PMTU increase shouldn't be
1243 * happened within 5 mins, the recommended timer is 10 mins.
1244 * Here this route expiration time is set to ip6_rt_mtu_expires
1245 * which is 10 mins. After 10 mins the decreased pmtu is expired
1246 * and detecting PMTU increase will be automatically happened.
1248 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1249 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1251 ip6_ins_rt(nrt, NULL, NULL, NULL);
1254 dst_release(&rt->u.dst);
1258 * Misc support functions
1261 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1263 struct rt6_info *rt = ip6_dst_alloc();
1266 rt->u.dst.input = ort->u.dst.input;
1267 rt->u.dst.output = ort->u.dst.output;
1269 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1270 rt->u.dst.dev = ort->u.dst.dev;
1272 dev_hold(rt->u.dst.dev);
1273 rt->rt6i_idev = ort->rt6i_idev;
1275 in6_dev_hold(rt->rt6i_idev);
1276 rt->u.dst.lastuse = jiffies;
1277 rt->rt6i_expires = 0;
1279 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1280 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1281 rt->rt6i_metric = 0;
1283 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1284 #ifdef CONFIG_IPV6_SUBTREES
1285 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1291 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1293 struct rt6_info *rt;
1294 struct fib6_node *fn;
1296 fn = &ip6_routing_table;
1298 write_lock_bh(&rt6_lock);
1299 for (rt = fn->leaf; rt; rt=rt->u.next) {
1300 if (dev == rt->rt6i_dev &&
1301 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1305 dst_hold(&rt->u.dst);
1306 write_unlock_bh(&rt6_lock);
1310 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1311 struct net_device *dev)
1313 struct in6_rtmsg rtmsg;
1315 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1316 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1317 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1318 rtmsg.rtmsg_metric = 1024;
1319 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1321 rtmsg.rtmsg_ifindex = dev->ifindex;
1323 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1324 return rt6_get_dflt_router(gwaddr, dev);
1327 void rt6_purge_dflt_routers(void)
1329 struct rt6_info *rt;
1332 read_lock_bh(&rt6_lock);
1333 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1334 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1335 dst_hold(&rt->u.dst);
1337 rt6_reset_dflt_pointer(NULL);
1339 read_unlock_bh(&rt6_lock);
1341 ip6_del_rt(rt, NULL, NULL, NULL);
1346 read_unlock_bh(&rt6_lock);
1349 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1351 struct in6_rtmsg rtmsg;
1355 case SIOCADDRT: /* Add a route */
1356 case SIOCDELRT: /* Delete a route */
1357 if (!capable(CAP_NET_ADMIN))
1359 err = copy_from_user(&rtmsg, arg,
1360 sizeof(struct in6_rtmsg));
1367 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1370 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1384 * Drop the packet on the floor
1387 static int ip6_pkt_discard(struct sk_buff *skb)
1389 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1390 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1395 static int ip6_pkt_discard_out(struct sk_buff *skb)
1397 skb->dev = skb->dst->dev;
1398 return ip6_pkt_discard(skb);
1402 * Allocate a dst for local (unicast / anycast) address.
1405 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1406 const struct in6_addr *addr,
1409 struct rt6_info *rt = ip6_dst_alloc();
1412 return ERR_PTR(-ENOMEM);
1414 dev_hold(&loopback_dev);
1417 rt->u.dst.flags = DST_HOST;
1418 rt->u.dst.input = ip6_input;
1419 rt->u.dst.output = ip6_output;
1420 rt->rt6i_dev = &loopback_dev;
1421 rt->rt6i_idev = idev;
1422 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1423 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1424 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1425 rt->u.dst.obsolete = -1;
1427 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1429 rt->rt6i_flags |= RTF_ANYCAST;
1431 rt->rt6i_flags |= RTF_LOCAL;
1432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1433 if (rt->rt6i_nexthop == NULL) {
1434 dst_free((struct dst_entry *) rt);
1435 return ERR_PTR(-ENOMEM);
1438 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1439 rt->rt6i_dst.plen = 128;
1441 atomic_set(&rt->u.dst.__refcnt, 1);
1446 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1448 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1449 rt != &ip6_null_entry) {
1450 RT6_TRACE("deleted by ifdown %p\n", rt);
1456 void rt6_ifdown(struct net_device *dev)
1458 write_lock_bh(&rt6_lock);
1459 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1460 write_unlock_bh(&rt6_lock);
1463 struct rt6_mtu_change_arg
1465 struct net_device *dev;
1469 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1471 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1472 struct inet6_dev *idev;
1474 /* In IPv6 pmtu discovery is not optional,
1475 so that RTAX_MTU lock cannot disable it.
1476 We still use this lock to block changes
1477 caused by addrconf/ndisc.
1480 idev = __in6_dev_get(arg->dev);
1484 /* For administrative MTU increase, there is no way to discover
1485 IPv6 PMTU increase, so PMTU increase should be updated here.
1486 Since RFC 1981 doesn't include administrative MTU increase
1487 update PMTU increase is a MUST. (i.e. jumbo frame)
1490 If new MTU is less than route PMTU, this new MTU will be the
1491 lowest MTU in the path, update the route PMTU to reflect PMTU
1492 decreases; if new MTU is greater than route PMTU, and the
1493 old MTU is the lowest MTU in the path, update the route PMTU
1494 to reflect the increase. In this case if the other nodes' MTU
1495 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1498 if (rt->rt6i_dev == arg->dev &&
1499 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1500 (dst_mtu(&rt->u.dst) > arg->mtu ||
1501 (dst_mtu(&rt->u.dst) < arg->mtu &&
1502 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1503 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1504 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1508 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1510 struct rt6_mtu_change_arg arg;
1514 read_lock_bh(&rt6_lock);
1515 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1516 read_unlock_bh(&rt6_lock);
1519 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1520 struct in6_rtmsg *rtmsg)
1522 memset(rtmsg, 0, sizeof(*rtmsg));
1524 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1525 rtmsg->rtmsg_src_len = r->rtm_src_len;
1526 rtmsg->rtmsg_flags = RTF_UP;
1527 if (r->rtm_type == RTN_UNREACHABLE)
1528 rtmsg->rtmsg_flags |= RTF_REJECT;
1530 if (rta[RTA_GATEWAY-1]) {
1531 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1533 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1534 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1536 if (rta[RTA_DST-1]) {
1537 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1539 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1541 if (rta[RTA_SRC-1]) {
1542 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1544 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1546 if (rta[RTA_OIF-1]) {
1547 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1549 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1551 if (rta[RTA_PRIORITY-1]) {
1552 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1554 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1559 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1561 struct rtmsg *r = NLMSG_DATA(nlh);
1562 struct in6_rtmsg rtmsg;
1564 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1566 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1569 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1571 struct rtmsg *r = NLMSG_DATA(nlh);
1572 struct in6_rtmsg rtmsg;
1574 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1576 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1579 struct rt6_rtnl_dump_arg
1581 struct sk_buff *skb;
1582 struct netlink_callback *cb;
1585 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1586 struct in6_addr *dst, struct in6_addr *src,
1587 int iif, int type, u32 pid, u32 seq,
1588 int prefix, unsigned int flags)
1591 struct nlmsghdr *nlh;
1592 unsigned char *b = skb->tail;
1593 struct rta_cacheinfo ci;
1595 if (prefix) { /* user wants prefix routes only */
1596 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1597 /* success since this is not a prefix route */
1602 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1603 rtm = NLMSG_DATA(nlh);
1604 rtm->rtm_family = AF_INET6;
1605 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1606 rtm->rtm_src_len = rt->rt6i_src.plen;
1608 rtm->rtm_table = RT_TABLE_MAIN;
1609 if (rt->rt6i_flags&RTF_REJECT)
1610 rtm->rtm_type = RTN_UNREACHABLE;
1611 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1612 rtm->rtm_type = RTN_LOCAL;
1614 rtm->rtm_type = RTN_UNICAST;
1616 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1617 rtm->rtm_protocol = rt->rt6i_protocol;
1618 if (rt->rt6i_flags&RTF_DYNAMIC)
1619 rtm->rtm_protocol = RTPROT_REDIRECT;
1620 else if (rt->rt6i_flags & RTF_ADDRCONF)
1621 rtm->rtm_protocol = RTPROT_KERNEL;
1622 else if (rt->rt6i_flags&RTF_DEFAULT)
1623 rtm->rtm_protocol = RTPROT_RA;
1625 if (rt->rt6i_flags&RTF_CACHE)
1626 rtm->rtm_flags |= RTM_F_CLONED;
1629 RTA_PUT(skb, RTA_DST, 16, dst);
1630 rtm->rtm_dst_len = 128;
1631 } else if (rtm->rtm_dst_len)
1632 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1633 #ifdef CONFIG_IPV6_SUBTREES
1635 RTA_PUT(skb, RTA_SRC, 16, src);
1636 rtm->rtm_src_len = 128;
1637 } else if (rtm->rtm_src_len)
1638 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1641 RTA_PUT(skb, RTA_IIF, 4, &iif);
1643 struct in6_addr saddr_buf;
1644 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1645 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1647 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1648 goto rtattr_failure;
1649 if (rt->u.dst.neighbour)
1650 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1652 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1653 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1654 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1655 if (rt->rt6i_expires)
1656 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1659 ci.rta_used = rt->u.dst.__use;
1660 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1661 ci.rta_error = rt->u.dst.error;
1665 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1666 nlh->nlmsg_len = skb->tail - b;
1671 skb_trim(skb, b - skb->data);
1675 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1677 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1680 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1681 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1682 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1686 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1687 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1688 prefix, NLM_F_MULTI);
1691 static int fib6_dump_node(struct fib6_walker_t *w)
1694 struct rt6_info *rt;
1696 for (rt = w->leaf; rt; rt = rt->u.next) {
1697 res = rt6_dump_route(rt, w->args);
1699 /* Frame is full, suspend walking */
1709 static void fib6_dump_end(struct netlink_callback *cb)
1711 struct fib6_walker_t *w = (void*)cb->args[0];
1715 fib6_walker_unlink(w);
1718 cb->done = (void*)cb->args[1];
1722 static int fib6_dump_done(struct netlink_callback *cb)
1725 return cb->done ? cb->done(cb) : 0;
1728 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1730 struct rt6_rtnl_dump_arg arg;
1731 struct fib6_walker_t *w;
1737 w = (void*)cb->args[0];
1741 * 1. hook callback destructor.
1743 cb->args[1] = (long)cb->done;
1744 cb->done = fib6_dump_done;
1747 * 2. allocate and initialize walker.
1749 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1752 RT6_TRACE("dump<%p", w);
1753 memset(w, 0, sizeof(*w));
1754 w->root = &ip6_routing_table;
1755 w->func = fib6_dump_node;
1757 cb->args[0] = (long)w;
1758 read_lock_bh(&rt6_lock);
1760 read_unlock_bh(&rt6_lock);
1763 read_lock_bh(&rt6_lock);
1764 res = fib6_walk_continue(w);
1765 read_unlock_bh(&rt6_lock);
1768 if (res <= 0 && skb->len == 0)
1769 RT6_TRACE("%p>dump end\n", w);
1771 res = res < 0 ? res : skb->len;
1772 /* res < 0 is an error. (really, impossible)
1773 res == 0 means that dump is complete, but skb still can contain data.
1774 res > 0 dump is not complete, but frame is full.
1776 /* Destroy walker, if dump of this table is complete. */
1782 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1784 struct rtattr **rta = arg;
1787 struct sk_buff *skb;
1789 struct rt6_info *rt;
1791 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1795 /* Reserve room for dummy headers, this skb can pass
1796 through good chunk of routing engine.
1798 skb->mac.raw = skb->data;
1799 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1801 memset(&fl, 0, sizeof(fl));
1803 ipv6_addr_copy(&fl.fl6_src,
1804 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1806 ipv6_addr_copy(&fl.fl6_dst,
1807 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1810 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1813 struct net_device *dev;
1814 dev = __dev_get_by_index(iif);
1823 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1825 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1827 skb->dst = &rt->u.dst;
1829 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1830 err = rt6_fill_node(skb, rt,
1831 &fl.fl6_dst, &fl.fl6_src,
1833 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1834 nlh->nlmsg_seq, 0, 0);
1840 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1850 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1851 struct netlink_skb_parms *req)
1853 struct sk_buff *skb;
1854 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1855 u32 pid = current->pid;
1861 seq = nlh->nlmsg_seq;
1863 skb = alloc_skb(size, gfp_any());
1865 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1868 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1870 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1873 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1874 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1881 #ifdef CONFIG_PROC_FS
1883 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1894 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1896 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1899 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1904 if (arg->len >= arg->length)
1907 for (i=0; i<16; i++) {
1908 sprintf(arg->buffer + arg->len, "%02x",
1909 rt->rt6i_dst.addr.s6_addr[i]);
1912 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1915 #ifdef CONFIG_IPV6_SUBTREES
1916 for (i=0; i<16; i++) {
1917 sprintf(arg->buffer + arg->len, "%02x",
1918 rt->rt6i_src.addr.s6_addr[i]);
1921 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1924 sprintf(arg->buffer + arg->len,
1925 "00000000000000000000000000000000 00 ");
1929 if (rt->rt6i_nexthop) {
1930 for (i=0; i<16; i++) {
1931 sprintf(arg->buffer + arg->len, "%02x",
1932 rt->rt6i_nexthop->primary_key[i]);
1936 sprintf(arg->buffer + arg->len,
1937 "00000000000000000000000000000000");
1940 arg->len += sprintf(arg->buffer + arg->len,
1941 " %08x %08x %08x %08x %8s\n",
1942 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1943 rt->u.dst.__use, rt->rt6i_flags,
1944 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1948 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1950 struct rt6_proc_arg arg;
1951 arg.buffer = buffer;
1952 arg.offset = offset;
1953 arg.length = length;
1957 read_lock_bh(&rt6_lock);
1958 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1959 read_unlock_bh(&rt6_lock);
1963 *start += offset % RT6_INFO_LEN;
1965 arg.len -= offset % RT6_INFO_LEN;
1967 if (arg.len > length)
1975 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1977 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1978 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1979 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1980 rt6_stats.fib_rt_cache,
1981 atomic_read(&ip6_dst_ops.entries),
1982 rt6_stats.fib_discarded_routes);
1987 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1989 return single_open(file, rt6_stats_seq_show, NULL);
1992 static struct file_operations rt6_stats_seq_fops = {
1993 .owner = THIS_MODULE,
1994 .open = rt6_stats_seq_open,
1996 .llseek = seq_lseek,
1997 .release = single_release,
1999 #endif /* CONFIG_PROC_FS */
2001 #ifdef CONFIG_SYSCTL
2003 static int flush_delay;
2006 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2007 void __user *buffer, size_t *lenp, loff_t *ppos)
2010 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2011 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2017 ctl_table ipv6_route_table[] = {
2019 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2020 .procname = "flush",
2021 .data = &flush_delay,
2022 .maxlen = sizeof(int),
2024 .proc_handler = &ipv6_sysctl_rtcache_flush
2027 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2028 .procname = "gc_thresh",
2029 .data = &ip6_dst_ops.gc_thresh,
2030 .maxlen = sizeof(int),
2032 .proc_handler = &proc_dointvec,
2035 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2036 .procname = "max_size",
2037 .data = &ip6_rt_max_size,
2038 .maxlen = sizeof(int),
2040 .proc_handler = &proc_dointvec,
2043 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2044 .procname = "gc_min_interval",
2045 .data = &ip6_rt_gc_min_interval,
2046 .maxlen = sizeof(int),
2048 .proc_handler = &proc_dointvec_jiffies,
2049 .strategy = &sysctl_jiffies,
2052 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2053 .procname = "gc_timeout",
2054 .data = &ip6_rt_gc_timeout,
2055 .maxlen = sizeof(int),
2057 .proc_handler = &proc_dointvec_jiffies,
2058 .strategy = &sysctl_jiffies,
2061 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2062 .procname = "gc_interval",
2063 .data = &ip6_rt_gc_interval,
2064 .maxlen = sizeof(int),
2066 .proc_handler = &proc_dointvec_jiffies,
2067 .strategy = &sysctl_jiffies,
2070 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2071 .procname = "gc_elasticity",
2072 .data = &ip6_rt_gc_elasticity,
2073 .maxlen = sizeof(int),
2075 .proc_handler = &proc_dointvec_jiffies,
2076 .strategy = &sysctl_jiffies,
2079 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2080 .procname = "mtu_expires",
2081 .data = &ip6_rt_mtu_expires,
2082 .maxlen = sizeof(int),
2084 .proc_handler = &proc_dointvec_jiffies,
2085 .strategy = &sysctl_jiffies,
2088 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2089 .procname = "min_adv_mss",
2090 .data = &ip6_rt_min_advmss,
2091 .maxlen = sizeof(int),
2093 .proc_handler = &proc_dointvec_jiffies,
2094 .strategy = &sysctl_jiffies,
2097 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2098 .procname = "gc_min_interval_ms",
2099 .data = &ip6_rt_gc_min_interval,
2100 .maxlen = sizeof(int),
2102 .proc_handler = &proc_dointvec_ms_jiffies,
2103 .strategy = &sysctl_ms_jiffies,
2110 void __init ip6_route_init(void)
2112 struct proc_dir_entry *p;
2114 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2115 sizeof(struct rt6_info),
2116 0, SLAB_HWCACHE_ALIGN,
2118 if (!ip6_dst_ops.kmem_cachep)
2119 panic("cannot create ip6_dst_cache");
2122 #ifdef CONFIG_PROC_FS
2123 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2125 p->owner = THIS_MODULE;
2127 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2134 void ip6_route_cleanup(void)
2136 #ifdef CONFIG_PROC_FS
2137 proc_net_remove("ipv6_route");
2138 proc_net_remove("rt6_stats");
2145 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);