[IPV6]: ROUTE: Convert rt6_cow() to rt6_alloc_cow().
[pandora-kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void             ip6_dst_destroy(struct dst_entry *);
88 static void             ip6_dst_ifdown(struct dst_entry *,
89                                        struct net_device *dev, int how);
90 static int               ip6_dst_gc(void);
91
92 static int              ip6_pkt_discard(struct sk_buff *skb);
93 static int              ip6_pkt_discard_out(struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97 static struct dst_ops ip6_dst_ops = {
98         .family                 =       AF_INET6,
99         .protocol               =       __constant_htons(ETH_P_IPV6),
100         .gc                     =       ip6_dst_gc,
101         .gc_thresh              =       1024,
102         .check                  =       ip6_dst_check,
103         .destroy                =       ip6_dst_destroy,
104         .ifdown                 =       ip6_dst_ifdown,
105         .negative_advice        =       ip6_negative_advice,
106         .link_failure           =       ip6_link_failure,
107         .update_pmtu            =       ip6_rt_update_pmtu,
108         .entry_size             =       sizeof(struct rt6_info),
109 };
110
111 struct rt6_info ip6_null_entry = {
112         .u = {
113                 .dst = {
114                         .__refcnt       = ATOMIC_INIT(1),
115                         .__use          = 1,
116                         .dev            = &loopback_dev,
117                         .obsolete       = -1,
118                         .error          = -ENETUNREACH,
119                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
120                         .input          = ip6_pkt_discard,
121                         .output         = ip6_pkt_discard_out,
122                         .ops            = &ip6_dst_ops,
123                         .path           = (struct dst_entry*)&ip6_null_entry,
124                 }
125         },
126         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
127         .rt6i_metric    = ~(u32) 0,
128         .rt6i_ref       = ATOMIC_INIT(1),
129 };
130
131 struct fib6_node ip6_routing_table = {
132         .leaf           = &ip6_null_entry,
133         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134 };
135
136 /* Protects all the ip6 fib */
137
138 DEFINE_RWLOCK(rt6_lock);
139
140
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 {
144         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145 }
146
147 static void ip6_dst_destroy(struct dst_entry *dst)
148 {
149         struct rt6_info *rt = (struct rt6_info *)dst;
150         struct inet6_dev *idev = rt->rt6i_idev;
151
152         if (idev != NULL) {
153                 rt->rt6i_idev = NULL;
154                 in6_dev_put(idev);
155         }       
156 }
157
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159                            int how)
160 {
161         struct rt6_info *rt = (struct rt6_info *)dst;
162         struct inet6_dev *idev = rt->rt6i_idev;
163
164         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166                 if (loopback_idev != NULL) {
167                         rt->rt6i_idev = loopback_idev;
168                         in6_dev_put(idev);
169                 }
170         }
171 }
172
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 {
175         return (rt->rt6i_flags & RTF_EXPIRES &&
176                 time_after(jiffies, rt->rt6i_expires));
177 }
178
179 /*
180  *      Route lookup. Any rt6_lock is implied.
181  */
182
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184                                                     int oif,
185                                                     int strict)
186 {
187         struct rt6_info *local = NULL;
188         struct rt6_info *sprt;
189
190         if (oif) {
191                 for (sprt = rt; sprt; sprt = sprt->u.next) {
192                         struct net_device *dev = sprt->rt6i_dev;
193                         if (dev->ifindex == oif)
194                                 return sprt;
195                         if (dev->flags & IFF_LOOPBACK) {
196                                 if (sprt->rt6i_idev == NULL ||
197                                     sprt->rt6i_idev->dev->ifindex != oif) {
198                                         if (strict && oif)
199                                                 continue;
200                                         if (local && (!oif || 
201                                                       local->rt6i_idev->dev->ifindex == oif))
202                                                 continue;
203                                 }
204                                 local = sprt;
205                         }
206                 }
207
208                 if (local)
209                         return local;
210
211                 if (strict)
212                         return &ip6_null_entry;
213         }
214         return rt;
215 }
216
217 /*
218  *      pointer to the last default router chosen. BH is disabled locally.
219  */
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 {
225         spin_lock_bh(&rt6_dflt_lock);
226         if (rt == NULL || rt == rt6_dflt_pointer) {
227                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228                 rt6_dflt_pointer = NULL;
229         }
230         spin_unlock_bh(&rt6_dflt_lock);
231 }
232
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 {
236         struct rt6_info *match = NULL;
237         struct rt6_info *sprt;
238         int mpri = 0;
239
240         for (sprt = rt; sprt; sprt = sprt->u.next) {
241                 struct neighbour *neigh;
242                 int m = 0;
243
244                 if (!oif ||
245                     (sprt->rt6i_dev &&
246                      sprt->rt6i_dev->ifindex == oif))
247                         m += 8;
248
249                 if (rt6_check_expired(sprt))
250                         continue;
251
252                 if (sprt == rt6_dflt_pointer)
253                         m += 4;
254
255                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256                         read_lock_bh(&neigh->lock);
257                         switch (neigh->nud_state) {
258                         case NUD_REACHABLE:
259                                 m += 3;
260                                 break;
261
262                         case NUD_STALE:
263                         case NUD_DELAY:
264                         case NUD_PROBE:
265                                 m += 2;
266                                 break;
267
268                         case NUD_NOARP:
269                         case NUD_PERMANENT:
270                                 m += 1;
271                                 break;
272
273                         case NUD_INCOMPLETE:
274                         default:
275                                 read_unlock_bh(&neigh->lock);
276                                 continue;
277                         }
278                         read_unlock_bh(&neigh->lock);
279                 } else {
280                         continue;
281                 }
282
283                 if (m > mpri || m >= 12) {
284                         match = sprt;
285                         mpri = m;
286                         if (m >= 12) {
287                                 /* we choose the last default router if it
288                                  * is in (probably) reachable state.
289                                  * If route changed, we should do pmtu
290                                  * discovery. --yoshfuji
291                                  */
292                                 break;
293                         }
294                 }
295         }
296
297         spin_lock(&rt6_dflt_lock);
298         if (!match) {
299                 /*
300                  *      No default routers are known to be reachable.
301                  *      SHOULD round robin
302                  */
303                 if (rt6_dflt_pointer) {
304                         for (sprt = rt6_dflt_pointer->u.next;
305                              sprt; sprt = sprt->u.next) {
306                                 if (sprt->u.dst.obsolete <= 0 &&
307                                     sprt->u.dst.error == 0 &&
308                                     !rt6_check_expired(sprt)) {
309                                         match = sprt;
310                                         break;
311                                 }
312                         }
313                         for (sprt = rt;
314                              !match && sprt;
315                              sprt = sprt->u.next) {
316                                 if (sprt->u.dst.obsolete <= 0 &&
317                                     sprt->u.dst.error == 0 &&
318                                     !rt6_check_expired(sprt)) {
319                                         match = sprt;
320                                         break;
321                                 }
322                                 if (sprt == rt6_dflt_pointer)
323                                         break;
324                         }
325                 }
326         }
327
328         if (match) {
329                 if (rt6_dflt_pointer != match)
330                         RT6_TRACE("changed default router: %p->%p\n",
331                                   rt6_dflt_pointer, match);
332                 rt6_dflt_pointer = match;
333         }
334         spin_unlock(&rt6_dflt_lock);
335
336         if (!match) {
337                 /*
338                  * Last Resort: if no default routers found, 
339                  * use addrconf default route.
340                  * We don't record this route.
341                  */
342                 for (sprt = ip6_routing_table.leaf;
343                      sprt; sprt = sprt->u.next) {
344                         if (!rt6_check_expired(sprt) &&
345                             (sprt->rt6i_flags & RTF_DEFAULT) &&
346                             (!oif ||
347                              (sprt->rt6i_dev &&
348                               sprt->rt6i_dev->ifindex == oif))) {
349                                 match = sprt;
350                                 break;
351                         }
352                 }
353                 if (!match) {
354                         /* no default route.  give up. */
355                         match = &ip6_null_entry;
356                 }
357         }
358
359         return match;
360 }
361
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363                             int oif, int strict)
364 {
365         struct fib6_node *fn;
366         struct rt6_info *rt;
367
368         read_lock_bh(&rt6_lock);
369         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370         rt = rt6_device_match(fn->leaf, oif, strict);
371         dst_hold(&rt->u.dst);
372         rt->u.dst.__use++;
373         read_unlock_bh(&rt6_lock);
374
375         rt->u.dst.lastuse = jiffies;
376         if (rt->u.dst.error == 0)
377                 return rt;
378         dst_release(&rt->u.dst);
379         return NULL;
380 }
381
382 /* ip6_ins_rt is called with FREE rt6_lock.
383    It takes new route entry, the addition fails by any reason the
384    route is freed. In any case, if caller does not hold it, it may
385    be destroyed.
386  */
387
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389                 void *_rtattr, struct netlink_skb_parms *req)
390 {
391         int err;
392
393         write_lock_bh(&rt6_lock);
394         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395         write_unlock_bh(&rt6_lock);
396
397         return err;
398 }
399
400 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
401                                       struct in6_addr *saddr)
402 {
403         struct rt6_info *rt;
404
405         /*
406          *      Clone the route.
407          */
408
409         rt = ip6_rt_copy(ort);
410
411         if (rt) {
412                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
413                         if (rt->rt6i_dst.plen != 128 &&
414                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
415                                 rt->rt6i_flags |= RTF_ANYCAST;
416                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
417                 }
418
419                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
420                 rt->rt6i_dst.plen = 128;
421                 rt->rt6i_flags |= RTF_CACHE;
422                 rt->u.dst.flags |= DST_HOST;
423
424 #ifdef CONFIG_IPV6_SUBTREES
425                 if (rt->rt6i_src.plen && saddr) {
426                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427                         rt->rt6i_src.plen = 128;
428                 }
429 #endif
430
431                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433         }
434
435         return rt;
436 }
437
438 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
439 {
440         struct rt6_info *rt = ip6_rt_copy(ort);
441         if (rt) {
442                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
443                 rt->rt6i_dst.plen = 128;
444                 rt->rt6i_flags |= RTF_CACHE;
445                 if (rt->rt6i_flags & RTF_REJECT)
446                         rt->u.dst.error = ort->u.dst.error;
447                 rt->u.dst.flags |= DST_HOST;
448                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
449         }
450         return rt;
451 }
452
453 #define BACKTRACK() \
454 if (rt == &ip6_null_entry && strict) { \
455        while ((fn = fn->parent) != NULL) { \
456                 if (fn->fn_flags & RTN_ROOT) { \
457                         goto out; \
458                 } \
459                 if (fn->fn_flags & RTN_RTINFO) \
460                         goto restart; \
461         } \
462 }
463
464
465 void ip6_route_input(struct sk_buff *skb)
466 {
467         struct fib6_node *fn;
468         struct rt6_info *rt;
469         int strict;
470         int attempts = 3;
471
472         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
473
474 relookup:
475         read_lock_bh(&rt6_lock);
476
477         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478                          &skb->nh.ipv6h->saddr);
479
480 restart:
481         rt = fn->leaf;
482
483         if ((rt->rt6i_flags & RTF_CACHE)) {
484                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485                 BACKTRACK();
486                 goto out;
487         }
488
489         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
490         BACKTRACK();
491
492         dst_hold(&rt->u.dst);
493         read_unlock_bh(&rt6_lock);
494
495         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
496                 struct rt6_info *nrt;
497                 int err;
498
499                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr,
500                                     &skb->nh.ipv6h->saddr);
501
502                 dst_release(&rt->u.dst);
503                 rt = nrt ? : &ip6_null_entry;
504
505                 dst_hold(&rt->u.dst);
506                 if (nrt) {
507                         err = ip6_ins_rt(nrt, NULL, NULL,
508                                          &NETLINK_CB(skb));
509                         if (!err)
510                                 goto out2;
511                 }
512
513                 if (--attempts <= 0)
514                         goto out2;
515
516                 /* Race condition! In the gap, when rt6_lock was
517                    released someone could insert this route.  Relookup.
518                 */
519                 dst_release(&rt->u.dst);
520                 goto relookup;
521         }
522
523 out2:
524         rt->u.dst.lastuse = jiffies;
525         rt->u.dst.__use++;
526         skb->dst = (struct dst_entry *) rt;
527         return;
528 out:
529         dst_hold(&rt->u.dst);
530         read_unlock_bh(&rt6_lock);
531         goto out2;
532 }
533
534 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
535 {
536         struct fib6_node *fn;
537         struct rt6_info *rt;
538         int strict;
539         int attempts = 3;
540
541         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
542
543 relookup:
544         read_lock_bh(&rt6_lock);
545
546         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
547
548 restart:
549         rt = fn->leaf;
550
551         if ((rt->rt6i_flags & RTF_CACHE)) {
552                 rt = rt6_device_match(rt, fl->oif, strict);
553                 BACKTRACK();
554                 goto out;
555         }
556         if (rt->rt6i_flags & RTF_DEFAULT) {
557                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
558                         rt = rt6_best_dflt(rt, fl->oif);
559         } else {
560                 rt = rt6_device_match(rt, fl->oif, strict);
561                 BACKTRACK();
562         }
563
564         dst_hold(&rt->u.dst);
565         read_unlock_bh(&rt6_lock);
566
567         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
568                 struct rt6_info *nrt;
569                 int err;
570
571                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
572
573                 dst_release(&rt->u.dst);
574                 rt = nrt ? : &ip6_null_entry;
575
576                 dst_hold(&rt->u.dst);
577                 if (nrt) {
578                         err = ip6_ins_rt(nrt, NULL, NULL, NULL);
579                         if (!err)
580                                 goto out2;
581                 }
582
583                 if (--attempts <= 0)
584                         goto out2;
585
586                 /* Race condition! In the gap, when rt6_lock was
587                    released someone could insert this route.  Relookup.
588                 */
589                 dst_release(&rt->u.dst);
590                 goto relookup;
591         }
592
593 out2:
594         rt->u.dst.lastuse = jiffies;
595         rt->u.dst.__use++;
596         return &rt->u.dst;
597 out:
598         dst_hold(&rt->u.dst);
599         read_unlock_bh(&rt6_lock);
600         goto out2;
601 }
602
603
604 /*
605  *      Destination cache support functions
606  */
607
608 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
609 {
610         struct rt6_info *rt;
611
612         rt = (struct rt6_info *) dst;
613
614         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
615                 return dst;
616
617         return NULL;
618 }
619
620 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
621 {
622         struct rt6_info *rt = (struct rt6_info *) dst;
623
624         if (rt) {
625                 if (rt->rt6i_flags & RTF_CACHE)
626                         ip6_del_rt(rt, NULL, NULL, NULL);
627                 else
628                         dst_release(dst);
629         }
630         return NULL;
631 }
632
633 static void ip6_link_failure(struct sk_buff *skb)
634 {
635         struct rt6_info *rt;
636
637         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
638
639         rt = (struct rt6_info *) skb->dst;
640         if (rt) {
641                 if (rt->rt6i_flags&RTF_CACHE) {
642                         dst_set_expires(&rt->u.dst, 0);
643                         rt->rt6i_flags |= RTF_EXPIRES;
644                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
645                         rt->rt6i_node->fn_sernum = -1;
646         }
647 }
648
649 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
650 {
651         struct rt6_info *rt6 = (struct rt6_info*)dst;
652
653         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
654                 rt6->rt6i_flags |= RTF_MODIFIED;
655                 if (mtu < IPV6_MIN_MTU) {
656                         mtu = IPV6_MIN_MTU;
657                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
658                 }
659                 dst->metrics[RTAX_MTU-1] = mtu;
660         }
661 }
662
663 /* Protected by rt6_lock.  */
664 static struct dst_entry *ndisc_dst_gc_list;
665 static int ipv6_get_mtu(struct net_device *dev);
666
667 static inline unsigned int ipv6_advmss(unsigned int mtu)
668 {
669         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
670
671         if (mtu < ip6_rt_min_advmss)
672                 mtu = ip6_rt_min_advmss;
673
674         /*
675          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
676          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
677          * IPV6_MAXPLEN is also valid and means: "any MSS, 
678          * rely only on pmtu discovery"
679          */
680         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
681                 mtu = IPV6_MAXPLEN;
682         return mtu;
683 }
684
685 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
686                                   struct neighbour *neigh,
687                                   struct in6_addr *addr,
688                                   int (*output)(struct sk_buff *))
689 {
690         struct rt6_info *rt;
691         struct inet6_dev *idev = in6_dev_get(dev);
692
693         if (unlikely(idev == NULL))
694                 return NULL;
695
696         rt = ip6_dst_alloc();
697         if (unlikely(rt == NULL)) {
698                 in6_dev_put(idev);
699                 goto out;
700         }
701
702         dev_hold(dev);
703         if (neigh)
704                 neigh_hold(neigh);
705         else
706                 neigh = ndisc_get_neigh(dev, addr);
707
708         rt->rt6i_dev      = dev;
709         rt->rt6i_idev     = idev;
710         rt->rt6i_nexthop  = neigh;
711         atomic_set(&rt->u.dst.__refcnt, 1);
712         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
713         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
714         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
715         rt->u.dst.output  = output;
716
717 #if 0   /* there's no chance to use these for ndisc */
718         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
719                                 ? DST_HOST 
720                                 : 0;
721         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
722         rt->rt6i_dst.plen = 128;
723 #endif
724
725         write_lock_bh(&rt6_lock);
726         rt->u.dst.next = ndisc_dst_gc_list;
727         ndisc_dst_gc_list = &rt->u.dst;
728         write_unlock_bh(&rt6_lock);
729
730         fib6_force_start_gc();
731
732 out:
733         return (struct dst_entry *)rt;
734 }
735
736 int ndisc_dst_gc(int *more)
737 {
738         struct dst_entry *dst, *next, **pprev;
739         int freed;
740
741         next = NULL;
742         pprev = &ndisc_dst_gc_list;
743         freed = 0;
744         while ((dst = *pprev) != NULL) {
745                 if (!atomic_read(&dst->__refcnt)) {
746                         *pprev = dst->next;
747                         dst_free(dst);
748                         freed++;
749                 } else {
750                         pprev = &dst->next;
751                         (*more)++;
752                 }
753         }
754
755         return freed;
756 }
757
758 static int ip6_dst_gc(void)
759 {
760         static unsigned expire = 30*HZ;
761         static unsigned long last_gc;
762         unsigned long now = jiffies;
763
764         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
765             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
766                 goto out;
767
768         expire++;
769         fib6_run_gc(expire);
770         last_gc = now;
771         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
772                 expire = ip6_rt_gc_timeout>>1;
773
774 out:
775         expire -= expire>>ip6_rt_gc_elasticity;
776         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
777 }
778
779 /* Clean host part of a prefix. Not necessary in radix tree,
780    but results in cleaner routing tables.
781
782    Remove it only when all the things will work!
783  */
784
785 static int ipv6_get_mtu(struct net_device *dev)
786 {
787         int mtu = IPV6_MIN_MTU;
788         struct inet6_dev *idev;
789
790         idev = in6_dev_get(dev);
791         if (idev) {
792                 mtu = idev->cnf.mtu6;
793                 in6_dev_put(idev);
794         }
795         return mtu;
796 }
797
798 int ipv6_get_hoplimit(struct net_device *dev)
799 {
800         int hoplimit = ipv6_devconf.hop_limit;
801         struct inet6_dev *idev;
802
803         idev = in6_dev_get(dev);
804         if (idev) {
805                 hoplimit = idev->cnf.hop_limit;
806                 in6_dev_put(idev);
807         }
808         return hoplimit;
809 }
810
811 /*
812  *
813  */
814
815 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
816                 void *_rtattr, struct netlink_skb_parms *req)
817 {
818         int err;
819         struct rtmsg *r;
820         struct rtattr **rta;
821         struct rt6_info *rt = NULL;
822         struct net_device *dev = NULL;
823         struct inet6_dev *idev = NULL;
824         int addr_type;
825
826         rta = (struct rtattr **) _rtattr;
827
828         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
829                 return -EINVAL;
830 #ifndef CONFIG_IPV6_SUBTREES
831         if (rtmsg->rtmsg_src_len)
832                 return -EINVAL;
833 #endif
834         if (rtmsg->rtmsg_ifindex) {
835                 err = -ENODEV;
836                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
837                 if (!dev)
838                         goto out;
839                 idev = in6_dev_get(dev);
840                 if (!idev)
841                         goto out;
842         }
843
844         if (rtmsg->rtmsg_metric == 0)
845                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
846
847         rt = ip6_dst_alloc();
848
849         if (rt == NULL) {
850                 err = -ENOMEM;
851                 goto out;
852         }
853
854         rt->u.dst.obsolete = -1;
855         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
856         if (nlh && (r = NLMSG_DATA(nlh))) {
857                 rt->rt6i_protocol = r->rtm_protocol;
858         } else {
859                 rt->rt6i_protocol = RTPROT_BOOT;
860         }
861
862         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
863
864         if (addr_type & IPV6_ADDR_MULTICAST)
865                 rt->u.dst.input = ip6_mc_input;
866         else
867                 rt->u.dst.input = ip6_forward;
868
869         rt->u.dst.output = ip6_output;
870
871         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
872                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
873         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
874         if (rt->rt6i_dst.plen == 128)
875                rt->u.dst.flags = DST_HOST;
876
877 #ifdef CONFIG_IPV6_SUBTREES
878         ipv6_addr_prefix(&rt->rt6i_src.addr, 
879                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
880         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
881 #endif
882
883         rt->rt6i_metric = rtmsg->rtmsg_metric;
884
885         /* We cannot add true routes via loopback here,
886            they would result in kernel looping; promote them to reject routes
887          */
888         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
889             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
890                 /* hold loopback dev/idev if we haven't done so. */
891                 if (dev != &loopback_dev) {
892                         if (dev) {
893                                 dev_put(dev);
894                                 in6_dev_put(idev);
895                         }
896                         dev = &loopback_dev;
897                         dev_hold(dev);
898                         idev = in6_dev_get(dev);
899                         if (!idev) {
900                                 err = -ENODEV;
901                                 goto out;
902                         }
903                 }
904                 rt->u.dst.output = ip6_pkt_discard_out;
905                 rt->u.dst.input = ip6_pkt_discard;
906                 rt->u.dst.error = -ENETUNREACH;
907                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
908                 goto install_route;
909         }
910
911         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
912                 struct in6_addr *gw_addr;
913                 int gwa_type;
914
915                 gw_addr = &rtmsg->rtmsg_gateway;
916                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
917                 gwa_type = ipv6_addr_type(gw_addr);
918
919                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
920                         struct rt6_info *grt;
921
922                         /* IPv6 strictly inhibits using not link-local
923                            addresses as nexthop address.
924                            Otherwise, router will not able to send redirects.
925                            It is very good, but in some (rare!) circumstances
926                            (SIT, PtP, NBMA NOARP links) it is handy to allow
927                            some exceptions. --ANK
928                          */
929                         err = -EINVAL;
930                         if (!(gwa_type&IPV6_ADDR_UNICAST))
931                                 goto out;
932
933                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
934
935                         err = -EHOSTUNREACH;
936                         if (grt == NULL)
937                                 goto out;
938                         if (dev) {
939                                 if (dev != grt->rt6i_dev) {
940                                         dst_release(&grt->u.dst);
941                                         goto out;
942                                 }
943                         } else {
944                                 dev = grt->rt6i_dev;
945                                 idev = grt->rt6i_idev;
946                                 dev_hold(dev);
947                                 in6_dev_hold(grt->rt6i_idev);
948                         }
949                         if (!(grt->rt6i_flags&RTF_GATEWAY))
950                                 err = 0;
951                         dst_release(&grt->u.dst);
952
953                         if (err)
954                                 goto out;
955                 }
956                 err = -EINVAL;
957                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
958                         goto out;
959         }
960
961         err = -ENODEV;
962         if (dev == NULL)
963                 goto out;
964
965         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
966                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
967                 if (IS_ERR(rt->rt6i_nexthop)) {
968                         err = PTR_ERR(rt->rt6i_nexthop);
969                         rt->rt6i_nexthop = NULL;
970                         goto out;
971                 }
972         }
973
974         rt->rt6i_flags = rtmsg->rtmsg_flags;
975
976 install_route:
977         if (rta && rta[RTA_METRICS-1]) {
978                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
979                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
980
981                 while (RTA_OK(attr, attrlen)) {
982                         unsigned flavor = attr->rta_type;
983                         if (flavor) {
984                                 if (flavor > RTAX_MAX) {
985                                         err = -EINVAL;
986                                         goto out;
987                                 }
988                                 rt->u.dst.metrics[flavor-1] =
989                                         *(u32 *)RTA_DATA(attr);
990                         }
991                         attr = RTA_NEXT(attr, attrlen);
992                 }
993         }
994
995         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
996                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
997         if (!rt->u.dst.metrics[RTAX_MTU-1])
998                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
999         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1000                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1001         rt->u.dst.dev = dev;
1002         rt->rt6i_idev = idev;
1003         return ip6_ins_rt(rt, nlh, _rtattr, req);
1004
1005 out:
1006         if (dev)
1007                 dev_put(dev);
1008         if (idev)
1009                 in6_dev_put(idev);
1010         if (rt)
1011                 dst_free((struct dst_entry *) rt);
1012         return err;
1013 }
1014
1015 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1016 {
1017         int err;
1018
1019         write_lock_bh(&rt6_lock);
1020
1021         rt6_reset_dflt_pointer(NULL);
1022
1023         err = fib6_del(rt, nlh, _rtattr, req);
1024         dst_release(&rt->u.dst);
1025
1026         write_unlock_bh(&rt6_lock);
1027
1028         return err;
1029 }
1030
1031 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1032 {
1033         struct fib6_node *fn;
1034         struct rt6_info *rt;
1035         int err = -ESRCH;
1036
1037         read_lock_bh(&rt6_lock);
1038
1039         fn = fib6_locate(&ip6_routing_table,
1040                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1041                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1042         
1043         if (fn) {
1044                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1045                         if (rtmsg->rtmsg_ifindex &&
1046                             (rt->rt6i_dev == NULL ||
1047                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1048                                 continue;
1049                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1050                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1051                                 continue;
1052                         if (rtmsg->rtmsg_metric &&
1053                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1054                                 continue;
1055                         dst_hold(&rt->u.dst);
1056                         read_unlock_bh(&rt6_lock);
1057
1058                         return ip6_del_rt(rt, nlh, _rtattr, req);
1059                 }
1060         }
1061         read_unlock_bh(&rt6_lock);
1062
1063         return err;
1064 }
1065
1066 /*
1067  *      Handle redirects
1068  */
1069 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1070                   struct neighbour *neigh, u8 *lladdr, int on_link)
1071 {
1072         struct rt6_info *rt, *nrt;
1073
1074         /* Locate old route to this destination. */
1075         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1076
1077         if (rt == NULL)
1078                 return;
1079
1080         if (neigh->dev != rt->rt6i_dev)
1081                 goto out;
1082
1083         /*
1084          * Current route is on-link; redirect is always invalid.
1085          * 
1086          * Seems, previous statement is not true. It could
1087          * be node, which looks for us as on-link (f.e. proxy ndisc)
1088          * But then router serving it might decide, that we should
1089          * know truth 8)8) --ANK (980726).
1090          */
1091         if (!(rt->rt6i_flags&RTF_GATEWAY))
1092                 goto out;
1093
1094         /*
1095          *      RFC 2461 specifies that redirects should only be
1096          *      accepted if they come from the nexthop to the target.
1097          *      Due to the way default routers are chosen, this notion
1098          *      is a bit fuzzy and one might need to check all default
1099          *      routers.
1100          */
1101         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1102                 if (rt->rt6i_flags & RTF_DEFAULT) {
1103                         struct rt6_info *rt1;
1104
1105                         read_lock(&rt6_lock);
1106                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1107                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1108                                         dst_hold(&rt1->u.dst);
1109                                         dst_release(&rt->u.dst);
1110                                         read_unlock(&rt6_lock);
1111                                         rt = rt1;
1112                                         goto source_ok;
1113                                 }
1114                         }
1115                         read_unlock(&rt6_lock);
1116                 }
1117                 if (net_ratelimit())
1118                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1119                                "for redirect target\n");
1120                 goto out;
1121         }
1122
1123 source_ok:
1124
1125         /*
1126          *      We have finally decided to accept it.
1127          */
1128
1129         neigh_update(neigh, lladdr, NUD_STALE, 
1130                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1131                      NEIGH_UPDATE_F_OVERRIDE|
1132                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1133                                      NEIGH_UPDATE_F_ISROUTER))
1134                      );
1135
1136         /*
1137          * Redirect received -> path was valid.
1138          * Look, redirects are sent only in response to data packets,
1139          * so that this nexthop apparently is reachable. --ANK
1140          */
1141         dst_confirm(&rt->u.dst);
1142
1143         /* Duplicate redirect: silently ignore. */
1144         if (neigh == rt->u.dst.neighbour)
1145                 goto out;
1146
1147         nrt = ip6_rt_copy(rt);
1148         if (nrt == NULL)
1149                 goto out;
1150
1151         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1152         if (on_link)
1153                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1154
1155         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1156         nrt->rt6i_dst.plen = 128;
1157         nrt->u.dst.flags |= DST_HOST;
1158
1159         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1160         nrt->rt6i_nexthop = neigh_clone(neigh);
1161         /* Reset pmtu, it may be better */
1162         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1163         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1164
1165         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1166                 goto out;
1167
1168         if (rt->rt6i_flags&RTF_CACHE) {
1169                 ip6_del_rt(rt, NULL, NULL, NULL);
1170                 return;
1171         }
1172
1173 out:
1174         dst_release(&rt->u.dst);
1175         return;
1176 }
1177
1178 /*
1179  *      Handle ICMP "packet too big" messages
1180  *      i.e. Path MTU discovery
1181  */
1182
1183 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1184                         struct net_device *dev, u32 pmtu)
1185 {
1186         struct rt6_info *rt, *nrt;
1187         int allfrag = 0;
1188
1189         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1190         if (rt == NULL)
1191                 return;
1192
1193         if (pmtu >= dst_mtu(&rt->u.dst))
1194                 goto out;
1195
1196         if (pmtu < IPV6_MIN_MTU) {
1197                 /*
1198                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1199                  * MTU (1280) and a fragment header should always be included
1200                  * after a node receiving Too Big message reporting PMTU is
1201                  * less than the IPv6 Minimum Link MTU.
1202                  */
1203                 pmtu = IPV6_MIN_MTU;
1204                 allfrag = 1;
1205         }
1206
1207         /* New mtu received -> path was valid.
1208            They are sent only in response to data packets,
1209            so that this nexthop apparently is reachable. --ANK
1210          */
1211         dst_confirm(&rt->u.dst);
1212
1213         /* Host route. If it is static, it would be better
1214            not to override it, but add new one, so that
1215            when cache entry will expire old pmtu
1216            would return automatically.
1217          */
1218         if (rt->rt6i_flags & RTF_CACHE) {
1219                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1220                 if (allfrag)
1221                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1222                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1223                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1224                 goto out;
1225         }
1226
1227         /* Network route.
1228            Two cases are possible:
1229            1. It is connected route. Action: COW
1230            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1231          */
1232         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1233                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1234         else
1235                 nrt = rt6_alloc_clone(rt, daddr);
1236
1237         if (nrt) {
1238                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1239                 if (allfrag)
1240                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1241
1242                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1243                  * happened within 5 mins, the recommended timer is 10 mins.
1244                  * Here this route expiration time is set to ip6_rt_mtu_expires
1245                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1246                  * and detecting PMTU increase will be automatically happened.
1247                  */
1248                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1249                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1250
1251                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1252         }
1253 out:
1254         dst_release(&rt->u.dst);
1255 }
1256
1257 /*
1258  *      Misc support functions
1259  */
1260
1261 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1262 {
1263         struct rt6_info *rt = ip6_dst_alloc();
1264
1265         if (rt) {
1266                 rt->u.dst.input = ort->u.dst.input;
1267                 rt->u.dst.output = ort->u.dst.output;
1268
1269                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1270                 rt->u.dst.dev = ort->u.dst.dev;
1271                 if (rt->u.dst.dev)
1272                         dev_hold(rt->u.dst.dev);
1273                 rt->rt6i_idev = ort->rt6i_idev;
1274                 if (rt->rt6i_idev)
1275                         in6_dev_hold(rt->rt6i_idev);
1276                 rt->u.dst.lastuse = jiffies;
1277                 rt->rt6i_expires = 0;
1278
1279                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1280                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1281                 rt->rt6i_metric = 0;
1282
1283                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1284 #ifdef CONFIG_IPV6_SUBTREES
1285                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1286 #endif
1287         }
1288         return rt;
1289 }
1290
1291 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1292 {       
1293         struct rt6_info *rt;
1294         struct fib6_node *fn;
1295
1296         fn = &ip6_routing_table;
1297
1298         write_lock_bh(&rt6_lock);
1299         for (rt = fn->leaf; rt; rt=rt->u.next) {
1300                 if (dev == rt->rt6i_dev &&
1301                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1302                         break;
1303         }
1304         if (rt)
1305                 dst_hold(&rt->u.dst);
1306         write_unlock_bh(&rt6_lock);
1307         return rt;
1308 }
1309
1310 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1311                                      struct net_device *dev)
1312 {
1313         struct in6_rtmsg rtmsg;
1314
1315         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1316         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1317         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1318         rtmsg.rtmsg_metric = 1024;
1319         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1320
1321         rtmsg.rtmsg_ifindex = dev->ifindex;
1322
1323         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1324         return rt6_get_dflt_router(gwaddr, dev);
1325 }
1326
1327 void rt6_purge_dflt_routers(void)
1328 {
1329         struct rt6_info *rt;
1330
1331 restart:
1332         read_lock_bh(&rt6_lock);
1333         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1334                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1335                         dst_hold(&rt->u.dst);
1336
1337                         rt6_reset_dflt_pointer(NULL);
1338
1339                         read_unlock_bh(&rt6_lock);
1340
1341                         ip6_del_rt(rt, NULL, NULL, NULL);
1342
1343                         goto restart;
1344                 }
1345         }
1346         read_unlock_bh(&rt6_lock);
1347 }
1348
1349 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1350 {
1351         struct in6_rtmsg rtmsg;
1352         int err;
1353
1354         switch(cmd) {
1355         case SIOCADDRT:         /* Add a route */
1356         case SIOCDELRT:         /* Delete a route */
1357                 if (!capable(CAP_NET_ADMIN))
1358                         return -EPERM;
1359                 err = copy_from_user(&rtmsg, arg,
1360                                      sizeof(struct in6_rtmsg));
1361                 if (err)
1362                         return -EFAULT;
1363                         
1364                 rtnl_lock();
1365                 switch (cmd) {
1366                 case SIOCADDRT:
1367                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1368                         break;
1369                 case SIOCDELRT:
1370                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1371                         break;
1372                 default:
1373                         err = -EINVAL;
1374                 }
1375                 rtnl_unlock();
1376
1377                 return err;
1378         };
1379
1380         return -EINVAL;
1381 }
1382
1383 /*
1384  *      Drop the packet on the floor
1385  */
1386
1387 static int ip6_pkt_discard(struct sk_buff *skb)
1388 {
1389         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1390         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1391         kfree_skb(skb);
1392         return 0;
1393 }
1394
1395 static int ip6_pkt_discard_out(struct sk_buff *skb)
1396 {
1397         skb->dev = skb->dst->dev;
1398         return ip6_pkt_discard(skb);
1399 }
1400
1401 /*
1402  *      Allocate a dst for local (unicast / anycast) address.
1403  */
1404
1405 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1406                                     const struct in6_addr *addr,
1407                                     int anycast)
1408 {
1409         struct rt6_info *rt = ip6_dst_alloc();
1410
1411         if (rt == NULL)
1412                 return ERR_PTR(-ENOMEM);
1413
1414         dev_hold(&loopback_dev);
1415         in6_dev_hold(idev);
1416
1417         rt->u.dst.flags = DST_HOST;
1418         rt->u.dst.input = ip6_input;
1419         rt->u.dst.output = ip6_output;
1420         rt->rt6i_dev = &loopback_dev;
1421         rt->rt6i_idev = idev;
1422         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1423         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1424         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1425         rt->u.dst.obsolete = -1;
1426
1427         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1428         if (anycast)
1429                 rt->rt6i_flags |= RTF_ANYCAST;
1430         else
1431                 rt->rt6i_flags |= RTF_LOCAL;
1432         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1433         if (rt->rt6i_nexthop == NULL) {
1434                 dst_free((struct dst_entry *) rt);
1435                 return ERR_PTR(-ENOMEM);
1436         }
1437
1438         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1439         rt->rt6i_dst.plen = 128;
1440
1441         atomic_set(&rt->u.dst.__refcnt, 1);
1442
1443         return rt;
1444 }
1445
1446 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1447 {
1448         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1449             rt != &ip6_null_entry) {
1450                 RT6_TRACE("deleted by ifdown %p\n", rt);
1451                 return -1;
1452         }
1453         return 0;
1454 }
1455
1456 void rt6_ifdown(struct net_device *dev)
1457 {
1458         write_lock_bh(&rt6_lock);
1459         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1460         write_unlock_bh(&rt6_lock);
1461 }
1462
1463 struct rt6_mtu_change_arg
1464 {
1465         struct net_device *dev;
1466         unsigned mtu;
1467 };
1468
1469 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1470 {
1471         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1472         struct inet6_dev *idev;
1473
1474         /* In IPv6 pmtu discovery is not optional,
1475            so that RTAX_MTU lock cannot disable it.
1476            We still use this lock to block changes
1477            caused by addrconf/ndisc.
1478         */
1479
1480         idev = __in6_dev_get(arg->dev);
1481         if (idev == NULL)
1482                 return 0;
1483
1484         /* For administrative MTU increase, there is no way to discover
1485            IPv6 PMTU increase, so PMTU increase should be updated here.
1486            Since RFC 1981 doesn't include administrative MTU increase
1487            update PMTU increase is a MUST. (i.e. jumbo frame)
1488          */
1489         /*
1490            If new MTU is less than route PMTU, this new MTU will be the
1491            lowest MTU in the path, update the route PMTU to reflect PMTU
1492            decreases; if new MTU is greater than route PMTU, and the
1493            old MTU is the lowest MTU in the path, update the route PMTU
1494            to reflect the increase. In this case if the other nodes' MTU
1495            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1496            PMTU discouvery.
1497          */
1498         if (rt->rt6i_dev == arg->dev &&
1499             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1500             (dst_mtu(&rt->u.dst) > arg->mtu ||
1501              (dst_mtu(&rt->u.dst) < arg->mtu &&
1502               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1503                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1504         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1505         return 0;
1506 }
1507
1508 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1509 {
1510         struct rt6_mtu_change_arg arg;
1511
1512         arg.dev = dev;
1513         arg.mtu = mtu;
1514         read_lock_bh(&rt6_lock);
1515         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1516         read_unlock_bh(&rt6_lock);
1517 }
1518
1519 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1520                               struct in6_rtmsg *rtmsg)
1521 {
1522         memset(rtmsg, 0, sizeof(*rtmsg));
1523
1524         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1525         rtmsg->rtmsg_src_len = r->rtm_src_len;
1526         rtmsg->rtmsg_flags = RTF_UP;
1527         if (r->rtm_type == RTN_UNREACHABLE)
1528                 rtmsg->rtmsg_flags |= RTF_REJECT;
1529
1530         if (rta[RTA_GATEWAY-1]) {
1531                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1532                         return -EINVAL;
1533                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1534                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1535         }
1536         if (rta[RTA_DST-1]) {
1537                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1538                         return -EINVAL;
1539                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1540         }
1541         if (rta[RTA_SRC-1]) {
1542                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1543                         return -EINVAL;
1544                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1545         }
1546         if (rta[RTA_OIF-1]) {
1547                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1548                         return -EINVAL;
1549                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1550         }
1551         if (rta[RTA_PRIORITY-1]) {
1552                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1553                         return -EINVAL;
1554                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1555         }
1556         return 0;
1557 }
1558
1559 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1560 {
1561         struct rtmsg *r = NLMSG_DATA(nlh);
1562         struct in6_rtmsg rtmsg;
1563
1564         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1565                 return -EINVAL;
1566         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1567 }
1568
1569 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1570 {
1571         struct rtmsg *r = NLMSG_DATA(nlh);
1572         struct in6_rtmsg rtmsg;
1573
1574         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1575                 return -EINVAL;
1576         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1577 }
1578
1579 struct rt6_rtnl_dump_arg
1580 {
1581         struct sk_buff *skb;
1582         struct netlink_callback *cb;
1583 };
1584
1585 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1586                          struct in6_addr *dst, struct in6_addr *src,
1587                          int iif, int type, u32 pid, u32 seq,
1588                          int prefix, unsigned int flags)
1589 {
1590         struct rtmsg *rtm;
1591         struct nlmsghdr  *nlh;
1592         unsigned char    *b = skb->tail;
1593         struct rta_cacheinfo ci;
1594
1595         if (prefix) {   /* user wants prefix routes only */
1596                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1597                         /* success since this is not a prefix route */
1598                         return 1;
1599                 }
1600         }
1601
1602         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1603         rtm = NLMSG_DATA(nlh);
1604         rtm->rtm_family = AF_INET6;
1605         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1606         rtm->rtm_src_len = rt->rt6i_src.plen;
1607         rtm->rtm_tos = 0;
1608         rtm->rtm_table = RT_TABLE_MAIN;
1609         if (rt->rt6i_flags&RTF_REJECT)
1610                 rtm->rtm_type = RTN_UNREACHABLE;
1611         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1612                 rtm->rtm_type = RTN_LOCAL;
1613         else
1614                 rtm->rtm_type = RTN_UNICAST;
1615         rtm->rtm_flags = 0;
1616         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1617         rtm->rtm_protocol = rt->rt6i_protocol;
1618         if (rt->rt6i_flags&RTF_DYNAMIC)
1619                 rtm->rtm_protocol = RTPROT_REDIRECT;
1620         else if (rt->rt6i_flags & RTF_ADDRCONF)
1621                 rtm->rtm_protocol = RTPROT_KERNEL;
1622         else if (rt->rt6i_flags&RTF_DEFAULT)
1623                 rtm->rtm_protocol = RTPROT_RA;
1624
1625         if (rt->rt6i_flags&RTF_CACHE)
1626                 rtm->rtm_flags |= RTM_F_CLONED;
1627
1628         if (dst) {
1629                 RTA_PUT(skb, RTA_DST, 16, dst);
1630                 rtm->rtm_dst_len = 128;
1631         } else if (rtm->rtm_dst_len)
1632                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1633 #ifdef CONFIG_IPV6_SUBTREES
1634         if (src) {
1635                 RTA_PUT(skb, RTA_SRC, 16, src);
1636                 rtm->rtm_src_len = 128;
1637         } else if (rtm->rtm_src_len)
1638                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1639 #endif
1640         if (iif)
1641                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1642         else if (dst) {
1643                 struct in6_addr saddr_buf;
1644                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1645                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1646         }
1647         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1648                 goto rtattr_failure;
1649         if (rt->u.dst.neighbour)
1650                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1651         if (rt->u.dst.dev)
1652                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1653         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1654         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1655         if (rt->rt6i_expires)
1656                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1657         else
1658                 ci.rta_expires = 0;
1659         ci.rta_used = rt->u.dst.__use;
1660         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1661         ci.rta_error = rt->u.dst.error;
1662         ci.rta_id = 0;
1663         ci.rta_ts = 0;
1664         ci.rta_tsage = 0;
1665         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1666         nlh->nlmsg_len = skb->tail - b;
1667         return skb->len;
1668
1669 nlmsg_failure:
1670 rtattr_failure:
1671         skb_trim(skb, b - skb->data);
1672         return -1;
1673 }
1674
1675 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1676 {
1677         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1678         int prefix;
1679
1680         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1681                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1682                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1683         } else
1684                 prefix = 0;
1685
1686         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1687                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1688                      prefix, NLM_F_MULTI);
1689 }
1690
1691 static int fib6_dump_node(struct fib6_walker_t *w)
1692 {
1693         int res;
1694         struct rt6_info *rt;
1695
1696         for (rt = w->leaf; rt; rt = rt->u.next) {
1697                 res = rt6_dump_route(rt, w->args);
1698                 if (res < 0) {
1699                         /* Frame is full, suspend walking */
1700                         w->leaf = rt;
1701                         return 1;
1702                 }
1703                 BUG_TRAP(res!=0);
1704         }
1705         w->leaf = NULL;
1706         return 0;
1707 }
1708
1709 static void fib6_dump_end(struct netlink_callback *cb)
1710 {
1711         struct fib6_walker_t *w = (void*)cb->args[0];
1712
1713         if (w) {
1714                 cb->args[0] = 0;
1715                 fib6_walker_unlink(w);
1716                 kfree(w);
1717         }
1718         cb->done = (void*)cb->args[1];
1719         cb->args[1] = 0;
1720 }
1721
1722 static int fib6_dump_done(struct netlink_callback *cb)
1723 {
1724         fib6_dump_end(cb);
1725         return cb->done ? cb->done(cb) : 0;
1726 }
1727
1728 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1729 {
1730         struct rt6_rtnl_dump_arg arg;
1731         struct fib6_walker_t *w;
1732         int res;
1733
1734         arg.skb = skb;
1735         arg.cb = cb;
1736
1737         w = (void*)cb->args[0];
1738         if (w == NULL) {
1739                 /* New dump:
1740                  * 
1741                  * 1. hook callback destructor.
1742                  */
1743                 cb->args[1] = (long)cb->done;
1744                 cb->done = fib6_dump_done;
1745
1746                 /*
1747                  * 2. allocate and initialize walker.
1748                  */
1749                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1750                 if (w == NULL)
1751                         return -ENOMEM;
1752                 RT6_TRACE("dump<%p", w);
1753                 memset(w, 0, sizeof(*w));
1754                 w->root = &ip6_routing_table;
1755                 w->func = fib6_dump_node;
1756                 w->args = &arg;
1757                 cb->args[0] = (long)w;
1758                 read_lock_bh(&rt6_lock);
1759                 res = fib6_walk(w);
1760                 read_unlock_bh(&rt6_lock);
1761         } else {
1762                 w->args = &arg;
1763                 read_lock_bh(&rt6_lock);
1764                 res = fib6_walk_continue(w);
1765                 read_unlock_bh(&rt6_lock);
1766         }
1767 #if RT6_DEBUG >= 3
1768         if (res <= 0 && skb->len == 0)
1769                 RT6_TRACE("%p>dump end\n", w);
1770 #endif
1771         res = res < 0 ? res : skb->len;
1772         /* res < 0 is an error. (really, impossible)
1773            res == 0 means that dump is complete, but skb still can contain data.
1774            res > 0 dump is not complete, but frame is full.
1775          */
1776         /* Destroy walker, if dump of this table is complete. */
1777         if (res <= 0)
1778                 fib6_dump_end(cb);
1779         return res;
1780 }
1781
1782 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1783 {
1784         struct rtattr **rta = arg;
1785         int iif = 0;
1786         int err = -ENOBUFS;
1787         struct sk_buff *skb;
1788         struct flowi fl;
1789         struct rt6_info *rt;
1790
1791         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1792         if (skb == NULL)
1793                 goto out;
1794
1795         /* Reserve room for dummy headers, this skb can pass
1796            through good chunk of routing engine.
1797          */
1798         skb->mac.raw = skb->data;
1799         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1800
1801         memset(&fl, 0, sizeof(fl));
1802         if (rta[RTA_SRC-1])
1803                 ipv6_addr_copy(&fl.fl6_src,
1804                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1805         if (rta[RTA_DST-1])
1806                 ipv6_addr_copy(&fl.fl6_dst,
1807                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1808
1809         if (rta[RTA_IIF-1])
1810                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1811
1812         if (iif) {
1813                 struct net_device *dev;
1814                 dev = __dev_get_by_index(iif);
1815                 if (!dev) {
1816                         err = -ENODEV;
1817                         goto out_free;
1818                 }
1819         }
1820
1821         fl.oif = 0;
1822         if (rta[RTA_OIF-1])
1823                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1824
1825         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1826
1827         skb->dst = &rt->u.dst;
1828
1829         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1830         err = rt6_fill_node(skb, rt, 
1831                             &fl.fl6_dst, &fl.fl6_src,
1832                             iif,
1833                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1834                             nlh->nlmsg_seq, 0, 0);
1835         if (err < 0) {
1836                 err = -EMSGSIZE;
1837                 goto out_free;
1838         }
1839
1840         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1841         if (err > 0)
1842                 err = 0;
1843 out:
1844         return err;
1845 out_free:
1846         kfree_skb(skb);
1847         goto out;       
1848 }
1849
1850 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1851                         struct netlink_skb_parms *req)
1852 {
1853         struct sk_buff *skb;
1854         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1855         u32 pid = current->pid;
1856         u32 seq = 0;
1857
1858         if (req)
1859                 pid = req->pid;
1860         if (nlh)
1861                 seq = nlh->nlmsg_seq;
1862         
1863         skb = alloc_skb(size, gfp_any());
1864         if (!skb) {
1865                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1866                 return;
1867         }
1868         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1869                 kfree_skb(skb);
1870                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1871                 return;
1872         }
1873         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1874         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1875 }
1876
1877 /*
1878  *      /proc
1879  */
1880
1881 #ifdef CONFIG_PROC_FS
1882
1883 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1884
1885 struct rt6_proc_arg
1886 {
1887         char *buffer;
1888         int offset;
1889         int length;
1890         int skip;
1891         int len;
1892 };
1893
1894 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1895 {
1896         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1897         int i;
1898
1899         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1900                 arg->skip++;
1901                 return 0;
1902         }
1903
1904         if (arg->len >= arg->length)
1905                 return 0;
1906
1907         for (i=0; i<16; i++) {
1908                 sprintf(arg->buffer + arg->len, "%02x",
1909                         rt->rt6i_dst.addr.s6_addr[i]);
1910                 arg->len += 2;
1911         }
1912         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913                             rt->rt6i_dst.plen);
1914
1915 #ifdef CONFIG_IPV6_SUBTREES
1916         for (i=0; i<16; i++) {
1917                 sprintf(arg->buffer + arg->len, "%02x",
1918                         rt->rt6i_src.addr.s6_addr[i]);
1919                 arg->len += 2;
1920         }
1921         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1922                             rt->rt6i_src.plen);
1923 #else
1924         sprintf(arg->buffer + arg->len,
1925                 "00000000000000000000000000000000 00 ");
1926         arg->len += 36;
1927 #endif
1928
1929         if (rt->rt6i_nexthop) {
1930                 for (i=0; i<16; i++) {
1931                         sprintf(arg->buffer + arg->len, "%02x",
1932                                 rt->rt6i_nexthop->primary_key[i]);
1933                         arg->len += 2;
1934                 }
1935         } else {
1936                 sprintf(arg->buffer + arg->len,
1937                         "00000000000000000000000000000000");
1938                 arg->len += 32;
1939         }
1940         arg->len += sprintf(arg->buffer + arg->len,
1941                             " %08x %08x %08x %08x %8s\n",
1942                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1943                             rt->u.dst.__use, rt->rt6i_flags, 
1944                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1945         return 0;
1946 }
1947
1948 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1949 {
1950         struct rt6_proc_arg arg;
1951         arg.buffer = buffer;
1952         arg.offset = offset;
1953         arg.length = length;
1954         arg.skip = 0;
1955         arg.len = 0;
1956
1957         read_lock_bh(&rt6_lock);
1958         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1959         read_unlock_bh(&rt6_lock);
1960
1961         *start = buffer;
1962         if (offset)
1963                 *start += offset % RT6_INFO_LEN;
1964
1965         arg.len -= offset % RT6_INFO_LEN;
1966
1967         if (arg.len > length)
1968                 arg.len = length;
1969         if (arg.len < 0)
1970                 arg.len = 0;
1971
1972         return arg.len;
1973 }
1974
1975 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1976 {
1977         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1978                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1979                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1980                       rt6_stats.fib_rt_cache,
1981                       atomic_read(&ip6_dst_ops.entries),
1982                       rt6_stats.fib_discarded_routes);
1983
1984         return 0;
1985 }
1986
1987 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1988 {
1989         return single_open(file, rt6_stats_seq_show, NULL);
1990 }
1991
1992 static struct file_operations rt6_stats_seq_fops = {
1993         .owner   = THIS_MODULE,
1994         .open    = rt6_stats_seq_open,
1995         .read    = seq_read,
1996         .llseek  = seq_lseek,
1997         .release = single_release,
1998 };
1999 #endif  /* CONFIG_PROC_FS */
2000
2001 #ifdef CONFIG_SYSCTL
2002
2003 static int flush_delay;
2004
2005 static
2006 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2007                               void __user *buffer, size_t *lenp, loff_t *ppos)
2008 {
2009         if (write) {
2010                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2011                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2012                 return 0;
2013         } else
2014                 return -EINVAL;
2015 }
2016
2017 ctl_table ipv6_route_table[] = {
2018         {
2019                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2020                 .procname       =       "flush",
2021                 .data           =       &flush_delay,
2022                 .maxlen         =       sizeof(int),
2023                 .mode           =       0200,
2024                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2025         },
2026         {
2027                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2028                 .procname       =       "gc_thresh",
2029                 .data           =       &ip6_dst_ops.gc_thresh,
2030                 .maxlen         =       sizeof(int),
2031                 .mode           =       0644,
2032                 .proc_handler   =       &proc_dointvec,
2033         },
2034         {
2035                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2036                 .procname       =       "max_size",
2037                 .data           =       &ip6_rt_max_size,
2038                 .maxlen         =       sizeof(int),
2039                 .mode           =       0644,
2040                 .proc_handler   =       &proc_dointvec,
2041         },
2042         {
2043                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2044                 .procname       =       "gc_min_interval",
2045                 .data           =       &ip6_rt_gc_min_interval,
2046                 .maxlen         =       sizeof(int),
2047                 .mode           =       0644,
2048                 .proc_handler   =       &proc_dointvec_jiffies,
2049                 .strategy       =       &sysctl_jiffies,
2050         },
2051         {
2052                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2053                 .procname       =       "gc_timeout",
2054                 .data           =       &ip6_rt_gc_timeout,
2055                 .maxlen         =       sizeof(int),
2056                 .mode           =       0644,
2057                 .proc_handler   =       &proc_dointvec_jiffies,
2058                 .strategy       =       &sysctl_jiffies,
2059         },
2060         {
2061                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2062                 .procname       =       "gc_interval",
2063                 .data           =       &ip6_rt_gc_interval,
2064                 .maxlen         =       sizeof(int),
2065                 .mode           =       0644,
2066                 .proc_handler   =       &proc_dointvec_jiffies,
2067                 .strategy       =       &sysctl_jiffies,
2068         },
2069         {
2070                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2071                 .procname       =       "gc_elasticity",
2072                 .data           =       &ip6_rt_gc_elasticity,
2073                 .maxlen         =       sizeof(int),
2074                 .mode           =       0644,
2075                 .proc_handler   =       &proc_dointvec_jiffies,
2076                 .strategy       =       &sysctl_jiffies,
2077         },
2078         {
2079                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2080                 .procname       =       "mtu_expires",
2081                 .data           =       &ip6_rt_mtu_expires,
2082                 .maxlen         =       sizeof(int),
2083                 .mode           =       0644,
2084                 .proc_handler   =       &proc_dointvec_jiffies,
2085                 .strategy       =       &sysctl_jiffies,
2086         },
2087         {
2088                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2089                 .procname       =       "min_adv_mss",
2090                 .data           =       &ip6_rt_min_advmss,
2091                 .maxlen         =       sizeof(int),
2092                 .mode           =       0644,
2093                 .proc_handler   =       &proc_dointvec_jiffies,
2094                 .strategy       =       &sysctl_jiffies,
2095         },
2096         {
2097                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2098                 .procname       =       "gc_min_interval_ms",
2099                 .data           =       &ip6_rt_gc_min_interval,
2100                 .maxlen         =       sizeof(int),
2101                 .mode           =       0644,
2102                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2103                 .strategy       =       &sysctl_ms_jiffies,
2104         },
2105         { .ctl_name = 0 }
2106 };
2107
2108 #endif
2109
2110 void __init ip6_route_init(void)
2111 {
2112         struct proc_dir_entry *p;
2113
2114         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2115                                                      sizeof(struct rt6_info),
2116                                                      0, SLAB_HWCACHE_ALIGN,
2117                                                      NULL, NULL);
2118         if (!ip6_dst_ops.kmem_cachep)
2119                 panic("cannot create ip6_dst_cache");
2120
2121         fib6_init();
2122 #ifdef  CONFIG_PROC_FS
2123         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2124         if (p)
2125                 p->owner = THIS_MODULE;
2126
2127         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2128 #endif
2129 #ifdef CONFIG_XFRM
2130         xfrm6_init();
2131 #endif
2132 }
2133
2134 void ip6_route_cleanup(void)
2135 {
2136 #ifdef CONFIG_PROC_FS
2137         proc_net_remove("ipv6_route");
2138         proc_net_remove("rt6_stats");
2139 #endif
2140 #ifdef CONFIG_XFRM
2141         xfrm6_fini();
2142 #endif
2143         rt6_ifdown(NULL);
2144         fib6_gc_cleanup();
2145         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2146 }