[IPV6]: ROUTE: Clean-up cow'ing in ip6_route_{intput,output}().
[pandora-kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static int ip6_rt_max_size = 4096;
78 static int ip6_rt_gc_min_interval = HZ / 2;
79 static int ip6_rt_gc_timeout = 60*HZ;
80 int ip6_rt_gc_interval = 30*HZ;
81 static int ip6_rt_gc_elasticity = 9;
82 static int ip6_rt_mtu_expires = 10*60*HZ;
83 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84
85 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
86 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
87 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
88 static void             ip6_dst_destroy(struct dst_entry *);
89 static void             ip6_dst_ifdown(struct dst_entry *,
90                                        struct net_device *dev, int how);
91 static int               ip6_dst_gc(void);
92
93 static int              ip6_pkt_discard(struct sk_buff *skb);
94 static int              ip6_pkt_discard_out(struct sk_buff *skb);
95 static void             ip6_link_failure(struct sk_buff *skb);
96 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97
98 static struct dst_ops ip6_dst_ops = {
99         .family                 =       AF_INET6,
100         .protocol               =       __constant_htons(ETH_P_IPV6),
101         .gc                     =       ip6_dst_gc,
102         .gc_thresh              =       1024,
103         .check                  =       ip6_dst_check,
104         .destroy                =       ip6_dst_destroy,
105         .ifdown                 =       ip6_dst_ifdown,
106         .negative_advice        =       ip6_negative_advice,
107         .link_failure           =       ip6_link_failure,
108         .update_pmtu            =       ip6_rt_update_pmtu,
109         .entry_size             =       sizeof(struct rt6_info),
110 };
111
112 struct rt6_info ip6_null_entry = {
113         .u = {
114                 .dst = {
115                         .__refcnt       = ATOMIC_INIT(1),
116                         .__use          = 1,
117                         .dev            = &loopback_dev,
118                         .obsolete       = -1,
119                         .error          = -ENETUNREACH,
120                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
121                         .input          = ip6_pkt_discard,
122                         .output         = ip6_pkt_discard_out,
123                         .ops            = &ip6_dst_ops,
124                         .path           = (struct dst_entry*)&ip6_null_entry,
125                 }
126         },
127         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
128         .rt6i_metric    = ~(u32) 0,
129         .rt6i_ref       = ATOMIC_INIT(1),
130 };
131
132 struct fib6_node ip6_routing_table = {
133         .leaf           = &ip6_null_entry,
134         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
135 };
136
137 /* Protects all the ip6 fib */
138
139 DEFINE_RWLOCK(rt6_lock);
140
141
142 /* allocate dst with ip6_dst_ops */
143 static __inline__ struct rt6_info *ip6_dst_alloc(void)
144 {
145         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
146 }
147
148 static void ip6_dst_destroy(struct dst_entry *dst)
149 {
150         struct rt6_info *rt = (struct rt6_info *)dst;
151         struct inet6_dev *idev = rt->rt6i_idev;
152
153         if (idev != NULL) {
154                 rt->rt6i_idev = NULL;
155                 in6_dev_put(idev);
156         }       
157 }
158
159 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
160                            int how)
161 {
162         struct rt6_info *rt = (struct rt6_info *)dst;
163         struct inet6_dev *idev = rt->rt6i_idev;
164
165         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
166                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
167                 if (loopback_idev != NULL) {
168                         rt->rt6i_idev = loopback_idev;
169                         in6_dev_put(idev);
170                 }
171         }
172 }
173
174 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175 {
176         return (rt->rt6i_flags & RTF_EXPIRES &&
177                 time_after(jiffies, rt->rt6i_expires));
178 }
179
180 /*
181  *      Route lookup. Any rt6_lock is implied.
182  */
183
184 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
185                                                     int oif,
186                                                     int strict)
187 {
188         struct rt6_info *local = NULL;
189         struct rt6_info *sprt;
190
191         if (oif) {
192                 for (sprt = rt; sprt; sprt = sprt->u.next) {
193                         struct net_device *dev = sprt->rt6i_dev;
194                         if (dev->ifindex == oif)
195                                 return sprt;
196                         if (dev->flags & IFF_LOOPBACK) {
197                                 if (sprt->rt6i_idev == NULL ||
198                                     sprt->rt6i_idev->dev->ifindex != oif) {
199                                         if (strict && oif)
200                                                 continue;
201                                         if (local && (!oif || 
202                                                       local->rt6i_idev->dev->ifindex == oif))
203                                                 continue;
204                                 }
205                                 local = sprt;
206                         }
207                 }
208
209                 if (local)
210                         return local;
211
212                 if (strict)
213                         return &ip6_null_entry;
214         }
215         return rt;
216 }
217
218 /*
219  *      pointer to the last default router chosen. BH is disabled locally.
220  */
221 static struct rt6_info *rt6_dflt_pointer;
222 static DEFINE_SPINLOCK(rt6_dflt_lock);
223
224 void rt6_reset_dflt_pointer(struct rt6_info *rt)
225 {
226         spin_lock_bh(&rt6_dflt_lock);
227         if (rt == NULL || rt == rt6_dflt_pointer) {
228                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
229                 rt6_dflt_pointer = NULL;
230         }
231         spin_unlock_bh(&rt6_dflt_lock);
232 }
233
234 /* Default Router Selection (RFC 2461 6.3.6) */
235 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236 {
237         struct rt6_info *match = NULL;
238         struct rt6_info *sprt;
239         int mpri = 0;
240
241         for (sprt = rt; sprt; sprt = sprt->u.next) {
242                 struct neighbour *neigh;
243                 int m = 0;
244
245                 if (!oif ||
246                     (sprt->rt6i_dev &&
247                      sprt->rt6i_dev->ifindex == oif))
248                         m += 8;
249
250                 if (rt6_check_expired(sprt))
251                         continue;
252
253                 if (sprt == rt6_dflt_pointer)
254                         m += 4;
255
256                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
257                         read_lock_bh(&neigh->lock);
258                         switch (neigh->nud_state) {
259                         case NUD_REACHABLE:
260                                 m += 3;
261                                 break;
262
263                         case NUD_STALE:
264                         case NUD_DELAY:
265                         case NUD_PROBE:
266                                 m += 2;
267                                 break;
268
269                         case NUD_NOARP:
270                         case NUD_PERMANENT:
271                                 m += 1;
272                                 break;
273
274                         case NUD_INCOMPLETE:
275                         default:
276                                 read_unlock_bh(&neigh->lock);
277                                 continue;
278                         }
279                         read_unlock_bh(&neigh->lock);
280                 } else {
281                         continue;
282                 }
283
284                 if (m > mpri || m >= 12) {
285                         match = sprt;
286                         mpri = m;
287                         if (m >= 12) {
288                                 /* we choose the last default router if it
289                                  * is in (probably) reachable state.
290                                  * If route changed, we should do pmtu
291                                  * discovery. --yoshfuji
292                                  */
293                                 break;
294                         }
295                 }
296         }
297
298         spin_lock(&rt6_dflt_lock);
299         if (!match) {
300                 /*
301                  *      No default routers are known to be reachable.
302                  *      SHOULD round robin
303                  */
304                 if (rt6_dflt_pointer) {
305                         for (sprt = rt6_dflt_pointer->u.next;
306                              sprt; sprt = sprt->u.next) {
307                                 if (sprt->u.dst.obsolete <= 0 &&
308                                     sprt->u.dst.error == 0 &&
309                                     !rt6_check_expired(sprt)) {
310                                         match = sprt;
311                                         break;
312                                 }
313                         }
314                         for (sprt = rt;
315                              !match && sprt;
316                              sprt = sprt->u.next) {
317                                 if (sprt->u.dst.obsolete <= 0 &&
318                                     sprt->u.dst.error == 0 &&
319                                     !rt6_check_expired(sprt)) {
320                                         match = sprt;
321                                         break;
322                                 }
323                                 if (sprt == rt6_dflt_pointer)
324                                         break;
325                         }
326                 }
327         }
328
329         if (match) {
330                 if (rt6_dflt_pointer != match)
331                         RT6_TRACE("changed default router: %p->%p\n",
332                                   rt6_dflt_pointer, match);
333                 rt6_dflt_pointer = match;
334         }
335         spin_unlock(&rt6_dflt_lock);
336
337         if (!match) {
338                 /*
339                  * Last Resort: if no default routers found, 
340                  * use addrconf default route.
341                  * We don't record this route.
342                  */
343                 for (sprt = ip6_routing_table.leaf;
344                      sprt; sprt = sprt->u.next) {
345                         if (!rt6_check_expired(sprt) &&
346                             (sprt->rt6i_flags & RTF_DEFAULT) &&
347                             (!oif ||
348                              (sprt->rt6i_dev &&
349                               sprt->rt6i_dev->ifindex == oif))) {
350                                 match = sprt;
351                                 break;
352                         }
353                 }
354                 if (!match) {
355                         /* no default route.  give up. */
356                         match = &ip6_null_entry;
357                 }
358         }
359
360         return match;
361 }
362
363 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
364                             int oif, int strict)
365 {
366         struct fib6_node *fn;
367         struct rt6_info *rt;
368
369         read_lock_bh(&rt6_lock);
370         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
371         rt = rt6_device_match(fn->leaf, oif, strict);
372         dst_hold(&rt->u.dst);
373         rt->u.dst.__use++;
374         read_unlock_bh(&rt6_lock);
375
376         rt->u.dst.lastuse = jiffies;
377         if (rt->u.dst.error == 0)
378                 return rt;
379         dst_release(&rt->u.dst);
380         return NULL;
381 }
382
383 /* ip6_ins_rt is called with FREE rt6_lock.
384    It takes new route entry, the addition fails by any reason the
385    route is freed. In any case, if caller does not hold it, it may
386    be destroyed.
387  */
388
389 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
390                 void *_rtattr, struct netlink_skb_parms *req)
391 {
392         int err;
393
394         write_lock_bh(&rt6_lock);
395         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
396         write_unlock_bh(&rt6_lock);
397
398         return err;
399 }
400
401 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
402                                       struct in6_addr *saddr)
403 {
404         struct rt6_info *rt;
405
406         /*
407          *      Clone the route.
408          */
409
410         rt = ip6_rt_copy(ort);
411
412         if (rt) {
413                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
414                         if (rt->rt6i_dst.plen != 128 &&
415                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
416                                 rt->rt6i_flags |= RTF_ANYCAST;
417                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
418                 }
419
420                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
421                 rt->rt6i_dst.plen = 128;
422                 rt->rt6i_flags |= RTF_CACHE;
423                 rt->u.dst.flags |= DST_HOST;
424
425 #ifdef CONFIG_IPV6_SUBTREES
426                 if (rt->rt6i_src.plen && saddr) {
427                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428                         rt->rt6i_src.plen = 128;
429                 }
430 #endif
431
432                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433
434         }
435
436         return rt;
437 }
438
439 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
440 {
441         struct rt6_info *rt = ip6_rt_copy(ort);
442         if (rt) {
443                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
444                 rt->rt6i_dst.plen = 128;
445                 rt->rt6i_flags |= RTF_CACHE;
446                 if (rt->rt6i_flags & RTF_REJECT)
447                         rt->u.dst.error = ort->u.dst.error;
448                 rt->u.dst.flags |= DST_HOST;
449                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
450         }
451         return rt;
452 }
453
454 #define BACKTRACK() \
455 if (rt == &ip6_null_entry && strict) { \
456        while ((fn = fn->parent) != NULL) { \
457                 if (fn->fn_flags & RTN_ROOT) { \
458                         goto out; \
459                 } \
460                 if (fn->fn_flags & RTN_RTINFO) \
461                         goto restart; \
462         } \
463 }
464
465
466 void ip6_route_input(struct sk_buff *skb)
467 {
468         struct fib6_node *fn;
469         struct rt6_info *rt, *nrt;
470         int strict;
471         int attempts = 3;
472         int err;
473
474         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
475
476 relookup:
477         read_lock_bh(&rt6_lock);
478
479         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
480                          &skb->nh.ipv6h->saddr);
481
482 restart:
483         rt = fn->leaf;
484
485         if ((rt->rt6i_flags & RTF_CACHE)) {
486                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
487                 BACKTRACK();
488                 goto out;
489         }
490
491         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
492         BACKTRACK();
493
494         dst_hold(&rt->u.dst);
495         read_unlock_bh(&rt6_lock);
496
497         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
498                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
499         else {
500 #if CLONE_OFFLINK_ROUTE
501                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
502 #else
503                 goto out2;
504 #endif
505         }
506
507         dst_release(&rt->u.dst);
508         rt = nrt ? : &ip6_null_entry;
509
510         dst_hold(&rt->u.dst);
511         if (nrt) {
512                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
513                 if (!err)
514                         goto out2;
515         }
516
517         if (--attempts <= 0)
518                 goto out2;
519
520         /*
521          * Race condition! In the gap, when rt6_lock was
522          * released someone could insert this route.  Relookup.
523          */
524         dst_release(&rt->u.dst);
525         goto relookup;
526
527 out:
528         dst_hold(&rt->u.dst);
529         read_unlock_bh(&rt6_lock);
530 out2:
531         rt->u.dst.lastuse = jiffies;
532         rt->u.dst.__use++;
533         skb->dst = (struct dst_entry *) rt;
534         return;
535 }
536
537 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
538 {
539         struct fib6_node *fn;
540         struct rt6_info *rt, *nrt;
541         int strict;
542         int attempts = 3;
543         int err;
544
545         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
546
547 relookup:
548         read_lock_bh(&rt6_lock);
549
550         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
551
552 restart:
553         rt = fn->leaf;
554
555         if ((rt->rt6i_flags & RTF_CACHE)) {
556                 rt = rt6_device_match(rt, fl->oif, strict);
557                 BACKTRACK();
558                 goto out;
559         }
560         if (rt->rt6i_flags & RTF_DEFAULT) {
561                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
562                         rt = rt6_best_dflt(rt, fl->oif);
563         } else {
564                 rt = rt6_device_match(rt, fl->oif, strict);
565                 BACKTRACK();
566         }
567
568         dst_hold(&rt->u.dst);
569         read_unlock_bh(&rt6_lock);
570
571         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
572                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
573         else {
574 #if CLONE_OFFLINK_ROUTE
575                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
576 #else
577                 goto out2;
578 #endif
579         }
580
581         dst_release(&rt->u.dst);
582         rt = nrt ? : &ip6_null_entry;
583
584         dst_hold(&rt->u.dst);
585         if (nrt) {
586                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
587                 if (!err)
588                         goto out2;
589         }
590
591         if (--attempts <= 0)
592                 goto out2;
593
594         /*
595          * Race condition! In the gap, when rt6_lock was
596          * released someone could insert this route.  Relookup.
597          */
598         dst_release(&rt->u.dst);
599         goto relookup;
600
601 out:
602         dst_hold(&rt->u.dst);
603         read_unlock_bh(&rt6_lock);
604 out2:
605         rt->u.dst.lastuse = jiffies;
606         rt->u.dst.__use++;
607         return &rt->u.dst;
608 }
609
610
611 /*
612  *      Destination cache support functions
613  */
614
615 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
616 {
617         struct rt6_info *rt;
618
619         rt = (struct rt6_info *) dst;
620
621         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
622                 return dst;
623
624         return NULL;
625 }
626
627 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
628 {
629         struct rt6_info *rt = (struct rt6_info *) dst;
630
631         if (rt) {
632                 if (rt->rt6i_flags & RTF_CACHE)
633                         ip6_del_rt(rt, NULL, NULL, NULL);
634                 else
635                         dst_release(dst);
636         }
637         return NULL;
638 }
639
640 static void ip6_link_failure(struct sk_buff *skb)
641 {
642         struct rt6_info *rt;
643
644         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
645
646         rt = (struct rt6_info *) skb->dst;
647         if (rt) {
648                 if (rt->rt6i_flags&RTF_CACHE) {
649                         dst_set_expires(&rt->u.dst, 0);
650                         rt->rt6i_flags |= RTF_EXPIRES;
651                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
652                         rt->rt6i_node->fn_sernum = -1;
653         }
654 }
655
656 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
657 {
658         struct rt6_info *rt6 = (struct rt6_info*)dst;
659
660         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
661                 rt6->rt6i_flags |= RTF_MODIFIED;
662                 if (mtu < IPV6_MIN_MTU) {
663                         mtu = IPV6_MIN_MTU;
664                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
665                 }
666                 dst->metrics[RTAX_MTU-1] = mtu;
667         }
668 }
669
670 /* Protected by rt6_lock.  */
671 static struct dst_entry *ndisc_dst_gc_list;
672 static int ipv6_get_mtu(struct net_device *dev);
673
674 static inline unsigned int ipv6_advmss(unsigned int mtu)
675 {
676         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
677
678         if (mtu < ip6_rt_min_advmss)
679                 mtu = ip6_rt_min_advmss;
680
681         /*
682          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
683          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
684          * IPV6_MAXPLEN is also valid and means: "any MSS, 
685          * rely only on pmtu discovery"
686          */
687         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
688                 mtu = IPV6_MAXPLEN;
689         return mtu;
690 }
691
692 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
693                                   struct neighbour *neigh,
694                                   struct in6_addr *addr,
695                                   int (*output)(struct sk_buff *))
696 {
697         struct rt6_info *rt;
698         struct inet6_dev *idev = in6_dev_get(dev);
699
700         if (unlikely(idev == NULL))
701                 return NULL;
702
703         rt = ip6_dst_alloc();
704         if (unlikely(rt == NULL)) {
705                 in6_dev_put(idev);
706                 goto out;
707         }
708
709         dev_hold(dev);
710         if (neigh)
711                 neigh_hold(neigh);
712         else
713                 neigh = ndisc_get_neigh(dev, addr);
714
715         rt->rt6i_dev      = dev;
716         rt->rt6i_idev     = idev;
717         rt->rt6i_nexthop  = neigh;
718         atomic_set(&rt->u.dst.__refcnt, 1);
719         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
720         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
721         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
722         rt->u.dst.output  = output;
723
724 #if 0   /* there's no chance to use these for ndisc */
725         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
726                                 ? DST_HOST 
727                                 : 0;
728         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
729         rt->rt6i_dst.plen = 128;
730 #endif
731
732         write_lock_bh(&rt6_lock);
733         rt->u.dst.next = ndisc_dst_gc_list;
734         ndisc_dst_gc_list = &rt->u.dst;
735         write_unlock_bh(&rt6_lock);
736
737         fib6_force_start_gc();
738
739 out:
740         return (struct dst_entry *)rt;
741 }
742
743 int ndisc_dst_gc(int *more)
744 {
745         struct dst_entry *dst, *next, **pprev;
746         int freed;
747
748         next = NULL;
749         pprev = &ndisc_dst_gc_list;
750         freed = 0;
751         while ((dst = *pprev) != NULL) {
752                 if (!atomic_read(&dst->__refcnt)) {
753                         *pprev = dst->next;
754                         dst_free(dst);
755                         freed++;
756                 } else {
757                         pprev = &dst->next;
758                         (*more)++;
759                 }
760         }
761
762         return freed;
763 }
764
765 static int ip6_dst_gc(void)
766 {
767         static unsigned expire = 30*HZ;
768         static unsigned long last_gc;
769         unsigned long now = jiffies;
770
771         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
772             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
773                 goto out;
774
775         expire++;
776         fib6_run_gc(expire);
777         last_gc = now;
778         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
779                 expire = ip6_rt_gc_timeout>>1;
780
781 out:
782         expire -= expire>>ip6_rt_gc_elasticity;
783         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
784 }
785
786 /* Clean host part of a prefix. Not necessary in radix tree,
787    but results in cleaner routing tables.
788
789    Remove it only when all the things will work!
790  */
791
792 static int ipv6_get_mtu(struct net_device *dev)
793 {
794         int mtu = IPV6_MIN_MTU;
795         struct inet6_dev *idev;
796
797         idev = in6_dev_get(dev);
798         if (idev) {
799                 mtu = idev->cnf.mtu6;
800                 in6_dev_put(idev);
801         }
802         return mtu;
803 }
804
805 int ipv6_get_hoplimit(struct net_device *dev)
806 {
807         int hoplimit = ipv6_devconf.hop_limit;
808         struct inet6_dev *idev;
809
810         idev = in6_dev_get(dev);
811         if (idev) {
812                 hoplimit = idev->cnf.hop_limit;
813                 in6_dev_put(idev);
814         }
815         return hoplimit;
816 }
817
818 /*
819  *
820  */
821
822 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
823                 void *_rtattr, struct netlink_skb_parms *req)
824 {
825         int err;
826         struct rtmsg *r;
827         struct rtattr **rta;
828         struct rt6_info *rt = NULL;
829         struct net_device *dev = NULL;
830         struct inet6_dev *idev = NULL;
831         int addr_type;
832
833         rta = (struct rtattr **) _rtattr;
834
835         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
836                 return -EINVAL;
837 #ifndef CONFIG_IPV6_SUBTREES
838         if (rtmsg->rtmsg_src_len)
839                 return -EINVAL;
840 #endif
841         if (rtmsg->rtmsg_ifindex) {
842                 err = -ENODEV;
843                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
844                 if (!dev)
845                         goto out;
846                 idev = in6_dev_get(dev);
847                 if (!idev)
848                         goto out;
849         }
850
851         if (rtmsg->rtmsg_metric == 0)
852                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
853
854         rt = ip6_dst_alloc();
855
856         if (rt == NULL) {
857                 err = -ENOMEM;
858                 goto out;
859         }
860
861         rt->u.dst.obsolete = -1;
862         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
863         if (nlh && (r = NLMSG_DATA(nlh))) {
864                 rt->rt6i_protocol = r->rtm_protocol;
865         } else {
866                 rt->rt6i_protocol = RTPROT_BOOT;
867         }
868
869         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
870
871         if (addr_type & IPV6_ADDR_MULTICAST)
872                 rt->u.dst.input = ip6_mc_input;
873         else
874                 rt->u.dst.input = ip6_forward;
875
876         rt->u.dst.output = ip6_output;
877
878         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
879                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
880         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
881         if (rt->rt6i_dst.plen == 128)
882                rt->u.dst.flags = DST_HOST;
883
884 #ifdef CONFIG_IPV6_SUBTREES
885         ipv6_addr_prefix(&rt->rt6i_src.addr, 
886                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
887         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
888 #endif
889
890         rt->rt6i_metric = rtmsg->rtmsg_metric;
891
892         /* We cannot add true routes via loopback here,
893            they would result in kernel looping; promote them to reject routes
894          */
895         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
896             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
897                 /* hold loopback dev/idev if we haven't done so. */
898                 if (dev != &loopback_dev) {
899                         if (dev) {
900                                 dev_put(dev);
901                                 in6_dev_put(idev);
902                         }
903                         dev = &loopback_dev;
904                         dev_hold(dev);
905                         idev = in6_dev_get(dev);
906                         if (!idev) {
907                                 err = -ENODEV;
908                                 goto out;
909                         }
910                 }
911                 rt->u.dst.output = ip6_pkt_discard_out;
912                 rt->u.dst.input = ip6_pkt_discard;
913                 rt->u.dst.error = -ENETUNREACH;
914                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
915                 goto install_route;
916         }
917
918         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
919                 struct in6_addr *gw_addr;
920                 int gwa_type;
921
922                 gw_addr = &rtmsg->rtmsg_gateway;
923                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
924                 gwa_type = ipv6_addr_type(gw_addr);
925
926                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
927                         struct rt6_info *grt;
928
929                         /* IPv6 strictly inhibits using not link-local
930                            addresses as nexthop address.
931                            Otherwise, router will not able to send redirects.
932                            It is very good, but in some (rare!) circumstances
933                            (SIT, PtP, NBMA NOARP links) it is handy to allow
934                            some exceptions. --ANK
935                          */
936                         err = -EINVAL;
937                         if (!(gwa_type&IPV6_ADDR_UNICAST))
938                                 goto out;
939
940                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
941
942                         err = -EHOSTUNREACH;
943                         if (grt == NULL)
944                                 goto out;
945                         if (dev) {
946                                 if (dev != grt->rt6i_dev) {
947                                         dst_release(&grt->u.dst);
948                                         goto out;
949                                 }
950                         } else {
951                                 dev = grt->rt6i_dev;
952                                 idev = grt->rt6i_idev;
953                                 dev_hold(dev);
954                                 in6_dev_hold(grt->rt6i_idev);
955                         }
956                         if (!(grt->rt6i_flags&RTF_GATEWAY))
957                                 err = 0;
958                         dst_release(&grt->u.dst);
959
960                         if (err)
961                                 goto out;
962                 }
963                 err = -EINVAL;
964                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
965                         goto out;
966         }
967
968         err = -ENODEV;
969         if (dev == NULL)
970                 goto out;
971
972         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
973                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
974                 if (IS_ERR(rt->rt6i_nexthop)) {
975                         err = PTR_ERR(rt->rt6i_nexthop);
976                         rt->rt6i_nexthop = NULL;
977                         goto out;
978                 }
979         }
980
981         rt->rt6i_flags = rtmsg->rtmsg_flags;
982
983 install_route:
984         if (rta && rta[RTA_METRICS-1]) {
985                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
986                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
987
988                 while (RTA_OK(attr, attrlen)) {
989                         unsigned flavor = attr->rta_type;
990                         if (flavor) {
991                                 if (flavor > RTAX_MAX) {
992                                         err = -EINVAL;
993                                         goto out;
994                                 }
995                                 rt->u.dst.metrics[flavor-1] =
996                                         *(u32 *)RTA_DATA(attr);
997                         }
998                         attr = RTA_NEXT(attr, attrlen);
999                 }
1000         }
1001
1002         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1003                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1004         if (!rt->u.dst.metrics[RTAX_MTU-1])
1005                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1006         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1007                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1008         rt->u.dst.dev = dev;
1009         rt->rt6i_idev = idev;
1010         return ip6_ins_rt(rt, nlh, _rtattr, req);
1011
1012 out:
1013         if (dev)
1014                 dev_put(dev);
1015         if (idev)
1016                 in6_dev_put(idev);
1017         if (rt)
1018                 dst_free((struct dst_entry *) rt);
1019         return err;
1020 }
1021
1022 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1023 {
1024         int err;
1025
1026         write_lock_bh(&rt6_lock);
1027
1028         rt6_reset_dflt_pointer(NULL);
1029
1030         err = fib6_del(rt, nlh, _rtattr, req);
1031         dst_release(&rt->u.dst);
1032
1033         write_unlock_bh(&rt6_lock);
1034
1035         return err;
1036 }
1037
1038 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1039 {
1040         struct fib6_node *fn;
1041         struct rt6_info *rt;
1042         int err = -ESRCH;
1043
1044         read_lock_bh(&rt6_lock);
1045
1046         fn = fib6_locate(&ip6_routing_table,
1047                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1048                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1049         
1050         if (fn) {
1051                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1052                         if (rtmsg->rtmsg_ifindex &&
1053                             (rt->rt6i_dev == NULL ||
1054                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1055                                 continue;
1056                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1057                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1058                                 continue;
1059                         if (rtmsg->rtmsg_metric &&
1060                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1061                                 continue;
1062                         dst_hold(&rt->u.dst);
1063                         read_unlock_bh(&rt6_lock);
1064
1065                         return ip6_del_rt(rt, nlh, _rtattr, req);
1066                 }
1067         }
1068         read_unlock_bh(&rt6_lock);
1069
1070         return err;
1071 }
1072
1073 /*
1074  *      Handle redirects
1075  */
1076 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1077                   struct neighbour *neigh, u8 *lladdr, int on_link)
1078 {
1079         struct rt6_info *rt, *nrt;
1080
1081         /* Locate old route to this destination. */
1082         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1083
1084         if (rt == NULL)
1085                 return;
1086
1087         if (neigh->dev != rt->rt6i_dev)
1088                 goto out;
1089
1090         /*
1091          * Current route is on-link; redirect is always invalid.
1092          * 
1093          * Seems, previous statement is not true. It could
1094          * be node, which looks for us as on-link (f.e. proxy ndisc)
1095          * But then router serving it might decide, that we should
1096          * know truth 8)8) --ANK (980726).
1097          */
1098         if (!(rt->rt6i_flags&RTF_GATEWAY))
1099                 goto out;
1100
1101         /*
1102          *      RFC 2461 specifies that redirects should only be
1103          *      accepted if they come from the nexthop to the target.
1104          *      Due to the way default routers are chosen, this notion
1105          *      is a bit fuzzy and one might need to check all default
1106          *      routers.
1107          */
1108         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1109                 if (rt->rt6i_flags & RTF_DEFAULT) {
1110                         struct rt6_info *rt1;
1111
1112                         read_lock(&rt6_lock);
1113                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1114                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1115                                         dst_hold(&rt1->u.dst);
1116                                         dst_release(&rt->u.dst);
1117                                         read_unlock(&rt6_lock);
1118                                         rt = rt1;
1119                                         goto source_ok;
1120                                 }
1121                         }
1122                         read_unlock(&rt6_lock);
1123                 }
1124                 if (net_ratelimit())
1125                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1126                                "for redirect target\n");
1127                 goto out;
1128         }
1129
1130 source_ok:
1131
1132         /*
1133          *      We have finally decided to accept it.
1134          */
1135
1136         neigh_update(neigh, lladdr, NUD_STALE, 
1137                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1138                      NEIGH_UPDATE_F_OVERRIDE|
1139                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1140                                      NEIGH_UPDATE_F_ISROUTER))
1141                      );
1142
1143         /*
1144          * Redirect received -> path was valid.
1145          * Look, redirects are sent only in response to data packets,
1146          * so that this nexthop apparently is reachable. --ANK
1147          */
1148         dst_confirm(&rt->u.dst);
1149
1150         /* Duplicate redirect: silently ignore. */
1151         if (neigh == rt->u.dst.neighbour)
1152                 goto out;
1153
1154         nrt = ip6_rt_copy(rt);
1155         if (nrt == NULL)
1156                 goto out;
1157
1158         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1159         if (on_link)
1160                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1161
1162         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1163         nrt->rt6i_dst.plen = 128;
1164         nrt->u.dst.flags |= DST_HOST;
1165
1166         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1167         nrt->rt6i_nexthop = neigh_clone(neigh);
1168         /* Reset pmtu, it may be better */
1169         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1170         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1171
1172         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1173                 goto out;
1174
1175         if (rt->rt6i_flags&RTF_CACHE) {
1176                 ip6_del_rt(rt, NULL, NULL, NULL);
1177                 return;
1178         }
1179
1180 out:
1181         dst_release(&rt->u.dst);
1182         return;
1183 }
1184
1185 /*
1186  *      Handle ICMP "packet too big" messages
1187  *      i.e. Path MTU discovery
1188  */
1189
1190 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1191                         struct net_device *dev, u32 pmtu)
1192 {
1193         struct rt6_info *rt, *nrt;
1194         int allfrag = 0;
1195
1196         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1197         if (rt == NULL)
1198                 return;
1199
1200         if (pmtu >= dst_mtu(&rt->u.dst))
1201                 goto out;
1202
1203         if (pmtu < IPV6_MIN_MTU) {
1204                 /*
1205                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1206                  * MTU (1280) and a fragment header should always be included
1207                  * after a node receiving Too Big message reporting PMTU is
1208                  * less than the IPv6 Minimum Link MTU.
1209                  */
1210                 pmtu = IPV6_MIN_MTU;
1211                 allfrag = 1;
1212         }
1213
1214         /* New mtu received -> path was valid.
1215            They are sent only in response to data packets,
1216            so that this nexthop apparently is reachable. --ANK
1217          */
1218         dst_confirm(&rt->u.dst);
1219
1220         /* Host route. If it is static, it would be better
1221            not to override it, but add new one, so that
1222            when cache entry will expire old pmtu
1223            would return automatically.
1224          */
1225         if (rt->rt6i_flags & RTF_CACHE) {
1226                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1227                 if (allfrag)
1228                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1229                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1230                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1231                 goto out;
1232         }
1233
1234         /* Network route.
1235            Two cases are possible:
1236            1. It is connected route. Action: COW
1237            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1238          */
1239         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1240                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1241         else
1242                 nrt = rt6_alloc_clone(rt, daddr);
1243
1244         if (nrt) {
1245                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1246                 if (allfrag)
1247                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1248
1249                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1250                  * happened within 5 mins, the recommended timer is 10 mins.
1251                  * Here this route expiration time is set to ip6_rt_mtu_expires
1252                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1253                  * and detecting PMTU increase will be automatically happened.
1254                  */
1255                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1256                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1257
1258                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1259         }
1260 out:
1261         dst_release(&rt->u.dst);
1262 }
1263
1264 /*
1265  *      Misc support functions
1266  */
1267
1268 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1269 {
1270         struct rt6_info *rt = ip6_dst_alloc();
1271
1272         if (rt) {
1273                 rt->u.dst.input = ort->u.dst.input;
1274                 rt->u.dst.output = ort->u.dst.output;
1275
1276                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1277                 rt->u.dst.dev = ort->u.dst.dev;
1278                 if (rt->u.dst.dev)
1279                         dev_hold(rt->u.dst.dev);
1280                 rt->rt6i_idev = ort->rt6i_idev;
1281                 if (rt->rt6i_idev)
1282                         in6_dev_hold(rt->rt6i_idev);
1283                 rt->u.dst.lastuse = jiffies;
1284                 rt->rt6i_expires = 0;
1285
1286                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1287                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1288                 rt->rt6i_metric = 0;
1289
1290                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1291 #ifdef CONFIG_IPV6_SUBTREES
1292                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1293 #endif
1294         }
1295         return rt;
1296 }
1297
1298 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1299 {       
1300         struct rt6_info *rt;
1301         struct fib6_node *fn;
1302
1303         fn = &ip6_routing_table;
1304
1305         write_lock_bh(&rt6_lock);
1306         for (rt = fn->leaf; rt; rt=rt->u.next) {
1307                 if (dev == rt->rt6i_dev &&
1308                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1309                         break;
1310         }
1311         if (rt)
1312                 dst_hold(&rt->u.dst);
1313         write_unlock_bh(&rt6_lock);
1314         return rt;
1315 }
1316
1317 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1318                                      struct net_device *dev)
1319 {
1320         struct in6_rtmsg rtmsg;
1321
1322         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1323         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1324         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1325         rtmsg.rtmsg_metric = 1024;
1326         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1327
1328         rtmsg.rtmsg_ifindex = dev->ifindex;
1329
1330         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1331         return rt6_get_dflt_router(gwaddr, dev);
1332 }
1333
1334 void rt6_purge_dflt_routers(void)
1335 {
1336         struct rt6_info *rt;
1337
1338 restart:
1339         read_lock_bh(&rt6_lock);
1340         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1341                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1342                         dst_hold(&rt->u.dst);
1343
1344                         rt6_reset_dflt_pointer(NULL);
1345
1346                         read_unlock_bh(&rt6_lock);
1347
1348                         ip6_del_rt(rt, NULL, NULL, NULL);
1349
1350                         goto restart;
1351                 }
1352         }
1353         read_unlock_bh(&rt6_lock);
1354 }
1355
1356 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1357 {
1358         struct in6_rtmsg rtmsg;
1359         int err;
1360
1361         switch(cmd) {
1362         case SIOCADDRT:         /* Add a route */
1363         case SIOCDELRT:         /* Delete a route */
1364                 if (!capable(CAP_NET_ADMIN))
1365                         return -EPERM;
1366                 err = copy_from_user(&rtmsg, arg,
1367                                      sizeof(struct in6_rtmsg));
1368                 if (err)
1369                         return -EFAULT;
1370                         
1371                 rtnl_lock();
1372                 switch (cmd) {
1373                 case SIOCADDRT:
1374                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1375                         break;
1376                 case SIOCDELRT:
1377                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1378                         break;
1379                 default:
1380                         err = -EINVAL;
1381                 }
1382                 rtnl_unlock();
1383
1384                 return err;
1385         };
1386
1387         return -EINVAL;
1388 }
1389
1390 /*
1391  *      Drop the packet on the floor
1392  */
1393
1394 static int ip6_pkt_discard(struct sk_buff *skb)
1395 {
1396         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1397         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1398         kfree_skb(skb);
1399         return 0;
1400 }
1401
1402 static int ip6_pkt_discard_out(struct sk_buff *skb)
1403 {
1404         skb->dev = skb->dst->dev;
1405         return ip6_pkt_discard(skb);
1406 }
1407
1408 /*
1409  *      Allocate a dst for local (unicast / anycast) address.
1410  */
1411
1412 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1413                                     const struct in6_addr *addr,
1414                                     int anycast)
1415 {
1416         struct rt6_info *rt = ip6_dst_alloc();
1417
1418         if (rt == NULL)
1419                 return ERR_PTR(-ENOMEM);
1420
1421         dev_hold(&loopback_dev);
1422         in6_dev_hold(idev);
1423
1424         rt->u.dst.flags = DST_HOST;
1425         rt->u.dst.input = ip6_input;
1426         rt->u.dst.output = ip6_output;
1427         rt->rt6i_dev = &loopback_dev;
1428         rt->rt6i_idev = idev;
1429         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1430         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1431         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1432         rt->u.dst.obsolete = -1;
1433
1434         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1435         if (anycast)
1436                 rt->rt6i_flags |= RTF_ANYCAST;
1437         else
1438                 rt->rt6i_flags |= RTF_LOCAL;
1439         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1440         if (rt->rt6i_nexthop == NULL) {
1441                 dst_free((struct dst_entry *) rt);
1442                 return ERR_PTR(-ENOMEM);
1443         }
1444
1445         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1446         rt->rt6i_dst.plen = 128;
1447
1448         atomic_set(&rt->u.dst.__refcnt, 1);
1449
1450         return rt;
1451 }
1452
1453 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1454 {
1455         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1456             rt != &ip6_null_entry) {
1457                 RT6_TRACE("deleted by ifdown %p\n", rt);
1458                 return -1;
1459         }
1460         return 0;
1461 }
1462
1463 void rt6_ifdown(struct net_device *dev)
1464 {
1465         write_lock_bh(&rt6_lock);
1466         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1467         write_unlock_bh(&rt6_lock);
1468 }
1469
1470 struct rt6_mtu_change_arg
1471 {
1472         struct net_device *dev;
1473         unsigned mtu;
1474 };
1475
1476 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1477 {
1478         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1479         struct inet6_dev *idev;
1480
1481         /* In IPv6 pmtu discovery is not optional,
1482            so that RTAX_MTU lock cannot disable it.
1483            We still use this lock to block changes
1484            caused by addrconf/ndisc.
1485         */
1486
1487         idev = __in6_dev_get(arg->dev);
1488         if (idev == NULL)
1489                 return 0;
1490
1491         /* For administrative MTU increase, there is no way to discover
1492            IPv6 PMTU increase, so PMTU increase should be updated here.
1493            Since RFC 1981 doesn't include administrative MTU increase
1494            update PMTU increase is a MUST. (i.e. jumbo frame)
1495          */
1496         /*
1497            If new MTU is less than route PMTU, this new MTU will be the
1498            lowest MTU in the path, update the route PMTU to reflect PMTU
1499            decreases; if new MTU is greater than route PMTU, and the
1500            old MTU is the lowest MTU in the path, update the route PMTU
1501            to reflect the increase. In this case if the other nodes' MTU
1502            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1503            PMTU discouvery.
1504          */
1505         if (rt->rt6i_dev == arg->dev &&
1506             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1507             (dst_mtu(&rt->u.dst) > arg->mtu ||
1508              (dst_mtu(&rt->u.dst) < arg->mtu &&
1509               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1510                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1511         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1512         return 0;
1513 }
1514
1515 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1516 {
1517         struct rt6_mtu_change_arg arg;
1518
1519         arg.dev = dev;
1520         arg.mtu = mtu;
1521         read_lock_bh(&rt6_lock);
1522         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1523         read_unlock_bh(&rt6_lock);
1524 }
1525
1526 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1527                               struct in6_rtmsg *rtmsg)
1528 {
1529         memset(rtmsg, 0, sizeof(*rtmsg));
1530
1531         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1532         rtmsg->rtmsg_src_len = r->rtm_src_len;
1533         rtmsg->rtmsg_flags = RTF_UP;
1534         if (r->rtm_type == RTN_UNREACHABLE)
1535                 rtmsg->rtmsg_flags |= RTF_REJECT;
1536
1537         if (rta[RTA_GATEWAY-1]) {
1538                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1539                         return -EINVAL;
1540                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1541                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1542         }
1543         if (rta[RTA_DST-1]) {
1544                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1545                         return -EINVAL;
1546                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1547         }
1548         if (rta[RTA_SRC-1]) {
1549                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1550                         return -EINVAL;
1551                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1552         }
1553         if (rta[RTA_OIF-1]) {
1554                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1555                         return -EINVAL;
1556                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1557         }
1558         if (rta[RTA_PRIORITY-1]) {
1559                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1560                         return -EINVAL;
1561                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1562         }
1563         return 0;
1564 }
1565
1566 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1567 {
1568         struct rtmsg *r = NLMSG_DATA(nlh);
1569         struct in6_rtmsg rtmsg;
1570
1571         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1572                 return -EINVAL;
1573         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1574 }
1575
1576 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1577 {
1578         struct rtmsg *r = NLMSG_DATA(nlh);
1579         struct in6_rtmsg rtmsg;
1580
1581         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1582                 return -EINVAL;
1583         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1584 }
1585
1586 struct rt6_rtnl_dump_arg
1587 {
1588         struct sk_buff *skb;
1589         struct netlink_callback *cb;
1590 };
1591
1592 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1593                          struct in6_addr *dst, struct in6_addr *src,
1594                          int iif, int type, u32 pid, u32 seq,
1595                          int prefix, unsigned int flags)
1596 {
1597         struct rtmsg *rtm;
1598         struct nlmsghdr  *nlh;
1599         unsigned char    *b = skb->tail;
1600         struct rta_cacheinfo ci;
1601
1602         if (prefix) {   /* user wants prefix routes only */
1603                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1604                         /* success since this is not a prefix route */
1605                         return 1;
1606                 }
1607         }
1608
1609         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1610         rtm = NLMSG_DATA(nlh);
1611         rtm->rtm_family = AF_INET6;
1612         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1613         rtm->rtm_src_len = rt->rt6i_src.plen;
1614         rtm->rtm_tos = 0;
1615         rtm->rtm_table = RT_TABLE_MAIN;
1616         if (rt->rt6i_flags&RTF_REJECT)
1617                 rtm->rtm_type = RTN_UNREACHABLE;
1618         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1619                 rtm->rtm_type = RTN_LOCAL;
1620         else
1621                 rtm->rtm_type = RTN_UNICAST;
1622         rtm->rtm_flags = 0;
1623         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1624         rtm->rtm_protocol = rt->rt6i_protocol;
1625         if (rt->rt6i_flags&RTF_DYNAMIC)
1626                 rtm->rtm_protocol = RTPROT_REDIRECT;
1627         else if (rt->rt6i_flags & RTF_ADDRCONF)
1628                 rtm->rtm_protocol = RTPROT_KERNEL;
1629         else if (rt->rt6i_flags&RTF_DEFAULT)
1630                 rtm->rtm_protocol = RTPROT_RA;
1631
1632         if (rt->rt6i_flags&RTF_CACHE)
1633                 rtm->rtm_flags |= RTM_F_CLONED;
1634
1635         if (dst) {
1636                 RTA_PUT(skb, RTA_DST, 16, dst);
1637                 rtm->rtm_dst_len = 128;
1638         } else if (rtm->rtm_dst_len)
1639                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1640 #ifdef CONFIG_IPV6_SUBTREES
1641         if (src) {
1642                 RTA_PUT(skb, RTA_SRC, 16, src);
1643                 rtm->rtm_src_len = 128;
1644         } else if (rtm->rtm_src_len)
1645                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1646 #endif
1647         if (iif)
1648                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1649         else if (dst) {
1650                 struct in6_addr saddr_buf;
1651                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1652                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1653         }
1654         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1655                 goto rtattr_failure;
1656         if (rt->u.dst.neighbour)
1657                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1658         if (rt->u.dst.dev)
1659                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1660         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1661         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1662         if (rt->rt6i_expires)
1663                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1664         else
1665                 ci.rta_expires = 0;
1666         ci.rta_used = rt->u.dst.__use;
1667         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1668         ci.rta_error = rt->u.dst.error;
1669         ci.rta_id = 0;
1670         ci.rta_ts = 0;
1671         ci.rta_tsage = 0;
1672         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1673         nlh->nlmsg_len = skb->tail - b;
1674         return skb->len;
1675
1676 nlmsg_failure:
1677 rtattr_failure:
1678         skb_trim(skb, b - skb->data);
1679         return -1;
1680 }
1681
1682 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1683 {
1684         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1685         int prefix;
1686
1687         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1688                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1689                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1690         } else
1691                 prefix = 0;
1692
1693         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1694                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1695                      prefix, NLM_F_MULTI);
1696 }
1697
1698 static int fib6_dump_node(struct fib6_walker_t *w)
1699 {
1700         int res;
1701         struct rt6_info *rt;
1702
1703         for (rt = w->leaf; rt; rt = rt->u.next) {
1704                 res = rt6_dump_route(rt, w->args);
1705                 if (res < 0) {
1706                         /* Frame is full, suspend walking */
1707                         w->leaf = rt;
1708                         return 1;
1709                 }
1710                 BUG_TRAP(res!=0);
1711         }
1712         w->leaf = NULL;
1713         return 0;
1714 }
1715
1716 static void fib6_dump_end(struct netlink_callback *cb)
1717 {
1718         struct fib6_walker_t *w = (void*)cb->args[0];
1719
1720         if (w) {
1721                 cb->args[0] = 0;
1722                 fib6_walker_unlink(w);
1723                 kfree(w);
1724         }
1725         cb->done = (void*)cb->args[1];
1726         cb->args[1] = 0;
1727 }
1728
1729 static int fib6_dump_done(struct netlink_callback *cb)
1730 {
1731         fib6_dump_end(cb);
1732         return cb->done ? cb->done(cb) : 0;
1733 }
1734
1735 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1736 {
1737         struct rt6_rtnl_dump_arg arg;
1738         struct fib6_walker_t *w;
1739         int res;
1740
1741         arg.skb = skb;
1742         arg.cb = cb;
1743
1744         w = (void*)cb->args[0];
1745         if (w == NULL) {
1746                 /* New dump:
1747                  * 
1748                  * 1. hook callback destructor.
1749                  */
1750                 cb->args[1] = (long)cb->done;
1751                 cb->done = fib6_dump_done;
1752
1753                 /*
1754                  * 2. allocate and initialize walker.
1755                  */
1756                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1757                 if (w == NULL)
1758                         return -ENOMEM;
1759                 RT6_TRACE("dump<%p", w);
1760                 memset(w, 0, sizeof(*w));
1761                 w->root = &ip6_routing_table;
1762                 w->func = fib6_dump_node;
1763                 w->args = &arg;
1764                 cb->args[0] = (long)w;
1765                 read_lock_bh(&rt6_lock);
1766                 res = fib6_walk(w);
1767                 read_unlock_bh(&rt6_lock);
1768         } else {
1769                 w->args = &arg;
1770                 read_lock_bh(&rt6_lock);
1771                 res = fib6_walk_continue(w);
1772                 read_unlock_bh(&rt6_lock);
1773         }
1774 #if RT6_DEBUG >= 3
1775         if (res <= 0 && skb->len == 0)
1776                 RT6_TRACE("%p>dump end\n", w);
1777 #endif
1778         res = res < 0 ? res : skb->len;
1779         /* res < 0 is an error. (really, impossible)
1780            res == 0 means that dump is complete, but skb still can contain data.
1781            res > 0 dump is not complete, but frame is full.
1782          */
1783         /* Destroy walker, if dump of this table is complete. */
1784         if (res <= 0)
1785                 fib6_dump_end(cb);
1786         return res;
1787 }
1788
1789 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1790 {
1791         struct rtattr **rta = arg;
1792         int iif = 0;
1793         int err = -ENOBUFS;
1794         struct sk_buff *skb;
1795         struct flowi fl;
1796         struct rt6_info *rt;
1797
1798         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1799         if (skb == NULL)
1800                 goto out;
1801
1802         /* Reserve room for dummy headers, this skb can pass
1803            through good chunk of routing engine.
1804          */
1805         skb->mac.raw = skb->data;
1806         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1807
1808         memset(&fl, 0, sizeof(fl));
1809         if (rta[RTA_SRC-1])
1810                 ipv6_addr_copy(&fl.fl6_src,
1811                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1812         if (rta[RTA_DST-1])
1813                 ipv6_addr_copy(&fl.fl6_dst,
1814                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1815
1816         if (rta[RTA_IIF-1])
1817                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1818
1819         if (iif) {
1820                 struct net_device *dev;
1821                 dev = __dev_get_by_index(iif);
1822                 if (!dev) {
1823                         err = -ENODEV;
1824                         goto out_free;
1825                 }
1826         }
1827
1828         fl.oif = 0;
1829         if (rta[RTA_OIF-1])
1830                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1831
1832         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1833
1834         skb->dst = &rt->u.dst;
1835
1836         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1837         err = rt6_fill_node(skb, rt, 
1838                             &fl.fl6_dst, &fl.fl6_src,
1839                             iif,
1840                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1841                             nlh->nlmsg_seq, 0, 0);
1842         if (err < 0) {
1843                 err = -EMSGSIZE;
1844                 goto out_free;
1845         }
1846
1847         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1848         if (err > 0)
1849                 err = 0;
1850 out:
1851         return err;
1852 out_free:
1853         kfree_skb(skb);
1854         goto out;       
1855 }
1856
1857 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1858                         struct netlink_skb_parms *req)
1859 {
1860         struct sk_buff *skb;
1861         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1862         u32 pid = current->pid;
1863         u32 seq = 0;
1864
1865         if (req)
1866                 pid = req->pid;
1867         if (nlh)
1868                 seq = nlh->nlmsg_seq;
1869         
1870         skb = alloc_skb(size, gfp_any());
1871         if (!skb) {
1872                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1873                 return;
1874         }
1875         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1876                 kfree_skb(skb);
1877                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1878                 return;
1879         }
1880         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1881         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1882 }
1883
1884 /*
1885  *      /proc
1886  */
1887
1888 #ifdef CONFIG_PROC_FS
1889
1890 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1891
1892 struct rt6_proc_arg
1893 {
1894         char *buffer;
1895         int offset;
1896         int length;
1897         int skip;
1898         int len;
1899 };
1900
1901 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1902 {
1903         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1904         int i;
1905
1906         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1907                 arg->skip++;
1908                 return 0;
1909         }
1910
1911         if (arg->len >= arg->length)
1912                 return 0;
1913
1914         for (i=0; i<16; i++) {
1915                 sprintf(arg->buffer + arg->len, "%02x",
1916                         rt->rt6i_dst.addr.s6_addr[i]);
1917                 arg->len += 2;
1918         }
1919         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920                             rt->rt6i_dst.plen);
1921
1922 #ifdef CONFIG_IPV6_SUBTREES
1923         for (i=0; i<16; i++) {
1924                 sprintf(arg->buffer + arg->len, "%02x",
1925                         rt->rt6i_src.addr.s6_addr[i]);
1926                 arg->len += 2;
1927         }
1928         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1929                             rt->rt6i_src.plen);
1930 #else
1931         sprintf(arg->buffer + arg->len,
1932                 "00000000000000000000000000000000 00 ");
1933         arg->len += 36;
1934 #endif
1935
1936         if (rt->rt6i_nexthop) {
1937                 for (i=0; i<16; i++) {
1938                         sprintf(arg->buffer + arg->len, "%02x",
1939                                 rt->rt6i_nexthop->primary_key[i]);
1940                         arg->len += 2;
1941                 }
1942         } else {
1943                 sprintf(arg->buffer + arg->len,
1944                         "00000000000000000000000000000000");
1945                 arg->len += 32;
1946         }
1947         arg->len += sprintf(arg->buffer + arg->len,
1948                             " %08x %08x %08x %08x %8s\n",
1949                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1950                             rt->u.dst.__use, rt->rt6i_flags, 
1951                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1952         return 0;
1953 }
1954
1955 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1956 {
1957         struct rt6_proc_arg arg;
1958         arg.buffer = buffer;
1959         arg.offset = offset;
1960         arg.length = length;
1961         arg.skip = 0;
1962         arg.len = 0;
1963
1964         read_lock_bh(&rt6_lock);
1965         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1966         read_unlock_bh(&rt6_lock);
1967
1968         *start = buffer;
1969         if (offset)
1970                 *start += offset % RT6_INFO_LEN;
1971
1972         arg.len -= offset % RT6_INFO_LEN;
1973
1974         if (arg.len > length)
1975                 arg.len = length;
1976         if (arg.len < 0)
1977                 arg.len = 0;
1978
1979         return arg.len;
1980 }
1981
1982 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1983 {
1984         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1985                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1986                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1987                       rt6_stats.fib_rt_cache,
1988                       atomic_read(&ip6_dst_ops.entries),
1989                       rt6_stats.fib_discarded_routes);
1990
1991         return 0;
1992 }
1993
1994 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1995 {
1996         return single_open(file, rt6_stats_seq_show, NULL);
1997 }
1998
1999 static struct file_operations rt6_stats_seq_fops = {
2000         .owner   = THIS_MODULE,
2001         .open    = rt6_stats_seq_open,
2002         .read    = seq_read,
2003         .llseek  = seq_lseek,
2004         .release = single_release,
2005 };
2006 #endif  /* CONFIG_PROC_FS */
2007
2008 #ifdef CONFIG_SYSCTL
2009
2010 static int flush_delay;
2011
2012 static
2013 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2014                               void __user *buffer, size_t *lenp, loff_t *ppos)
2015 {
2016         if (write) {
2017                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2018                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2019                 return 0;
2020         } else
2021                 return -EINVAL;
2022 }
2023
2024 ctl_table ipv6_route_table[] = {
2025         {
2026                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2027                 .procname       =       "flush",
2028                 .data           =       &flush_delay,
2029                 .maxlen         =       sizeof(int),
2030                 .mode           =       0200,
2031                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2032         },
2033         {
2034                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2035                 .procname       =       "gc_thresh",
2036                 .data           =       &ip6_dst_ops.gc_thresh,
2037                 .maxlen         =       sizeof(int),
2038                 .mode           =       0644,
2039                 .proc_handler   =       &proc_dointvec,
2040         },
2041         {
2042                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2043                 .procname       =       "max_size",
2044                 .data           =       &ip6_rt_max_size,
2045                 .maxlen         =       sizeof(int),
2046                 .mode           =       0644,
2047                 .proc_handler   =       &proc_dointvec,
2048         },
2049         {
2050                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2051                 .procname       =       "gc_min_interval",
2052                 .data           =       &ip6_rt_gc_min_interval,
2053                 .maxlen         =       sizeof(int),
2054                 .mode           =       0644,
2055                 .proc_handler   =       &proc_dointvec_jiffies,
2056                 .strategy       =       &sysctl_jiffies,
2057         },
2058         {
2059                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2060                 .procname       =       "gc_timeout",
2061                 .data           =       &ip6_rt_gc_timeout,
2062                 .maxlen         =       sizeof(int),
2063                 .mode           =       0644,
2064                 .proc_handler   =       &proc_dointvec_jiffies,
2065                 .strategy       =       &sysctl_jiffies,
2066         },
2067         {
2068                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2069                 .procname       =       "gc_interval",
2070                 .data           =       &ip6_rt_gc_interval,
2071                 .maxlen         =       sizeof(int),
2072                 .mode           =       0644,
2073                 .proc_handler   =       &proc_dointvec_jiffies,
2074                 .strategy       =       &sysctl_jiffies,
2075         },
2076         {
2077                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2078                 .procname       =       "gc_elasticity",
2079                 .data           =       &ip6_rt_gc_elasticity,
2080                 .maxlen         =       sizeof(int),
2081                 .mode           =       0644,
2082                 .proc_handler   =       &proc_dointvec_jiffies,
2083                 .strategy       =       &sysctl_jiffies,
2084         },
2085         {
2086                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2087                 .procname       =       "mtu_expires",
2088                 .data           =       &ip6_rt_mtu_expires,
2089                 .maxlen         =       sizeof(int),
2090                 .mode           =       0644,
2091                 .proc_handler   =       &proc_dointvec_jiffies,
2092                 .strategy       =       &sysctl_jiffies,
2093         },
2094         {
2095                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2096                 .procname       =       "min_adv_mss",
2097                 .data           =       &ip6_rt_min_advmss,
2098                 .maxlen         =       sizeof(int),
2099                 .mode           =       0644,
2100                 .proc_handler   =       &proc_dointvec_jiffies,
2101                 .strategy       =       &sysctl_jiffies,
2102         },
2103         {
2104                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2105                 .procname       =       "gc_min_interval_ms",
2106                 .data           =       &ip6_rt_gc_min_interval,
2107                 .maxlen         =       sizeof(int),
2108                 .mode           =       0644,
2109                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2110                 .strategy       =       &sysctl_ms_jiffies,
2111         },
2112         { .ctl_name = 0 }
2113 };
2114
2115 #endif
2116
2117 void __init ip6_route_init(void)
2118 {
2119         struct proc_dir_entry *p;
2120
2121         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2122                                                      sizeof(struct rt6_info),
2123                                                      0, SLAB_HWCACHE_ALIGN,
2124                                                      NULL, NULL);
2125         if (!ip6_dst_ops.kmem_cachep)
2126                 panic("cannot create ip6_dst_cache");
2127
2128         fib6_init();
2129 #ifdef  CONFIG_PROC_FS
2130         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2131         if (p)
2132                 p->owner = THIS_MODULE;
2133
2134         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2135 #endif
2136 #ifdef CONFIG_XFRM
2137         xfrm6_init();
2138 #endif
2139 }
2140
2141 void ip6_route_cleanup(void)
2142 {
2143 #ifdef CONFIG_PROC_FS
2144         proc_net_remove("ipv6_route");
2145         proc_net_remove("rt6_stats");
2146 #endif
2147 #ifdef CONFIG_XFRM
2148         xfrm6_fini();
2149 #endif
2150         rt6_ifdown(NULL);
2151         fib6_gc_cleanup();
2152         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2153 }