Pull sbs into release branch
[pandora-kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  *      Ville Nuorvala
26  *              Fixed routing subtrees.
27  */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
59
60 #include <asm/uaccess.h>
61
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
68
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
76
77 #define CLONE_OFFLINK_ROUTE 0
78
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void             ip6_dst_destroy(struct dst_entry *);
91 static void             ip6_dst_ifdown(struct dst_entry *,
92                                        struct net_device *dev, int how);
93 static int               ip6_dst_gc(void);
94
95 static int              ip6_pkt_discard(struct sk_buff *skb);
96 static int              ip6_pkt_discard_out(struct sk_buff *skb);
97 static void             ip6_link_failure(struct sk_buff *skb);
98 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102                                            struct in6_addr *gwaddr, int ifindex,
103                                            unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105                                            struct in6_addr *gwaddr, int ifindex);
106 #endif
107
108 static struct dst_ops ip6_dst_ops = {
109         .family                 =       AF_INET6,
110         .protocol               =       __constant_htons(ETH_P_IPV6),
111         .gc                     =       ip6_dst_gc,
112         .gc_thresh              =       1024,
113         .check                  =       ip6_dst_check,
114         .destroy                =       ip6_dst_destroy,
115         .ifdown                 =       ip6_dst_ifdown,
116         .negative_advice        =       ip6_negative_advice,
117         .link_failure           =       ip6_link_failure,
118         .update_pmtu            =       ip6_rt_update_pmtu,
119         .entry_size             =       sizeof(struct rt6_info),
120 };
121
122 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
123 {
124 }
125
126 static struct dst_ops ip6_dst_blackhole_ops = {
127         .family                 =       AF_INET6,
128         .protocol               =       __constant_htons(ETH_P_IPV6),
129         .destroy                =       ip6_dst_destroy,
130         .check                  =       ip6_dst_check,
131         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
132         .entry_size             =       sizeof(struct rt6_info),
133 };
134
135 struct rt6_info ip6_null_entry = {
136         .u = {
137                 .dst = {
138                         .__refcnt       = ATOMIC_INIT(1),
139                         .__use          = 1,
140                         .dev            = &loopback_dev,
141                         .obsolete       = -1,
142                         .error          = -ENETUNREACH,
143                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
144                         .input          = ip6_pkt_discard,
145                         .output         = ip6_pkt_discard_out,
146                         .ops            = &ip6_dst_ops,
147                         .path           = (struct dst_entry*)&ip6_null_entry,
148                 }
149         },
150         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
151         .rt6i_metric    = ~(u32) 0,
152         .rt6i_ref       = ATOMIC_INIT(1),
153 };
154
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
157 static int ip6_pkt_prohibit(struct sk_buff *skb);
158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
161 struct rt6_info ip6_prohibit_entry = {
162         .u = {
163                 .dst = {
164                         .__refcnt       = ATOMIC_INIT(1),
165                         .__use          = 1,
166                         .dev            = &loopback_dev,
167                         .obsolete       = -1,
168                         .error          = -EACCES,
169                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
170                         .input          = ip6_pkt_prohibit,
171                         .output         = ip6_pkt_prohibit_out,
172                         .ops            = &ip6_dst_ops,
173                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
174                 }
175         },
176         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
177         .rt6i_metric    = ~(u32) 0,
178         .rt6i_ref       = ATOMIC_INIT(1),
179 };
180
181 struct rt6_info ip6_blk_hole_entry = {
182         .u = {
183                 .dst = {
184                         .__refcnt       = ATOMIC_INIT(1),
185                         .__use          = 1,
186                         .dev            = &loopback_dev,
187                         .obsolete       = -1,
188                         .error          = -EINVAL,
189                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
190                         .input          = ip6_pkt_blk_hole,
191                         .output         = ip6_pkt_blk_hole,
192                         .ops            = &ip6_dst_ops,
193                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
194                 }
195         },
196         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
197         .rt6i_metric    = ~(u32) 0,
198         .rt6i_ref       = ATOMIC_INIT(1),
199 };
200
201 #endif
202
203 /* allocate dst with ip6_dst_ops */
204 static __inline__ struct rt6_info *ip6_dst_alloc(void)
205 {
206         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
207 }
208
209 static void ip6_dst_destroy(struct dst_entry *dst)
210 {
211         struct rt6_info *rt = (struct rt6_info *)dst;
212         struct inet6_dev *idev = rt->rt6i_idev;
213
214         if (idev != NULL) {
215                 rt->rt6i_idev = NULL;
216                 in6_dev_put(idev);
217         }
218 }
219
220 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221                            int how)
222 {
223         struct rt6_info *rt = (struct rt6_info *)dst;
224         struct inet6_dev *idev = rt->rt6i_idev;
225
226         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
227                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
228                 if (loopback_idev != NULL) {
229                         rt->rt6i_idev = loopback_idev;
230                         in6_dev_put(idev);
231                 }
232         }
233 }
234
235 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
236 {
237         return (rt->rt6i_flags & RTF_EXPIRES &&
238                 time_after(jiffies, rt->rt6i_expires));
239 }
240
241 static inline int rt6_need_strict(struct in6_addr *daddr)
242 {
243         return (ipv6_addr_type(daddr) &
244                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
245 }
246
247 /*
248  *      Route lookup. Any table->tb6_lock is implied.
249  */
250
251 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
252                                                     int oif,
253                                                     int strict)
254 {
255         struct rt6_info *local = NULL;
256         struct rt6_info *sprt;
257
258         if (oif) {
259                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
260                         struct net_device *dev = sprt->rt6i_dev;
261                         if (dev->ifindex == oif)
262                                 return sprt;
263                         if (dev->flags & IFF_LOOPBACK) {
264                                 if (sprt->rt6i_idev == NULL ||
265                                     sprt->rt6i_idev->dev->ifindex != oif) {
266                                         if (strict && oif)
267                                                 continue;
268                                         if (local && (!oif ||
269                                                       local->rt6i_idev->dev->ifindex == oif))
270                                                 continue;
271                                 }
272                                 local = sprt;
273                         }
274                 }
275
276                 if (local)
277                         return local;
278
279                 if (strict)
280                         return &ip6_null_entry;
281         }
282         return rt;
283 }
284
285 #ifdef CONFIG_IPV6_ROUTER_PREF
286 static void rt6_probe(struct rt6_info *rt)
287 {
288         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289         /*
290          * Okay, this does not seem to be appropriate
291          * for now, however, we need to check if it
292          * is really so; aka Router Reachability Probing.
293          *
294          * Router Reachability Probe MUST be rate-limited
295          * to no more than one per minute.
296          */
297         if (!neigh || (neigh->nud_state & NUD_VALID))
298                 return;
299         read_lock_bh(&neigh->lock);
300         if (!(neigh->nud_state & NUD_VALID) &&
301             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
302                 struct in6_addr mcaddr;
303                 struct in6_addr *target;
304
305                 neigh->updated = jiffies;
306                 read_unlock_bh(&neigh->lock);
307
308                 target = (struct in6_addr *)&neigh->primary_key;
309                 addrconf_addr_solict_mult(target, &mcaddr);
310                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311         } else
312                 read_unlock_bh(&neigh->lock);
313 }
314 #else
315 static inline void rt6_probe(struct rt6_info *rt)
316 {
317         return;
318 }
319 #endif
320
321 /*
322  * Default Router Selection (RFC 2461 6.3.6)
323  */
324 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
325 {
326         struct net_device *dev = rt->rt6i_dev;
327         if (!oif || dev->ifindex == oif)
328                 return 2;
329         if ((dev->flags & IFF_LOOPBACK) &&
330             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331                 return 1;
332         return 0;
333 }
334
335 static inline int rt6_check_neigh(struct rt6_info *rt)
336 {
337         struct neighbour *neigh = rt->rt6i_nexthop;
338         int m = 0;
339         if (rt->rt6i_flags & RTF_NONEXTHOP ||
340             !(rt->rt6i_flags & RTF_GATEWAY))
341                 m = 1;
342         else if (neigh) {
343                 read_lock_bh(&neigh->lock);
344                 if (neigh->nud_state & NUD_VALID)
345                         m = 2;
346                 else if (!(neigh->nud_state & NUD_FAILED))
347                         m = 1;
348                 read_unlock_bh(&neigh->lock);
349         }
350         return m;
351 }
352
353 static int rt6_score_route(struct rt6_info *rt, int oif,
354                            int strict)
355 {
356         int m, n;
357
358         m = rt6_check_dev(rt, oif);
359         if (!m && (strict & RT6_LOOKUP_F_IFACE))
360                 return -1;
361 #ifdef CONFIG_IPV6_ROUTER_PREF
362         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363 #endif
364         n = rt6_check_neigh(rt);
365         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
366                 return -1;
367         return m;
368 }
369
370 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371                                    int *mpri, struct rt6_info *match)
372 {
373         int m;
374
375         if (rt6_check_expired(rt))
376                 goto out;
377
378         m = rt6_score_route(rt, oif, strict);
379         if (m < 0)
380                 goto out;
381
382         if (m > *mpri) {
383                 if (strict & RT6_LOOKUP_F_REACHABLE)
384                         rt6_probe(match);
385                 *mpri = m;
386                 match = rt;
387         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
388                 rt6_probe(rt);
389         }
390
391 out:
392         return match;
393 }
394
395 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396                                      struct rt6_info *rr_head,
397                                      u32 metric, int oif, int strict)
398 {
399         struct rt6_info *rt, *match;
400         int mpri = -1;
401
402         match = NULL;
403         for (rt = rr_head; rt && rt->rt6i_metric == metric;
404              rt = rt->u.dst.rt6_next)
405                 match = find_match(rt, oif, strict, &mpri, match);
406         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407              rt = rt->u.dst.rt6_next)
408                 match = find_match(rt, oif, strict, &mpri, match);
409
410         return match;
411 }
412
413 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414 {
415         struct rt6_info *match, *rt0;
416
417         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
418                   __FUNCTION__, fn->leaf, oif);
419
420         rt0 = fn->rr_ptr;
421         if (!rt0)
422                 fn->rr_ptr = rt0 = fn->leaf;
423
424         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
425
426         if (!match &&
427             (strict & RT6_LOOKUP_F_REACHABLE)) {
428                 struct rt6_info *next = rt0->u.dst.rt6_next;
429
430                 /* no entries matched; do round-robin */
431                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
432                         next = fn->leaf;
433
434                 if (next != rt0)
435                         fn->rr_ptr = next;
436         }
437
438         RT6_TRACE("%s() => %p\n",
439                   __FUNCTION__, match);
440
441         return (match ? match : &ip6_null_entry);
442 }
443
444 #ifdef CONFIG_IPV6_ROUTE_INFO
445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446                   struct in6_addr *gwaddr)
447 {
448         struct route_info *rinfo = (struct route_info *) opt;
449         struct in6_addr prefix_buf, *prefix;
450         unsigned int pref;
451         u32 lifetime;
452         struct rt6_info *rt;
453
454         if (len < sizeof(struct route_info)) {
455                 return -EINVAL;
456         }
457
458         /* Sanity check for prefix_len and length */
459         if (rinfo->length > 3) {
460                 return -EINVAL;
461         } else if (rinfo->prefix_len > 128) {
462                 return -EINVAL;
463         } else if (rinfo->prefix_len > 64) {
464                 if (rinfo->length < 2) {
465                         return -EINVAL;
466                 }
467         } else if (rinfo->prefix_len > 0) {
468                 if (rinfo->length < 1) {
469                         return -EINVAL;
470                 }
471         }
472
473         pref = rinfo->route_pref;
474         if (pref == ICMPV6_ROUTER_PREF_INVALID)
475                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
477         lifetime = ntohl(rinfo->lifetime);
478         if (lifetime == 0xffffffff) {
479                 /* infinity */
480         } else if (lifetime > 0x7fffffff/HZ) {
481                 /* Avoid arithmetic overflow */
482                 lifetime = 0x7fffffff/HZ - 1;
483         }
484
485         if (rinfo->length == 3)
486                 prefix = (struct in6_addr *)rinfo->prefix;
487         else {
488                 /* this function is safe */
489                 ipv6_addr_prefix(&prefix_buf,
490                                  (struct in6_addr *)rinfo->prefix,
491                                  rinfo->prefix_len);
492                 prefix = &prefix_buf;
493         }
494
495         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
496
497         if (rt && !lifetime) {
498                 ip6_del_rt(rt);
499                 rt = NULL;
500         }
501
502         if (!rt && lifetime)
503                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
504                                         pref);
505         else if (rt)
506                 rt->rt6i_flags = RTF_ROUTEINFO |
507                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
508
509         if (rt) {
510                 if (lifetime == 0xffffffff) {
511                         rt->rt6i_flags &= ~RTF_EXPIRES;
512                 } else {
513                         rt->rt6i_expires = jiffies + HZ * lifetime;
514                         rt->rt6i_flags |= RTF_EXPIRES;
515                 }
516                 dst_release(&rt->u.dst);
517         }
518         return 0;
519 }
520 #endif
521
522 #define BACKTRACK(saddr) \
523 do { \
524         if (rt == &ip6_null_entry) { \
525                 struct fib6_node *pn; \
526                 while (1) { \
527                         if (fn->fn_flags & RTN_TL_ROOT) \
528                                 goto out; \
529                         pn = fn->parent; \
530                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
531                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
532                         else \
533                                 fn = pn; \
534                         if (fn->fn_flags & RTN_RTINFO) \
535                                 goto restart; \
536                 } \
537         } \
538 } while(0)
539
540 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
541                                              struct flowi *fl, int flags)
542 {
543         struct fib6_node *fn;
544         struct rt6_info *rt;
545
546         read_lock_bh(&table->tb6_lock);
547         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
548 restart:
549         rt = fn->leaf;
550         rt = rt6_device_match(rt, fl->oif, flags);
551         BACKTRACK(&fl->fl6_src);
552 out:
553         dst_hold(&rt->u.dst);
554         read_unlock_bh(&table->tb6_lock);
555
556         rt->u.dst.lastuse = jiffies;
557         rt->u.dst.__use++;
558
559         return rt;
560
561 }
562
563 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
564                             int oif, int strict)
565 {
566         struct flowi fl = {
567                 .oif = oif,
568                 .nl_u = {
569                         .ip6_u = {
570                                 .daddr = *daddr,
571                         },
572                 },
573         };
574         struct dst_entry *dst;
575         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
576
577         if (saddr) {
578                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579                 flags |= RT6_LOOKUP_F_HAS_SADDR;
580         }
581
582         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
583         if (dst->error == 0)
584                 return (struct rt6_info *) dst;
585
586         dst_release(dst);
587
588         return NULL;
589 }
590
591 EXPORT_SYMBOL(rt6_lookup);
592
593 /* ip6_ins_rt is called with FREE table->tb6_lock.
594    It takes new route entry, the addition fails by any reason the
595    route is freed. In any case, if caller does not hold it, it may
596    be destroyed.
597  */
598
599 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
600 {
601         int err;
602         struct fib6_table *table;
603
604         table = rt->rt6i_table;
605         write_lock_bh(&table->tb6_lock);
606         err = fib6_add(&table->tb6_root, rt, info);
607         write_unlock_bh(&table->tb6_lock);
608
609         return err;
610 }
611
612 int ip6_ins_rt(struct rt6_info *rt)
613 {
614         return __ip6_ins_rt(rt, NULL);
615 }
616
617 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618                                       struct in6_addr *saddr)
619 {
620         struct rt6_info *rt;
621
622         /*
623          *      Clone the route.
624          */
625
626         rt = ip6_rt_copy(ort);
627
628         if (rt) {
629                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630                         if (rt->rt6i_dst.plen != 128 &&
631                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632                                 rt->rt6i_flags |= RTF_ANYCAST;
633                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
634                 }
635
636                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
637                 rt->rt6i_dst.plen = 128;
638                 rt->rt6i_flags |= RTF_CACHE;
639                 rt->u.dst.flags |= DST_HOST;
640
641 #ifdef CONFIG_IPV6_SUBTREES
642                 if (rt->rt6i_src.plen && saddr) {
643                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644                         rt->rt6i_src.plen = 128;
645                 }
646 #endif
647
648                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
650         }
651
652         return rt;
653 }
654
655 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656 {
657         struct rt6_info *rt = ip6_rt_copy(ort);
658         if (rt) {
659                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660                 rt->rt6i_dst.plen = 128;
661                 rt->rt6i_flags |= RTF_CACHE;
662                 rt->u.dst.flags |= DST_HOST;
663                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664         }
665         return rt;
666 }
667
668 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
669                                             struct flowi *fl, int flags)
670 {
671         struct fib6_node *fn;
672         struct rt6_info *rt, *nrt;
673         int strict = 0;
674         int attempts = 3;
675         int err;
676         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
677
678         strict |= flags & RT6_LOOKUP_F_IFACE;
679
680 relookup:
681         read_lock_bh(&table->tb6_lock);
682
683 restart_2:
684         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
685
686 restart:
687         rt = rt6_select(fn, fl->iif, strict | reachable);
688         BACKTRACK(&fl->fl6_src);
689         if (rt == &ip6_null_entry ||
690             rt->rt6i_flags & RTF_CACHE)
691                 goto out;
692
693         dst_hold(&rt->u.dst);
694         read_unlock_bh(&table->tb6_lock);
695
696         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
698         else {
699 #if CLONE_OFFLINK_ROUTE
700                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701 #else
702                 goto out2;
703 #endif
704         }
705
706         dst_release(&rt->u.dst);
707         rt = nrt ? : &ip6_null_entry;
708
709         dst_hold(&rt->u.dst);
710         if (nrt) {
711                 err = ip6_ins_rt(nrt);
712                 if (!err)
713                         goto out2;
714         }
715
716         if (--attempts <= 0)
717                 goto out2;
718
719         /*
720          * Race condition! In the gap, when table->tb6_lock was
721          * released someone could insert this route.  Relookup.
722          */
723         dst_release(&rt->u.dst);
724         goto relookup;
725
726 out:
727         if (reachable) {
728                 reachable = 0;
729                 goto restart_2;
730         }
731         dst_hold(&rt->u.dst);
732         read_unlock_bh(&table->tb6_lock);
733 out2:
734         rt->u.dst.lastuse = jiffies;
735         rt->u.dst.__use++;
736
737         return rt;
738 }
739
740 void ip6_route_input(struct sk_buff *skb)
741 {
742         struct ipv6hdr *iph = ipv6_hdr(skb);
743         int flags = RT6_LOOKUP_F_HAS_SADDR;
744         struct flowi fl = {
745                 .iif = skb->dev->ifindex,
746                 .nl_u = {
747                         .ip6_u = {
748                                 .daddr = iph->daddr,
749                                 .saddr = iph->saddr,
750                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
751                         },
752                 },
753                 .mark = skb->mark,
754                 .proto = iph->nexthdr,
755         };
756
757         if (rt6_need_strict(&iph->daddr))
758                 flags |= RT6_LOOKUP_F_IFACE;
759
760         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761 }
762
763 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764                                              struct flowi *fl, int flags)
765 {
766         struct fib6_node *fn;
767         struct rt6_info *rt, *nrt;
768         int strict = 0;
769         int attempts = 3;
770         int err;
771         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
772
773         strict |= flags & RT6_LOOKUP_F_IFACE;
774
775 relookup:
776         read_lock_bh(&table->tb6_lock);
777
778 restart_2:
779         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
780
781 restart:
782         rt = rt6_select(fn, fl->oif, strict | reachable);
783         BACKTRACK(&fl->fl6_src);
784         if (rt == &ip6_null_entry ||
785             rt->rt6i_flags & RTF_CACHE)
786                 goto out;
787
788         dst_hold(&rt->u.dst);
789         read_unlock_bh(&table->tb6_lock);
790
791         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
792                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
793         else {
794 #if CLONE_OFFLINK_ROUTE
795                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
796 #else
797                 goto out2;
798 #endif
799         }
800
801         dst_release(&rt->u.dst);
802         rt = nrt ? : &ip6_null_entry;
803
804         dst_hold(&rt->u.dst);
805         if (nrt) {
806                 err = ip6_ins_rt(nrt);
807                 if (!err)
808                         goto out2;
809         }
810
811         if (--attempts <= 0)
812                 goto out2;
813
814         /*
815          * Race condition! In the gap, when table->tb6_lock was
816          * released someone could insert this route.  Relookup.
817          */
818         dst_release(&rt->u.dst);
819         goto relookup;
820
821 out:
822         if (reachable) {
823                 reachable = 0;
824                 goto restart_2;
825         }
826         dst_hold(&rt->u.dst);
827         read_unlock_bh(&table->tb6_lock);
828 out2:
829         rt->u.dst.lastuse = jiffies;
830         rt->u.dst.__use++;
831         return rt;
832 }
833
834 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
835 {
836         int flags = 0;
837
838         if (rt6_need_strict(&fl->fl6_dst))
839                 flags |= RT6_LOOKUP_F_IFACE;
840
841         if (!ipv6_addr_any(&fl->fl6_src))
842                 flags |= RT6_LOOKUP_F_HAS_SADDR;
843
844         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
845 }
846
847 EXPORT_SYMBOL(ip6_route_output);
848
849 static int ip6_blackhole_output(struct sk_buff *skb)
850 {
851         kfree_skb(skb);
852         return 0;
853 }
854
855 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
856 {
857         struct rt6_info *ort = (struct rt6_info *) *dstp;
858         struct rt6_info *rt = (struct rt6_info *)
859                 dst_alloc(&ip6_dst_blackhole_ops);
860         struct dst_entry *new = NULL;
861
862         if (rt) {
863                 new = &rt->u.dst;
864
865                 atomic_set(&new->__refcnt, 1);
866                 new->__use = 1;
867                 new->input = ip6_blackhole_output;
868                 new->output = ip6_blackhole_output;
869
870                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
871                 new->dev = ort->u.dst.dev;
872                 if (new->dev)
873                         dev_hold(new->dev);
874                 rt->rt6i_idev = ort->rt6i_idev;
875                 if (rt->rt6i_idev)
876                         in6_dev_hold(rt->rt6i_idev);
877                 rt->rt6i_expires = 0;
878
879                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
880                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
881                 rt->rt6i_metric = 0;
882
883                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
884 #ifdef CONFIG_IPV6_SUBTREES
885                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
886 #endif
887
888                 dst_free(new);
889         }
890
891         dst_release(*dstp);
892         *dstp = new;
893         return (new ? 0 : -ENOMEM);
894 }
895 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
896
897 /*
898  *      Destination cache support functions
899  */
900
901 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
902 {
903         struct rt6_info *rt;
904
905         rt = (struct rt6_info *) dst;
906
907         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
908                 return dst;
909
910         return NULL;
911 }
912
913 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
914 {
915         struct rt6_info *rt = (struct rt6_info *) dst;
916
917         if (rt) {
918                 if (rt->rt6i_flags & RTF_CACHE)
919                         ip6_del_rt(rt);
920                 else
921                         dst_release(dst);
922         }
923         return NULL;
924 }
925
926 static void ip6_link_failure(struct sk_buff *skb)
927 {
928         struct rt6_info *rt;
929
930         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
931
932         rt = (struct rt6_info *) skb->dst;
933         if (rt) {
934                 if (rt->rt6i_flags&RTF_CACHE) {
935                         dst_set_expires(&rt->u.dst, 0);
936                         rt->rt6i_flags |= RTF_EXPIRES;
937                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
938                         rt->rt6i_node->fn_sernum = -1;
939         }
940 }
941
942 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
943 {
944         struct rt6_info *rt6 = (struct rt6_info*)dst;
945
946         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
947                 rt6->rt6i_flags |= RTF_MODIFIED;
948                 if (mtu < IPV6_MIN_MTU) {
949                         mtu = IPV6_MIN_MTU;
950                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
951                 }
952                 dst->metrics[RTAX_MTU-1] = mtu;
953                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
954         }
955 }
956
957 static int ipv6_get_mtu(struct net_device *dev);
958
959 static inline unsigned int ipv6_advmss(unsigned int mtu)
960 {
961         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
962
963         if (mtu < ip6_rt_min_advmss)
964                 mtu = ip6_rt_min_advmss;
965
966         /*
967          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
968          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
969          * IPV6_MAXPLEN is also valid and means: "any MSS,
970          * rely only on pmtu discovery"
971          */
972         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
973                 mtu = IPV6_MAXPLEN;
974         return mtu;
975 }
976
977 static struct dst_entry *ndisc_dst_gc_list;
978 static DEFINE_SPINLOCK(ndisc_lock);
979
980 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
981                                   struct neighbour *neigh,
982                                   struct in6_addr *addr,
983                                   int (*output)(struct sk_buff *))
984 {
985         struct rt6_info *rt;
986         struct inet6_dev *idev = in6_dev_get(dev);
987
988         if (unlikely(idev == NULL))
989                 return NULL;
990
991         rt = ip6_dst_alloc();
992         if (unlikely(rt == NULL)) {
993                 in6_dev_put(idev);
994                 goto out;
995         }
996
997         dev_hold(dev);
998         if (neigh)
999                 neigh_hold(neigh);
1000         else
1001                 neigh = ndisc_get_neigh(dev, addr);
1002
1003         rt->rt6i_dev      = dev;
1004         rt->rt6i_idev     = idev;
1005         rt->rt6i_nexthop  = neigh;
1006         atomic_set(&rt->u.dst.__refcnt, 1);
1007         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1008         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1009         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1010         rt->u.dst.output  = output;
1011
1012 #if 0   /* there's no chance to use these for ndisc */
1013         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1014                                 ? DST_HOST
1015                                 : 0;
1016         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1017         rt->rt6i_dst.plen = 128;
1018 #endif
1019
1020         spin_lock_bh(&ndisc_lock);
1021         rt->u.dst.next = ndisc_dst_gc_list;
1022         ndisc_dst_gc_list = &rt->u.dst;
1023         spin_unlock_bh(&ndisc_lock);
1024
1025         fib6_force_start_gc();
1026
1027 out:
1028         return &rt->u.dst;
1029 }
1030
1031 int ndisc_dst_gc(int *more)
1032 {
1033         struct dst_entry *dst, *next, **pprev;
1034         int freed;
1035
1036         next = NULL;
1037         freed = 0;
1038
1039         spin_lock_bh(&ndisc_lock);
1040         pprev = &ndisc_dst_gc_list;
1041
1042         while ((dst = *pprev) != NULL) {
1043                 if (!atomic_read(&dst->__refcnt)) {
1044                         *pprev = dst->next;
1045                         dst_free(dst);
1046                         freed++;
1047                 } else {
1048                         pprev = &dst->next;
1049                         (*more)++;
1050                 }
1051         }
1052
1053         spin_unlock_bh(&ndisc_lock);
1054
1055         return freed;
1056 }
1057
1058 static int ip6_dst_gc(void)
1059 {
1060         static unsigned expire = 30*HZ;
1061         static unsigned long last_gc;
1062         unsigned long now = jiffies;
1063
1064         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1065             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1066                 goto out;
1067
1068         expire++;
1069         fib6_run_gc(expire);
1070         last_gc = now;
1071         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1072                 expire = ip6_rt_gc_timeout>>1;
1073
1074 out:
1075         expire -= expire>>ip6_rt_gc_elasticity;
1076         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1077 }
1078
1079 /* Clean host part of a prefix. Not necessary in radix tree,
1080    but results in cleaner routing tables.
1081
1082    Remove it only when all the things will work!
1083  */
1084
1085 static int ipv6_get_mtu(struct net_device *dev)
1086 {
1087         int mtu = IPV6_MIN_MTU;
1088         struct inet6_dev *idev;
1089
1090         idev = in6_dev_get(dev);
1091         if (idev) {
1092                 mtu = idev->cnf.mtu6;
1093                 in6_dev_put(idev);
1094         }
1095         return mtu;
1096 }
1097
1098 int ipv6_get_hoplimit(struct net_device *dev)
1099 {
1100         int hoplimit = ipv6_devconf.hop_limit;
1101         struct inet6_dev *idev;
1102
1103         idev = in6_dev_get(dev);
1104         if (idev) {
1105                 hoplimit = idev->cnf.hop_limit;
1106                 in6_dev_put(idev);
1107         }
1108         return hoplimit;
1109 }
1110
1111 /*
1112  *
1113  */
1114
1115 int ip6_route_add(struct fib6_config *cfg)
1116 {
1117         int err;
1118         struct rt6_info *rt = NULL;
1119         struct net_device *dev = NULL;
1120         struct inet6_dev *idev = NULL;
1121         struct fib6_table *table;
1122         int addr_type;
1123
1124         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1125                 return -EINVAL;
1126 #ifndef CONFIG_IPV6_SUBTREES
1127         if (cfg->fc_src_len)
1128                 return -EINVAL;
1129 #endif
1130         if (cfg->fc_ifindex) {
1131                 err = -ENODEV;
1132                 dev = dev_get_by_index(cfg->fc_ifindex);
1133                 if (!dev)
1134                         goto out;
1135                 idev = in6_dev_get(dev);
1136                 if (!idev)
1137                         goto out;
1138         }
1139
1140         if (cfg->fc_metric == 0)
1141                 cfg->fc_metric = IP6_RT_PRIO_USER;
1142
1143         table = fib6_new_table(cfg->fc_table);
1144         if (table == NULL) {
1145                 err = -ENOBUFS;
1146                 goto out;
1147         }
1148
1149         rt = ip6_dst_alloc();
1150
1151         if (rt == NULL) {
1152                 err = -ENOMEM;
1153                 goto out;
1154         }
1155
1156         rt->u.dst.obsolete = -1;
1157         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1158
1159         if (cfg->fc_protocol == RTPROT_UNSPEC)
1160                 cfg->fc_protocol = RTPROT_BOOT;
1161         rt->rt6i_protocol = cfg->fc_protocol;
1162
1163         addr_type = ipv6_addr_type(&cfg->fc_dst);
1164
1165         if (addr_type & IPV6_ADDR_MULTICAST)
1166                 rt->u.dst.input = ip6_mc_input;
1167         else
1168                 rt->u.dst.input = ip6_forward;
1169
1170         rt->u.dst.output = ip6_output;
1171
1172         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173         rt->rt6i_dst.plen = cfg->fc_dst_len;
1174         if (rt->rt6i_dst.plen == 128)
1175                rt->u.dst.flags = DST_HOST;
1176
1177 #ifdef CONFIG_IPV6_SUBTREES
1178         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179         rt->rt6i_src.plen = cfg->fc_src_len;
1180 #endif
1181
1182         rt->rt6i_metric = cfg->fc_metric;
1183
1184         /* We cannot add true routes via loopback here,
1185            they would result in kernel looping; promote them to reject routes
1186          */
1187         if ((cfg->fc_flags & RTF_REJECT) ||
1188             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1189                 /* hold loopback dev/idev if we haven't done so. */
1190                 if (dev != &loopback_dev) {
1191                         if (dev) {
1192                                 dev_put(dev);
1193                                 in6_dev_put(idev);
1194                         }
1195                         dev = &loopback_dev;
1196                         dev_hold(dev);
1197                         idev = in6_dev_get(dev);
1198                         if (!idev) {
1199                                 err = -ENODEV;
1200                                 goto out;
1201                         }
1202                 }
1203                 rt->u.dst.output = ip6_pkt_discard_out;
1204                 rt->u.dst.input = ip6_pkt_discard;
1205                 rt->u.dst.error = -ENETUNREACH;
1206                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1207                 goto install_route;
1208         }
1209
1210         if (cfg->fc_flags & RTF_GATEWAY) {
1211                 struct in6_addr *gw_addr;
1212                 int gwa_type;
1213
1214                 gw_addr = &cfg->fc_gateway;
1215                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1216                 gwa_type = ipv6_addr_type(gw_addr);
1217
1218                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1219                         struct rt6_info *grt;
1220
1221                         /* IPv6 strictly inhibits using not link-local
1222                            addresses as nexthop address.
1223                            Otherwise, router will not able to send redirects.
1224                            It is very good, but in some (rare!) circumstances
1225                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1226                            some exceptions. --ANK
1227                          */
1228                         err = -EINVAL;
1229                         if (!(gwa_type&IPV6_ADDR_UNICAST))
1230                                 goto out;
1231
1232                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1233
1234                         err = -EHOSTUNREACH;
1235                         if (grt == NULL)
1236                                 goto out;
1237                         if (dev) {
1238                                 if (dev != grt->rt6i_dev) {
1239                                         dst_release(&grt->u.dst);
1240                                         goto out;
1241                                 }
1242                         } else {
1243                                 dev = grt->rt6i_dev;
1244                                 idev = grt->rt6i_idev;
1245                                 dev_hold(dev);
1246                                 in6_dev_hold(grt->rt6i_idev);
1247                         }
1248                         if (!(grt->rt6i_flags&RTF_GATEWAY))
1249                                 err = 0;
1250                         dst_release(&grt->u.dst);
1251
1252                         if (err)
1253                                 goto out;
1254                 }
1255                 err = -EINVAL;
1256                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1257                         goto out;
1258         }
1259
1260         err = -ENODEV;
1261         if (dev == NULL)
1262                 goto out;
1263
1264         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1265                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1266                 if (IS_ERR(rt->rt6i_nexthop)) {
1267                         err = PTR_ERR(rt->rt6i_nexthop);
1268                         rt->rt6i_nexthop = NULL;
1269                         goto out;
1270                 }
1271         }
1272
1273         rt->rt6i_flags = cfg->fc_flags;
1274
1275 install_route:
1276         if (cfg->fc_mx) {
1277                 struct nlattr *nla;
1278                 int remaining;
1279
1280                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1281                         int type = nla->nla_type;
1282
1283                         if (type) {
1284                                 if (type > RTAX_MAX) {
1285                                         err = -EINVAL;
1286                                         goto out;
1287                                 }
1288
1289                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1290                         }
1291                 }
1292         }
1293
1294         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1295                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1296         if (!rt->u.dst.metrics[RTAX_MTU-1])
1297                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1298         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1299                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1300         rt->u.dst.dev = dev;
1301         rt->rt6i_idev = idev;
1302         rt->rt6i_table = table;
1303         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1304
1305 out:
1306         if (dev)
1307                 dev_put(dev);
1308         if (idev)
1309                 in6_dev_put(idev);
1310         if (rt)
1311                 dst_free(&rt->u.dst);
1312         return err;
1313 }
1314
1315 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1316 {
1317         int err;
1318         struct fib6_table *table;
1319
1320         if (rt == &ip6_null_entry)
1321                 return -ENOENT;
1322
1323         table = rt->rt6i_table;
1324         write_lock_bh(&table->tb6_lock);
1325
1326         err = fib6_del(rt, info);
1327         dst_release(&rt->u.dst);
1328
1329         write_unlock_bh(&table->tb6_lock);
1330
1331         return err;
1332 }
1333
1334 int ip6_del_rt(struct rt6_info *rt)
1335 {
1336         return __ip6_del_rt(rt, NULL);
1337 }
1338
1339 static int ip6_route_del(struct fib6_config *cfg)
1340 {
1341         struct fib6_table *table;
1342         struct fib6_node *fn;
1343         struct rt6_info *rt;
1344         int err = -ESRCH;
1345
1346         table = fib6_get_table(cfg->fc_table);
1347         if (table == NULL)
1348                 return err;
1349
1350         read_lock_bh(&table->tb6_lock);
1351
1352         fn = fib6_locate(&table->tb6_root,
1353                          &cfg->fc_dst, cfg->fc_dst_len,
1354                          &cfg->fc_src, cfg->fc_src_len);
1355
1356         if (fn) {
1357                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1358                         if (cfg->fc_ifindex &&
1359                             (rt->rt6i_dev == NULL ||
1360                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1361                                 continue;
1362                         if (cfg->fc_flags & RTF_GATEWAY &&
1363                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1364                                 continue;
1365                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1366                                 continue;
1367                         dst_hold(&rt->u.dst);
1368                         read_unlock_bh(&table->tb6_lock);
1369
1370                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1371                 }
1372         }
1373         read_unlock_bh(&table->tb6_lock);
1374
1375         return err;
1376 }
1377
1378 /*
1379  *      Handle redirects
1380  */
1381 struct ip6rd_flowi {
1382         struct flowi fl;
1383         struct in6_addr gateway;
1384 };
1385
1386 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1387                                              struct flowi *fl,
1388                                              int flags)
1389 {
1390         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1391         struct rt6_info *rt;
1392         struct fib6_node *fn;
1393
1394         /*
1395          * Get the "current" route for this destination and
1396          * check if the redirect has come from approriate router.
1397          *
1398          * RFC 2461 specifies that redirects should only be
1399          * accepted if they come from the nexthop to the target.
1400          * Due to the way the routes are chosen, this notion
1401          * is a bit fuzzy and one might need to check all possible
1402          * routes.
1403          */
1404
1405         read_lock_bh(&table->tb6_lock);
1406         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1407 restart:
1408         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1409                 /*
1410                  * Current route is on-link; redirect is always invalid.
1411                  *
1412                  * Seems, previous statement is not true. It could
1413                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1414                  * But then router serving it might decide, that we should
1415                  * know truth 8)8) --ANK (980726).
1416                  */
1417                 if (rt6_check_expired(rt))
1418                         continue;
1419                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1420                         continue;
1421                 if (fl->oif != rt->rt6i_dev->ifindex)
1422                         continue;
1423                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1424                         continue;
1425                 break;
1426         }
1427
1428         if (!rt)
1429                 rt = &ip6_null_entry;
1430         BACKTRACK(&fl->fl6_src);
1431 out:
1432         dst_hold(&rt->u.dst);
1433
1434         read_unlock_bh(&table->tb6_lock);
1435
1436         return rt;
1437 };
1438
1439 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1440                                            struct in6_addr *src,
1441                                            struct in6_addr *gateway,
1442                                            struct net_device *dev)
1443 {
1444         int flags = RT6_LOOKUP_F_HAS_SADDR;
1445         struct ip6rd_flowi rdfl = {
1446                 .fl = {
1447                         .oif = dev->ifindex,
1448                         .nl_u = {
1449                                 .ip6_u = {
1450                                         .daddr = *dest,
1451                                         .saddr = *src,
1452                                 },
1453                         },
1454                 },
1455                 .gateway = *gateway,
1456         };
1457
1458         if (rt6_need_strict(dest))
1459                 flags |= RT6_LOOKUP_F_IFACE;
1460
1461         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1462 }
1463
1464 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1465                   struct in6_addr *saddr,
1466                   struct neighbour *neigh, u8 *lladdr, int on_link)
1467 {
1468         struct rt6_info *rt, *nrt = NULL;
1469         struct netevent_redirect netevent;
1470
1471         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1472
1473         if (rt == &ip6_null_entry) {
1474                 if (net_ratelimit())
1475                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1476                                "for redirect target\n");
1477                 goto out;
1478         }
1479
1480         /*
1481          *      We have finally decided to accept it.
1482          */
1483
1484         neigh_update(neigh, lladdr, NUD_STALE,
1485                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1486                      NEIGH_UPDATE_F_OVERRIDE|
1487                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1488                                      NEIGH_UPDATE_F_ISROUTER))
1489                      );
1490
1491         /*
1492          * Redirect received -> path was valid.
1493          * Look, redirects are sent only in response to data packets,
1494          * so that this nexthop apparently is reachable. --ANK
1495          */
1496         dst_confirm(&rt->u.dst);
1497
1498         /* Duplicate redirect: silently ignore. */
1499         if (neigh == rt->u.dst.neighbour)
1500                 goto out;
1501
1502         nrt = ip6_rt_copy(rt);
1503         if (nrt == NULL)
1504                 goto out;
1505
1506         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1507         if (on_link)
1508                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1509
1510         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1511         nrt->rt6i_dst.plen = 128;
1512         nrt->u.dst.flags |= DST_HOST;
1513
1514         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1515         nrt->rt6i_nexthop = neigh_clone(neigh);
1516         /* Reset pmtu, it may be better */
1517         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1518         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1519
1520         if (ip6_ins_rt(nrt))
1521                 goto out;
1522
1523         netevent.old = &rt->u.dst;
1524         netevent.new = &nrt->u.dst;
1525         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1526
1527         if (rt->rt6i_flags&RTF_CACHE) {
1528                 ip6_del_rt(rt);
1529                 return;
1530         }
1531
1532 out:
1533         dst_release(&rt->u.dst);
1534         return;
1535 }
1536
1537 /*
1538  *      Handle ICMP "packet too big" messages
1539  *      i.e. Path MTU discovery
1540  */
1541
1542 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1543                         struct net_device *dev, u32 pmtu)
1544 {
1545         struct rt6_info *rt, *nrt;
1546         int allfrag = 0;
1547
1548         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1549         if (rt == NULL)
1550                 return;
1551
1552         if (pmtu >= dst_mtu(&rt->u.dst))
1553                 goto out;
1554
1555         if (pmtu < IPV6_MIN_MTU) {
1556                 /*
1557                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1558                  * MTU (1280) and a fragment header should always be included
1559                  * after a node receiving Too Big message reporting PMTU is
1560                  * less than the IPv6 Minimum Link MTU.
1561                  */
1562                 pmtu = IPV6_MIN_MTU;
1563                 allfrag = 1;
1564         }
1565
1566         /* New mtu received -> path was valid.
1567            They are sent only in response to data packets,
1568            so that this nexthop apparently is reachable. --ANK
1569          */
1570         dst_confirm(&rt->u.dst);
1571
1572         /* Host route. If it is static, it would be better
1573            not to override it, but add new one, so that
1574            when cache entry will expire old pmtu
1575            would return automatically.
1576          */
1577         if (rt->rt6i_flags & RTF_CACHE) {
1578                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1579                 if (allfrag)
1580                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1581                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1582                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1583                 goto out;
1584         }
1585
1586         /* Network route.
1587            Two cases are possible:
1588            1. It is connected route. Action: COW
1589            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1590          */
1591         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1592                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1593         else
1594                 nrt = rt6_alloc_clone(rt, daddr);
1595
1596         if (nrt) {
1597                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1598                 if (allfrag)
1599                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1600
1601                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1602                  * happened within 5 mins, the recommended timer is 10 mins.
1603                  * Here this route expiration time is set to ip6_rt_mtu_expires
1604                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1605                  * and detecting PMTU increase will be automatically happened.
1606                  */
1607                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1608                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1609
1610                 ip6_ins_rt(nrt);
1611         }
1612 out:
1613         dst_release(&rt->u.dst);
1614 }
1615
1616 /*
1617  *      Misc support functions
1618  */
1619
1620 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1621 {
1622         struct rt6_info *rt = ip6_dst_alloc();
1623
1624         if (rt) {
1625                 rt->u.dst.input = ort->u.dst.input;
1626                 rt->u.dst.output = ort->u.dst.output;
1627
1628                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1629                 rt->u.dst.error = ort->u.dst.error;
1630                 rt->u.dst.dev = ort->u.dst.dev;
1631                 if (rt->u.dst.dev)
1632                         dev_hold(rt->u.dst.dev);
1633                 rt->rt6i_idev = ort->rt6i_idev;
1634                 if (rt->rt6i_idev)
1635                         in6_dev_hold(rt->rt6i_idev);
1636                 rt->u.dst.lastuse = jiffies;
1637                 rt->rt6i_expires = 0;
1638
1639                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1640                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1641                 rt->rt6i_metric = 0;
1642
1643                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1644 #ifdef CONFIG_IPV6_SUBTREES
1645                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1646 #endif
1647                 rt->rt6i_table = ort->rt6i_table;
1648         }
1649         return rt;
1650 }
1651
1652 #ifdef CONFIG_IPV6_ROUTE_INFO
1653 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1654                                            struct in6_addr *gwaddr, int ifindex)
1655 {
1656         struct fib6_node *fn;
1657         struct rt6_info *rt = NULL;
1658         struct fib6_table *table;
1659
1660         table = fib6_get_table(RT6_TABLE_INFO);
1661         if (table == NULL)
1662                 return NULL;
1663
1664         write_lock_bh(&table->tb6_lock);
1665         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1666         if (!fn)
1667                 goto out;
1668
1669         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1670                 if (rt->rt6i_dev->ifindex != ifindex)
1671                         continue;
1672                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1673                         continue;
1674                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1675                         continue;
1676                 dst_hold(&rt->u.dst);
1677                 break;
1678         }
1679 out:
1680         write_unlock_bh(&table->tb6_lock);
1681         return rt;
1682 }
1683
1684 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1685                                            struct in6_addr *gwaddr, int ifindex,
1686                                            unsigned pref)
1687 {
1688         struct fib6_config cfg = {
1689                 .fc_table       = RT6_TABLE_INFO,
1690                 .fc_metric      = 1024,
1691                 .fc_ifindex     = ifindex,
1692                 .fc_dst_len     = prefixlen,
1693                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1694                                   RTF_UP | RTF_PREF(pref),
1695         };
1696
1697         ipv6_addr_copy(&cfg.fc_dst, prefix);
1698         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1699
1700         /* We should treat it as a default route if prefix length is 0. */
1701         if (!prefixlen)
1702                 cfg.fc_flags |= RTF_DEFAULT;
1703
1704         ip6_route_add(&cfg);
1705
1706         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1707 }
1708 #endif
1709
1710 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1711 {
1712         struct rt6_info *rt;
1713         struct fib6_table *table;
1714
1715         table = fib6_get_table(RT6_TABLE_DFLT);
1716         if (table == NULL)
1717                 return NULL;
1718
1719         write_lock_bh(&table->tb6_lock);
1720         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1721                 if (dev == rt->rt6i_dev &&
1722                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1723                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1724                         break;
1725         }
1726         if (rt)
1727                 dst_hold(&rt->u.dst);
1728         write_unlock_bh(&table->tb6_lock);
1729         return rt;
1730 }
1731
1732 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1733                                      struct net_device *dev,
1734                                      unsigned int pref)
1735 {
1736         struct fib6_config cfg = {
1737                 .fc_table       = RT6_TABLE_DFLT,
1738                 .fc_metric      = 1024,
1739                 .fc_ifindex     = dev->ifindex,
1740                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1741                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1742         };
1743
1744         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1745
1746         ip6_route_add(&cfg);
1747
1748         return rt6_get_dflt_router(gwaddr, dev);
1749 }
1750
1751 void rt6_purge_dflt_routers(void)
1752 {
1753         struct rt6_info *rt;
1754         struct fib6_table *table;
1755
1756         /* NOTE: Keep consistent with rt6_get_dflt_router */
1757         table = fib6_get_table(RT6_TABLE_DFLT);
1758         if (table == NULL)
1759                 return;
1760
1761 restart:
1762         read_lock_bh(&table->tb6_lock);
1763         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1764                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1765                         dst_hold(&rt->u.dst);
1766                         read_unlock_bh(&table->tb6_lock);
1767                         ip6_del_rt(rt);
1768                         goto restart;
1769                 }
1770         }
1771         read_unlock_bh(&table->tb6_lock);
1772 }
1773
1774 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1775                                  struct fib6_config *cfg)
1776 {
1777         memset(cfg, 0, sizeof(*cfg));
1778
1779         cfg->fc_table = RT6_TABLE_MAIN;
1780         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1781         cfg->fc_metric = rtmsg->rtmsg_metric;
1782         cfg->fc_expires = rtmsg->rtmsg_info;
1783         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1784         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1785         cfg->fc_flags = rtmsg->rtmsg_flags;
1786
1787         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790 }
1791
1792 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1793 {
1794         struct fib6_config cfg;
1795         struct in6_rtmsg rtmsg;
1796         int err;
1797
1798         switch(cmd) {
1799         case SIOCADDRT:         /* Add a route */
1800         case SIOCDELRT:         /* Delete a route */
1801                 if (!capable(CAP_NET_ADMIN))
1802                         return -EPERM;
1803                 err = copy_from_user(&rtmsg, arg,
1804                                      sizeof(struct in6_rtmsg));
1805                 if (err)
1806                         return -EFAULT;
1807
1808                 rtmsg_to_fib6_config(&rtmsg, &cfg);
1809
1810                 rtnl_lock();
1811                 switch (cmd) {
1812                 case SIOCADDRT:
1813                         err = ip6_route_add(&cfg);
1814                         break;
1815                 case SIOCDELRT:
1816                         err = ip6_route_del(&cfg);
1817                         break;
1818                 default:
1819                         err = -EINVAL;
1820                 }
1821                 rtnl_unlock();
1822
1823                 return err;
1824         }
1825
1826         return -EINVAL;
1827 }
1828
1829 /*
1830  *      Drop the packet on the floor
1831  */
1832
1833 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1834                                int ipstats_mib_noroutes)
1835 {
1836         int type;
1837         switch (ipstats_mib_noroutes) {
1838         case IPSTATS_MIB_INNOROUTES:
1839                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1840                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1841                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1842                         break;
1843                 }
1844                 /* FALLTHROUGH */
1845         case IPSTATS_MIB_OUTNOROUTES:
1846                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1847                 break;
1848         }
1849         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1850         kfree_skb(skb);
1851         return 0;
1852 }
1853
1854 static int ip6_pkt_discard(struct sk_buff *skb)
1855 {
1856         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1857 }
1858
1859 static int ip6_pkt_discard_out(struct sk_buff *skb)
1860 {
1861         skb->dev = skb->dst->dev;
1862         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1863 }
1864
1865 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1866
1867 static int ip6_pkt_prohibit(struct sk_buff *skb)
1868 {
1869         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1870 }
1871
1872 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1873 {
1874         skb->dev = skb->dst->dev;
1875         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1876 }
1877
1878 static int ip6_pkt_blk_hole(struct sk_buff *skb)
1879 {
1880         kfree_skb(skb);
1881         return 0;
1882 }
1883
1884 #endif
1885
1886 /*
1887  *      Allocate a dst for local (unicast / anycast) address.
1888  */
1889
1890 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1891                                     const struct in6_addr *addr,
1892                                     int anycast)
1893 {
1894         struct rt6_info *rt = ip6_dst_alloc();
1895
1896         if (rt == NULL)
1897                 return ERR_PTR(-ENOMEM);
1898
1899         dev_hold(&loopback_dev);
1900         in6_dev_hold(idev);
1901
1902         rt->u.dst.flags = DST_HOST;
1903         rt->u.dst.input = ip6_input;
1904         rt->u.dst.output = ip6_output;
1905         rt->rt6i_dev = &loopback_dev;
1906         rt->rt6i_idev = idev;
1907         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1908         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1909         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1910         rt->u.dst.obsolete = -1;
1911
1912         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1913         if (anycast)
1914                 rt->rt6i_flags |= RTF_ANYCAST;
1915         else
1916                 rt->rt6i_flags |= RTF_LOCAL;
1917         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1918         if (rt->rt6i_nexthop == NULL) {
1919                 dst_free(&rt->u.dst);
1920                 return ERR_PTR(-ENOMEM);
1921         }
1922
1923         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1924         rt->rt6i_dst.plen = 128;
1925         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1926
1927         atomic_set(&rt->u.dst.__refcnt, 1);
1928
1929         return rt;
1930 }
1931
1932 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1933 {
1934         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1935             rt != &ip6_null_entry) {
1936                 RT6_TRACE("deleted by ifdown %p\n", rt);
1937                 return -1;
1938         }
1939         return 0;
1940 }
1941
1942 void rt6_ifdown(struct net_device *dev)
1943 {
1944         fib6_clean_all(fib6_ifdown, 0, dev);
1945 }
1946
1947 struct rt6_mtu_change_arg
1948 {
1949         struct net_device *dev;
1950         unsigned mtu;
1951 };
1952
1953 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1954 {
1955         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1956         struct inet6_dev *idev;
1957
1958         /* In IPv6 pmtu discovery is not optional,
1959            so that RTAX_MTU lock cannot disable it.
1960            We still use this lock to block changes
1961            caused by addrconf/ndisc.
1962         */
1963
1964         idev = __in6_dev_get(arg->dev);
1965         if (idev == NULL)
1966                 return 0;
1967
1968         /* For administrative MTU increase, there is no way to discover
1969            IPv6 PMTU increase, so PMTU increase should be updated here.
1970            Since RFC 1981 doesn't include administrative MTU increase
1971            update PMTU increase is a MUST. (i.e. jumbo frame)
1972          */
1973         /*
1974            If new MTU is less than route PMTU, this new MTU will be the
1975            lowest MTU in the path, update the route PMTU to reflect PMTU
1976            decreases; if new MTU is greater than route PMTU, and the
1977            old MTU is the lowest MTU in the path, update the route PMTU
1978            to reflect the increase. In this case if the other nodes' MTU
1979            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1980            PMTU discouvery.
1981          */
1982         if (rt->rt6i_dev == arg->dev &&
1983             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1984             (dst_mtu(&rt->u.dst) > arg->mtu ||
1985              (dst_mtu(&rt->u.dst) < arg->mtu &&
1986               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1987                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1988         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1989         return 0;
1990 }
1991
1992 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1993 {
1994         struct rt6_mtu_change_arg arg = {
1995                 .dev = dev,
1996                 .mtu = mtu,
1997         };
1998
1999         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
2000 }
2001
2002 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2003         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2004         [RTA_OIF]               = { .type = NLA_U32 },
2005         [RTA_IIF]               = { .type = NLA_U32 },
2006         [RTA_PRIORITY]          = { .type = NLA_U32 },
2007         [RTA_METRICS]           = { .type = NLA_NESTED },
2008 };
2009
2010 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2011                               struct fib6_config *cfg)
2012 {
2013         struct rtmsg *rtm;
2014         struct nlattr *tb[RTA_MAX+1];
2015         int err;
2016
2017         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2018         if (err < 0)
2019                 goto errout;
2020
2021         err = -EINVAL;
2022         rtm = nlmsg_data(nlh);
2023         memset(cfg, 0, sizeof(*cfg));
2024
2025         cfg->fc_table = rtm->rtm_table;
2026         cfg->fc_dst_len = rtm->rtm_dst_len;
2027         cfg->fc_src_len = rtm->rtm_src_len;
2028         cfg->fc_flags = RTF_UP;
2029         cfg->fc_protocol = rtm->rtm_protocol;
2030
2031         if (rtm->rtm_type == RTN_UNREACHABLE)
2032                 cfg->fc_flags |= RTF_REJECT;
2033
2034         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2035         cfg->fc_nlinfo.nlh = nlh;
2036
2037         if (tb[RTA_GATEWAY]) {
2038                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2039                 cfg->fc_flags |= RTF_GATEWAY;
2040         }
2041
2042         if (tb[RTA_DST]) {
2043                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2044
2045                 if (nla_len(tb[RTA_DST]) < plen)
2046                         goto errout;
2047
2048                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2049         }
2050
2051         if (tb[RTA_SRC]) {
2052                 int plen = (rtm->rtm_src_len + 7) >> 3;
2053
2054                 if (nla_len(tb[RTA_SRC]) < plen)
2055                         goto errout;
2056
2057                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2058         }
2059
2060         if (tb[RTA_OIF])
2061                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2062
2063         if (tb[RTA_PRIORITY])
2064                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2065
2066         if (tb[RTA_METRICS]) {
2067                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2068                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2069         }
2070
2071         if (tb[RTA_TABLE])
2072                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2073
2074         err = 0;
2075 errout:
2076         return err;
2077 }
2078
2079 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2080 {
2081         struct fib6_config cfg;
2082         int err;
2083
2084         err = rtm_to_fib6_config(skb, nlh, &cfg);
2085         if (err < 0)
2086                 return err;
2087
2088         return ip6_route_del(&cfg);
2089 }
2090
2091 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2092 {
2093         struct fib6_config cfg;
2094         int err;
2095
2096         err = rtm_to_fib6_config(skb, nlh, &cfg);
2097         if (err < 0)
2098                 return err;
2099
2100         return ip6_route_add(&cfg);
2101 }
2102
2103 static inline size_t rt6_nlmsg_size(void)
2104 {
2105         return NLMSG_ALIGN(sizeof(struct rtmsg))
2106                + nla_total_size(16) /* RTA_SRC */
2107                + nla_total_size(16) /* RTA_DST */
2108                + nla_total_size(16) /* RTA_GATEWAY */
2109                + nla_total_size(16) /* RTA_PREFSRC */
2110                + nla_total_size(4) /* RTA_TABLE */
2111                + nla_total_size(4) /* RTA_IIF */
2112                + nla_total_size(4) /* RTA_OIF */
2113                + nla_total_size(4) /* RTA_PRIORITY */
2114                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2115                + nla_total_size(sizeof(struct rta_cacheinfo));
2116 }
2117
2118 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2119                          struct in6_addr *dst, struct in6_addr *src,
2120                          int iif, int type, u32 pid, u32 seq,
2121                          int prefix, unsigned int flags)
2122 {
2123         struct rtmsg *rtm;
2124         struct nlmsghdr *nlh;
2125         long expires;
2126         u32 table;
2127
2128         if (prefix) {   /* user wants prefix routes only */
2129                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2130                         /* success since this is not a prefix route */
2131                         return 1;
2132                 }
2133         }
2134
2135         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2136         if (nlh == NULL)
2137                 return -EMSGSIZE;
2138
2139         rtm = nlmsg_data(nlh);
2140         rtm->rtm_family = AF_INET6;
2141         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2142         rtm->rtm_src_len = rt->rt6i_src.plen;
2143         rtm->rtm_tos = 0;
2144         if (rt->rt6i_table)
2145                 table = rt->rt6i_table->tb6_id;
2146         else
2147                 table = RT6_TABLE_UNSPEC;
2148         rtm->rtm_table = table;
2149         NLA_PUT_U32(skb, RTA_TABLE, table);
2150         if (rt->rt6i_flags&RTF_REJECT)
2151                 rtm->rtm_type = RTN_UNREACHABLE;
2152         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2153                 rtm->rtm_type = RTN_LOCAL;
2154         else
2155                 rtm->rtm_type = RTN_UNICAST;
2156         rtm->rtm_flags = 0;
2157         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2158         rtm->rtm_protocol = rt->rt6i_protocol;
2159         if (rt->rt6i_flags&RTF_DYNAMIC)
2160                 rtm->rtm_protocol = RTPROT_REDIRECT;
2161         else if (rt->rt6i_flags & RTF_ADDRCONF)
2162                 rtm->rtm_protocol = RTPROT_KERNEL;
2163         else if (rt->rt6i_flags&RTF_DEFAULT)
2164                 rtm->rtm_protocol = RTPROT_RA;
2165
2166         if (rt->rt6i_flags&RTF_CACHE)
2167                 rtm->rtm_flags |= RTM_F_CLONED;
2168
2169         if (dst) {
2170                 NLA_PUT(skb, RTA_DST, 16, dst);
2171                 rtm->rtm_dst_len = 128;
2172         } else if (rtm->rtm_dst_len)
2173                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2174 #ifdef CONFIG_IPV6_SUBTREES
2175         if (src) {
2176                 NLA_PUT(skb, RTA_SRC, 16, src);
2177                 rtm->rtm_src_len = 128;
2178         } else if (rtm->rtm_src_len)
2179                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2180 #endif
2181         if (iif)
2182                 NLA_PUT_U32(skb, RTA_IIF, iif);
2183         else if (dst) {
2184                 struct in6_addr saddr_buf;
2185                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2186                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2187         }
2188
2189         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2190                 goto nla_put_failure;
2191
2192         if (rt->u.dst.neighbour)
2193                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2194
2195         if (rt->u.dst.dev)
2196                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2197
2198         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2199
2200         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2201         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2202                                expires, rt->u.dst.error) < 0)
2203                 goto nla_put_failure;
2204
2205         return nlmsg_end(skb, nlh);
2206
2207 nla_put_failure:
2208         nlmsg_cancel(skb, nlh);
2209         return -EMSGSIZE;
2210 }
2211
2212 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2213 {
2214         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2215         int prefix;
2216
2217         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2218                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2219                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2220         } else
2221                 prefix = 0;
2222
2223         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2224                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2225                      prefix, NLM_F_MULTI);
2226 }
2227
2228 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2229 {
2230         struct nlattr *tb[RTA_MAX+1];
2231         struct rt6_info *rt;
2232         struct sk_buff *skb;
2233         struct rtmsg *rtm;
2234         struct flowi fl;
2235         int err, iif = 0;
2236
2237         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238         if (err < 0)
2239                 goto errout;
2240
2241         err = -EINVAL;
2242         memset(&fl, 0, sizeof(fl));
2243
2244         if (tb[RTA_SRC]) {
2245                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2246                         goto errout;
2247
2248                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2249         }
2250
2251         if (tb[RTA_DST]) {
2252                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2253                         goto errout;
2254
2255                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2256         }
2257
2258         if (tb[RTA_IIF])
2259                 iif = nla_get_u32(tb[RTA_IIF]);
2260
2261         if (tb[RTA_OIF])
2262                 fl.oif = nla_get_u32(tb[RTA_OIF]);
2263
2264         if (iif) {
2265                 struct net_device *dev;
2266                 dev = __dev_get_by_index(iif);
2267                 if (!dev) {
2268                         err = -ENODEV;
2269                         goto errout;
2270                 }
2271         }
2272
2273         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2274         if (skb == NULL) {
2275                 err = -ENOBUFS;
2276                 goto errout;
2277         }
2278
2279         /* Reserve room for dummy headers, this skb can pass
2280            through good chunk of routing engine.
2281          */
2282         skb_reset_mac_header(skb);
2283         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2284
2285         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2286         skb->dst = &rt->u.dst;
2287
2288         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2289                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2290                             nlh->nlmsg_seq, 0, 0);
2291         if (err < 0) {
2292                 kfree_skb(skb);
2293                 goto errout;
2294         }
2295
2296         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2297 errout:
2298         return err;
2299 }
2300
2301 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2302 {
2303         struct sk_buff *skb;
2304         u32 pid = 0, seq = 0;
2305         struct nlmsghdr *nlh = NULL;
2306         int err = -ENOBUFS;
2307
2308         if (info) {
2309                 pid = info->pid;
2310                 nlh = info->nlh;
2311                 if (nlh)
2312                         seq = nlh->nlmsg_seq;
2313         }
2314
2315         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2316         if (skb == NULL)
2317                 goto errout;
2318
2319         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2320         if (err < 0) {
2321                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2322                 WARN_ON(err == -EMSGSIZE);
2323                 kfree_skb(skb);
2324                 goto errout;
2325         }
2326         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2327 errout:
2328         if (err < 0)
2329                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2330 }
2331
2332 /*
2333  *      /proc
2334  */
2335
2336 #ifdef CONFIG_PROC_FS
2337
2338 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2339
2340 struct rt6_proc_arg
2341 {
2342         char *buffer;
2343         int offset;
2344         int length;
2345         int skip;
2346         int len;
2347 };
2348
2349 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2350 {
2351         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2352
2353         if (arg->skip < arg->offset / RT6_INFO_LEN) {
2354                 arg->skip++;
2355                 return 0;
2356         }
2357
2358         if (arg->len >= arg->length)
2359                 return 0;
2360
2361         arg->len += sprintf(arg->buffer + arg->len,
2362                             NIP6_SEQFMT " %02x ",
2363                             NIP6(rt->rt6i_dst.addr),
2364                             rt->rt6i_dst.plen);
2365
2366 #ifdef CONFIG_IPV6_SUBTREES
2367         arg->len += sprintf(arg->buffer + arg->len,
2368                             NIP6_SEQFMT " %02x ",
2369                             NIP6(rt->rt6i_src.addr),
2370                             rt->rt6i_src.plen);
2371 #else
2372         arg->len += sprintf(arg->buffer + arg->len,
2373                             "00000000000000000000000000000000 00 ");
2374 #endif
2375
2376         if (rt->rt6i_nexthop) {
2377                 arg->len += sprintf(arg->buffer + arg->len,
2378                                     NIP6_SEQFMT,
2379                                     NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2380         } else {
2381                 arg->len += sprintf(arg->buffer + arg->len,
2382                                     "00000000000000000000000000000000");
2383         }
2384         arg->len += sprintf(arg->buffer + arg->len,
2385                             " %08x %08x %08x %08x %8s\n",
2386                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2387                             rt->u.dst.__use, rt->rt6i_flags,
2388                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
2389         return 0;
2390 }
2391
2392 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2393 {
2394         struct rt6_proc_arg arg = {
2395                 .buffer = buffer,
2396                 .offset = offset,
2397                 .length = length,
2398         };
2399
2400         fib6_clean_all(rt6_info_route, 0, &arg);
2401
2402         *start = buffer;
2403         if (offset)
2404                 *start += offset % RT6_INFO_LEN;
2405
2406         arg.len -= offset % RT6_INFO_LEN;
2407
2408         if (arg.len > length)
2409                 arg.len = length;
2410         if (arg.len < 0)
2411                 arg.len = 0;
2412
2413         return arg.len;
2414 }
2415
2416 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2417 {
2418         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2419                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2420                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2421                       rt6_stats.fib_rt_cache,
2422                       atomic_read(&ip6_dst_ops.entries),
2423                       rt6_stats.fib_discarded_routes);
2424
2425         return 0;
2426 }
2427
2428 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2429 {
2430         return single_open(file, rt6_stats_seq_show, NULL);
2431 }
2432
2433 static const struct file_operations rt6_stats_seq_fops = {
2434         .owner   = THIS_MODULE,
2435         .open    = rt6_stats_seq_open,
2436         .read    = seq_read,
2437         .llseek  = seq_lseek,
2438         .release = single_release,
2439 };
2440 #endif  /* CONFIG_PROC_FS */
2441
2442 #ifdef CONFIG_SYSCTL
2443
2444 static int flush_delay;
2445
2446 static
2447 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2448                               void __user *buffer, size_t *lenp, loff_t *ppos)
2449 {
2450         if (write) {
2451                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2452                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2453                 return 0;
2454         } else
2455                 return -EINVAL;
2456 }
2457
2458 ctl_table ipv6_route_table[] = {
2459         {
2460                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH,
2461                 .procname       =       "flush",
2462                 .data           =       &flush_delay,
2463                 .maxlen         =       sizeof(int),
2464                 .mode           =       0200,
2465                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2466         },
2467         {
2468                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2469                 .procname       =       "gc_thresh",
2470                 .data           =       &ip6_dst_ops.gc_thresh,
2471                 .maxlen         =       sizeof(int),
2472                 .mode           =       0644,
2473                 .proc_handler   =       &proc_dointvec,
2474         },
2475         {
2476                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2477                 .procname       =       "max_size",
2478                 .data           =       &ip6_rt_max_size,
2479                 .maxlen         =       sizeof(int),
2480                 .mode           =       0644,
2481                 .proc_handler   =       &proc_dointvec,
2482         },
2483         {
2484                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2485                 .procname       =       "gc_min_interval",
2486                 .data           =       &ip6_rt_gc_min_interval,
2487                 .maxlen         =       sizeof(int),
2488                 .mode           =       0644,
2489                 .proc_handler   =       &proc_dointvec_jiffies,
2490                 .strategy       =       &sysctl_jiffies,
2491         },
2492         {
2493                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2494                 .procname       =       "gc_timeout",
2495                 .data           =       &ip6_rt_gc_timeout,
2496                 .maxlen         =       sizeof(int),
2497                 .mode           =       0644,
2498                 .proc_handler   =       &proc_dointvec_jiffies,
2499                 .strategy       =       &sysctl_jiffies,
2500         },
2501         {
2502                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2503                 .procname       =       "gc_interval",
2504                 .data           =       &ip6_rt_gc_interval,
2505                 .maxlen         =       sizeof(int),
2506                 .mode           =       0644,
2507                 .proc_handler   =       &proc_dointvec_jiffies,
2508                 .strategy       =       &sysctl_jiffies,
2509         },
2510         {
2511                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2512                 .procname       =       "gc_elasticity",
2513                 .data           =       &ip6_rt_gc_elasticity,
2514                 .maxlen         =       sizeof(int),
2515                 .mode           =       0644,
2516                 .proc_handler   =       &proc_dointvec_jiffies,
2517                 .strategy       =       &sysctl_jiffies,
2518         },
2519         {
2520                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2521                 .procname       =       "mtu_expires",
2522                 .data           =       &ip6_rt_mtu_expires,
2523                 .maxlen         =       sizeof(int),
2524                 .mode           =       0644,
2525                 .proc_handler   =       &proc_dointvec_jiffies,
2526                 .strategy       =       &sysctl_jiffies,
2527         },
2528         {
2529                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2530                 .procname       =       "min_adv_mss",
2531                 .data           =       &ip6_rt_min_advmss,
2532                 .maxlen         =       sizeof(int),
2533                 .mode           =       0644,
2534                 .proc_handler   =       &proc_dointvec_jiffies,
2535                 .strategy       =       &sysctl_jiffies,
2536         },
2537         {
2538                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2539                 .procname       =       "gc_min_interval_ms",
2540                 .data           =       &ip6_rt_gc_min_interval,
2541                 .maxlen         =       sizeof(int),
2542                 .mode           =       0644,
2543                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2544                 .strategy       =       &sysctl_ms_jiffies,
2545         },
2546         { .ctl_name = 0 }
2547 };
2548
2549 #endif
2550
2551 void __init ip6_route_init(void)
2552 {
2553 #ifdef  CONFIG_PROC_FS
2554         struct proc_dir_entry *p;
2555 #endif
2556         ip6_dst_ops.kmem_cachep =
2557                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2558                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2559         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2560
2561         fib6_init();
2562 #ifdef  CONFIG_PROC_FS
2563         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2564         if (p)
2565                 p->owner = THIS_MODULE;
2566
2567         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2568 #endif
2569 #ifdef CONFIG_XFRM
2570         xfrm6_init();
2571 #endif
2572 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2573         fib6_rules_init();
2574 #endif
2575
2576         __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2577         __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2578         __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2579 }
2580
2581 void ip6_route_cleanup(void)
2582 {
2583 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2584         fib6_rules_cleanup();
2585 #endif
2586 #ifdef CONFIG_PROC_FS
2587         proc_net_remove("ipv6_route");
2588         proc_net_remove("rt6_stats");
2589 #endif
2590 #ifdef CONFIG_XFRM
2591         xfrm6_fini();
2592 #endif
2593         rt6_ifdown(NULL);
2594         fib6_gc_cleanup();
2595         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2596 }