inet: Use FIB table peer roots in routes.
[pandora-kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85                                            const struct in6_addr *prefix, int prefixlen,
86                                            const struct in6_addr *gwaddr, int ifindex,
87                                            unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95         struct rt6_info *rt = (struct rt6_info *) dst;
96         struct inet_peer *peer;
97         u32 *p = NULL;
98
99         if (!(rt->dst.flags & DST_HOST))
100                 return NULL;
101
102         peer = rt6_get_peer_create(rt);
103         if (peer) {
104                 u32 *old_p = __DST_METRICS_PTR(old);
105                 unsigned long prev, new;
106
107                 p = peer->metrics;
108                 if (inet_metrics_new(peer))
109                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111                 new = (unsigned long) p;
112                 prev = cmpxchg(&dst->_metrics, old, new);
113
114                 if (prev != old) {
115                         p = __DST_METRICS_PTR(prev);
116                         if (prev & DST_METRICS_READ_ONLY)
117                                 p = NULL;
118                 }
119         }
120         return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125         struct in6_addr *p = &rt->rt6i_gateway;
126
127         if (!ipv6_addr_any(p))
128                 return (const void *) p;
129         return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134         struct rt6_info *rt = (struct rt6_info *) dst;
135         struct neighbour *n;
136
137         daddr = choose_neigh_daddr(rt, daddr);
138         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139         if (n)
140                 return n;
141         return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147         if (!n) {
148                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149                 if (IS_ERR(n))
150                         return PTR_ERR(n);
151         }
152         dst_set_neighbour(&rt->dst, n);
153
154         return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158         .family                 =       AF_INET6,
159         .protocol               =       cpu_to_be16(ETH_P_IPV6),
160         .gc                     =       ip6_dst_gc,
161         .gc_thresh              =       1024,
162         .check                  =       ip6_dst_check,
163         .default_advmss         =       ip6_default_advmss,
164         .mtu                    =       ip6_mtu,
165         .cow_metrics            =       ipv6_cow_metrics,
166         .destroy                =       ip6_dst_destroy,
167         .ifdown                 =       ip6_dst_ifdown,
168         .negative_advice        =       ip6_negative_advice,
169         .link_failure           =       ip6_link_failure,
170         .update_pmtu            =       ip6_rt_update_pmtu,
171         .local_out              =       __ip6_local_out,
172         .neigh_lookup           =       ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179         return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187                                          unsigned long old)
188 {
189         return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193         .family                 =       AF_INET6,
194         .protocol               =       cpu_to_be16(ETH_P_IPV6),
195         .destroy                =       ip6_dst_destroy,
196         .check                  =       ip6_dst_check,
197         .mtu                    =       ip6_blackhole_mtu,
198         .default_advmss         =       ip6_default_advmss,
199         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
200         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
201         .neigh_lookup           =       ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205         [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209         .dst = {
210                 .__refcnt       = ATOMIC_INIT(1),
211                 .__use          = 1,
212                 .obsolete       = -1,
213                 .error          = -ENETUNREACH,
214                 .input          = ip6_pkt_discard,
215                 .output         = ip6_pkt_discard_out,
216         },
217         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
218         .rt6i_protocol  = RTPROT_KERNEL,
219         .rt6i_metric    = ~(u32) 0,
220         .rt6i_ref       = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229         .dst = {
230                 .__refcnt       = ATOMIC_INIT(1),
231                 .__use          = 1,
232                 .obsolete       = -1,
233                 .error          = -EACCES,
234                 .input          = ip6_pkt_prohibit,
235                 .output         = ip6_pkt_prohibit_out,
236         },
237         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
238         .rt6i_protocol  = RTPROT_KERNEL,
239         .rt6i_metric    = ~(u32) 0,
240         .rt6i_ref       = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244         .dst = {
245                 .__refcnt       = ATOMIC_INIT(1),
246                 .__use          = 1,
247                 .obsolete       = -1,
248                 .error          = -EINVAL,
249                 .input          = dst_discard,
250                 .output         = dst_discard,
251         },
252         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
253         .rt6i_protocol  = RTPROT_KERNEL,
254         .rt6i_metric    = ~(u32) 0,
255         .rt6i_ref       = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
262                                              struct net_device *dev,
263                                              int flags,
264                                              struct fib6_table *table)
265 {
266         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267                                         0, 0, flags);
268
269         if (rt) {
270                 memset(&rt->rt6i_table, 0,
271                        sizeof(*rt) - sizeof(struct dst_entry));
272                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
273         }
274         return rt;
275 }
276
277 static void ip6_dst_destroy(struct dst_entry *dst)
278 {
279         struct rt6_info *rt = (struct rt6_info *)dst;
280         struct inet6_dev *idev = rt->rt6i_idev;
281
282         if (!(rt->dst.flags & DST_HOST))
283                 dst_destroy_metrics_generic(dst);
284
285         if (idev) {
286                 rt->rt6i_idev = NULL;
287                 in6_dev_put(idev);
288         }
289
290         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291                 dst_release(dst->from);
292
293         if (rt6_has_peer(rt)) {
294                 struct inet_peer *peer = rt6_peer_ptr(rt);
295                 inet_putpeer(peer);
296         }
297 }
298
299 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301 static u32 rt6_peer_genid(void)
302 {
303         return atomic_read(&__rt6_peer_genid);
304 }
305
306 void rt6_bind_peer(struct rt6_info *rt, int create)
307 {
308         struct inet_peer_base *base;
309         struct inet_peer *peer;
310
311         base = inetpeer_base_ptr(rt->_rt6i_peer);
312         if (!base)
313                 return;
314
315         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
316         if (!rt6_set_peer(rt, peer))
317                 inet_putpeer(peer);
318         else
319                 rt->rt6i_peer_genid = rt6_peer_genid();
320 }
321
322 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
323                            int how)
324 {
325         struct rt6_info *rt = (struct rt6_info *)dst;
326         struct inet6_dev *idev = rt->rt6i_idev;
327         struct net_device *loopback_dev =
328                 dev_net(dev)->loopback_dev;
329
330         if (dev != loopback_dev && idev && idev->dev == dev) {
331                 struct inet6_dev *loopback_idev =
332                         in6_dev_get(loopback_dev);
333                 if (loopback_idev) {
334                         rt->rt6i_idev = loopback_idev;
335                         in6_dev_put(idev);
336                 }
337         }
338 }
339
340 static bool rt6_check_expired(const struct rt6_info *rt)
341 {
342         struct rt6_info *ort = NULL;
343
344         if (rt->rt6i_flags & RTF_EXPIRES) {
345                 if (time_after(jiffies, rt->dst.expires))
346                         return true;
347         } else if (rt->dst.from) {
348                 ort = (struct rt6_info *) rt->dst.from;
349                 return (ort->rt6i_flags & RTF_EXPIRES) &&
350                         time_after(jiffies, ort->dst.expires);
351         }
352         return false;
353 }
354
355 static bool rt6_need_strict(const struct in6_addr *daddr)
356 {
357         return ipv6_addr_type(daddr) &
358                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
359 }
360
361 /*
362  *      Route lookup. Any table->tb6_lock is implied.
363  */
364
365 static inline struct rt6_info *rt6_device_match(struct net *net,
366                                                     struct rt6_info *rt,
367                                                     const struct in6_addr *saddr,
368                                                     int oif,
369                                                     int flags)
370 {
371         struct rt6_info *local = NULL;
372         struct rt6_info *sprt;
373
374         if (!oif && ipv6_addr_any(saddr))
375                 goto out;
376
377         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
378                 struct net_device *dev = sprt->dst.dev;
379
380                 if (oif) {
381                         if (dev->ifindex == oif)
382                                 return sprt;
383                         if (dev->flags & IFF_LOOPBACK) {
384                                 if (!sprt->rt6i_idev ||
385                                     sprt->rt6i_idev->dev->ifindex != oif) {
386                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
387                                                 continue;
388                                         if (local && (!oif ||
389                                                       local->rt6i_idev->dev->ifindex == oif))
390                                                 continue;
391                                 }
392                                 local = sprt;
393                         }
394                 } else {
395                         if (ipv6_chk_addr(net, saddr, dev,
396                                           flags & RT6_LOOKUP_F_IFACE))
397                                 return sprt;
398                 }
399         }
400
401         if (oif) {
402                 if (local)
403                         return local;
404
405                 if (flags & RT6_LOOKUP_F_IFACE)
406                         return net->ipv6.ip6_null_entry;
407         }
408 out:
409         return rt;
410 }
411
412 #ifdef CONFIG_IPV6_ROUTER_PREF
413 static void rt6_probe(struct rt6_info *rt)
414 {
415         struct neighbour *neigh;
416         /*
417          * Okay, this does not seem to be appropriate
418          * for now, however, we need to check if it
419          * is really so; aka Router Reachability Probing.
420          *
421          * Router Reachability Probe MUST be rate-limited
422          * to no more than one per minute.
423          */
424         rcu_read_lock();
425         neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
426         if (!neigh || (neigh->nud_state & NUD_VALID))
427                 goto out;
428         read_lock_bh(&neigh->lock);
429         if (!(neigh->nud_state & NUD_VALID) &&
430             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
431                 struct in6_addr mcaddr;
432                 struct in6_addr *target;
433
434                 neigh->updated = jiffies;
435                 read_unlock_bh(&neigh->lock);
436
437                 target = (struct in6_addr *)&neigh->primary_key;
438                 addrconf_addr_solict_mult(target, &mcaddr);
439                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
440         } else {
441                 read_unlock_bh(&neigh->lock);
442         }
443 out:
444         rcu_read_unlock();
445 }
446 #else
447 static inline void rt6_probe(struct rt6_info *rt)
448 {
449 }
450 #endif
451
452 /*
453  * Default Router Selection (RFC 2461 6.3.6)
454  */
455 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
456 {
457         struct net_device *dev = rt->dst.dev;
458         if (!oif || dev->ifindex == oif)
459                 return 2;
460         if ((dev->flags & IFF_LOOPBACK) &&
461             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
462                 return 1;
463         return 0;
464 }
465
466 static inline int rt6_check_neigh(struct rt6_info *rt)
467 {
468         struct neighbour *neigh;
469         int m;
470
471         rcu_read_lock();
472         neigh = dst_get_neighbour_noref(&rt->dst);
473         if (rt->rt6i_flags & RTF_NONEXTHOP ||
474             !(rt->rt6i_flags & RTF_GATEWAY))
475                 m = 1;
476         else if (neigh) {
477                 read_lock_bh(&neigh->lock);
478                 if (neigh->nud_state & NUD_VALID)
479                         m = 2;
480 #ifdef CONFIG_IPV6_ROUTER_PREF
481                 else if (neigh->nud_state & NUD_FAILED)
482                         m = 0;
483 #endif
484                 else
485                         m = 1;
486                 read_unlock_bh(&neigh->lock);
487         } else
488                 m = 0;
489         rcu_read_unlock();
490         return m;
491 }
492
493 static int rt6_score_route(struct rt6_info *rt, int oif,
494                            int strict)
495 {
496         int m, n;
497
498         m = rt6_check_dev(rt, oif);
499         if (!m && (strict & RT6_LOOKUP_F_IFACE))
500                 return -1;
501 #ifdef CONFIG_IPV6_ROUTER_PREF
502         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
503 #endif
504         n = rt6_check_neigh(rt);
505         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
506                 return -1;
507         return m;
508 }
509
510 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
511                                    int *mpri, struct rt6_info *match)
512 {
513         int m;
514
515         if (rt6_check_expired(rt))
516                 goto out;
517
518         m = rt6_score_route(rt, oif, strict);
519         if (m < 0)
520                 goto out;
521
522         if (m > *mpri) {
523                 if (strict & RT6_LOOKUP_F_REACHABLE)
524                         rt6_probe(match);
525                 *mpri = m;
526                 match = rt;
527         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
528                 rt6_probe(rt);
529         }
530
531 out:
532         return match;
533 }
534
535 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
536                                      struct rt6_info *rr_head,
537                                      u32 metric, int oif, int strict)
538 {
539         struct rt6_info *rt, *match;
540         int mpri = -1;
541
542         match = NULL;
543         for (rt = rr_head; rt && rt->rt6i_metric == metric;
544              rt = rt->dst.rt6_next)
545                 match = find_match(rt, oif, strict, &mpri, match);
546         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
547              rt = rt->dst.rt6_next)
548                 match = find_match(rt, oif, strict, &mpri, match);
549
550         return match;
551 }
552
553 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
554 {
555         struct rt6_info *match, *rt0;
556         struct net *net;
557
558         rt0 = fn->rr_ptr;
559         if (!rt0)
560                 fn->rr_ptr = rt0 = fn->leaf;
561
562         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
563
564         if (!match &&
565             (strict & RT6_LOOKUP_F_REACHABLE)) {
566                 struct rt6_info *next = rt0->dst.rt6_next;
567
568                 /* no entries matched; do round-robin */
569                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
570                         next = fn->leaf;
571
572                 if (next != rt0)
573                         fn->rr_ptr = next;
574         }
575
576         net = dev_net(rt0->dst.dev);
577         return match ? match : net->ipv6.ip6_null_entry;
578 }
579
580 #ifdef CONFIG_IPV6_ROUTE_INFO
581 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
582                   const struct in6_addr *gwaddr)
583 {
584         struct net *net = dev_net(dev);
585         struct route_info *rinfo = (struct route_info *) opt;
586         struct in6_addr prefix_buf, *prefix;
587         unsigned int pref;
588         unsigned long lifetime;
589         struct rt6_info *rt;
590
591         if (len < sizeof(struct route_info)) {
592                 return -EINVAL;
593         }
594
595         /* Sanity check for prefix_len and length */
596         if (rinfo->length > 3) {
597                 return -EINVAL;
598         } else if (rinfo->prefix_len > 128) {
599                 return -EINVAL;
600         } else if (rinfo->prefix_len > 64) {
601                 if (rinfo->length < 2) {
602                         return -EINVAL;
603                 }
604         } else if (rinfo->prefix_len > 0) {
605                 if (rinfo->length < 1) {
606                         return -EINVAL;
607                 }
608         }
609
610         pref = rinfo->route_pref;
611         if (pref == ICMPV6_ROUTER_PREF_INVALID)
612                 return -EINVAL;
613
614         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
615
616         if (rinfo->length == 3)
617                 prefix = (struct in6_addr *)rinfo->prefix;
618         else {
619                 /* this function is safe */
620                 ipv6_addr_prefix(&prefix_buf,
621                                  (struct in6_addr *)rinfo->prefix,
622                                  rinfo->prefix_len);
623                 prefix = &prefix_buf;
624         }
625
626         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
627                                 dev->ifindex);
628
629         if (rt && !lifetime) {
630                 ip6_del_rt(rt);
631                 rt = NULL;
632         }
633
634         if (!rt && lifetime)
635                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
636                                         pref);
637         else if (rt)
638                 rt->rt6i_flags = RTF_ROUTEINFO |
639                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
640
641         if (rt) {
642                 if (!addrconf_finite_timeout(lifetime))
643                         rt6_clean_expires(rt);
644                 else
645                         rt6_set_expires(rt, jiffies + HZ * lifetime);
646
647                 dst_release(&rt->dst);
648         }
649         return 0;
650 }
651 #endif
652
653 #define BACKTRACK(__net, saddr)                 \
654 do { \
655         if (rt == __net->ipv6.ip6_null_entry) { \
656                 struct fib6_node *pn; \
657                 while (1) { \
658                         if (fn->fn_flags & RTN_TL_ROOT) \
659                                 goto out; \
660                         pn = fn->parent; \
661                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
662                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
663                         else \
664                                 fn = pn; \
665                         if (fn->fn_flags & RTN_RTINFO) \
666                                 goto restart; \
667                 } \
668         } \
669 } while (0)
670
671 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
672                                              struct fib6_table *table,
673                                              struct flowi6 *fl6, int flags)
674 {
675         struct fib6_node *fn;
676         struct rt6_info *rt;
677
678         read_lock_bh(&table->tb6_lock);
679         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
680 restart:
681         rt = fn->leaf;
682         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
683         BACKTRACK(net, &fl6->saddr);
684 out:
685         dst_use(&rt->dst, jiffies);
686         read_unlock_bh(&table->tb6_lock);
687         return rt;
688
689 }
690
691 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
692                                     int flags)
693 {
694         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
695 }
696 EXPORT_SYMBOL_GPL(ip6_route_lookup);
697
698 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
699                             const struct in6_addr *saddr, int oif, int strict)
700 {
701         struct flowi6 fl6 = {
702                 .flowi6_oif = oif,
703                 .daddr = *daddr,
704         };
705         struct dst_entry *dst;
706         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
707
708         if (saddr) {
709                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
710                 flags |= RT6_LOOKUP_F_HAS_SADDR;
711         }
712
713         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
714         if (dst->error == 0)
715                 return (struct rt6_info *) dst;
716
717         dst_release(dst);
718
719         return NULL;
720 }
721
722 EXPORT_SYMBOL(rt6_lookup);
723
724 /* ip6_ins_rt is called with FREE table->tb6_lock.
725    It takes new route entry, the addition fails by any reason the
726    route is freed. In any case, if caller does not hold it, it may
727    be destroyed.
728  */
729
730 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
731 {
732         int err;
733         struct fib6_table *table;
734
735         table = rt->rt6i_table;
736         write_lock_bh(&table->tb6_lock);
737         err = fib6_add(&table->tb6_root, rt, info);
738         write_unlock_bh(&table->tb6_lock);
739
740         return err;
741 }
742
743 int ip6_ins_rt(struct rt6_info *rt)
744 {
745         struct nl_info info = {
746                 .nl_net = dev_net(rt->dst.dev),
747         };
748         return __ip6_ins_rt(rt, &info);
749 }
750
751 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
752                                       const struct in6_addr *daddr,
753                                       const struct in6_addr *saddr)
754 {
755         struct rt6_info *rt;
756
757         /*
758          *      Clone the route.
759          */
760
761         rt = ip6_rt_copy(ort, daddr);
762
763         if (rt) {
764                 int attempts = !in_softirq();
765
766                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
767                         if (ort->rt6i_dst.plen != 128 &&
768                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
769                                 rt->rt6i_flags |= RTF_ANYCAST;
770                         rt->rt6i_gateway = *daddr;
771                 }
772
773                 rt->rt6i_flags |= RTF_CACHE;
774
775 #ifdef CONFIG_IPV6_SUBTREES
776                 if (rt->rt6i_src.plen && saddr) {
777                         rt->rt6i_src.addr = *saddr;
778                         rt->rt6i_src.plen = 128;
779                 }
780 #endif
781
782         retry:
783                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
784                         struct net *net = dev_net(rt->dst.dev);
785                         int saved_rt_min_interval =
786                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
787                         int saved_rt_elasticity =
788                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
789
790                         if (attempts-- > 0) {
791                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
792                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
793
794                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
795
796                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
797                                         saved_rt_elasticity;
798                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
799                                         saved_rt_min_interval;
800                                 goto retry;
801                         }
802
803                         net_warn_ratelimited("Neighbour table overflow\n");
804                         dst_free(&rt->dst);
805                         return NULL;
806                 }
807         }
808
809         return rt;
810 }
811
812 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
813                                         const struct in6_addr *daddr)
814 {
815         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
816
817         if (rt) {
818                 rt->rt6i_flags |= RTF_CACHE;
819                 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
820         }
821         return rt;
822 }
823
824 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
825                                       struct flowi6 *fl6, int flags)
826 {
827         struct fib6_node *fn;
828         struct rt6_info *rt, *nrt;
829         int strict = 0;
830         int attempts = 3;
831         int err;
832         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
833
834         strict |= flags & RT6_LOOKUP_F_IFACE;
835
836 relookup:
837         read_lock_bh(&table->tb6_lock);
838
839 restart_2:
840         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
841
842 restart:
843         rt = rt6_select(fn, oif, strict | reachable);
844
845         BACKTRACK(net, &fl6->saddr);
846         if (rt == net->ipv6.ip6_null_entry ||
847             rt->rt6i_flags & RTF_CACHE)
848                 goto out;
849
850         dst_hold(&rt->dst);
851         read_unlock_bh(&table->tb6_lock);
852
853         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
854                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
855         else if (!(rt->dst.flags & DST_HOST))
856                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
857         else
858                 goto out2;
859
860         dst_release(&rt->dst);
861         rt = nrt ? : net->ipv6.ip6_null_entry;
862
863         dst_hold(&rt->dst);
864         if (nrt) {
865                 err = ip6_ins_rt(nrt);
866                 if (!err)
867                         goto out2;
868         }
869
870         if (--attempts <= 0)
871                 goto out2;
872
873         /*
874          * Race condition! In the gap, when table->tb6_lock was
875          * released someone could insert this route.  Relookup.
876          */
877         dst_release(&rt->dst);
878         goto relookup;
879
880 out:
881         if (reachable) {
882                 reachable = 0;
883                 goto restart_2;
884         }
885         dst_hold(&rt->dst);
886         read_unlock_bh(&table->tb6_lock);
887 out2:
888         rt->dst.lastuse = jiffies;
889         rt->dst.__use++;
890
891         return rt;
892 }
893
894 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
895                                             struct flowi6 *fl6, int flags)
896 {
897         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
898 }
899
900 static struct dst_entry *ip6_route_input_lookup(struct net *net,
901                                                 struct net_device *dev,
902                                                 struct flowi6 *fl6, int flags)
903 {
904         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
905                 flags |= RT6_LOOKUP_F_IFACE;
906
907         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
908 }
909
910 void ip6_route_input(struct sk_buff *skb)
911 {
912         const struct ipv6hdr *iph = ipv6_hdr(skb);
913         struct net *net = dev_net(skb->dev);
914         int flags = RT6_LOOKUP_F_HAS_SADDR;
915         struct flowi6 fl6 = {
916                 .flowi6_iif = skb->dev->ifindex,
917                 .daddr = iph->daddr,
918                 .saddr = iph->saddr,
919                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
920                 .flowi6_mark = skb->mark,
921                 .flowi6_proto = iph->nexthdr,
922         };
923
924         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
925 }
926
927 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
928                                              struct flowi6 *fl6, int flags)
929 {
930         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
931 }
932
933 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
934                                     struct flowi6 *fl6)
935 {
936         int flags = 0;
937
938         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
939                 flags |= RT6_LOOKUP_F_IFACE;
940
941         if (!ipv6_addr_any(&fl6->saddr))
942                 flags |= RT6_LOOKUP_F_HAS_SADDR;
943         else if (sk)
944                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
945
946         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
947 }
948
949 EXPORT_SYMBOL(ip6_route_output);
950
951 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
952 {
953         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
954         struct dst_entry *new = NULL;
955
956         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
957         if (rt) {
958                 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
959                 rt6_init_peer(rt, net->ipv6.peers);
960
961                 new = &rt->dst;
962
963                 new->__use = 1;
964                 new->input = dst_discard;
965                 new->output = dst_discard;
966
967                 if (dst_metrics_read_only(&ort->dst))
968                         new->_metrics = ort->dst._metrics;
969                 else
970                         dst_copy_metrics(new, &ort->dst);
971                 rt->rt6i_idev = ort->rt6i_idev;
972                 if (rt->rt6i_idev)
973                         in6_dev_hold(rt->rt6i_idev);
974
975                 rt->rt6i_gateway = ort->rt6i_gateway;
976                 rt->rt6i_flags = ort->rt6i_flags;
977                 rt6_clean_expires(rt);
978                 rt->rt6i_metric = 0;
979
980                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
981 #ifdef CONFIG_IPV6_SUBTREES
982                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
983 #endif
984
985                 dst_free(new);
986         }
987
988         dst_release(dst_orig);
989         return new ? new : ERR_PTR(-ENOMEM);
990 }
991
992 /*
993  *      Destination cache support functions
994  */
995
996 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
997 {
998         struct rt6_info *rt;
999
1000         rt = (struct rt6_info *) dst;
1001
1002         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1003                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1004                         if (!rt6_has_peer(rt))
1005                                 rt6_bind_peer(rt, 0);
1006                         rt->rt6i_peer_genid = rt6_peer_genid();
1007                 }
1008                 return dst;
1009         }
1010         return NULL;
1011 }
1012
1013 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1014 {
1015         struct rt6_info *rt = (struct rt6_info *) dst;
1016
1017         if (rt) {
1018                 if (rt->rt6i_flags & RTF_CACHE) {
1019                         if (rt6_check_expired(rt)) {
1020                                 ip6_del_rt(rt);
1021                                 dst = NULL;
1022                         }
1023                 } else {
1024                         dst_release(dst);
1025                         dst = NULL;
1026                 }
1027         }
1028         return dst;
1029 }
1030
1031 static void ip6_link_failure(struct sk_buff *skb)
1032 {
1033         struct rt6_info *rt;
1034
1035         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1036
1037         rt = (struct rt6_info *) skb_dst(skb);
1038         if (rt) {
1039                 if (rt->rt6i_flags & RTF_CACHE)
1040                         rt6_update_expires(rt, 0);
1041                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1042                         rt->rt6i_node->fn_sernum = -1;
1043         }
1044 }
1045
1046 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1047 {
1048         struct rt6_info *rt6 = (struct rt6_info*)dst;
1049
1050         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1051                 rt6->rt6i_flags |= RTF_MODIFIED;
1052                 if (mtu < IPV6_MIN_MTU) {
1053                         u32 features = dst_metric(dst, RTAX_FEATURES);
1054                         mtu = IPV6_MIN_MTU;
1055                         features |= RTAX_FEATURE_ALLFRAG;
1056                         dst_metric_set(dst, RTAX_FEATURES, features);
1057                 }
1058                 dst_metric_set(dst, RTAX_MTU, mtu);
1059         }
1060 }
1061
1062 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1063 {
1064         struct net_device *dev = dst->dev;
1065         unsigned int mtu = dst_mtu(dst);
1066         struct net *net = dev_net(dev);
1067
1068         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1069
1070         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1071                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1072
1073         /*
1074          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1075          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1076          * IPV6_MAXPLEN is also valid and means: "any MSS,
1077          * rely only on pmtu discovery"
1078          */
1079         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1080                 mtu = IPV6_MAXPLEN;
1081         return mtu;
1082 }
1083
1084 static unsigned int ip6_mtu(const struct dst_entry *dst)
1085 {
1086         struct inet6_dev *idev;
1087         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1088
1089         if (mtu)
1090                 return mtu;
1091
1092         mtu = IPV6_MIN_MTU;
1093
1094         rcu_read_lock();
1095         idev = __in6_dev_get(dst->dev);
1096         if (idev)
1097                 mtu = idev->cnf.mtu6;
1098         rcu_read_unlock();
1099
1100         return mtu;
1101 }
1102
1103 static struct dst_entry *icmp6_dst_gc_list;
1104 static DEFINE_SPINLOCK(icmp6_dst_lock);
1105
1106 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1107                                   struct neighbour *neigh,
1108                                   struct flowi6 *fl6)
1109 {
1110         struct dst_entry *dst;
1111         struct rt6_info *rt;
1112         struct inet6_dev *idev = in6_dev_get(dev);
1113         struct net *net = dev_net(dev);
1114
1115         if (unlikely(!idev))
1116                 return ERR_PTR(-ENODEV);
1117
1118         rt = ip6_dst_alloc(net, dev, 0, NULL);
1119         if (unlikely(!rt)) {
1120                 in6_dev_put(idev);
1121                 dst = ERR_PTR(-ENOMEM);
1122                 goto out;
1123         }
1124
1125         if (neigh)
1126                 neigh_hold(neigh);
1127         else {
1128                 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1129                 if (IS_ERR(neigh)) {
1130                         in6_dev_put(idev);
1131                         dst_free(&rt->dst);
1132                         return ERR_CAST(neigh);
1133                 }
1134         }
1135
1136         rt->dst.flags |= DST_HOST;
1137         rt->dst.output  = ip6_output;
1138         dst_set_neighbour(&rt->dst, neigh);
1139         atomic_set(&rt->dst.__refcnt, 1);
1140         rt->rt6i_dst.addr = fl6->daddr;
1141         rt->rt6i_dst.plen = 128;
1142         rt->rt6i_idev     = idev;
1143         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1144
1145         spin_lock_bh(&icmp6_dst_lock);
1146         rt->dst.next = icmp6_dst_gc_list;
1147         icmp6_dst_gc_list = &rt->dst;
1148         spin_unlock_bh(&icmp6_dst_lock);
1149
1150         fib6_force_start_gc(net);
1151
1152         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1153
1154 out:
1155         return dst;
1156 }
1157
1158 int icmp6_dst_gc(void)
1159 {
1160         struct dst_entry *dst, **pprev;
1161         int more = 0;
1162
1163         spin_lock_bh(&icmp6_dst_lock);
1164         pprev = &icmp6_dst_gc_list;
1165
1166         while ((dst = *pprev) != NULL) {
1167                 if (!atomic_read(&dst->__refcnt)) {
1168                         *pprev = dst->next;
1169                         dst_free(dst);
1170                 } else {
1171                         pprev = &dst->next;
1172                         ++more;
1173                 }
1174         }
1175
1176         spin_unlock_bh(&icmp6_dst_lock);
1177
1178         return more;
1179 }
1180
1181 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1182                             void *arg)
1183 {
1184         struct dst_entry *dst, **pprev;
1185
1186         spin_lock_bh(&icmp6_dst_lock);
1187         pprev = &icmp6_dst_gc_list;
1188         while ((dst = *pprev) != NULL) {
1189                 struct rt6_info *rt = (struct rt6_info *) dst;
1190                 if (func(rt, arg)) {
1191                         *pprev = dst->next;
1192                         dst_free(dst);
1193                 } else {
1194                         pprev = &dst->next;
1195                 }
1196         }
1197         spin_unlock_bh(&icmp6_dst_lock);
1198 }
1199
1200 static int ip6_dst_gc(struct dst_ops *ops)
1201 {
1202         unsigned long now = jiffies;
1203         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1204         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1205         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1206         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1207         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1208         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1209         int entries;
1210
1211         entries = dst_entries_get_fast(ops);
1212         if (time_after(rt_last_gc + rt_min_interval, now) &&
1213             entries <= rt_max_size)
1214                 goto out;
1215
1216         net->ipv6.ip6_rt_gc_expire++;
1217         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1218         net->ipv6.ip6_rt_last_gc = now;
1219         entries = dst_entries_get_slow(ops);
1220         if (entries < ops->gc_thresh)
1221                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1222 out:
1223         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1224         return entries > rt_max_size;
1225 }
1226
1227 /* Clean host part of a prefix. Not necessary in radix tree,
1228    but results in cleaner routing tables.
1229
1230    Remove it only when all the things will work!
1231  */
1232
1233 int ip6_dst_hoplimit(struct dst_entry *dst)
1234 {
1235         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1236         if (hoplimit == 0) {
1237                 struct net_device *dev = dst->dev;
1238                 struct inet6_dev *idev;
1239
1240                 rcu_read_lock();
1241                 idev = __in6_dev_get(dev);
1242                 if (idev)
1243                         hoplimit = idev->cnf.hop_limit;
1244                 else
1245                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1246                 rcu_read_unlock();
1247         }
1248         return hoplimit;
1249 }
1250 EXPORT_SYMBOL(ip6_dst_hoplimit);
1251
1252 /*
1253  *
1254  */
1255
1256 int ip6_route_add(struct fib6_config *cfg)
1257 {
1258         int err;
1259         struct net *net = cfg->fc_nlinfo.nl_net;
1260         struct rt6_info *rt = NULL;
1261         struct net_device *dev = NULL;
1262         struct inet6_dev *idev = NULL;
1263         struct fib6_table *table;
1264         int addr_type;
1265
1266         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1267                 return -EINVAL;
1268 #ifndef CONFIG_IPV6_SUBTREES
1269         if (cfg->fc_src_len)
1270                 return -EINVAL;
1271 #endif
1272         if (cfg->fc_ifindex) {
1273                 err = -ENODEV;
1274                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1275                 if (!dev)
1276                         goto out;
1277                 idev = in6_dev_get(dev);
1278                 if (!idev)
1279                         goto out;
1280         }
1281
1282         if (cfg->fc_metric == 0)
1283                 cfg->fc_metric = IP6_RT_PRIO_USER;
1284
1285         err = -ENOBUFS;
1286         if (cfg->fc_nlinfo.nlh &&
1287             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1288                 table = fib6_get_table(net, cfg->fc_table);
1289                 if (!table) {
1290                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1291                         table = fib6_new_table(net, cfg->fc_table);
1292                 }
1293         } else {
1294                 table = fib6_new_table(net, cfg->fc_table);
1295         }
1296
1297         if (!table)
1298                 goto out;
1299
1300         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1301
1302         if (!rt) {
1303                 err = -ENOMEM;
1304                 goto out;
1305         }
1306
1307         rt->dst.obsolete = -1;
1308
1309         if (cfg->fc_flags & RTF_EXPIRES)
1310                 rt6_set_expires(rt, jiffies +
1311                                 clock_t_to_jiffies(cfg->fc_expires));
1312         else
1313                 rt6_clean_expires(rt);
1314
1315         if (cfg->fc_protocol == RTPROT_UNSPEC)
1316                 cfg->fc_protocol = RTPROT_BOOT;
1317         rt->rt6i_protocol = cfg->fc_protocol;
1318
1319         addr_type = ipv6_addr_type(&cfg->fc_dst);
1320
1321         if (addr_type & IPV6_ADDR_MULTICAST)
1322                 rt->dst.input = ip6_mc_input;
1323         else if (cfg->fc_flags & RTF_LOCAL)
1324                 rt->dst.input = ip6_input;
1325         else
1326                 rt->dst.input = ip6_forward;
1327
1328         rt->dst.output = ip6_output;
1329
1330         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1331         rt->rt6i_dst.plen = cfg->fc_dst_len;
1332         if (rt->rt6i_dst.plen == 128)
1333                rt->dst.flags |= DST_HOST;
1334
1335         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1336                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1337                 if (!metrics) {
1338                         err = -ENOMEM;
1339                         goto out;
1340                 }
1341                 dst_init_metrics(&rt->dst, metrics, 0);
1342         }
1343 #ifdef CONFIG_IPV6_SUBTREES
1344         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1345         rt->rt6i_src.plen = cfg->fc_src_len;
1346 #endif
1347
1348         rt->rt6i_metric = cfg->fc_metric;
1349
1350         /* We cannot add true routes via loopback here,
1351            they would result in kernel looping; promote them to reject routes
1352          */
1353         if ((cfg->fc_flags & RTF_REJECT) ||
1354             (dev && (dev->flags & IFF_LOOPBACK) &&
1355              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1356              !(cfg->fc_flags & RTF_LOCAL))) {
1357                 /* hold loopback dev/idev if we haven't done so. */
1358                 if (dev != net->loopback_dev) {
1359                         if (dev) {
1360                                 dev_put(dev);
1361                                 in6_dev_put(idev);
1362                         }
1363                         dev = net->loopback_dev;
1364                         dev_hold(dev);
1365                         idev = in6_dev_get(dev);
1366                         if (!idev) {
1367                                 err = -ENODEV;
1368                                 goto out;
1369                         }
1370                 }
1371                 rt->dst.output = ip6_pkt_discard_out;
1372                 rt->dst.input = ip6_pkt_discard;
1373                 rt->dst.error = -ENETUNREACH;
1374                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1375                 goto install_route;
1376         }
1377
1378         if (cfg->fc_flags & RTF_GATEWAY) {
1379                 const struct in6_addr *gw_addr;
1380                 int gwa_type;
1381
1382                 gw_addr = &cfg->fc_gateway;
1383                 rt->rt6i_gateway = *gw_addr;
1384                 gwa_type = ipv6_addr_type(gw_addr);
1385
1386                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1387                         struct rt6_info *grt;
1388
1389                         /* IPv6 strictly inhibits using not link-local
1390                            addresses as nexthop address.
1391                            Otherwise, router will not able to send redirects.
1392                            It is very good, but in some (rare!) circumstances
1393                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1394                            some exceptions. --ANK
1395                          */
1396                         err = -EINVAL;
1397                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1398                                 goto out;
1399
1400                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1401
1402                         err = -EHOSTUNREACH;
1403                         if (!grt)
1404                                 goto out;
1405                         if (dev) {
1406                                 if (dev != grt->dst.dev) {
1407                                         dst_release(&grt->dst);
1408                                         goto out;
1409                                 }
1410                         } else {
1411                                 dev = grt->dst.dev;
1412                                 idev = grt->rt6i_idev;
1413                                 dev_hold(dev);
1414                                 in6_dev_hold(grt->rt6i_idev);
1415                         }
1416                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1417                                 err = 0;
1418                         dst_release(&grt->dst);
1419
1420                         if (err)
1421                                 goto out;
1422                 }
1423                 err = -EINVAL;
1424                 if (!dev || (dev->flags & IFF_LOOPBACK))
1425                         goto out;
1426         }
1427
1428         err = -ENODEV;
1429         if (!dev)
1430                 goto out;
1431
1432         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1433                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1434                         err = -EINVAL;
1435                         goto out;
1436                 }
1437                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1438                 rt->rt6i_prefsrc.plen = 128;
1439         } else
1440                 rt->rt6i_prefsrc.plen = 0;
1441
1442         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1443                 err = rt6_bind_neighbour(rt, dev);
1444                 if (err)
1445                         goto out;
1446         }
1447
1448         rt->rt6i_flags = cfg->fc_flags;
1449
1450 install_route:
1451         if (cfg->fc_mx) {
1452                 struct nlattr *nla;
1453                 int remaining;
1454
1455                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1456                         int type = nla_type(nla);
1457
1458                         if (type) {
1459                                 if (type > RTAX_MAX) {
1460                                         err = -EINVAL;
1461                                         goto out;
1462                                 }
1463
1464                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1465                         }
1466                 }
1467         }
1468
1469         rt->dst.dev = dev;
1470         rt->rt6i_idev = idev;
1471         rt->rt6i_table = table;
1472
1473         cfg->fc_nlinfo.nl_net = dev_net(dev);
1474
1475         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1476
1477 out:
1478         if (dev)
1479                 dev_put(dev);
1480         if (idev)
1481                 in6_dev_put(idev);
1482         if (rt)
1483                 dst_free(&rt->dst);
1484         return err;
1485 }
1486
1487 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1488 {
1489         int err;
1490         struct fib6_table *table;
1491         struct net *net = dev_net(rt->dst.dev);
1492
1493         if (rt == net->ipv6.ip6_null_entry)
1494                 return -ENOENT;
1495
1496         table = rt->rt6i_table;
1497         write_lock_bh(&table->tb6_lock);
1498
1499         err = fib6_del(rt, info);
1500         dst_release(&rt->dst);
1501
1502         write_unlock_bh(&table->tb6_lock);
1503
1504         return err;
1505 }
1506
1507 int ip6_del_rt(struct rt6_info *rt)
1508 {
1509         struct nl_info info = {
1510                 .nl_net = dev_net(rt->dst.dev),
1511         };
1512         return __ip6_del_rt(rt, &info);
1513 }
1514
1515 static int ip6_route_del(struct fib6_config *cfg)
1516 {
1517         struct fib6_table *table;
1518         struct fib6_node *fn;
1519         struct rt6_info *rt;
1520         int err = -ESRCH;
1521
1522         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1523         if (!table)
1524                 return err;
1525
1526         read_lock_bh(&table->tb6_lock);
1527
1528         fn = fib6_locate(&table->tb6_root,
1529                          &cfg->fc_dst, cfg->fc_dst_len,
1530                          &cfg->fc_src, cfg->fc_src_len);
1531
1532         if (fn) {
1533                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1534                         if (cfg->fc_ifindex &&
1535                             (!rt->dst.dev ||
1536                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1537                                 continue;
1538                         if (cfg->fc_flags & RTF_GATEWAY &&
1539                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1540                                 continue;
1541                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1542                                 continue;
1543                         dst_hold(&rt->dst);
1544                         read_unlock_bh(&table->tb6_lock);
1545
1546                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1547                 }
1548         }
1549         read_unlock_bh(&table->tb6_lock);
1550
1551         return err;
1552 }
1553
1554 /*
1555  *      Handle redirects
1556  */
1557 struct ip6rd_flowi {
1558         struct flowi6 fl6;
1559         struct in6_addr gateway;
1560 };
1561
1562 static struct rt6_info *__ip6_route_redirect(struct net *net,
1563                                              struct fib6_table *table,
1564                                              struct flowi6 *fl6,
1565                                              int flags)
1566 {
1567         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1568         struct rt6_info *rt;
1569         struct fib6_node *fn;
1570
1571         /*
1572          * Get the "current" route for this destination and
1573          * check if the redirect has come from approriate router.
1574          *
1575          * RFC 2461 specifies that redirects should only be
1576          * accepted if they come from the nexthop to the target.
1577          * Due to the way the routes are chosen, this notion
1578          * is a bit fuzzy and one might need to check all possible
1579          * routes.
1580          */
1581
1582         read_lock_bh(&table->tb6_lock);
1583         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1584 restart:
1585         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1586                 /*
1587                  * Current route is on-link; redirect is always invalid.
1588                  *
1589                  * Seems, previous statement is not true. It could
1590                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1591                  * But then router serving it might decide, that we should
1592                  * know truth 8)8) --ANK (980726).
1593                  */
1594                 if (rt6_check_expired(rt))
1595                         continue;
1596                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1597                         continue;
1598                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1599                         continue;
1600                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1601                         continue;
1602                 break;
1603         }
1604
1605         if (!rt)
1606                 rt = net->ipv6.ip6_null_entry;
1607         BACKTRACK(net, &fl6->saddr);
1608 out:
1609         dst_hold(&rt->dst);
1610
1611         read_unlock_bh(&table->tb6_lock);
1612
1613         return rt;
1614 };
1615
1616 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1617                                            const struct in6_addr *src,
1618                                            const struct in6_addr *gateway,
1619                                            struct net_device *dev)
1620 {
1621         int flags = RT6_LOOKUP_F_HAS_SADDR;
1622         struct net *net = dev_net(dev);
1623         struct ip6rd_flowi rdfl = {
1624                 .fl6 = {
1625                         .flowi6_oif = dev->ifindex,
1626                         .daddr = *dest,
1627                         .saddr = *src,
1628                 },
1629         };
1630
1631         rdfl.gateway = *gateway;
1632
1633         if (rt6_need_strict(dest))
1634                 flags |= RT6_LOOKUP_F_IFACE;
1635
1636         return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1637                                                    flags, __ip6_route_redirect);
1638 }
1639
1640 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1641                   const struct in6_addr *saddr,
1642                   struct neighbour *neigh, u8 *lladdr, int on_link)
1643 {
1644         struct rt6_info *rt, *nrt = NULL;
1645         struct netevent_redirect netevent;
1646         struct net *net = dev_net(neigh->dev);
1647
1648         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1649
1650         if (rt == net->ipv6.ip6_null_entry) {
1651                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1652                 goto out;
1653         }
1654
1655         /*
1656          *      We have finally decided to accept it.
1657          */
1658
1659         neigh_update(neigh, lladdr, NUD_STALE,
1660                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1661                      NEIGH_UPDATE_F_OVERRIDE|
1662                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1663                                      NEIGH_UPDATE_F_ISROUTER))
1664                      );
1665
1666         /*
1667          * Redirect received -> path was valid.
1668          * Look, redirects are sent only in response to data packets,
1669          * so that this nexthop apparently is reachable. --ANK
1670          */
1671         dst_confirm(&rt->dst);
1672
1673         /* Duplicate redirect: silently ignore. */
1674         if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1675                 goto out;
1676
1677         nrt = ip6_rt_copy(rt, dest);
1678         if (!nrt)
1679                 goto out;
1680
1681         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1682         if (on_link)
1683                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1684
1685         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1686         dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1687
1688         if (ip6_ins_rt(nrt))
1689                 goto out;
1690
1691         netevent.old = &rt->dst;
1692         netevent.new = &nrt->dst;
1693         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1694
1695         if (rt->rt6i_flags & RTF_CACHE) {
1696                 ip6_del_rt(rt);
1697                 return;
1698         }
1699
1700 out:
1701         dst_release(&rt->dst);
1702 }
1703
1704 /*
1705  *      Handle ICMP "packet too big" messages
1706  *      i.e. Path MTU discovery
1707  */
1708
1709 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1710                              struct net *net, u32 pmtu, int ifindex)
1711 {
1712         struct rt6_info *rt, *nrt;
1713         int allfrag = 0;
1714 again:
1715         rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1716         if (!rt)
1717                 return;
1718
1719         if (rt6_check_expired(rt)) {
1720                 ip6_del_rt(rt);
1721                 goto again;
1722         }
1723
1724         if (pmtu >= dst_mtu(&rt->dst))
1725                 goto out;
1726
1727         if (pmtu < IPV6_MIN_MTU) {
1728                 /*
1729                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1730                  * MTU (1280) and a fragment header should always be included
1731                  * after a node receiving Too Big message reporting PMTU is
1732                  * less than the IPv6 Minimum Link MTU.
1733                  */
1734                 pmtu = IPV6_MIN_MTU;
1735                 allfrag = 1;
1736         }
1737
1738         /* New mtu received -> path was valid.
1739            They are sent only in response to data packets,
1740            so that this nexthop apparently is reachable. --ANK
1741          */
1742         dst_confirm(&rt->dst);
1743
1744         /* Host route. If it is static, it would be better
1745            not to override it, but add new one, so that
1746            when cache entry will expire old pmtu
1747            would return automatically.
1748          */
1749         if (rt->rt6i_flags & RTF_CACHE) {
1750                 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1751                 if (allfrag) {
1752                         u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1753                         features |= RTAX_FEATURE_ALLFRAG;
1754                         dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1755                 }
1756                 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1757                 rt->rt6i_flags |= RTF_MODIFIED;
1758                 goto out;
1759         }
1760
1761         /* Network route.
1762            Two cases are possible:
1763            1. It is connected route. Action: COW
1764            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1765          */
1766         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1767                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1768         else
1769                 nrt = rt6_alloc_clone(rt, daddr);
1770
1771         if (nrt) {
1772                 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1773                 if (allfrag) {
1774                         u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1775                         features |= RTAX_FEATURE_ALLFRAG;
1776                         dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1777                 }
1778
1779                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1780                  * happened within 5 mins, the recommended timer is 10 mins.
1781                  * Here this route expiration time is set to ip6_rt_mtu_expires
1782                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1783                  * and detecting PMTU increase will be automatically happened.
1784                  */
1785                 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1786                 nrt->rt6i_flags |= RTF_DYNAMIC;
1787                 ip6_ins_rt(nrt);
1788         }
1789 out:
1790         dst_release(&rt->dst);
1791 }
1792
1793 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1794                         struct net_device *dev, u32 pmtu)
1795 {
1796         struct net *net = dev_net(dev);
1797
1798         /*
1799          * RFC 1981 states that a node "MUST reduce the size of the packets it
1800          * is sending along the path" that caused the Packet Too Big message.
1801          * Since it's not possible in the general case to determine which
1802          * interface was used to send the original packet, we update the MTU
1803          * on the interface that will be used to send future packets. We also
1804          * update the MTU on the interface that received the Packet Too Big in
1805          * case the original packet was forced out that interface with
1806          * SO_BINDTODEVICE or similar. This is the next best thing to the
1807          * correct behaviour, which would be to update the MTU on all
1808          * interfaces.
1809          */
1810         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1811         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1812 }
1813
1814 /*
1815  *      Misc support functions
1816  */
1817
1818 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1819                                     const struct in6_addr *dest)
1820 {
1821         struct net *net = dev_net(ort->dst.dev);
1822         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1823                                             ort->rt6i_table);
1824
1825         if (rt) {
1826                 rt->dst.input = ort->dst.input;
1827                 rt->dst.output = ort->dst.output;
1828                 rt->dst.flags |= DST_HOST;
1829
1830                 rt->rt6i_dst.addr = *dest;
1831                 rt->rt6i_dst.plen = 128;
1832                 dst_copy_metrics(&rt->dst, &ort->dst);
1833                 rt->dst.error = ort->dst.error;
1834                 rt->rt6i_idev = ort->rt6i_idev;
1835                 if (rt->rt6i_idev)
1836                         in6_dev_hold(rt->rt6i_idev);
1837                 rt->dst.lastuse = jiffies;
1838
1839                 rt->rt6i_gateway = ort->rt6i_gateway;
1840                 rt->rt6i_flags = ort->rt6i_flags;
1841                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1842                     (RTF_DEFAULT | RTF_ADDRCONF))
1843                         rt6_set_from(rt, ort);
1844                 else
1845                         rt6_clean_expires(rt);
1846                 rt->rt6i_metric = 0;
1847
1848 #ifdef CONFIG_IPV6_SUBTREES
1849                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1850 #endif
1851                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1852                 rt->rt6i_table = ort->rt6i_table;
1853         }
1854         return rt;
1855 }
1856
1857 #ifdef CONFIG_IPV6_ROUTE_INFO
1858 static struct rt6_info *rt6_get_route_info(struct net *net,
1859                                            const struct in6_addr *prefix, int prefixlen,
1860                                            const struct in6_addr *gwaddr, int ifindex)
1861 {
1862         struct fib6_node *fn;
1863         struct rt6_info *rt = NULL;
1864         struct fib6_table *table;
1865
1866         table = fib6_get_table(net, RT6_TABLE_INFO);
1867         if (!table)
1868                 return NULL;
1869
1870         write_lock_bh(&table->tb6_lock);
1871         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1872         if (!fn)
1873                 goto out;
1874
1875         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1876                 if (rt->dst.dev->ifindex != ifindex)
1877                         continue;
1878                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1879                         continue;
1880                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1881                         continue;
1882                 dst_hold(&rt->dst);
1883                 break;
1884         }
1885 out:
1886         write_unlock_bh(&table->tb6_lock);
1887         return rt;
1888 }
1889
1890 static struct rt6_info *rt6_add_route_info(struct net *net,
1891                                            const struct in6_addr *prefix, int prefixlen,
1892                                            const struct in6_addr *gwaddr, int ifindex,
1893                                            unsigned int pref)
1894 {
1895         struct fib6_config cfg = {
1896                 .fc_table       = RT6_TABLE_INFO,
1897                 .fc_metric      = IP6_RT_PRIO_USER,
1898                 .fc_ifindex     = ifindex,
1899                 .fc_dst_len     = prefixlen,
1900                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1901                                   RTF_UP | RTF_PREF(pref),
1902                 .fc_nlinfo.pid = 0,
1903                 .fc_nlinfo.nlh = NULL,
1904                 .fc_nlinfo.nl_net = net,
1905         };
1906
1907         cfg.fc_dst = *prefix;
1908         cfg.fc_gateway = *gwaddr;
1909
1910         /* We should treat it as a default route if prefix length is 0. */
1911         if (!prefixlen)
1912                 cfg.fc_flags |= RTF_DEFAULT;
1913
1914         ip6_route_add(&cfg);
1915
1916         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1917 }
1918 #endif
1919
1920 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1921 {
1922         struct rt6_info *rt;
1923         struct fib6_table *table;
1924
1925         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1926         if (!table)
1927                 return NULL;
1928
1929         write_lock_bh(&table->tb6_lock);
1930         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1931                 if (dev == rt->dst.dev &&
1932                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1933                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1934                         break;
1935         }
1936         if (rt)
1937                 dst_hold(&rt->dst);
1938         write_unlock_bh(&table->tb6_lock);
1939         return rt;
1940 }
1941
1942 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1943                                      struct net_device *dev,
1944                                      unsigned int pref)
1945 {
1946         struct fib6_config cfg = {
1947                 .fc_table       = RT6_TABLE_DFLT,
1948                 .fc_metric      = IP6_RT_PRIO_USER,
1949                 .fc_ifindex     = dev->ifindex,
1950                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1951                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1952                 .fc_nlinfo.pid = 0,
1953                 .fc_nlinfo.nlh = NULL,
1954                 .fc_nlinfo.nl_net = dev_net(dev),
1955         };
1956
1957         cfg.fc_gateway = *gwaddr;
1958
1959         ip6_route_add(&cfg);
1960
1961         return rt6_get_dflt_router(gwaddr, dev);
1962 }
1963
1964 void rt6_purge_dflt_routers(struct net *net)
1965 {
1966         struct rt6_info *rt;
1967         struct fib6_table *table;
1968
1969         /* NOTE: Keep consistent with rt6_get_dflt_router */
1970         table = fib6_get_table(net, RT6_TABLE_DFLT);
1971         if (!table)
1972                 return;
1973
1974 restart:
1975         read_lock_bh(&table->tb6_lock);
1976         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1977                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1978                         dst_hold(&rt->dst);
1979                         read_unlock_bh(&table->tb6_lock);
1980                         ip6_del_rt(rt);
1981                         goto restart;
1982                 }
1983         }
1984         read_unlock_bh(&table->tb6_lock);
1985 }
1986
1987 static void rtmsg_to_fib6_config(struct net *net,
1988                                  struct in6_rtmsg *rtmsg,
1989                                  struct fib6_config *cfg)
1990 {
1991         memset(cfg, 0, sizeof(*cfg));
1992
1993         cfg->fc_table = RT6_TABLE_MAIN;
1994         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1995         cfg->fc_metric = rtmsg->rtmsg_metric;
1996         cfg->fc_expires = rtmsg->rtmsg_info;
1997         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1998         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1999         cfg->fc_flags = rtmsg->rtmsg_flags;
2000
2001         cfg->fc_nlinfo.nl_net = net;
2002
2003         cfg->fc_dst = rtmsg->rtmsg_dst;
2004         cfg->fc_src = rtmsg->rtmsg_src;
2005         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2006 }
2007
2008 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2009 {
2010         struct fib6_config cfg;
2011         struct in6_rtmsg rtmsg;
2012         int err;
2013
2014         switch(cmd) {
2015         case SIOCADDRT:         /* Add a route */
2016         case SIOCDELRT:         /* Delete a route */
2017                 if (!capable(CAP_NET_ADMIN))
2018                         return -EPERM;
2019                 err = copy_from_user(&rtmsg, arg,
2020                                      sizeof(struct in6_rtmsg));
2021                 if (err)
2022                         return -EFAULT;
2023
2024                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2025
2026                 rtnl_lock();
2027                 switch (cmd) {
2028                 case SIOCADDRT:
2029                         err = ip6_route_add(&cfg);
2030                         break;
2031                 case SIOCDELRT:
2032                         err = ip6_route_del(&cfg);
2033                         break;
2034                 default:
2035                         err = -EINVAL;
2036                 }
2037                 rtnl_unlock();
2038
2039                 return err;
2040         }
2041
2042         return -EINVAL;
2043 }
2044
2045 /*
2046  *      Drop the packet on the floor
2047  */
2048
2049 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2050 {
2051         int type;
2052         struct dst_entry *dst = skb_dst(skb);
2053         switch (ipstats_mib_noroutes) {
2054         case IPSTATS_MIB_INNOROUTES:
2055                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2056                 if (type == IPV6_ADDR_ANY) {
2057                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2058                                       IPSTATS_MIB_INADDRERRORS);
2059                         break;
2060                 }
2061                 /* FALLTHROUGH */
2062         case IPSTATS_MIB_OUTNOROUTES:
2063                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2064                               ipstats_mib_noroutes);
2065                 break;
2066         }
2067         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2068         kfree_skb(skb);
2069         return 0;
2070 }
2071
2072 static int ip6_pkt_discard(struct sk_buff *skb)
2073 {
2074         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2075 }
2076
2077 static int ip6_pkt_discard_out(struct sk_buff *skb)
2078 {
2079         skb->dev = skb_dst(skb)->dev;
2080         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2081 }
2082
2083 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2084
2085 static int ip6_pkt_prohibit(struct sk_buff *skb)
2086 {
2087         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2088 }
2089
2090 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2091 {
2092         skb->dev = skb_dst(skb)->dev;
2093         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2094 }
2095
2096 #endif
2097
2098 /*
2099  *      Allocate a dst for local (unicast / anycast) address.
2100  */
2101
2102 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2103                                     const struct in6_addr *addr,
2104                                     bool anycast)
2105 {
2106         struct net *net = dev_net(idev->dev);
2107         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2108         int err;
2109
2110         if (!rt) {
2111                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2112                 return ERR_PTR(-ENOMEM);
2113         }
2114
2115         in6_dev_hold(idev);
2116
2117         rt->dst.flags |= DST_HOST;
2118         rt->dst.input = ip6_input;
2119         rt->dst.output = ip6_output;
2120         rt->rt6i_idev = idev;
2121         rt->dst.obsolete = -1;
2122
2123         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2124         if (anycast)
2125                 rt->rt6i_flags |= RTF_ANYCAST;
2126         else
2127                 rt->rt6i_flags |= RTF_LOCAL;
2128         err = rt6_bind_neighbour(rt, rt->dst.dev);
2129         if (err) {
2130                 dst_free(&rt->dst);
2131                 return ERR_PTR(err);
2132         }
2133
2134         rt->rt6i_dst.addr = *addr;
2135         rt->rt6i_dst.plen = 128;
2136         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2137
2138         atomic_set(&rt->dst.__refcnt, 1);
2139
2140         return rt;
2141 }
2142
2143 int ip6_route_get_saddr(struct net *net,
2144                         struct rt6_info *rt,
2145                         const struct in6_addr *daddr,
2146                         unsigned int prefs,
2147                         struct in6_addr *saddr)
2148 {
2149         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2150         int err = 0;
2151         if (rt->rt6i_prefsrc.plen)
2152                 *saddr = rt->rt6i_prefsrc.addr;
2153         else
2154                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2155                                          daddr, prefs, saddr);
2156         return err;
2157 }
2158
2159 /* remove deleted ip from prefsrc entries */
2160 struct arg_dev_net_ip {
2161         struct net_device *dev;
2162         struct net *net;
2163         struct in6_addr *addr;
2164 };
2165
2166 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2167 {
2168         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2169         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2170         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2171
2172         if (((void *)rt->dst.dev == dev || !dev) &&
2173             rt != net->ipv6.ip6_null_entry &&
2174             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2175                 /* remove prefsrc entry */
2176                 rt->rt6i_prefsrc.plen = 0;
2177         }
2178         return 0;
2179 }
2180
2181 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2182 {
2183         struct net *net = dev_net(ifp->idev->dev);
2184         struct arg_dev_net_ip adni = {
2185                 .dev = ifp->idev->dev,
2186                 .net = net,
2187                 .addr = &ifp->addr,
2188         };
2189         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2190 }
2191
2192 struct arg_dev_net {
2193         struct net_device *dev;
2194         struct net *net;
2195 };
2196
2197 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2198 {
2199         const struct arg_dev_net *adn = arg;
2200         const struct net_device *dev = adn->dev;
2201
2202         if ((rt->dst.dev == dev || !dev) &&
2203             rt != adn->net->ipv6.ip6_null_entry)
2204                 return -1;
2205
2206         return 0;
2207 }
2208
2209 void rt6_ifdown(struct net *net, struct net_device *dev)
2210 {
2211         struct arg_dev_net adn = {
2212                 .dev = dev,
2213                 .net = net,
2214         };
2215
2216         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2217         icmp6_clean_all(fib6_ifdown, &adn);
2218 }
2219
2220 struct rt6_mtu_change_arg {
2221         struct net_device *dev;
2222         unsigned int mtu;
2223 };
2224
2225 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2226 {
2227         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2228         struct inet6_dev *idev;
2229
2230         /* In IPv6 pmtu discovery is not optional,
2231            so that RTAX_MTU lock cannot disable it.
2232            We still use this lock to block changes
2233            caused by addrconf/ndisc.
2234         */
2235
2236         idev = __in6_dev_get(arg->dev);
2237         if (!idev)
2238                 return 0;
2239
2240         /* For administrative MTU increase, there is no way to discover
2241            IPv6 PMTU increase, so PMTU increase should be updated here.
2242            Since RFC 1981 doesn't include administrative MTU increase
2243            update PMTU increase is a MUST. (i.e. jumbo frame)
2244          */
2245         /*
2246            If new MTU is less than route PMTU, this new MTU will be the
2247            lowest MTU in the path, update the route PMTU to reflect PMTU
2248            decreases; if new MTU is greater than route PMTU, and the
2249            old MTU is the lowest MTU in the path, update the route PMTU
2250            to reflect the increase. In this case if the other nodes' MTU
2251            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2252            PMTU discouvery.
2253          */
2254         if (rt->dst.dev == arg->dev &&
2255             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2256             (dst_mtu(&rt->dst) >= arg->mtu ||
2257              (dst_mtu(&rt->dst) < arg->mtu &&
2258               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2259                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2260         }
2261         return 0;
2262 }
2263
2264 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2265 {
2266         struct rt6_mtu_change_arg arg = {
2267                 .dev = dev,
2268                 .mtu = mtu,
2269         };
2270
2271         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2272 }
2273
2274 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2275         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2276         [RTA_OIF]               = { .type = NLA_U32 },
2277         [RTA_IIF]               = { .type = NLA_U32 },
2278         [RTA_PRIORITY]          = { .type = NLA_U32 },
2279         [RTA_METRICS]           = { .type = NLA_NESTED },
2280 };
2281
2282 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2283                               struct fib6_config *cfg)
2284 {
2285         struct rtmsg *rtm;
2286         struct nlattr *tb[RTA_MAX+1];
2287         int err;
2288
2289         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2290         if (err < 0)
2291                 goto errout;
2292
2293         err = -EINVAL;
2294         rtm = nlmsg_data(nlh);
2295         memset(cfg, 0, sizeof(*cfg));
2296
2297         cfg->fc_table = rtm->rtm_table;
2298         cfg->fc_dst_len = rtm->rtm_dst_len;
2299         cfg->fc_src_len = rtm->rtm_src_len;
2300         cfg->fc_flags = RTF_UP;
2301         cfg->fc_protocol = rtm->rtm_protocol;
2302
2303         if (rtm->rtm_type == RTN_UNREACHABLE)
2304                 cfg->fc_flags |= RTF_REJECT;
2305
2306         if (rtm->rtm_type == RTN_LOCAL)
2307                 cfg->fc_flags |= RTF_LOCAL;
2308
2309         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2310         cfg->fc_nlinfo.nlh = nlh;
2311         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2312
2313         if (tb[RTA_GATEWAY]) {
2314                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2315                 cfg->fc_flags |= RTF_GATEWAY;
2316         }
2317
2318         if (tb[RTA_DST]) {
2319                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2320
2321                 if (nla_len(tb[RTA_DST]) < plen)
2322                         goto errout;
2323
2324                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2325         }
2326
2327         if (tb[RTA_SRC]) {
2328                 int plen = (rtm->rtm_src_len + 7) >> 3;
2329
2330                 if (nla_len(tb[RTA_SRC]) < plen)
2331                         goto errout;
2332
2333                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2334         }
2335
2336         if (tb[RTA_PREFSRC])
2337                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2338
2339         if (tb[RTA_OIF])
2340                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2341
2342         if (tb[RTA_PRIORITY])
2343                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2344
2345         if (tb[RTA_METRICS]) {
2346                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2347                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2348         }
2349
2350         if (tb[RTA_TABLE])
2351                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2352
2353         err = 0;
2354 errout:
2355         return err;
2356 }
2357
2358 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2359 {
2360         struct fib6_config cfg;
2361         int err;
2362
2363         err = rtm_to_fib6_config(skb, nlh, &cfg);
2364         if (err < 0)
2365                 return err;
2366
2367         return ip6_route_del(&cfg);
2368 }
2369
2370 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2371 {
2372         struct fib6_config cfg;
2373         int err;
2374
2375         err = rtm_to_fib6_config(skb, nlh, &cfg);
2376         if (err < 0)
2377                 return err;
2378
2379         return ip6_route_add(&cfg);
2380 }
2381
2382 static inline size_t rt6_nlmsg_size(void)
2383 {
2384         return NLMSG_ALIGN(sizeof(struct rtmsg))
2385                + nla_total_size(16) /* RTA_SRC */
2386                + nla_total_size(16) /* RTA_DST */
2387                + nla_total_size(16) /* RTA_GATEWAY */
2388                + nla_total_size(16) /* RTA_PREFSRC */
2389                + nla_total_size(4) /* RTA_TABLE */
2390                + nla_total_size(4) /* RTA_IIF */
2391                + nla_total_size(4) /* RTA_OIF */
2392                + nla_total_size(4) /* RTA_PRIORITY */
2393                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2394                + nla_total_size(sizeof(struct rta_cacheinfo));
2395 }
2396
2397 static int rt6_fill_node(struct net *net,
2398                          struct sk_buff *skb, struct rt6_info *rt,
2399                          struct in6_addr *dst, struct in6_addr *src,
2400                          int iif, int type, u32 pid, u32 seq,
2401                          int prefix, int nowait, unsigned int flags)
2402 {
2403         const struct inet_peer *peer;
2404         struct rtmsg *rtm;
2405         struct nlmsghdr *nlh;
2406         long expires;
2407         u32 table;
2408         struct neighbour *n;
2409         u32 ts, tsage;
2410
2411         if (prefix) {   /* user wants prefix routes only */
2412                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2413                         /* success since this is not a prefix route */
2414                         return 1;
2415                 }
2416         }
2417
2418         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2419         if (!nlh)
2420                 return -EMSGSIZE;
2421
2422         rtm = nlmsg_data(nlh);
2423         rtm->rtm_family = AF_INET6;
2424         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2425         rtm->rtm_src_len = rt->rt6i_src.plen;
2426         rtm->rtm_tos = 0;
2427         if (rt->rt6i_table)
2428                 table = rt->rt6i_table->tb6_id;
2429         else
2430                 table = RT6_TABLE_UNSPEC;
2431         rtm->rtm_table = table;
2432         if (nla_put_u32(skb, RTA_TABLE, table))
2433                 goto nla_put_failure;
2434         if (rt->rt6i_flags & RTF_REJECT)
2435                 rtm->rtm_type = RTN_UNREACHABLE;
2436         else if (rt->rt6i_flags & RTF_LOCAL)
2437                 rtm->rtm_type = RTN_LOCAL;
2438         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2439                 rtm->rtm_type = RTN_LOCAL;
2440         else
2441                 rtm->rtm_type = RTN_UNICAST;
2442         rtm->rtm_flags = 0;
2443         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2444         rtm->rtm_protocol = rt->rt6i_protocol;
2445         if (rt->rt6i_flags & RTF_DYNAMIC)
2446                 rtm->rtm_protocol = RTPROT_REDIRECT;
2447         else if (rt->rt6i_flags & RTF_ADDRCONF)
2448                 rtm->rtm_protocol = RTPROT_KERNEL;
2449         else if (rt->rt6i_flags & RTF_DEFAULT)
2450                 rtm->rtm_protocol = RTPROT_RA;
2451
2452         if (rt->rt6i_flags & RTF_CACHE)
2453                 rtm->rtm_flags |= RTM_F_CLONED;
2454
2455         if (dst) {
2456                 if (nla_put(skb, RTA_DST, 16, dst))
2457                         goto nla_put_failure;
2458                 rtm->rtm_dst_len = 128;
2459         } else if (rtm->rtm_dst_len)
2460                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2461                         goto nla_put_failure;
2462 #ifdef CONFIG_IPV6_SUBTREES
2463         if (src) {
2464                 if (nla_put(skb, RTA_SRC, 16, src))
2465                         goto nla_put_failure;
2466                 rtm->rtm_src_len = 128;
2467         } else if (rtm->rtm_src_len &&
2468                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2469                 goto nla_put_failure;
2470 #endif
2471         if (iif) {
2472 #ifdef CONFIG_IPV6_MROUTE
2473                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2474                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2475                         if (err <= 0) {
2476                                 if (!nowait) {
2477                                         if (err == 0)
2478                                                 return 0;
2479                                         goto nla_put_failure;
2480                                 } else {
2481                                         if (err == -EMSGSIZE)
2482                                                 goto nla_put_failure;
2483                                 }
2484                         }
2485                 } else
2486 #endif
2487                         if (nla_put_u32(skb, RTA_IIF, iif))
2488                                 goto nla_put_failure;
2489         } else if (dst) {
2490                 struct in6_addr saddr_buf;
2491                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2492                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2493                         goto nla_put_failure;
2494         }
2495
2496         if (rt->rt6i_prefsrc.plen) {
2497                 struct in6_addr saddr_buf;
2498                 saddr_buf = rt->rt6i_prefsrc.addr;
2499                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2500                         goto nla_put_failure;
2501         }
2502
2503         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2504                 goto nla_put_failure;
2505
2506         rcu_read_lock();
2507         n = dst_get_neighbour_noref(&rt->dst);
2508         if (n) {
2509                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2510                         rcu_read_unlock();
2511                         goto nla_put_failure;
2512                 }
2513         }
2514         rcu_read_unlock();
2515
2516         if (rt->dst.dev &&
2517             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2518                 goto nla_put_failure;
2519         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2520                 goto nla_put_failure;
2521         if (!(rt->rt6i_flags & RTF_EXPIRES))
2522                 expires = 0;
2523         else if (rt->dst.expires - jiffies < INT_MAX)
2524                 expires = rt->dst.expires - jiffies;
2525         else
2526                 expires = INT_MAX;
2527
2528         peer = NULL;
2529         if (rt6_has_peer(rt))
2530                 peer = rt6_peer_ptr(rt);
2531         ts = tsage = 0;
2532         if (peer && peer->tcp_ts_stamp) {
2533                 ts = peer->tcp_ts;
2534                 tsage = get_seconds() - peer->tcp_ts_stamp;
2535         }
2536
2537         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2538                                expires, rt->dst.error) < 0)
2539                 goto nla_put_failure;
2540
2541         return nlmsg_end(skb, nlh);
2542
2543 nla_put_failure:
2544         nlmsg_cancel(skb, nlh);
2545         return -EMSGSIZE;
2546 }
2547
2548 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2549 {
2550         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2551         int prefix;
2552
2553         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2554                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2555                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2556         } else
2557                 prefix = 0;
2558
2559         return rt6_fill_node(arg->net,
2560                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2561                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2562                      prefix, 0, NLM_F_MULTI);
2563 }
2564
2565 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2566 {
2567         struct net *net = sock_net(in_skb->sk);
2568         struct nlattr *tb[RTA_MAX+1];
2569         struct rt6_info *rt;
2570         struct sk_buff *skb;
2571         struct rtmsg *rtm;
2572         struct flowi6 fl6;
2573         int err, iif = 0, oif = 0;
2574
2575         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2576         if (err < 0)
2577                 goto errout;
2578
2579         err = -EINVAL;
2580         memset(&fl6, 0, sizeof(fl6));
2581
2582         if (tb[RTA_SRC]) {
2583                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2584                         goto errout;
2585
2586                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2587         }
2588
2589         if (tb[RTA_DST]) {
2590                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2591                         goto errout;
2592
2593                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2594         }
2595
2596         if (tb[RTA_IIF])
2597                 iif = nla_get_u32(tb[RTA_IIF]);
2598
2599         if (tb[RTA_OIF])
2600                 oif = nla_get_u32(tb[RTA_OIF]);
2601
2602         if (iif) {
2603                 struct net_device *dev;
2604                 int flags = 0;
2605
2606                 dev = __dev_get_by_index(net, iif);
2607                 if (!dev) {
2608                         err = -ENODEV;
2609                         goto errout;
2610                 }
2611
2612                 fl6.flowi6_iif = iif;
2613
2614                 if (!ipv6_addr_any(&fl6.saddr))
2615                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2616
2617                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2618                                                                flags);
2619         } else {
2620                 fl6.flowi6_oif = oif;
2621
2622                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2623         }
2624
2625         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2626         if (!skb) {
2627                 dst_release(&rt->dst);
2628                 err = -ENOBUFS;
2629                 goto errout;
2630         }
2631
2632         /* Reserve room for dummy headers, this skb can pass
2633            through good chunk of routing engine.
2634          */
2635         skb_reset_mac_header(skb);
2636         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2637
2638         skb_dst_set(skb, &rt->dst);
2639
2640         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2641                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2642                             nlh->nlmsg_seq, 0, 0, 0);
2643         if (err < 0) {
2644                 kfree_skb(skb);
2645                 goto errout;
2646         }
2647
2648         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2649 errout:
2650         return err;
2651 }
2652
2653 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2654 {
2655         struct sk_buff *skb;
2656         struct net *net = info->nl_net;
2657         u32 seq;
2658         int err;
2659
2660         err = -ENOBUFS;
2661         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2662
2663         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2664         if (!skb)
2665                 goto errout;
2666
2667         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2668                                 event, info->pid, seq, 0, 0, 0);
2669         if (err < 0) {
2670                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2671                 WARN_ON(err == -EMSGSIZE);
2672                 kfree_skb(skb);
2673                 goto errout;
2674         }
2675         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2676                     info->nlh, gfp_any());
2677         return;
2678 errout:
2679         if (err < 0)
2680                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2681 }
2682
2683 static int ip6_route_dev_notify(struct notifier_block *this,
2684                                 unsigned long event, void *data)
2685 {
2686         struct net_device *dev = (struct net_device *)data;
2687         struct net *net = dev_net(dev);
2688
2689         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2690                 net->ipv6.ip6_null_entry->dst.dev = dev;
2691                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2692 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2694                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2695                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2696                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2697 #endif
2698         }
2699
2700         return NOTIFY_OK;
2701 }
2702
2703 /*
2704  *      /proc
2705  */
2706
2707 #ifdef CONFIG_PROC_FS
2708
2709 struct rt6_proc_arg
2710 {
2711         char *buffer;
2712         int offset;
2713         int length;
2714         int skip;
2715         int len;
2716 };
2717
2718 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2719 {
2720         struct seq_file *m = p_arg;
2721         struct neighbour *n;
2722
2723         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2724
2725 #ifdef CONFIG_IPV6_SUBTREES
2726         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2727 #else
2728         seq_puts(m, "00000000000000000000000000000000 00 ");
2729 #endif
2730         rcu_read_lock();
2731         n = dst_get_neighbour_noref(&rt->dst);
2732         if (n) {
2733                 seq_printf(m, "%pi6", n->primary_key);
2734         } else {
2735                 seq_puts(m, "00000000000000000000000000000000");
2736         }
2737         rcu_read_unlock();
2738         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2739                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2740                    rt->dst.__use, rt->rt6i_flags,
2741                    rt->dst.dev ? rt->dst.dev->name : "");
2742         return 0;
2743 }
2744
2745 static int ipv6_route_show(struct seq_file *m, void *v)
2746 {
2747         struct net *net = (struct net *)m->private;
2748         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2749         return 0;
2750 }
2751
2752 static int ipv6_route_open(struct inode *inode, struct file *file)
2753 {
2754         return single_open_net(inode, file, ipv6_route_show);
2755 }
2756
2757 static const struct file_operations ipv6_route_proc_fops = {
2758         .owner          = THIS_MODULE,
2759         .open           = ipv6_route_open,
2760         .read           = seq_read,
2761         .llseek         = seq_lseek,
2762         .release        = single_release_net,
2763 };
2764
2765 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2766 {
2767         struct net *net = (struct net *)seq->private;
2768         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2769                    net->ipv6.rt6_stats->fib_nodes,
2770                    net->ipv6.rt6_stats->fib_route_nodes,
2771                    net->ipv6.rt6_stats->fib_rt_alloc,
2772                    net->ipv6.rt6_stats->fib_rt_entries,
2773                    net->ipv6.rt6_stats->fib_rt_cache,
2774                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2775                    net->ipv6.rt6_stats->fib_discarded_routes);
2776
2777         return 0;
2778 }
2779
2780 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2781 {
2782         return single_open_net(inode, file, rt6_stats_seq_show);
2783 }
2784
2785 static const struct file_operations rt6_stats_seq_fops = {
2786         .owner   = THIS_MODULE,
2787         .open    = rt6_stats_seq_open,
2788         .read    = seq_read,
2789         .llseek  = seq_lseek,
2790         .release = single_release_net,
2791 };
2792 #endif  /* CONFIG_PROC_FS */
2793
2794 #ifdef CONFIG_SYSCTL
2795
2796 static
2797 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2798                               void __user *buffer, size_t *lenp, loff_t *ppos)
2799 {
2800         struct net *net;
2801         int delay;
2802         if (!write)
2803                 return -EINVAL;
2804
2805         net = (struct net *)ctl->extra1;
2806         delay = net->ipv6.sysctl.flush_delay;
2807         proc_dointvec(ctl, write, buffer, lenp, ppos);
2808         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2809         return 0;
2810 }
2811
2812 ctl_table ipv6_route_table_template[] = {
2813         {
2814                 .procname       =       "flush",
2815                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2816                 .maxlen         =       sizeof(int),
2817                 .mode           =       0200,
2818                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2819         },
2820         {
2821                 .procname       =       "gc_thresh",
2822                 .data           =       &ip6_dst_ops_template.gc_thresh,
2823                 .maxlen         =       sizeof(int),
2824                 .mode           =       0644,
2825                 .proc_handler   =       proc_dointvec,
2826         },
2827         {
2828                 .procname       =       "max_size",
2829                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2830                 .maxlen         =       sizeof(int),
2831                 .mode           =       0644,
2832                 .proc_handler   =       proc_dointvec,
2833         },
2834         {
2835                 .procname       =       "gc_min_interval",
2836                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2837                 .maxlen         =       sizeof(int),
2838                 .mode           =       0644,
2839                 .proc_handler   =       proc_dointvec_jiffies,
2840         },
2841         {
2842                 .procname       =       "gc_timeout",
2843                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2844                 .maxlen         =       sizeof(int),
2845                 .mode           =       0644,
2846                 .proc_handler   =       proc_dointvec_jiffies,
2847         },
2848         {
2849                 .procname       =       "gc_interval",
2850                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2851                 .maxlen         =       sizeof(int),
2852                 .mode           =       0644,
2853                 .proc_handler   =       proc_dointvec_jiffies,
2854         },
2855         {
2856                 .procname       =       "gc_elasticity",
2857                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2858                 .maxlen         =       sizeof(int),
2859                 .mode           =       0644,
2860                 .proc_handler   =       proc_dointvec,
2861         },
2862         {
2863                 .procname       =       "mtu_expires",
2864                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2865                 .maxlen         =       sizeof(int),
2866                 .mode           =       0644,
2867                 .proc_handler   =       proc_dointvec_jiffies,
2868         },
2869         {
2870                 .procname       =       "min_adv_mss",
2871                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2872                 .maxlen         =       sizeof(int),
2873                 .mode           =       0644,
2874                 .proc_handler   =       proc_dointvec,
2875         },
2876         {
2877                 .procname       =       "gc_min_interval_ms",
2878                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2879                 .maxlen         =       sizeof(int),
2880                 .mode           =       0644,
2881                 .proc_handler   =       proc_dointvec_ms_jiffies,
2882         },
2883         { }
2884 };
2885
2886 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2887 {
2888         struct ctl_table *table;
2889
2890         table = kmemdup(ipv6_route_table_template,
2891                         sizeof(ipv6_route_table_template),
2892                         GFP_KERNEL);
2893
2894         if (table) {
2895                 table[0].data = &net->ipv6.sysctl.flush_delay;
2896                 table[0].extra1 = net;
2897                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2898                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2899                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2900                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2901                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2902                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2903                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2904                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2905                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2906         }
2907
2908         return table;
2909 }
2910 #endif
2911
2912 static int __net_init ip6_route_net_init(struct net *net)
2913 {
2914         int ret = -ENOMEM;
2915
2916         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2917                sizeof(net->ipv6.ip6_dst_ops));
2918
2919         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2920                 goto out_ip6_dst_ops;
2921
2922         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2923                                            sizeof(*net->ipv6.ip6_null_entry),
2924                                            GFP_KERNEL);
2925         if (!net->ipv6.ip6_null_entry)
2926                 goto out_ip6_dst_entries;
2927         net->ipv6.ip6_null_entry->dst.path =
2928                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2929         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2930         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2931                          ip6_template_metrics, true);
2932
2933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2935                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2936                                                GFP_KERNEL);
2937         if (!net->ipv6.ip6_prohibit_entry)
2938                 goto out_ip6_null_entry;
2939         net->ipv6.ip6_prohibit_entry->dst.path =
2940                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2941         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2942         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2943                          ip6_template_metrics, true);
2944
2945         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2946                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2947                                                GFP_KERNEL);
2948         if (!net->ipv6.ip6_blk_hole_entry)
2949                 goto out_ip6_prohibit_entry;
2950         net->ipv6.ip6_blk_hole_entry->dst.path =
2951                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2952         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2953         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2954                          ip6_template_metrics, true);
2955 #endif
2956
2957         net->ipv6.sysctl.flush_delay = 0;
2958         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2959         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2960         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2961         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2962         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2963         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2964         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2965
2966 #ifdef CONFIG_PROC_FS
2967         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969 #endif
2970         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2971
2972         ret = 0;
2973 out:
2974         return ret;
2975
2976 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2977 out_ip6_prohibit_entry:
2978         kfree(net->ipv6.ip6_prohibit_entry);
2979 out_ip6_null_entry:
2980         kfree(net->ipv6.ip6_null_entry);
2981 #endif
2982 out_ip6_dst_entries:
2983         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2984 out_ip6_dst_ops:
2985         goto out;
2986 }
2987
2988 static void __net_exit ip6_route_net_exit(struct net *net)
2989 {
2990 #ifdef CONFIG_PROC_FS
2991         proc_net_remove(net, "ipv6_route");
2992         proc_net_remove(net, "rt6_stats");
2993 #endif
2994         kfree(net->ipv6.ip6_null_entry);
2995 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2996         kfree(net->ipv6.ip6_prohibit_entry);
2997         kfree(net->ipv6.ip6_blk_hole_entry);
2998 #endif
2999         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3000 }
3001
3002 static struct pernet_operations ip6_route_net_ops = {
3003         .init = ip6_route_net_init,
3004         .exit = ip6_route_net_exit,
3005 };
3006
3007 static int __net_init ipv6_inetpeer_init(struct net *net)
3008 {
3009         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3010
3011         if (!bp)
3012                 return -ENOMEM;
3013         inet_peer_base_init(bp);
3014         net->ipv6.peers = bp;
3015         return 0;
3016 }
3017
3018 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3019 {
3020         struct inet_peer_base *bp = net->ipv6.peers;
3021
3022         net->ipv6.peers = NULL;
3023         inetpeer_invalidate_tree(bp);
3024         kfree(bp);
3025 }
3026
3027 static struct pernet_operations ipv6_inetpeer_ops = {
3028         .init   =       ipv6_inetpeer_init,
3029         .exit   =       ipv6_inetpeer_exit,
3030 };
3031
3032 static struct notifier_block ip6_route_dev_notifier = {
3033         .notifier_call = ip6_route_dev_notify,
3034         .priority = 0,
3035 };
3036
3037 int __init ip6_route_init(void)
3038 {
3039         int ret;
3040
3041         ret = -ENOMEM;
3042         ip6_dst_ops_template.kmem_cachep =
3043                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3044                                   SLAB_HWCACHE_ALIGN, NULL);
3045         if (!ip6_dst_ops_template.kmem_cachep)
3046                 goto out;
3047
3048         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3049         if (ret)
3050                 goto out_kmem_cache;
3051
3052         ret = register_pernet_subsys(&ip6_route_net_ops);
3053         if (ret)
3054                 goto out_dst_entries;
3055
3056         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3057         if (ret)
3058                 goto out_register_subsys;
3059
3060         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3061
3062         /* Registering of the loopback is done before this portion of code,
3063          * the loopback reference in rt6_info will not be taken, do it
3064          * manually for init_net */
3065         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3066         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3067   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3068         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3069         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3070         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3071         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3072   #endif
3073         ret = fib6_init();
3074         if (ret)
3075                 goto out_register_inetpeer;
3076
3077         ret = xfrm6_init();
3078         if (ret)
3079                 goto out_fib6_init;
3080
3081         ret = fib6_rules_init();
3082         if (ret)
3083                 goto xfrm6_init;
3084
3085         ret = -ENOBUFS;
3086         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3087             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3088             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3089                 goto fib6_rules_init;
3090
3091         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3092         if (ret)
3093                 goto fib6_rules_init;
3094
3095 out:
3096         return ret;
3097
3098 fib6_rules_init:
3099         fib6_rules_cleanup();
3100 xfrm6_init:
3101         xfrm6_fini();
3102 out_fib6_init:
3103         fib6_gc_cleanup();
3104 out_register_inetpeer:
3105         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3106 out_register_subsys:
3107         unregister_pernet_subsys(&ip6_route_net_ops);
3108 out_dst_entries:
3109         dst_entries_destroy(&ip6_dst_blackhole_ops);
3110 out_kmem_cache:
3111         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3112         goto out;
3113 }
3114
3115 void ip6_route_cleanup(void)
3116 {
3117         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3118         fib6_rules_cleanup();
3119         xfrm6_fini();
3120         fib6_gc_cleanup();
3121         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3122         unregister_pernet_subsys(&ip6_route_net_ops);
3123         dst_entries_destroy(&ip6_dst_blackhole_ops);
3124         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3125 }