inet: Consolidate inetpeer_invalidate_tree() interfaces.
[pandora-kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85                                            const struct in6_addr *prefix, int prefixlen,
86                                            const struct in6_addr *gwaddr, int ifindex,
87                                            unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95         struct rt6_info *rt = (struct rt6_info *) dst;
96         struct inet_peer *peer;
97         u32 *p = NULL;
98
99         if (!(rt->dst.flags & DST_HOST))
100                 return NULL;
101
102         peer = rt6_get_peer_create(rt);
103         if (peer) {
104                 u32 *old_p = __DST_METRICS_PTR(old);
105                 unsigned long prev, new;
106
107                 p = peer->metrics;
108                 if (inet_metrics_new(peer))
109                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111                 new = (unsigned long) p;
112                 prev = cmpxchg(&dst->_metrics, old, new);
113
114                 if (prev != old) {
115                         p = __DST_METRICS_PTR(prev);
116                         if (prev & DST_METRICS_READ_ONLY)
117                                 p = NULL;
118                 }
119         }
120         return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125         struct in6_addr *p = &rt->rt6i_gateway;
126
127         if (!ipv6_addr_any(p))
128                 return (const void *) p;
129         return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134         struct rt6_info *rt = (struct rt6_info *) dst;
135         struct neighbour *n;
136
137         daddr = choose_neigh_daddr(rt, daddr);
138         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139         if (n)
140                 return n;
141         return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147         if (!n) {
148                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149                 if (IS_ERR(n))
150                         return PTR_ERR(n);
151         }
152         dst_set_neighbour(&rt->dst, n);
153
154         return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158         .family                 =       AF_INET6,
159         .protocol               =       cpu_to_be16(ETH_P_IPV6),
160         .gc                     =       ip6_dst_gc,
161         .gc_thresh              =       1024,
162         .check                  =       ip6_dst_check,
163         .default_advmss         =       ip6_default_advmss,
164         .mtu                    =       ip6_mtu,
165         .cow_metrics            =       ipv6_cow_metrics,
166         .destroy                =       ip6_dst_destroy,
167         .ifdown                 =       ip6_dst_ifdown,
168         .negative_advice        =       ip6_negative_advice,
169         .link_failure           =       ip6_link_failure,
170         .update_pmtu            =       ip6_rt_update_pmtu,
171         .local_out              =       __ip6_local_out,
172         .neigh_lookup           =       ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179         return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187                                          unsigned long old)
188 {
189         return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193         .family                 =       AF_INET6,
194         .protocol               =       cpu_to_be16(ETH_P_IPV6),
195         .destroy                =       ip6_dst_destroy,
196         .check                  =       ip6_dst_check,
197         .mtu                    =       ip6_blackhole_mtu,
198         .default_advmss         =       ip6_default_advmss,
199         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
200         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
201         .neigh_lookup           =       ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205         [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209         .dst = {
210                 .__refcnt       = ATOMIC_INIT(1),
211                 .__use          = 1,
212                 .obsolete       = -1,
213                 .error          = -ENETUNREACH,
214                 .input          = ip6_pkt_discard,
215                 .output         = ip6_pkt_discard_out,
216         },
217         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
218         .rt6i_protocol  = RTPROT_KERNEL,
219         .rt6i_metric    = ~(u32) 0,
220         .rt6i_ref       = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229         .dst = {
230                 .__refcnt       = ATOMIC_INIT(1),
231                 .__use          = 1,
232                 .obsolete       = -1,
233                 .error          = -EACCES,
234                 .input          = ip6_pkt_prohibit,
235                 .output         = ip6_pkt_prohibit_out,
236         },
237         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
238         .rt6i_protocol  = RTPROT_KERNEL,
239         .rt6i_metric    = ~(u32) 0,
240         .rt6i_ref       = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244         .dst = {
245                 .__refcnt       = ATOMIC_INIT(1),
246                 .__use          = 1,
247                 .obsolete       = -1,
248                 .error          = -EINVAL,
249                 .input          = dst_discard,
250                 .output         = dst_discard,
251         },
252         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
253         .rt6i_protocol  = RTPROT_KERNEL,
254         .rt6i_metric    = ~(u32) 0,
255         .rt6i_ref       = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
262                                              struct net_device *dev,
263                                              int flags)
264 {
265         struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
266
267         if (rt)
268                 memset(&rt->rt6i_table, 0,
269                        sizeof(*rt) - sizeof(struct dst_entry));
270
271         return rt;
272 }
273
274 static void ip6_dst_destroy(struct dst_entry *dst)
275 {
276         struct rt6_info *rt = (struct rt6_info *)dst;
277         struct inet6_dev *idev = rt->rt6i_idev;
278         struct inet_peer *peer = rt->rt6i_peer;
279
280         if (!(rt->dst.flags & DST_HOST))
281                 dst_destroy_metrics_generic(dst);
282
283         if (idev) {
284                 rt->rt6i_idev = NULL;
285                 in6_dev_put(idev);
286         }
287
288         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
289                 dst_release(dst->from);
290
291         if (peer) {
292                 rt->rt6i_peer = NULL;
293                 inet_putpeer(peer);
294         }
295 }
296
297 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
298
299 static u32 rt6_peer_genid(void)
300 {
301         return atomic_read(&__rt6_peer_genid);
302 }
303
304 void rt6_bind_peer(struct rt6_info *rt, int create)
305 {
306         struct net *net = dev_net(rt->dst.dev);
307         struct inet_peer *peer;
308
309         peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
310         if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311                 inet_putpeer(peer);
312         else
313                 rt->rt6i_peer_genid = rt6_peer_genid();
314 }
315
316 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317                            int how)
318 {
319         struct rt6_info *rt = (struct rt6_info *)dst;
320         struct inet6_dev *idev = rt->rt6i_idev;
321         struct net_device *loopback_dev =
322                 dev_net(dev)->loopback_dev;
323
324         if (dev != loopback_dev && idev && idev->dev == dev) {
325                 struct inet6_dev *loopback_idev =
326                         in6_dev_get(loopback_dev);
327                 if (loopback_idev) {
328                         rt->rt6i_idev = loopback_idev;
329                         in6_dev_put(idev);
330                 }
331         }
332 }
333
334 static bool rt6_check_expired(const struct rt6_info *rt)
335 {
336         struct rt6_info *ort = NULL;
337
338         if (rt->rt6i_flags & RTF_EXPIRES) {
339                 if (time_after(jiffies, rt->dst.expires))
340                         return true;
341         } else if (rt->dst.from) {
342                 ort = (struct rt6_info *) rt->dst.from;
343                 return (ort->rt6i_flags & RTF_EXPIRES) &&
344                         time_after(jiffies, ort->dst.expires);
345         }
346         return false;
347 }
348
349 static bool rt6_need_strict(const struct in6_addr *daddr)
350 {
351         return ipv6_addr_type(daddr) &
352                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
353 }
354
355 /*
356  *      Route lookup. Any table->tb6_lock is implied.
357  */
358
359 static inline struct rt6_info *rt6_device_match(struct net *net,
360                                                     struct rt6_info *rt,
361                                                     const struct in6_addr *saddr,
362                                                     int oif,
363                                                     int flags)
364 {
365         struct rt6_info *local = NULL;
366         struct rt6_info *sprt;
367
368         if (!oif && ipv6_addr_any(saddr))
369                 goto out;
370
371         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
372                 struct net_device *dev = sprt->dst.dev;
373
374                 if (oif) {
375                         if (dev->ifindex == oif)
376                                 return sprt;
377                         if (dev->flags & IFF_LOOPBACK) {
378                                 if (!sprt->rt6i_idev ||
379                                     sprt->rt6i_idev->dev->ifindex != oif) {
380                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
381                                                 continue;
382                                         if (local && (!oif ||
383                                                       local->rt6i_idev->dev->ifindex == oif))
384                                                 continue;
385                                 }
386                                 local = sprt;
387                         }
388                 } else {
389                         if (ipv6_chk_addr(net, saddr, dev,
390                                           flags & RT6_LOOKUP_F_IFACE))
391                                 return sprt;
392                 }
393         }
394
395         if (oif) {
396                 if (local)
397                         return local;
398
399                 if (flags & RT6_LOOKUP_F_IFACE)
400                         return net->ipv6.ip6_null_entry;
401         }
402 out:
403         return rt;
404 }
405
406 #ifdef CONFIG_IPV6_ROUTER_PREF
407 static void rt6_probe(struct rt6_info *rt)
408 {
409         struct neighbour *neigh;
410         /*
411          * Okay, this does not seem to be appropriate
412          * for now, however, we need to check if it
413          * is really so; aka Router Reachability Probing.
414          *
415          * Router Reachability Probe MUST be rate-limited
416          * to no more than one per minute.
417          */
418         rcu_read_lock();
419         neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
420         if (!neigh || (neigh->nud_state & NUD_VALID))
421                 goto out;
422         read_lock_bh(&neigh->lock);
423         if (!(neigh->nud_state & NUD_VALID) &&
424             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
425                 struct in6_addr mcaddr;
426                 struct in6_addr *target;
427
428                 neigh->updated = jiffies;
429                 read_unlock_bh(&neigh->lock);
430
431                 target = (struct in6_addr *)&neigh->primary_key;
432                 addrconf_addr_solict_mult(target, &mcaddr);
433                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
434         } else {
435                 read_unlock_bh(&neigh->lock);
436         }
437 out:
438         rcu_read_unlock();
439 }
440 #else
441 static inline void rt6_probe(struct rt6_info *rt)
442 {
443 }
444 #endif
445
446 /*
447  * Default Router Selection (RFC 2461 6.3.6)
448  */
449 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
450 {
451         struct net_device *dev = rt->dst.dev;
452         if (!oif || dev->ifindex == oif)
453                 return 2;
454         if ((dev->flags & IFF_LOOPBACK) &&
455             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456                 return 1;
457         return 0;
458 }
459
460 static inline int rt6_check_neigh(struct rt6_info *rt)
461 {
462         struct neighbour *neigh;
463         int m;
464
465         rcu_read_lock();
466         neigh = dst_get_neighbour_noref(&rt->dst);
467         if (rt->rt6i_flags & RTF_NONEXTHOP ||
468             !(rt->rt6i_flags & RTF_GATEWAY))
469                 m = 1;
470         else if (neigh) {
471                 read_lock_bh(&neigh->lock);
472                 if (neigh->nud_state & NUD_VALID)
473                         m = 2;
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475                 else if (neigh->nud_state & NUD_FAILED)
476                         m = 0;
477 #endif
478                 else
479                         m = 1;
480                 read_unlock_bh(&neigh->lock);
481         } else
482                 m = 0;
483         rcu_read_unlock();
484         return m;
485 }
486
487 static int rt6_score_route(struct rt6_info *rt, int oif,
488                            int strict)
489 {
490         int m, n;
491
492         m = rt6_check_dev(rt, oif);
493         if (!m && (strict & RT6_LOOKUP_F_IFACE))
494                 return -1;
495 #ifdef CONFIG_IPV6_ROUTER_PREF
496         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497 #endif
498         n = rt6_check_neigh(rt);
499         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
500                 return -1;
501         return m;
502 }
503
504 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505                                    int *mpri, struct rt6_info *match)
506 {
507         int m;
508
509         if (rt6_check_expired(rt))
510                 goto out;
511
512         m = rt6_score_route(rt, oif, strict);
513         if (m < 0)
514                 goto out;
515
516         if (m > *mpri) {
517                 if (strict & RT6_LOOKUP_F_REACHABLE)
518                         rt6_probe(match);
519                 *mpri = m;
520                 match = rt;
521         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522                 rt6_probe(rt);
523         }
524
525 out:
526         return match;
527 }
528
529 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530                                      struct rt6_info *rr_head,
531                                      u32 metric, int oif, int strict)
532 {
533         struct rt6_info *rt, *match;
534         int mpri = -1;
535
536         match = NULL;
537         for (rt = rr_head; rt && rt->rt6i_metric == metric;
538              rt = rt->dst.rt6_next)
539                 match = find_match(rt, oif, strict, &mpri, match);
540         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
541              rt = rt->dst.rt6_next)
542                 match = find_match(rt, oif, strict, &mpri, match);
543
544         return match;
545 }
546
547 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548 {
549         struct rt6_info *match, *rt0;
550         struct net *net;
551
552         rt0 = fn->rr_ptr;
553         if (!rt0)
554                 fn->rr_ptr = rt0 = fn->leaf;
555
556         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
557
558         if (!match &&
559             (strict & RT6_LOOKUP_F_REACHABLE)) {
560                 struct rt6_info *next = rt0->dst.rt6_next;
561
562                 /* no entries matched; do round-robin */
563                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564                         next = fn->leaf;
565
566                 if (next != rt0)
567                         fn->rr_ptr = next;
568         }
569
570         net = dev_net(rt0->dst.dev);
571         return match ? match : net->ipv6.ip6_null_entry;
572 }
573
574 #ifdef CONFIG_IPV6_ROUTE_INFO
575 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
576                   const struct in6_addr *gwaddr)
577 {
578         struct net *net = dev_net(dev);
579         struct route_info *rinfo = (struct route_info *) opt;
580         struct in6_addr prefix_buf, *prefix;
581         unsigned int pref;
582         unsigned long lifetime;
583         struct rt6_info *rt;
584
585         if (len < sizeof(struct route_info)) {
586                 return -EINVAL;
587         }
588
589         /* Sanity check for prefix_len and length */
590         if (rinfo->length > 3) {
591                 return -EINVAL;
592         } else if (rinfo->prefix_len > 128) {
593                 return -EINVAL;
594         } else if (rinfo->prefix_len > 64) {
595                 if (rinfo->length < 2) {
596                         return -EINVAL;
597                 }
598         } else if (rinfo->prefix_len > 0) {
599                 if (rinfo->length < 1) {
600                         return -EINVAL;
601                 }
602         }
603
604         pref = rinfo->route_pref;
605         if (pref == ICMPV6_ROUTER_PREF_INVALID)
606                 return -EINVAL;
607
608         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
609
610         if (rinfo->length == 3)
611                 prefix = (struct in6_addr *)rinfo->prefix;
612         else {
613                 /* this function is safe */
614                 ipv6_addr_prefix(&prefix_buf,
615                                  (struct in6_addr *)rinfo->prefix,
616                                  rinfo->prefix_len);
617                 prefix = &prefix_buf;
618         }
619
620         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
621                                 dev->ifindex);
622
623         if (rt && !lifetime) {
624                 ip6_del_rt(rt);
625                 rt = NULL;
626         }
627
628         if (!rt && lifetime)
629                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
630                                         pref);
631         else if (rt)
632                 rt->rt6i_flags = RTF_ROUTEINFO |
633                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
634
635         if (rt) {
636                 if (!addrconf_finite_timeout(lifetime))
637                         rt6_clean_expires(rt);
638                 else
639                         rt6_set_expires(rt, jiffies + HZ * lifetime);
640
641                 dst_release(&rt->dst);
642         }
643         return 0;
644 }
645 #endif
646
647 #define BACKTRACK(__net, saddr)                 \
648 do { \
649         if (rt == __net->ipv6.ip6_null_entry) { \
650                 struct fib6_node *pn; \
651                 while (1) { \
652                         if (fn->fn_flags & RTN_TL_ROOT) \
653                                 goto out; \
654                         pn = fn->parent; \
655                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
656                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
657                         else \
658                                 fn = pn; \
659                         if (fn->fn_flags & RTN_RTINFO) \
660                                 goto restart; \
661                 } \
662         } \
663 } while (0)
664
665 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
666                                              struct fib6_table *table,
667                                              struct flowi6 *fl6, int flags)
668 {
669         struct fib6_node *fn;
670         struct rt6_info *rt;
671
672         read_lock_bh(&table->tb6_lock);
673         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
674 restart:
675         rt = fn->leaf;
676         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
677         BACKTRACK(net, &fl6->saddr);
678 out:
679         dst_use(&rt->dst, jiffies);
680         read_unlock_bh(&table->tb6_lock);
681         return rt;
682
683 }
684
685 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
686                                     int flags)
687 {
688         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
689 }
690 EXPORT_SYMBOL_GPL(ip6_route_lookup);
691
692 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
693                             const struct in6_addr *saddr, int oif, int strict)
694 {
695         struct flowi6 fl6 = {
696                 .flowi6_oif = oif,
697                 .daddr = *daddr,
698         };
699         struct dst_entry *dst;
700         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
701
702         if (saddr) {
703                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
704                 flags |= RT6_LOOKUP_F_HAS_SADDR;
705         }
706
707         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
708         if (dst->error == 0)
709                 return (struct rt6_info *) dst;
710
711         dst_release(dst);
712
713         return NULL;
714 }
715
716 EXPORT_SYMBOL(rt6_lookup);
717
718 /* ip6_ins_rt is called with FREE table->tb6_lock.
719    It takes new route entry, the addition fails by any reason the
720    route is freed. In any case, if caller does not hold it, it may
721    be destroyed.
722  */
723
724 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
725 {
726         int err;
727         struct fib6_table *table;
728
729         table = rt->rt6i_table;
730         write_lock_bh(&table->tb6_lock);
731         err = fib6_add(&table->tb6_root, rt, info);
732         write_unlock_bh(&table->tb6_lock);
733
734         return err;
735 }
736
737 int ip6_ins_rt(struct rt6_info *rt)
738 {
739         struct nl_info info = {
740                 .nl_net = dev_net(rt->dst.dev),
741         };
742         return __ip6_ins_rt(rt, &info);
743 }
744
745 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
746                                       const struct in6_addr *daddr,
747                                       const struct in6_addr *saddr)
748 {
749         struct rt6_info *rt;
750
751         /*
752          *      Clone the route.
753          */
754
755         rt = ip6_rt_copy(ort, daddr);
756
757         if (rt) {
758                 int attempts = !in_softirq();
759
760                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
761                         if (ort->rt6i_dst.plen != 128 &&
762                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
763                                 rt->rt6i_flags |= RTF_ANYCAST;
764                         rt->rt6i_gateway = *daddr;
765                 }
766
767                 rt->rt6i_flags |= RTF_CACHE;
768
769 #ifdef CONFIG_IPV6_SUBTREES
770                 if (rt->rt6i_src.plen && saddr) {
771                         rt->rt6i_src.addr = *saddr;
772                         rt->rt6i_src.plen = 128;
773                 }
774 #endif
775
776         retry:
777                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
778                         struct net *net = dev_net(rt->dst.dev);
779                         int saved_rt_min_interval =
780                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
781                         int saved_rt_elasticity =
782                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
783
784                         if (attempts-- > 0) {
785                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
786                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
787
788                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
789
790                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
791                                         saved_rt_elasticity;
792                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
793                                         saved_rt_min_interval;
794                                 goto retry;
795                         }
796
797                         net_warn_ratelimited("Neighbour table overflow\n");
798                         dst_free(&rt->dst);
799                         return NULL;
800                 }
801         }
802
803         return rt;
804 }
805
806 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
807                                         const struct in6_addr *daddr)
808 {
809         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
810
811         if (rt) {
812                 rt->rt6i_flags |= RTF_CACHE;
813                 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
814         }
815         return rt;
816 }
817
818 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
819                                       struct flowi6 *fl6, int flags)
820 {
821         struct fib6_node *fn;
822         struct rt6_info *rt, *nrt;
823         int strict = 0;
824         int attempts = 3;
825         int err;
826         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
827
828         strict |= flags & RT6_LOOKUP_F_IFACE;
829
830 relookup:
831         read_lock_bh(&table->tb6_lock);
832
833 restart_2:
834         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
835
836 restart:
837         rt = rt6_select(fn, oif, strict | reachable);
838
839         BACKTRACK(net, &fl6->saddr);
840         if (rt == net->ipv6.ip6_null_entry ||
841             rt->rt6i_flags & RTF_CACHE)
842                 goto out;
843
844         dst_hold(&rt->dst);
845         read_unlock_bh(&table->tb6_lock);
846
847         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
848                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
849         else if (!(rt->dst.flags & DST_HOST))
850                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
851         else
852                 goto out2;
853
854         dst_release(&rt->dst);
855         rt = nrt ? : net->ipv6.ip6_null_entry;
856
857         dst_hold(&rt->dst);
858         if (nrt) {
859                 err = ip6_ins_rt(nrt);
860                 if (!err)
861                         goto out2;
862         }
863
864         if (--attempts <= 0)
865                 goto out2;
866
867         /*
868          * Race condition! In the gap, when table->tb6_lock was
869          * released someone could insert this route.  Relookup.
870          */
871         dst_release(&rt->dst);
872         goto relookup;
873
874 out:
875         if (reachable) {
876                 reachable = 0;
877                 goto restart_2;
878         }
879         dst_hold(&rt->dst);
880         read_unlock_bh(&table->tb6_lock);
881 out2:
882         rt->dst.lastuse = jiffies;
883         rt->dst.__use++;
884
885         return rt;
886 }
887
888 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
889                                             struct flowi6 *fl6, int flags)
890 {
891         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
892 }
893
894 static struct dst_entry *ip6_route_input_lookup(struct net *net,
895                                                 struct net_device *dev,
896                                                 struct flowi6 *fl6, int flags)
897 {
898         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
899                 flags |= RT6_LOOKUP_F_IFACE;
900
901         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
902 }
903
904 void ip6_route_input(struct sk_buff *skb)
905 {
906         const struct ipv6hdr *iph = ipv6_hdr(skb);
907         struct net *net = dev_net(skb->dev);
908         int flags = RT6_LOOKUP_F_HAS_SADDR;
909         struct flowi6 fl6 = {
910                 .flowi6_iif = skb->dev->ifindex,
911                 .daddr = iph->daddr,
912                 .saddr = iph->saddr,
913                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
914                 .flowi6_mark = skb->mark,
915                 .flowi6_proto = iph->nexthdr,
916         };
917
918         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
919 }
920
921 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
922                                              struct flowi6 *fl6, int flags)
923 {
924         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
925 }
926
927 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
928                                     struct flowi6 *fl6)
929 {
930         int flags = 0;
931
932         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
933                 flags |= RT6_LOOKUP_F_IFACE;
934
935         if (!ipv6_addr_any(&fl6->saddr))
936                 flags |= RT6_LOOKUP_F_HAS_SADDR;
937         else if (sk)
938                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
939
940         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
941 }
942
943 EXPORT_SYMBOL(ip6_route_output);
944
945 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
946 {
947         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
948         struct dst_entry *new = NULL;
949
950         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
951         if (rt) {
952                 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
953
954                 new = &rt->dst;
955
956                 new->__use = 1;
957                 new->input = dst_discard;
958                 new->output = dst_discard;
959
960                 if (dst_metrics_read_only(&ort->dst))
961                         new->_metrics = ort->dst._metrics;
962                 else
963                         dst_copy_metrics(new, &ort->dst);
964                 rt->rt6i_idev = ort->rt6i_idev;
965                 if (rt->rt6i_idev)
966                         in6_dev_hold(rt->rt6i_idev);
967
968                 rt->rt6i_gateway = ort->rt6i_gateway;
969                 rt->rt6i_flags = ort->rt6i_flags;
970                 rt6_clean_expires(rt);
971                 rt->rt6i_metric = 0;
972
973                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
974 #ifdef CONFIG_IPV6_SUBTREES
975                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
976 #endif
977
978                 dst_free(new);
979         }
980
981         dst_release(dst_orig);
982         return new ? new : ERR_PTR(-ENOMEM);
983 }
984
985 /*
986  *      Destination cache support functions
987  */
988
989 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
990 {
991         struct rt6_info *rt;
992
993         rt = (struct rt6_info *) dst;
994
995         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
996                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
997                         if (!rt->rt6i_peer)
998                                 rt6_bind_peer(rt, 0);
999                         rt->rt6i_peer_genid = rt6_peer_genid();
1000                 }
1001                 return dst;
1002         }
1003         return NULL;
1004 }
1005
1006 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1007 {
1008         struct rt6_info *rt = (struct rt6_info *) dst;
1009
1010         if (rt) {
1011                 if (rt->rt6i_flags & RTF_CACHE) {
1012                         if (rt6_check_expired(rt)) {
1013                                 ip6_del_rt(rt);
1014                                 dst = NULL;
1015                         }
1016                 } else {
1017                         dst_release(dst);
1018                         dst = NULL;
1019                 }
1020         }
1021         return dst;
1022 }
1023
1024 static void ip6_link_failure(struct sk_buff *skb)
1025 {
1026         struct rt6_info *rt;
1027
1028         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1029
1030         rt = (struct rt6_info *) skb_dst(skb);
1031         if (rt) {
1032                 if (rt->rt6i_flags & RTF_CACHE)
1033                         rt6_update_expires(rt, 0);
1034                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1035                         rt->rt6i_node->fn_sernum = -1;
1036         }
1037 }
1038
1039 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1040 {
1041         struct rt6_info *rt6 = (struct rt6_info*)dst;
1042
1043         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1044                 rt6->rt6i_flags |= RTF_MODIFIED;
1045                 if (mtu < IPV6_MIN_MTU) {
1046                         u32 features = dst_metric(dst, RTAX_FEATURES);
1047                         mtu = IPV6_MIN_MTU;
1048                         features |= RTAX_FEATURE_ALLFRAG;
1049                         dst_metric_set(dst, RTAX_FEATURES, features);
1050                 }
1051                 dst_metric_set(dst, RTAX_MTU, mtu);
1052         }
1053 }
1054
1055 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1056 {
1057         struct net_device *dev = dst->dev;
1058         unsigned int mtu = dst_mtu(dst);
1059         struct net *net = dev_net(dev);
1060
1061         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1062
1063         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1064                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1065
1066         /*
1067          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1068          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1069          * IPV6_MAXPLEN is also valid and means: "any MSS,
1070          * rely only on pmtu discovery"
1071          */
1072         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1073                 mtu = IPV6_MAXPLEN;
1074         return mtu;
1075 }
1076
1077 static unsigned int ip6_mtu(const struct dst_entry *dst)
1078 {
1079         struct inet6_dev *idev;
1080         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1081
1082         if (mtu)
1083                 return mtu;
1084
1085         mtu = IPV6_MIN_MTU;
1086
1087         rcu_read_lock();
1088         idev = __in6_dev_get(dst->dev);
1089         if (idev)
1090                 mtu = idev->cnf.mtu6;
1091         rcu_read_unlock();
1092
1093         return mtu;
1094 }
1095
1096 static struct dst_entry *icmp6_dst_gc_list;
1097 static DEFINE_SPINLOCK(icmp6_dst_lock);
1098
1099 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1100                                   struct neighbour *neigh,
1101                                   struct flowi6 *fl6)
1102 {
1103         struct dst_entry *dst;
1104         struct rt6_info *rt;
1105         struct inet6_dev *idev = in6_dev_get(dev);
1106         struct net *net = dev_net(dev);
1107
1108         if (unlikely(!idev))
1109                 return ERR_PTR(-ENODEV);
1110
1111         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1112         if (unlikely(!rt)) {
1113                 in6_dev_put(idev);
1114                 dst = ERR_PTR(-ENOMEM);
1115                 goto out;
1116         }
1117
1118         if (neigh)
1119                 neigh_hold(neigh);
1120         else {
1121                 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1122                 if (IS_ERR(neigh)) {
1123                         in6_dev_put(idev);
1124                         dst_free(&rt->dst);
1125                         return ERR_CAST(neigh);
1126                 }
1127         }
1128
1129         rt->dst.flags |= DST_HOST;
1130         rt->dst.output  = ip6_output;
1131         dst_set_neighbour(&rt->dst, neigh);
1132         atomic_set(&rt->dst.__refcnt, 1);
1133         rt->rt6i_dst.addr = fl6->daddr;
1134         rt->rt6i_dst.plen = 128;
1135         rt->rt6i_idev     = idev;
1136         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1137
1138         spin_lock_bh(&icmp6_dst_lock);
1139         rt->dst.next = icmp6_dst_gc_list;
1140         icmp6_dst_gc_list = &rt->dst;
1141         spin_unlock_bh(&icmp6_dst_lock);
1142
1143         fib6_force_start_gc(net);
1144
1145         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1146
1147 out:
1148         return dst;
1149 }
1150
1151 int icmp6_dst_gc(void)
1152 {
1153         struct dst_entry *dst, **pprev;
1154         int more = 0;
1155
1156         spin_lock_bh(&icmp6_dst_lock);
1157         pprev = &icmp6_dst_gc_list;
1158
1159         while ((dst = *pprev) != NULL) {
1160                 if (!atomic_read(&dst->__refcnt)) {
1161                         *pprev = dst->next;
1162                         dst_free(dst);
1163                 } else {
1164                         pprev = &dst->next;
1165                         ++more;
1166                 }
1167         }
1168
1169         spin_unlock_bh(&icmp6_dst_lock);
1170
1171         return more;
1172 }
1173
1174 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1175                             void *arg)
1176 {
1177         struct dst_entry *dst, **pprev;
1178
1179         spin_lock_bh(&icmp6_dst_lock);
1180         pprev = &icmp6_dst_gc_list;
1181         while ((dst = *pprev) != NULL) {
1182                 struct rt6_info *rt = (struct rt6_info *) dst;
1183                 if (func(rt, arg)) {
1184                         *pprev = dst->next;
1185                         dst_free(dst);
1186                 } else {
1187                         pprev = &dst->next;
1188                 }
1189         }
1190         spin_unlock_bh(&icmp6_dst_lock);
1191 }
1192
1193 static int ip6_dst_gc(struct dst_ops *ops)
1194 {
1195         unsigned long now = jiffies;
1196         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1197         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1198         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1199         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1200         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1201         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1202         int entries;
1203
1204         entries = dst_entries_get_fast(ops);
1205         if (time_after(rt_last_gc + rt_min_interval, now) &&
1206             entries <= rt_max_size)
1207                 goto out;
1208
1209         net->ipv6.ip6_rt_gc_expire++;
1210         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1211         net->ipv6.ip6_rt_last_gc = now;
1212         entries = dst_entries_get_slow(ops);
1213         if (entries < ops->gc_thresh)
1214                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1215 out:
1216         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1217         return entries > rt_max_size;
1218 }
1219
1220 /* Clean host part of a prefix. Not necessary in radix tree,
1221    but results in cleaner routing tables.
1222
1223    Remove it only when all the things will work!
1224  */
1225
1226 int ip6_dst_hoplimit(struct dst_entry *dst)
1227 {
1228         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1229         if (hoplimit == 0) {
1230                 struct net_device *dev = dst->dev;
1231                 struct inet6_dev *idev;
1232
1233                 rcu_read_lock();
1234                 idev = __in6_dev_get(dev);
1235                 if (idev)
1236                         hoplimit = idev->cnf.hop_limit;
1237                 else
1238                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1239                 rcu_read_unlock();
1240         }
1241         return hoplimit;
1242 }
1243 EXPORT_SYMBOL(ip6_dst_hoplimit);
1244
1245 /*
1246  *
1247  */
1248
1249 int ip6_route_add(struct fib6_config *cfg)
1250 {
1251         int err;
1252         struct net *net = cfg->fc_nlinfo.nl_net;
1253         struct rt6_info *rt = NULL;
1254         struct net_device *dev = NULL;
1255         struct inet6_dev *idev = NULL;
1256         struct fib6_table *table;
1257         int addr_type;
1258
1259         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1260                 return -EINVAL;
1261 #ifndef CONFIG_IPV6_SUBTREES
1262         if (cfg->fc_src_len)
1263                 return -EINVAL;
1264 #endif
1265         if (cfg->fc_ifindex) {
1266                 err = -ENODEV;
1267                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1268                 if (!dev)
1269                         goto out;
1270                 idev = in6_dev_get(dev);
1271                 if (!idev)
1272                         goto out;
1273         }
1274
1275         if (cfg->fc_metric == 0)
1276                 cfg->fc_metric = IP6_RT_PRIO_USER;
1277
1278         err = -ENOBUFS;
1279         if (cfg->fc_nlinfo.nlh &&
1280             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1281                 table = fib6_get_table(net, cfg->fc_table);
1282                 if (!table) {
1283                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1284                         table = fib6_new_table(net, cfg->fc_table);
1285                 }
1286         } else {
1287                 table = fib6_new_table(net, cfg->fc_table);
1288         }
1289
1290         if (!table)
1291                 goto out;
1292
1293         rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1294
1295         if (!rt) {
1296                 err = -ENOMEM;
1297                 goto out;
1298         }
1299
1300         rt->dst.obsolete = -1;
1301
1302         if (cfg->fc_flags & RTF_EXPIRES)
1303                 rt6_set_expires(rt, jiffies +
1304                                 clock_t_to_jiffies(cfg->fc_expires));
1305         else
1306                 rt6_clean_expires(rt);
1307
1308         if (cfg->fc_protocol == RTPROT_UNSPEC)
1309                 cfg->fc_protocol = RTPROT_BOOT;
1310         rt->rt6i_protocol = cfg->fc_protocol;
1311
1312         addr_type = ipv6_addr_type(&cfg->fc_dst);
1313
1314         if (addr_type & IPV6_ADDR_MULTICAST)
1315                 rt->dst.input = ip6_mc_input;
1316         else if (cfg->fc_flags & RTF_LOCAL)
1317                 rt->dst.input = ip6_input;
1318         else
1319                 rt->dst.input = ip6_forward;
1320
1321         rt->dst.output = ip6_output;
1322
1323         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1324         rt->rt6i_dst.plen = cfg->fc_dst_len;
1325         if (rt->rt6i_dst.plen == 128)
1326                rt->dst.flags |= DST_HOST;
1327
1328         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1329                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1330                 if (!metrics) {
1331                         err = -ENOMEM;
1332                         goto out;
1333                 }
1334                 dst_init_metrics(&rt->dst, metrics, 0);
1335         }
1336 #ifdef CONFIG_IPV6_SUBTREES
1337         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1338         rt->rt6i_src.plen = cfg->fc_src_len;
1339 #endif
1340
1341         rt->rt6i_metric = cfg->fc_metric;
1342
1343         /* We cannot add true routes via loopback here,
1344            they would result in kernel looping; promote them to reject routes
1345          */
1346         if ((cfg->fc_flags & RTF_REJECT) ||
1347             (dev && (dev->flags & IFF_LOOPBACK) &&
1348              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1349              !(cfg->fc_flags & RTF_LOCAL))) {
1350                 /* hold loopback dev/idev if we haven't done so. */
1351                 if (dev != net->loopback_dev) {
1352                         if (dev) {
1353                                 dev_put(dev);
1354                                 in6_dev_put(idev);
1355                         }
1356                         dev = net->loopback_dev;
1357                         dev_hold(dev);
1358                         idev = in6_dev_get(dev);
1359                         if (!idev) {
1360                                 err = -ENODEV;
1361                                 goto out;
1362                         }
1363                 }
1364                 rt->dst.output = ip6_pkt_discard_out;
1365                 rt->dst.input = ip6_pkt_discard;
1366                 rt->dst.error = -ENETUNREACH;
1367                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1368                 goto install_route;
1369         }
1370
1371         if (cfg->fc_flags & RTF_GATEWAY) {
1372                 const struct in6_addr *gw_addr;
1373                 int gwa_type;
1374
1375                 gw_addr = &cfg->fc_gateway;
1376                 rt->rt6i_gateway = *gw_addr;
1377                 gwa_type = ipv6_addr_type(gw_addr);
1378
1379                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1380                         struct rt6_info *grt;
1381
1382                         /* IPv6 strictly inhibits using not link-local
1383                            addresses as nexthop address.
1384                            Otherwise, router will not able to send redirects.
1385                            It is very good, but in some (rare!) circumstances
1386                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1387                            some exceptions. --ANK
1388                          */
1389                         err = -EINVAL;
1390                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1391                                 goto out;
1392
1393                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1394
1395                         err = -EHOSTUNREACH;
1396                         if (!grt)
1397                                 goto out;
1398                         if (dev) {
1399                                 if (dev != grt->dst.dev) {
1400                                         dst_release(&grt->dst);
1401                                         goto out;
1402                                 }
1403                         } else {
1404                                 dev = grt->dst.dev;
1405                                 idev = grt->rt6i_idev;
1406                                 dev_hold(dev);
1407                                 in6_dev_hold(grt->rt6i_idev);
1408                         }
1409                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1410                                 err = 0;
1411                         dst_release(&grt->dst);
1412
1413                         if (err)
1414                                 goto out;
1415                 }
1416                 err = -EINVAL;
1417                 if (!dev || (dev->flags & IFF_LOOPBACK))
1418                         goto out;
1419         }
1420
1421         err = -ENODEV;
1422         if (!dev)
1423                 goto out;
1424
1425         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1426                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1427                         err = -EINVAL;
1428                         goto out;
1429                 }
1430                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1431                 rt->rt6i_prefsrc.plen = 128;
1432         } else
1433                 rt->rt6i_prefsrc.plen = 0;
1434
1435         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1436                 err = rt6_bind_neighbour(rt, dev);
1437                 if (err)
1438                         goto out;
1439         }
1440
1441         rt->rt6i_flags = cfg->fc_flags;
1442
1443 install_route:
1444         if (cfg->fc_mx) {
1445                 struct nlattr *nla;
1446                 int remaining;
1447
1448                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1449                         int type = nla_type(nla);
1450
1451                         if (type) {
1452                                 if (type > RTAX_MAX) {
1453                                         err = -EINVAL;
1454                                         goto out;
1455                                 }
1456
1457                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1458                         }
1459                 }
1460         }
1461
1462         rt->dst.dev = dev;
1463         rt->rt6i_idev = idev;
1464         rt->rt6i_table = table;
1465
1466         cfg->fc_nlinfo.nl_net = dev_net(dev);
1467
1468         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1469
1470 out:
1471         if (dev)
1472                 dev_put(dev);
1473         if (idev)
1474                 in6_dev_put(idev);
1475         if (rt)
1476                 dst_free(&rt->dst);
1477         return err;
1478 }
1479
1480 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1481 {
1482         int err;
1483         struct fib6_table *table;
1484         struct net *net = dev_net(rt->dst.dev);
1485
1486         if (rt == net->ipv6.ip6_null_entry)
1487                 return -ENOENT;
1488
1489         table = rt->rt6i_table;
1490         write_lock_bh(&table->tb6_lock);
1491
1492         err = fib6_del(rt, info);
1493         dst_release(&rt->dst);
1494
1495         write_unlock_bh(&table->tb6_lock);
1496
1497         return err;
1498 }
1499
1500 int ip6_del_rt(struct rt6_info *rt)
1501 {
1502         struct nl_info info = {
1503                 .nl_net = dev_net(rt->dst.dev),
1504         };
1505         return __ip6_del_rt(rt, &info);
1506 }
1507
1508 static int ip6_route_del(struct fib6_config *cfg)
1509 {
1510         struct fib6_table *table;
1511         struct fib6_node *fn;
1512         struct rt6_info *rt;
1513         int err = -ESRCH;
1514
1515         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1516         if (!table)
1517                 return err;
1518
1519         read_lock_bh(&table->tb6_lock);
1520
1521         fn = fib6_locate(&table->tb6_root,
1522                          &cfg->fc_dst, cfg->fc_dst_len,
1523                          &cfg->fc_src, cfg->fc_src_len);
1524
1525         if (fn) {
1526                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1527                         if (cfg->fc_ifindex &&
1528                             (!rt->dst.dev ||
1529                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1530                                 continue;
1531                         if (cfg->fc_flags & RTF_GATEWAY &&
1532                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1533                                 continue;
1534                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1535                                 continue;
1536                         dst_hold(&rt->dst);
1537                         read_unlock_bh(&table->tb6_lock);
1538
1539                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1540                 }
1541         }
1542         read_unlock_bh(&table->tb6_lock);
1543
1544         return err;
1545 }
1546
1547 /*
1548  *      Handle redirects
1549  */
1550 struct ip6rd_flowi {
1551         struct flowi6 fl6;
1552         struct in6_addr gateway;
1553 };
1554
1555 static struct rt6_info *__ip6_route_redirect(struct net *net,
1556                                              struct fib6_table *table,
1557                                              struct flowi6 *fl6,
1558                                              int flags)
1559 {
1560         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1561         struct rt6_info *rt;
1562         struct fib6_node *fn;
1563
1564         /*
1565          * Get the "current" route for this destination and
1566          * check if the redirect has come from approriate router.
1567          *
1568          * RFC 2461 specifies that redirects should only be
1569          * accepted if they come from the nexthop to the target.
1570          * Due to the way the routes are chosen, this notion
1571          * is a bit fuzzy and one might need to check all possible
1572          * routes.
1573          */
1574
1575         read_lock_bh(&table->tb6_lock);
1576         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1577 restart:
1578         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1579                 /*
1580                  * Current route is on-link; redirect is always invalid.
1581                  *
1582                  * Seems, previous statement is not true. It could
1583                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1584                  * But then router serving it might decide, that we should
1585                  * know truth 8)8) --ANK (980726).
1586                  */
1587                 if (rt6_check_expired(rt))
1588                         continue;
1589                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1590                         continue;
1591                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1592                         continue;
1593                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1594                         continue;
1595                 break;
1596         }
1597
1598         if (!rt)
1599                 rt = net->ipv6.ip6_null_entry;
1600         BACKTRACK(net, &fl6->saddr);
1601 out:
1602         dst_hold(&rt->dst);
1603
1604         read_unlock_bh(&table->tb6_lock);
1605
1606         return rt;
1607 };
1608
1609 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1610                                            const struct in6_addr *src,
1611                                            const struct in6_addr *gateway,
1612                                            struct net_device *dev)
1613 {
1614         int flags = RT6_LOOKUP_F_HAS_SADDR;
1615         struct net *net = dev_net(dev);
1616         struct ip6rd_flowi rdfl = {
1617                 .fl6 = {
1618                         .flowi6_oif = dev->ifindex,
1619                         .daddr = *dest,
1620                         .saddr = *src,
1621                 },
1622         };
1623
1624         rdfl.gateway = *gateway;
1625
1626         if (rt6_need_strict(dest))
1627                 flags |= RT6_LOOKUP_F_IFACE;
1628
1629         return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1630                                                    flags, __ip6_route_redirect);
1631 }
1632
1633 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1634                   const struct in6_addr *saddr,
1635                   struct neighbour *neigh, u8 *lladdr, int on_link)
1636 {
1637         struct rt6_info *rt, *nrt = NULL;
1638         struct netevent_redirect netevent;
1639         struct net *net = dev_net(neigh->dev);
1640
1641         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1642
1643         if (rt == net->ipv6.ip6_null_entry) {
1644                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1645                 goto out;
1646         }
1647
1648         /*
1649          *      We have finally decided to accept it.
1650          */
1651
1652         neigh_update(neigh, lladdr, NUD_STALE,
1653                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1654                      NEIGH_UPDATE_F_OVERRIDE|
1655                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1656                                      NEIGH_UPDATE_F_ISROUTER))
1657                      );
1658
1659         /*
1660          * Redirect received -> path was valid.
1661          * Look, redirects are sent only in response to data packets,
1662          * so that this nexthop apparently is reachable. --ANK
1663          */
1664         dst_confirm(&rt->dst);
1665
1666         /* Duplicate redirect: silently ignore. */
1667         if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1668                 goto out;
1669
1670         nrt = ip6_rt_copy(rt, dest);
1671         if (!nrt)
1672                 goto out;
1673
1674         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1675         if (on_link)
1676                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1677
1678         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1679         dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1680
1681         if (ip6_ins_rt(nrt))
1682                 goto out;
1683
1684         netevent.old = &rt->dst;
1685         netevent.new = &nrt->dst;
1686         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1687
1688         if (rt->rt6i_flags & RTF_CACHE) {
1689                 ip6_del_rt(rt);
1690                 return;
1691         }
1692
1693 out:
1694         dst_release(&rt->dst);
1695 }
1696
1697 /*
1698  *      Handle ICMP "packet too big" messages
1699  *      i.e. Path MTU discovery
1700  */
1701
1702 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1703                              struct net *net, u32 pmtu, int ifindex)
1704 {
1705         struct rt6_info *rt, *nrt;
1706         int allfrag = 0;
1707 again:
1708         rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1709         if (!rt)
1710                 return;
1711
1712         if (rt6_check_expired(rt)) {
1713                 ip6_del_rt(rt);
1714                 goto again;
1715         }
1716
1717         if (pmtu >= dst_mtu(&rt->dst))
1718                 goto out;
1719
1720         if (pmtu < IPV6_MIN_MTU) {
1721                 /*
1722                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1723                  * MTU (1280) and a fragment header should always be included
1724                  * after a node receiving Too Big message reporting PMTU is
1725                  * less than the IPv6 Minimum Link MTU.
1726                  */
1727                 pmtu = IPV6_MIN_MTU;
1728                 allfrag = 1;
1729         }
1730
1731         /* New mtu received -> path was valid.
1732            They are sent only in response to data packets,
1733            so that this nexthop apparently is reachable. --ANK
1734          */
1735         dst_confirm(&rt->dst);
1736
1737         /* Host route. If it is static, it would be better
1738            not to override it, but add new one, so that
1739            when cache entry will expire old pmtu
1740            would return automatically.
1741          */
1742         if (rt->rt6i_flags & RTF_CACHE) {
1743                 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1744                 if (allfrag) {
1745                         u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1746                         features |= RTAX_FEATURE_ALLFRAG;
1747                         dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1748                 }
1749                 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1750                 rt->rt6i_flags |= RTF_MODIFIED;
1751                 goto out;
1752         }
1753
1754         /* Network route.
1755            Two cases are possible:
1756            1. It is connected route. Action: COW
1757            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1758          */
1759         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1760                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1761         else
1762                 nrt = rt6_alloc_clone(rt, daddr);
1763
1764         if (nrt) {
1765                 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1766                 if (allfrag) {
1767                         u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1768                         features |= RTAX_FEATURE_ALLFRAG;
1769                         dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1770                 }
1771
1772                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1773                  * happened within 5 mins, the recommended timer is 10 mins.
1774                  * Here this route expiration time is set to ip6_rt_mtu_expires
1775                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1776                  * and detecting PMTU increase will be automatically happened.
1777                  */
1778                 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1779                 nrt->rt6i_flags |= RTF_DYNAMIC;
1780                 ip6_ins_rt(nrt);
1781         }
1782 out:
1783         dst_release(&rt->dst);
1784 }
1785
1786 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1787                         struct net_device *dev, u32 pmtu)
1788 {
1789         struct net *net = dev_net(dev);
1790
1791         /*
1792          * RFC 1981 states that a node "MUST reduce the size of the packets it
1793          * is sending along the path" that caused the Packet Too Big message.
1794          * Since it's not possible in the general case to determine which
1795          * interface was used to send the original packet, we update the MTU
1796          * on the interface that will be used to send future packets. We also
1797          * update the MTU on the interface that received the Packet Too Big in
1798          * case the original packet was forced out that interface with
1799          * SO_BINDTODEVICE or similar. This is the next best thing to the
1800          * correct behaviour, which would be to update the MTU on all
1801          * interfaces.
1802          */
1803         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1804         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1805 }
1806
1807 /*
1808  *      Misc support functions
1809  */
1810
1811 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1812                                     const struct in6_addr *dest)
1813 {
1814         struct net *net = dev_net(ort->dst.dev);
1815         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1816                                             ort->dst.dev, 0);
1817
1818         if (rt) {
1819                 rt->dst.input = ort->dst.input;
1820                 rt->dst.output = ort->dst.output;
1821                 rt->dst.flags |= DST_HOST;
1822
1823                 rt->rt6i_dst.addr = *dest;
1824                 rt->rt6i_dst.plen = 128;
1825                 dst_copy_metrics(&rt->dst, &ort->dst);
1826                 rt->dst.error = ort->dst.error;
1827                 rt->rt6i_idev = ort->rt6i_idev;
1828                 if (rt->rt6i_idev)
1829                         in6_dev_hold(rt->rt6i_idev);
1830                 rt->dst.lastuse = jiffies;
1831
1832                 rt->rt6i_gateway = ort->rt6i_gateway;
1833                 rt->rt6i_flags = ort->rt6i_flags;
1834                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1835                     (RTF_DEFAULT | RTF_ADDRCONF))
1836                         rt6_set_from(rt, ort);
1837                 else
1838                         rt6_clean_expires(rt);
1839                 rt->rt6i_metric = 0;
1840
1841 #ifdef CONFIG_IPV6_SUBTREES
1842                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1843 #endif
1844                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1845                 rt->rt6i_table = ort->rt6i_table;
1846         }
1847         return rt;
1848 }
1849
1850 #ifdef CONFIG_IPV6_ROUTE_INFO
1851 static struct rt6_info *rt6_get_route_info(struct net *net,
1852                                            const struct in6_addr *prefix, int prefixlen,
1853                                            const struct in6_addr *gwaddr, int ifindex)
1854 {
1855         struct fib6_node *fn;
1856         struct rt6_info *rt = NULL;
1857         struct fib6_table *table;
1858
1859         table = fib6_get_table(net, RT6_TABLE_INFO);
1860         if (!table)
1861                 return NULL;
1862
1863         write_lock_bh(&table->tb6_lock);
1864         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1865         if (!fn)
1866                 goto out;
1867
1868         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1869                 if (rt->dst.dev->ifindex != ifindex)
1870                         continue;
1871                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1872                         continue;
1873                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1874                         continue;
1875                 dst_hold(&rt->dst);
1876                 break;
1877         }
1878 out:
1879         write_unlock_bh(&table->tb6_lock);
1880         return rt;
1881 }
1882
1883 static struct rt6_info *rt6_add_route_info(struct net *net,
1884                                            const struct in6_addr *prefix, int prefixlen,
1885                                            const struct in6_addr *gwaddr, int ifindex,
1886                                            unsigned int pref)
1887 {
1888         struct fib6_config cfg = {
1889                 .fc_table       = RT6_TABLE_INFO,
1890                 .fc_metric      = IP6_RT_PRIO_USER,
1891                 .fc_ifindex     = ifindex,
1892                 .fc_dst_len     = prefixlen,
1893                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1894                                   RTF_UP | RTF_PREF(pref),
1895                 .fc_nlinfo.pid = 0,
1896                 .fc_nlinfo.nlh = NULL,
1897                 .fc_nlinfo.nl_net = net,
1898         };
1899
1900         cfg.fc_dst = *prefix;
1901         cfg.fc_gateway = *gwaddr;
1902
1903         /* We should treat it as a default route if prefix length is 0. */
1904         if (!prefixlen)
1905                 cfg.fc_flags |= RTF_DEFAULT;
1906
1907         ip6_route_add(&cfg);
1908
1909         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1910 }
1911 #endif
1912
1913 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1914 {
1915         struct rt6_info *rt;
1916         struct fib6_table *table;
1917
1918         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1919         if (!table)
1920                 return NULL;
1921
1922         write_lock_bh(&table->tb6_lock);
1923         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1924                 if (dev == rt->dst.dev &&
1925                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1926                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1927                         break;
1928         }
1929         if (rt)
1930                 dst_hold(&rt->dst);
1931         write_unlock_bh(&table->tb6_lock);
1932         return rt;
1933 }
1934
1935 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1936                                      struct net_device *dev,
1937                                      unsigned int pref)
1938 {
1939         struct fib6_config cfg = {
1940                 .fc_table       = RT6_TABLE_DFLT,
1941                 .fc_metric      = IP6_RT_PRIO_USER,
1942                 .fc_ifindex     = dev->ifindex,
1943                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1944                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1945                 .fc_nlinfo.pid = 0,
1946                 .fc_nlinfo.nlh = NULL,
1947                 .fc_nlinfo.nl_net = dev_net(dev),
1948         };
1949
1950         cfg.fc_gateway = *gwaddr;
1951
1952         ip6_route_add(&cfg);
1953
1954         return rt6_get_dflt_router(gwaddr, dev);
1955 }
1956
1957 void rt6_purge_dflt_routers(struct net *net)
1958 {
1959         struct rt6_info *rt;
1960         struct fib6_table *table;
1961
1962         /* NOTE: Keep consistent with rt6_get_dflt_router */
1963         table = fib6_get_table(net, RT6_TABLE_DFLT);
1964         if (!table)
1965                 return;
1966
1967 restart:
1968         read_lock_bh(&table->tb6_lock);
1969         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1970                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1971                         dst_hold(&rt->dst);
1972                         read_unlock_bh(&table->tb6_lock);
1973                         ip6_del_rt(rt);
1974                         goto restart;
1975                 }
1976         }
1977         read_unlock_bh(&table->tb6_lock);
1978 }
1979
1980 static void rtmsg_to_fib6_config(struct net *net,
1981                                  struct in6_rtmsg *rtmsg,
1982                                  struct fib6_config *cfg)
1983 {
1984         memset(cfg, 0, sizeof(*cfg));
1985
1986         cfg->fc_table = RT6_TABLE_MAIN;
1987         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1988         cfg->fc_metric = rtmsg->rtmsg_metric;
1989         cfg->fc_expires = rtmsg->rtmsg_info;
1990         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1991         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1992         cfg->fc_flags = rtmsg->rtmsg_flags;
1993
1994         cfg->fc_nlinfo.nl_net = net;
1995
1996         cfg->fc_dst = rtmsg->rtmsg_dst;
1997         cfg->fc_src = rtmsg->rtmsg_src;
1998         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1999 }
2000
2001 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2002 {
2003         struct fib6_config cfg;
2004         struct in6_rtmsg rtmsg;
2005         int err;
2006
2007         switch(cmd) {
2008         case SIOCADDRT:         /* Add a route */
2009         case SIOCDELRT:         /* Delete a route */
2010                 if (!capable(CAP_NET_ADMIN))
2011                         return -EPERM;
2012                 err = copy_from_user(&rtmsg, arg,
2013                                      sizeof(struct in6_rtmsg));
2014                 if (err)
2015                         return -EFAULT;
2016
2017                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2018
2019                 rtnl_lock();
2020                 switch (cmd) {
2021                 case SIOCADDRT:
2022                         err = ip6_route_add(&cfg);
2023                         break;
2024                 case SIOCDELRT:
2025                         err = ip6_route_del(&cfg);
2026                         break;
2027                 default:
2028                         err = -EINVAL;
2029                 }
2030                 rtnl_unlock();
2031
2032                 return err;
2033         }
2034
2035         return -EINVAL;
2036 }
2037
2038 /*
2039  *      Drop the packet on the floor
2040  */
2041
2042 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2043 {
2044         int type;
2045         struct dst_entry *dst = skb_dst(skb);
2046         switch (ipstats_mib_noroutes) {
2047         case IPSTATS_MIB_INNOROUTES:
2048                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2049                 if (type == IPV6_ADDR_ANY) {
2050                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2051                                       IPSTATS_MIB_INADDRERRORS);
2052                         break;
2053                 }
2054                 /* FALLTHROUGH */
2055         case IPSTATS_MIB_OUTNOROUTES:
2056                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2057                               ipstats_mib_noroutes);
2058                 break;
2059         }
2060         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2061         kfree_skb(skb);
2062         return 0;
2063 }
2064
2065 static int ip6_pkt_discard(struct sk_buff *skb)
2066 {
2067         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2068 }
2069
2070 static int ip6_pkt_discard_out(struct sk_buff *skb)
2071 {
2072         skb->dev = skb_dst(skb)->dev;
2073         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2074 }
2075
2076 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2077
2078 static int ip6_pkt_prohibit(struct sk_buff *skb)
2079 {
2080         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2081 }
2082
2083 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2084 {
2085         skb->dev = skb_dst(skb)->dev;
2086         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2087 }
2088
2089 #endif
2090
2091 /*
2092  *      Allocate a dst for local (unicast / anycast) address.
2093  */
2094
2095 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2096                                     const struct in6_addr *addr,
2097                                     bool anycast)
2098 {
2099         struct net *net = dev_net(idev->dev);
2100         struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2101                                             net->loopback_dev, 0);
2102         int err;
2103
2104         if (!rt) {
2105                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2106                 return ERR_PTR(-ENOMEM);
2107         }
2108
2109         in6_dev_hold(idev);
2110
2111         rt->dst.flags |= DST_HOST;
2112         rt->dst.input = ip6_input;
2113         rt->dst.output = ip6_output;
2114         rt->rt6i_idev = idev;
2115         rt->dst.obsolete = -1;
2116
2117         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2118         if (anycast)
2119                 rt->rt6i_flags |= RTF_ANYCAST;
2120         else
2121                 rt->rt6i_flags |= RTF_LOCAL;
2122         err = rt6_bind_neighbour(rt, rt->dst.dev);
2123         if (err) {
2124                 dst_free(&rt->dst);
2125                 return ERR_PTR(err);
2126         }
2127
2128         rt->rt6i_dst.addr = *addr;
2129         rt->rt6i_dst.plen = 128;
2130         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2131
2132         atomic_set(&rt->dst.__refcnt, 1);
2133
2134         return rt;
2135 }
2136
2137 int ip6_route_get_saddr(struct net *net,
2138                         struct rt6_info *rt,
2139                         const struct in6_addr *daddr,
2140                         unsigned int prefs,
2141                         struct in6_addr *saddr)
2142 {
2143         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2144         int err = 0;
2145         if (rt->rt6i_prefsrc.plen)
2146                 *saddr = rt->rt6i_prefsrc.addr;
2147         else
2148                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2149                                          daddr, prefs, saddr);
2150         return err;
2151 }
2152
2153 /* remove deleted ip from prefsrc entries */
2154 struct arg_dev_net_ip {
2155         struct net_device *dev;
2156         struct net *net;
2157         struct in6_addr *addr;
2158 };
2159
2160 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2161 {
2162         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2163         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2164         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2165
2166         if (((void *)rt->dst.dev == dev || !dev) &&
2167             rt != net->ipv6.ip6_null_entry &&
2168             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2169                 /* remove prefsrc entry */
2170                 rt->rt6i_prefsrc.plen = 0;
2171         }
2172         return 0;
2173 }
2174
2175 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2176 {
2177         struct net *net = dev_net(ifp->idev->dev);
2178         struct arg_dev_net_ip adni = {
2179                 .dev = ifp->idev->dev,
2180                 .net = net,
2181                 .addr = &ifp->addr,
2182         };
2183         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2184 }
2185
2186 struct arg_dev_net {
2187         struct net_device *dev;
2188         struct net *net;
2189 };
2190
2191 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2192 {
2193         const struct arg_dev_net *adn = arg;
2194         const struct net_device *dev = adn->dev;
2195
2196         if ((rt->dst.dev == dev || !dev) &&
2197             rt != adn->net->ipv6.ip6_null_entry)
2198                 return -1;
2199
2200         return 0;
2201 }
2202
2203 void rt6_ifdown(struct net *net, struct net_device *dev)
2204 {
2205         struct arg_dev_net adn = {
2206                 .dev = dev,
2207                 .net = net,
2208         };
2209
2210         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2211         icmp6_clean_all(fib6_ifdown, &adn);
2212 }
2213
2214 struct rt6_mtu_change_arg {
2215         struct net_device *dev;
2216         unsigned int mtu;
2217 };
2218
2219 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2220 {
2221         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2222         struct inet6_dev *idev;
2223
2224         /* In IPv6 pmtu discovery is not optional,
2225            so that RTAX_MTU lock cannot disable it.
2226            We still use this lock to block changes
2227            caused by addrconf/ndisc.
2228         */
2229
2230         idev = __in6_dev_get(arg->dev);
2231         if (!idev)
2232                 return 0;
2233
2234         /* For administrative MTU increase, there is no way to discover
2235            IPv6 PMTU increase, so PMTU increase should be updated here.
2236            Since RFC 1981 doesn't include administrative MTU increase
2237            update PMTU increase is a MUST. (i.e. jumbo frame)
2238          */
2239         /*
2240            If new MTU is less than route PMTU, this new MTU will be the
2241            lowest MTU in the path, update the route PMTU to reflect PMTU
2242            decreases; if new MTU is greater than route PMTU, and the
2243            old MTU is the lowest MTU in the path, update the route PMTU
2244            to reflect the increase. In this case if the other nodes' MTU
2245            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2246            PMTU discouvery.
2247          */
2248         if (rt->dst.dev == arg->dev &&
2249             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2250             (dst_mtu(&rt->dst) >= arg->mtu ||
2251              (dst_mtu(&rt->dst) < arg->mtu &&
2252               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2253                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2254         }
2255         return 0;
2256 }
2257
2258 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2259 {
2260         struct rt6_mtu_change_arg arg = {
2261                 .dev = dev,
2262                 .mtu = mtu,
2263         };
2264
2265         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2266 }
2267
2268 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2269         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2270         [RTA_OIF]               = { .type = NLA_U32 },
2271         [RTA_IIF]               = { .type = NLA_U32 },
2272         [RTA_PRIORITY]          = { .type = NLA_U32 },
2273         [RTA_METRICS]           = { .type = NLA_NESTED },
2274 };
2275
2276 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2277                               struct fib6_config *cfg)
2278 {
2279         struct rtmsg *rtm;
2280         struct nlattr *tb[RTA_MAX+1];
2281         int err;
2282
2283         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2284         if (err < 0)
2285                 goto errout;
2286
2287         err = -EINVAL;
2288         rtm = nlmsg_data(nlh);
2289         memset(cfg, 0, sizeof(*cfg));
2290
2291         cfg->fc_table = rtm->rtm_table;
2292         cfg->fc_dst_len = rtm->rtm_dst_len;
2293         cfg->fc_src_len = rtm->rtm_src_len;
2294         cfg->fc_flags = RTF_UP;
2295         cfg->fc_protocol = rtm->rtm_protocol;
2296
2297         if (rtm->rtm_type == RTN_UNREACHABLE)
2298                 cfg->fc_flags |= RTF_REJECT;
2299
2300         if (rtm->rtm_type == RTN_LOCAL)
2301                 cfg->fc_flags |= RTF_LOCAL;
2302
2303         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2304         cfg->fc_nlinfo.nlh = nlh;
2305         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2306
2307         if (tb[RTA_GATEWAY]) {
2308                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2309                 cfg->fc_flags |= RTF_GATEWAY;
2310         }
2311
2312         if (tb[RTA_DST]) {
2313                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2314
2315                 if (nla_len(tb[RTA_DST]) < plen)
2316                         goto errout;
2317
2318                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2319         }
2320
2321         if (tb[RTA_SRC]) {
2322                 int plen = (rtm->rtm_src_len + 7) >> 3;
2323
2324                 if (nla_len(tb[RTA_SRC]) < plen)
2325                         goto errout;
2326
2327                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2328         }
2329
2330         if (tb[RTA_PREFSRC])
2331                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2332
2333         if (tb[RTA_OIF])
2334                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2335
2336         if (tb[RTA_PRIORITY])
2337                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2338
2339         if (tb[RTA_METRICS]) {
2340                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2341                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2342         }
2343
2344         if (tb[RTA_TABLE])
2345                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2346
2347         err = 0;
2348 errout:
2349         return err;
2350 }
2351
2352 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2353 {
2354         struct fib6_config cfg;
2355         int err;
2356
2357         err = rtm_to_fib6_config(skb, nlh, &cfg);
2358         if (err < 0)
2359                 return err;
2360
2361         return ip6_route_del(&cfg);
2362 }
2363
2364 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2365 {
2366         struct fib6_config cfg;
2367         int err;
2368
2369         err = rtm_to_fib6_config(skb, nlh, &cfg);
2370         if (err < 0)
2371                 return err;
2372
2373         return ip6_route_add(&cfg);
2374 }
2375
2376 static inline size_t rt6_nlmsg_size(void)
2377 {
2378         return NLMSG_ALIGN(sizeof(struct rtmsg))
2379                + nla_total_size(16) /* RTA_SRC */
2380                + nla_total_size(16) /* RTA_DST */
2381                + nla_total_size(16) /* RTA_GATEWAY */
2382                + nla_total_size(16) /* RTA_PREFSRC */
2383                + nla_total_size(4) /* RTA_TABLE */
2384                + nla_total_size(4) /* RTA_IIF */
2385                + nla_total_size(4) /* RTA_OIF */
2386                + nla_total_size(4) /* RTA_PRIORITY */
2387                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2388                + nla_total_size(sizeof(struct rta_cacheinfo));
2389 }
2390
2391 static int rt6_fill_node(struct net *net,
2392                          struct sk_buff *skb, struct rt6_info *rt,
2393                          struct in6_addr *dst, struct in6_addr *src,
2394                          int iif, int type, u32 pid, u32 seq,
2395                          int prefix, int nowait, unsigned int flags)
2396 {
2397         const struct inet_peer *peer;
2398         struct rtmsg *rtm;
2399         struct nlmsghdr *nlh;
2400         long expires;
2401         u32 table;
2402         struct neighbour *n;
2403         u32 ts, tsage;
2404
2405         if (prefix) {   /* user wants prefix routes only */
2406                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2407                         /* success since this is not a prefix route */
2408                         return 1;
2409                 }
2410         }
2411
2412         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2413         if (!nlh)
2414                 return -EMSGSIZE;
2415
2416         rtm = nlmsg_data(nlh);
2417         rtm->rtm_family = AF_INET6;
2418         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2419         rtm->rtm_src_len = rt->rt6i_src.plen;
2420         rtm->rtm_tos = 0;
2421         if (rt->rt6i_table)
2422                 table = rt->rt6i_table->tb6_id;
2423         else
2424                 table = RT6_TABLE_UNSPEC;
2425         rtm->rtm_table = table;
2426         if (nla_put_u32(skb, RTA_TABLE, table))
2427                 goto nla_put_failure;
2428         if (rt->rt6i_flags & RTF_REJECT)
2429                 rtm->rtm_type = RTN_UNREACHABLE;
2430         else if (rt->rt6i_flags & RTF_LOCAL)
2431                 rtm->rtm_type = RTN_LOCAL;
2432         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2433                 rtm->rtm_type = RTN_LOCAL;
2434         else
2435                 rtm->rtm_type = RTN_UNICAST;
2436         rtm->rtm_flags = 0;
2437         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2438         rtm->rtm_protocol = rt->rt6i_protocol;
2439         if (rt->rt6i_flags & RTF_DYNAMIC)
2440                 rtm->rtm_protocol = RTPROT_REDIRECT;
2441         else if (rt->rt6i_flags & RTF_ADDRCONF)
2442                 rtm->rtm_protocol = RTPROT_KERNEL;
2443         else if (rt->rt6i_flags & RTF_DEFAULT)
2444                 rtm->rtm_protocol = RTPROT_RA;
2445
2446         if (rt->rt6i_flags & RTF_CACHE)
2447                 rtm->rtm_flags |= RTM_F_CLONED;
2448
2449         if (dst) {
2450                 if (nla_put(skb, RTA_DST, 16, dst))
2451                         goto nla_put_failure;
2452                 rtm->rtm_dst_len = 128;
2453         } else if (rtm->rtm_dst_len)
2454                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2455                         goto nla_put_failure;
2456 #ifdef CONFIG_IPV6_SUBTREES
2457         if (src) {
2458                 if (nla_put(skb, RTA_SRC, 16, src))
2459                         goto nla_put_failure;
2460                 rtm->rtm_src_len = 128;
2461         } else if (rtm->rtm_src_len &&
2462                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2463                 goto nla_put_failure;
2464 #endif
2465         if (iif) {
2466 #ifdef CONFIG_IPV6_MROUTE
2467                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2468                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2469                         if (err <= 0) {
2470                                 if (!nowait) {
2471                                         if (err == 0)
2472                                                 return 0;
2473                                         goto nla_put_failure;
2474                                 } else {
2475                                         if (err == -EMSGSIZE)
2476                                                 goto nla_put_failure;
2477                                 }
2478                         }
2479                 } else
2480 #endif
2481                         if (nla_put_u32(skb, RTA_IIF, iif))
2482                                 goto nla_put_failure;
2483         } else if (dst) {
2484                 struct in6_addr saddr_buf;
2485                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2486                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2487                         goto nla_put_failure;
2488         }
2489
2490         if (rt->rt6i_prefsrc.plen) {
2491                 struct in6_addr saddr_buf;
2492                 saddr_buf = rt->rt6i_prefsrc.addr;
2493                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2494                         goto nla_put_failure;
2495         }
2496
2497         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2498                 goto nla_put_failure;
2499
2500         rcu_read_lock();
2501         n = dst_get_neighbour_noref(&rt->dst);
2502         if (n) {
2503                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2504                         rcu_read_unlock();
2505                         goto nla_put_failure;
2506                 }
2507         }
2508         rcu_read_unlock();
2509
2510         if (rt->dst.dev &&
2511             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2512                 goto nla_put_failure;
2513         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2514                 goto nla_put_failure;
2515         if (!(rt->rt6i_flags & RTF_EXPIRES))
2516                 expires = 0;
2517         else if (rt->dst.expires - jiffies < INT_MAX)
2518                 expires = rt->dst.expires - jiffies;
2519         else
2520                 expires = INT_MAX;
2521
2522         peer = rt->rt6i_peer;
2523         ts = tsage = 0;
2524         if (peer && peer->tcp_ts_stamp) {
2525                 ts = peer->tcp_ts;
2526                 tsage = get_seconds() - peer->tcp_ts_stamp;
2527         }
2528
2529         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2530                                expires, rt->dst.error) < 0)
2531                 goto nla_put_failure;
2532
2533         return nlmsg_end(skb, nlh);
2534
2535 nla_put_failure:
2536         nlmsg_cancel(skb, nlh);
2537         return -EMSGSIZE;
2538 }
2539
2540 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2541 {
2542         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2543         int prefix;
2544
2545         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2546                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2547                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2548         } else
2549                 prefix = 0;
2550
2551         return rt6_fill_node(arg->net,
2552                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2553                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2554                      prefix, 0, NLM_F_MULTI);
2555 }
2556
2557 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2558 {
2559         struct net *net = sock_net(in_skb->sk);
2560         struct nlattr *tb[RTA_MAX+1];
2561         struct rt6_info *rt;
2562         struct sk_buff *skb;
2563         struct rtmsg *rtm;
2564         struct flowi6 fl6;
2565         int err, iif = 0, oif = 0;
2566
2567         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2568         if (err < 0)
2569                 goto errout;
2570
2571         err = -EINVAL;
2572         memset(&fl6, 0, sizeof(fl6));
2573
2574         if (tb[RTA_SRC]) {
2575                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2576                         goto errout;
2577
2578                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2579         }
2580
2581         if (tb[RTA_DST]) {
2582                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2583                         goto errout;
2584
2585                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2586         }
2587
2588         if (tb[RTA_IIF])
2589                 iif = nla_get_u32(tb[RTA_IIF]);
2590
2591         if (tb[RTA_OIF])
2592                 oif = nla_get_u32(tb[RTA_OIF]);
2593
2594         if (iif) {
2595                 struct net_device *dev;
2596                 int flags = 0;
2597
2598                 dev = __dev_get_by_index(net, iif);
2599                 if (!dev) {
2600                         err = -ENODEV;
2601                         goto errout;
2602                 }
2603
2604                 fl6.flowi6_iif = iif;
2605
2606                 if (!ipv6_addr_any(&fl6.saddr))
2607                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2608
2609                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2610                                                                flags);
2611         } else {
2612                 fl6.flowi6_oif = oif;
2613
2614                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2615         }
2616
2617         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2618         if (!skb) {
2619                 dst_release(&rt->dst);
2620                 err = -ENOBUFS;
2621                 goto errout;
2622         }
2623
2624         /* Reserve room for dummy headers, this skb can pass
2625            through good chunk of routing engine.
2626          */
2627         skb_reset_mac_header(skb);
2628         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2629
2630         skb_dst_set(skb, &rt->dst);
2631
2632         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2633                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2634                             nlh->nlmsg_seq, 0, 0, 0);
2635         if (err < 0) {
2636                 kfree_skb(skb);
2637                 goto errout;
2638         }
2639
2640         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2641 errout:
2642         return err;
2643 }
2644
2645 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2646 {
2647         struct sk_buff *skb;
2648         struct net *net = info->nl_net;
2649         u32 seq;
2650         int err;
2651
2652         err = -ENOBUFS;
2653         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2654
2655         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2656         if (!skb)
2657                 goto errout;
2658
2659         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2660                                 event, info->pid, seq, 0, 0, 0);
2661         if (err < 0) {
2662                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2663                 WARN_ON(err == -EMSGSIZE);
2664                 kfree_skb(skb);
2665                 goto errout;
2666         }
2667         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2668                     info->nlh, gfp_any());
2669         return;
2670 errout:
2671         if (err < 0)
2672                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2673 }
2674
2675 static int ip6_route_dev_notify(struct notifier_block *this,
2676                                 unsigned long event, void *data)
2677 {
2678         struct net_device *dev = (struct net_device *)data;
2679         struct net *net = dev_net(dev);
2680
2681         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2682                 net->ipv6.ip6_null_entry->dst.dev = dev;
2683                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2684 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2685                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2686                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2687                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2688                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2689 #endif
2690         }
2691
2692         return NOTIFY_OK;
2693 }
2694
2695 /*
2696  *      /proc
2697  */
2698
2699 #ifdef CONFIG_PROC_FS
2700
2701 struct rt6_proc_arg
2702 {
2703         char *buffer;
2704         int offset;
2705         int length;
2706         int skip;
2707         int len;
2708 };
2709
2710 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2711 {
2712         struct seq_file *m = p_arg;
2713         struct neighbour *n;
2714
2715         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2716
2717 #ifdef CONFIG_IPV6_SUBTREES
2718         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2719 #else
2720         seq_puts(m, "00000000000000000000000000000000 00 ");
2721 #endif
2722         rcu_read_lock();
2723         n = dst_get_neighbour_noref(&rt->dst);
2724         if (n) {
2725                 seq_printf(m, "%pi6", n->primary_key);
2726         } else {
2727                 seq_puts(m, "00000000000000000000000000000000");
2728         }
2729         rcu_read_unlock();
2730         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2731                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2732                    rt->dst.__use, rt->rt6i_flags,
2733                    rt->dst.dev ? rt->dst.dev->name : "");
2734         return 0;
2735 }
2736
2737 static int ipv6_route_show(struct seq_file *m, void *v)
2738 {
2739         struct net *net = (struct net *)m->private;
2740         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2741         return 0;
2742 }
2743
2744 static int ipv6_route_open(struct inode *inode, struct file *file)
2745 {
2746         return single_open_net(inode, file, ipv6_route_show);
2747 }
2748
2749 static const struct file_operations ipv6_route_proc_fops = {
2750         .owner          = THIS_MODULE,
2751         .open           = ipv6_route_open,
2752         .read           = seq_read,
2753         .llseek         = seq_lseek,
2754         .release        = single_release_net,
2755 };
2756
2757 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2758 {
2759         struct net *net = (struct net *)seq->private;
2760         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2761                    net->ipv6.rt6_stats->fib_nodes,
2762                    net->ipv6.rt6_stats->fib_route_nodes,
2763                    net->ipv6.rt6_stats->fib_rt_alloc,
2764                    net->ipv6.rt6_stats->fib_rt_entries,
2765                    net->ipv6.rt6_stats->fib_rt_cache,
2766                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2767                    net->ipv6.rt6_stats->fib_discarded_routes);
2768
2769         return 0;
2770 }
2771
2772 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2773 {
2774         return single_open_net(inode, file, rt6_stats_seq_show);
2775 }
2776
2777 static const struct file_operations rt6_stats_seq_fops = {
2778         .owner   = THIS_MODULE,
2779         .open    = rt6_stats_seq_open,
2780         .read    = seq_read,
2781         .llseek  = seq_lseek,
2782         .release = single_release_net,
2783 };
2784 #endif  /* CONFIG_PROC_FS */
2785
2786 #ifdef CONFIG_SYSCTL
2787
2788 static
2789 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2790                               void __user *buffer, size_t *lenp, loff_t *ppos)
2791 {
2792         struct net *net;
2793         int delay;
2794         if (!write)
2795                 return -EINVAL;
2796
2797         net = (struct net *)ctl->extra1;
2798         delay = net->ipv6.sysctl.flush_delay;
2799         proc_dointvec(ctl, write, buffer, lenp, ppos);
2800         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2801         return 0;
2802 }
2803
2804 ctl_table ipv6_route_table_template[] = {
2805         {
2806                 .procname       =       "flush",
2807                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2808                 .maxlen         =       sizeof(int),
2809                 .mode           =       0200,
2810                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2811         },
2812         {
2813                 .procname       =       "gc_thresh",
2814                 .data           =       &ip6_dst_ops_template.gc_thresh,
2815                 .maxlen         =       sizeof(int),
2816                 .mode           =       0644,
2817                 .proc_handler   =       proc_dointvec,
2818         },
2819         {
2820                 .procname       =       "max_size",
2821                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2822                 .maxlen         =       sizeof(int),
2823                 .mode           =       0644,
2824                 .proc_handler   =       proc_dointvec,
2825         },
2826         {
2827                 .procname       =       "gc_min_interval",
2828                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2829                 .maxlen         =       sizeof(int),
2830                 .mode           =       0644,
2831                 .proc_handler   =       proc_dointvec_jiffies,
2832         },
2833         {
2834                 .procname       =       "gc_timeout",
2835                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2836                 .maxlen         =       sizeof(int),
2837                 .mode           =       0644,
2838                 .proc_handler   =       proc_dointvec_jiffies,
2839         },
2840         {
2841                 .procname       =       "gc_interval",
2842                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2843                 .maxlen         =       sizeof(int),
2844                 .mode           =       0644,
2845                 .proc_handler   =       proc_dointvec_jiffies,
2846         },
2847         {
2848                 .procname       =       "gc_elasticity",
2849                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2850                 .maxlen         =       sizeof(int),
2851                 .mode           =       0644,
2852                 .proc_handler   =       proc_dointvec,
2853         },
2854         {
2855                 .procname       =       "mtu_expires",
2856                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2857                 .maxlen         =       sizeof(int),
2858                 .mode           =       0644,
2859                 .proc_handler   =       proc_dointvec_jiffies,
2860         },
2861         {
2862                 .procname       =       "min_adv_mss",
2863                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2864                 .maxlen         =       sizeof(int),
2865                 .mode           =       0644,
2866                 .proc_handler   =       proc_dointvec,
2867         },
2868         {
2869                 .procname       =       "gc_min_interval_ms",
2870                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2871                 .maxlen         =       sizeof(int),
2872                 .mode           =       0644,
2873                 .proc_handler   =       proc_dointvec_ms_jiffies,
2874         },
2875         { }
2876 };
2877
2878 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2879 {
2880         struct ctl_table *table;
2881
2882         table = kmemdup(ipv6_route_table_template,
2883                         sizeof(ipv6_route_table_template),
2884                         GFP_KERNEL);
2885
2886         if (table) {
2887                 table[0].data = &net->ipv6.sysctl.flush_delay;
2888                 table[0].extra1 = net;
2889                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2890                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2891                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2892                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2893                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2894                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2895                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2896                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2897                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2898         }
2899
2900         return table;
2901 }
2902 #endif
2903
2904 static int __net_init ip6_route_net_init(struct net *net)
2905 {
2906         int ret = -ENOMEM;
2907
2908         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2909                sizeof(net->ipv6.ip6_dst_ops));
2910
2911         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2912                 goto out_ip6_dst_ops;
2913
2914         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2915                                            sizeof(*net->ipv6.ip6_null_entry),
2916                                            GFP_KERNEL);
2917         if (!net->ipv6.ip6_null_entry)
2918                 goto out_ip6_dst_entries;
2919         net->ipv6.ip6_null_entry->dst.path =
2920                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2921         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2922         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2923                          ip6_template_metrics, true);
2924
2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2927                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2928                                                GFP_KERNEL);
2929         if (!net->ipv6.ip6_prohibit_entry)
2930                 goto out_ip6_null_entry;
2931         net->ipv6.ip6_prohibit_entry->dst.path =
2932                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2933         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2934         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2935                          ip6_template_metrics, true);
2936
2937         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2938                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2939                                                GFP_KERNEL);
2940         if (!net->ipv6.ip6_blk_hole_entry)
2941                 goto out_ip6_prohibit_entry;
2942         net->ipv6.ip6_blk_hole_entry->dst.path =
2943                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2944         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2945         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2946                          ip6_template_metrics, true);
2947 #endif
2948
2949         net->ipv6.sysctl.flush_delay = 0;
2950         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2951         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2952         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2953         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2954         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2955         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2956         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2957
2958 #ifdef CONFIG_PROC_FS
2959         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2960         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2961 #endif
2962         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2963
2964         ret = 0;
2965 out:
2966         return ret;
2967
2968 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2969 out_ip6_prohibit_entry:
2970         kfree(net->ipv6.ip6_prohibit_entry);
2971 out_ip6_null_entry:
2972         kfree(net->ipv6.ip6_null_entry);
2973 #endif
2974 out_ip6_dst_entries:
2975         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2976 out_ip6_dst_ops:
2977         goto out;
2978 }
2979
2980 static void __net_exit ip6_route_net_exit(struct net *net)
2981 {
2982 #ifdef CONFIG_PROC_FS
2983         proc_net_remove(net, "ipv6_route");
2984         proc_net_remove(net, "rt6_stats");
2985 #endif
2986         kfree(net->ipv6.ip6_null_entry);
2987 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988         kfree(net->ipv6.ip6_prohibit_entry);
2989         kfree(net->ipv6.ip6_blk_hole_entry);
2990 #endif
2991         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992 }
2993
2994 static struct pernet_operations ip6_route_net_ops = {
2995         .init = ip6_route_net_init,
2996         .exit = ip6_route_net_exit,
2997 };
2998
2999 static int __net_init ipv6_inetpeer_init(struct net *net)
3000 {
3001         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3002
3003         if (!bp)
3004                 return -ENOMEM;
3005         inet_peer_base_init(bp);
3006         net->ipv6.peers = bp;
3007         return 0;
3008 }
3009
3010 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3011 {
3012         struct inet_peer_base *bp = net->ipv6.peers;
3013
3014         net->ipv6.peers = NULL;
3015         inetpeer_invalidate_tree(bp);
3016         kfree(bp);
3017 }
3018
3019 static __net_initdata struct pernet_operations ipv6_inetpeer_ops = {
3020         .init   =       ipv6_inetpeer_init,
3021         .exit   =       ipv6_inetpeer_exit,
3022 };
3023
3024 static struct notifier_block ip6_route_dev_notifier = {
3025         .notifier_call = ip6_route_dev_notify,
3026         .priority = 0,
3027 };
3028
3029 int __init ip6_route_init(void)
3030 {
3031         int ret;
3032
3033         ret = -ENOMEM;
3034         ip6_dst_ops_template.kmem_cachep =
3035                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3036                                   SLAB_HWCACHE_ALIGN, NULL);
3037         if (!ip6_dst_ops_template.kmem_cachep)
3038                 goto out;
3039
3040         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3041         if (ret)
3042                 goto out_kmem_cache;
3043
3044         ret = register_pernet_subsys(&ip6_route_net_ops);
3045         if (ret)
3046                 goto out_dst_entries;
3047
3048         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3049         if (ret)
3050                 goto out_register_subsys;
3051
3052         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3053
3054         /* Registering of the loopback is done before this portion of code,
3055          * the loopback reference in rt6_info will not be taken, do it
3056          * manually for init_net */
3057         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3058         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3059   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3060         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3061         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3062         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3063         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3064   #endif
3065         ret = fib6_init();
3066         if (ret)
3067                 goto out_register_inetpeer;
3068
3069         ret = xfrm6_init();
3070         if (ret)
3071                 goto out_fib6_init;
3072
3073         ret = fib6_rules_init();
3074         if (ret)
3075                 goto xfrm6_init;
3076
3077         ret = -ENOBUFS;
3078         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3079             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3080             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3081                 goto fib6_rules_init;
3082
3083         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3084         if (ret)
3085                 goto fib6_rules_init;
3086
3087 out:
3088         return ret;
3089
3090 fib6_rules_init:
3091         fib6_rules_cleanup();
3092 xfrm6_init:
3093         xfrm6_fini();
3094 out_fib6_init:
3095         fib6_gc_cleanup();
3096 out_register_inetpeer:
3097         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098 out_register_subsys:
3099         unregister_pernet_subsys(&ip6_route_net_ops);
3100 out_dst_entries:
3101         dst_entries_destroy(&ip6_dst_blackhole_ops);
3102 out_kmem_cache:
3103         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3104         goto out;
3105 }
3106
3107 void ip6_route_cleanup(void)
3108 {
3109         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3110         fib6_rules_cleanup();
3111         xfrm6_fini();
3112         fib6_gc_cleanup();
3113         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3114         unregister_pernet_subsys(&ip6_route_net_ops);
3115         dst_entries_destroy(&ip6_dst_blackhole_ops);
3116         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3117 }