Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
[pandora-kernel.git] / net / ipv6 / icmp.c
1 /*
2  *      Internet Control Message Protocol (ICMPv6)
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on net/ipv4/icmp.c
9  *
10  *      RFC 1885
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17
18 /*
19  *      Changes:
20  *
21  *      Andi Kleen              :       exception handling
22  *      Andi Kleen                      add rate limits. never reply to a icmp.
23  *                                      add more length checks and other fixes.
24  *      yoshfuji                :       ensure to sent parameter problem for
25  *                                      fragments.
26  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
27  *      Randy Dunlap and
28  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
29  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31
32 #define pr_fmt(fmt) "IPv6: " fmt
33
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54
55 #include <net/ip.h>
56 #include <net/sock.h>
57
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
61 #include <net/raw.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
66 #include <net/icmp.h>
67 #include <net/xfrm.h>
68 #include <net/inet_common.h>
69
70 #include <asm/uaccess.h>
71
72 /*
73  *      The ICMP socket(s). This is the most convenient way to flow control
74  *      our ICMP output as well as maintain a clean interface throughout
75  *      all layers. All Socketless IP sends will soon be gone.
76  *
77  *      On SMP we have one ICMP socket per-cpu.
78  */
79 static inline struct sock *icmpv6_sk(struct net *net)
80 {
81         return net->ipv6.icmp_sk[smp_processor_id()];
82 }
83
84 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
85                        u8 type, u8 code, int offset, __be32 info)
86 {
87         struct net *net = dev_net(skb->dev);
88
89         if (type == ICMPV6_PKT_TOOBIG)
90                 ip6_update_pmtu(skb, net, info, 0, 0);
91         else if (type == NDISC_REDIRECT)
92                 ip6_redirect(skb, net, 0, 0);
93 }
94
95 static int icmpv6_rcv(struct sk_buff *skb);
96
97 static const struct inet6_protocol icmpv6_protocol = {
98         .handler        =       icmpv6_rcv,
99         .err_handler    =       icmpv6_err,
100         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
101 };
102
103 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105         struct sock *sk;
106
107         local_bh_disable();
108
109         sk = icmpv6_sk(net);
110         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
111                 /* This can happen if the output path (f.e. SIT or
112                  * ip6ip6 tunnel) signals dst_link_failure() for an
113                  * outgoing ICMP6 packet.
114                  */
115                 local_bh_enable();
116                 return NULL;
117         }
118         return sk;
119 }
120
121 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
122 {
123         spin_unlock_bh(&sk->sk_lock.slock);
124 }
125
126 /*
127  * Figure out, may we reply to this packet with icmp error.
128  *
129  * We do not reply, if:
130  *      - it was icmp error message.
131  *      - it is truncated, so that it is known, that protocol is ICMPV6
132  *        (i.e. in the middle of some exthdr)
133  *
134  *      --ANK (980726)
135  */
136
137 static bool is_ineligible(const struct sk_buff *skb)
138 {
139         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
140         int len = skb->len - ptr;
141         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
142         __be16 frag_off;
143
144         if (len < 0)
145                 return true;
146
147         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
148         if (ptr < 0)
149                 return false;
150         if (nexthdr == IPPROTO_ICMPV6) {
151                 u8 _type, *tp;
152                 tp = skb_header_pointer(skb,
153                         ptr+offsetof(struct icmp6hdr, icmp6_type),
154                         sizeof(_type), &_type);
155                 if (tp == NULL ||
156                     !(*tp & ICMPV6_INFOMSG_MASK))
157                         return true;
158         }
159         return false;
160 }
161
162 /*
163  * Check the ICMP output rate limit
164  */
165 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
166                                       struct flowi6 *fl6)
167 {
168         struct dst_entry *dst;
169         struct net *net = sock_net(sk);
170         bool res = false;
171
172         /* Informational messages are not limited. */
173         if (type & ICMPV6_INFOMSG_MASK)
174                 return true;
175
176         /* Do not limit pmtu discovery, it would break it. */
177         if (type == ICMPV6_PKT_TOOBIG)
178                 return true;
179
180         /*
181          * Look up the output route.
182          * XXX: perhaps the expire for routing entries cloned by
183          * this lookup should be more aggressive (not longer than timeout).
184          */
185         dst = ip6_route_output(net, sk, fl6);
186         if (dst->error) {
187                 IP6_INC_STATS(net, ip6_dst_idev(dst),
188                               IPSTATS_MIB_OUTNOROUTES);
189         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
190                 res = true;
191         } else {
192                 struct rt6_info *rt = (struct rt6_info *)dst;
193                 int tmo = net->ipv6.sysctl.icmpv6_time;
194                 struct inet_peer *peer;
195
196                 /* Give more bandwidth to wider prefixes. */
197                 if (rt->rt6i_dst.plen < 128)
198                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
199
200                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
201                 res = inet_peer_xrlim_allow(peer, tmo);
202                 if (peer)
203                         inet_putpeer(peer);
204         }
205         dst_release(dst);
206         return res;
207 }
208
209 /*
210  *      an inline helper for the "simple" if statement below
211  *      checks if parameter problem report is caused by an
212  *      unrecognized IPv6 option that has the Option Type
213  *      highest-order two bits set to 10
214  */
215
216 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
217 {
218         u8 _optval, *op;
219
220         offset += skb_network_offset(skb);
221         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
222         if (op == NULL)
223                 return true;
224         return (*op & 0xC0) == 0x80;
225 }
226
227 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
228 {
229         struct sk_buff *skb;
230         struct icmp6hdr *icmp6h;
231         int err = 0;
232
233         if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
234                 goto out;
235
236         icmp6h = icmp6_hdr(skb);
237         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
238         icmp6h->icmp6_cksum = 0;
239
240         if (skb_queue_len(&sk->sk_write_queue) == 1) {
241                 skb->csum = csum_partial(icmp6h,
242                                         sizeof(struct icmp6hdr), skb->csum);
243                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
244                                                       &fl6->daddr,
245                                                       len, fl6->flowi6_proto,
246                                                       skb->csum);
247         } else {
248                 __wsum tmp_csum = 0;
249
250                 skb_queue_walk(&sk->sk_write_queue, skb) {
251                         tmp_csum = csum_add(tmp_csum, skb->csum);
252                 }
253
254                 tmp_csum = csum_partial(icmp6h,
255                                         sizeof(struct icmp6hdr), tmp_csum);
256                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
257                                                       &fl6->daddr,
258                                                       len, fl6->flowi6_proto,
259                                                       tmp_csum);
260         }
261         ip6_push_pending_frames(sk);
262 out:
263         return err;
264 }
265
266 struct icmpv6_msg {
267         struct sk_buff  *skb;
268         int             offset;
269         uint8_t         type;
270 };
271
272 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
273 {
274         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
275         struct sk_buff *org_skb = msg->skb;
276         __wsum csum = 0;
277
278         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
279                                       to, len, csum);
280         skb->csum = csum_block_add(skb->csum, csum, odd);
281         if (!(msg->type & ICMPV6_INFOMSG_MASK))
282                 nf_ct_attach(skb, org_skb);
283         return 0;
284 }
285
286 #if IS_ENABLED(CONFIG_IPV6_MIP6)
287 static void mip6_addr_swap(struct sk_buff *skb)
288 {
289         struct ipv6hdr *iph = ipv6_hdr(skb);
290         struct inet6_skb_parm *opt = IP6CB(skb);
291         struct ipv6_destopt_hao *hao;
292         struct in6_addr tmp;
293         int off;
294
295         if (opt->dsthao) {
296                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
297                 if (likely(off >= 0)) {
298                         hao = (struct ipv6_destopt_hao *)
299                                         (skb_network_header(skb) + off);
300                         tmp = iph->saddr;
301                         iph->saddr = hao->addr;
302                         hao->addr = tmp;
303                 }
304         }
305 }
306 #else
307 static inline void mip6_addr_swap(struct sk_buff *skb) {}
308 #endif
309
310 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
311                                              struct sock *sk, struct flowi6 *fl6)
312 {
313         struct dst_entry *dst, *dst2;
314         struct flowi6 fl2;
315         int err;
316
317         err = ip6_dst_lookup(sk, &dst, fl6);
318         if (err)
319                 return ERR_PTR(err);
320
321         /*
322          * We won't send icmp if the destination is known
323          * anycast.
324          */
325         if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
326                 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
327                 dst_release(dst);
328                 return ERR_PTR(-EINVAL);
329         }
330
331         /* No need to clone since we're just using its address. */
332         dst2 = dst;
333
334         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
335         if (!IS_ERR(dst)) {
336                 if (dst != dst2)
337                         return dst;
338         } else {
339                 if (PTR_ERR(dst) == -EPERM)
340                         dst = NULL;
341                 else
342                         return dst;
343         }
344
345         err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
346         if (err)
347                 goto relookup_failed;
348
349         err = ip6_dst_lookup(sk, &dst2, &fl2);
350         if (err)
351                 goto relookup_failed;
352
353         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
354         if (!IS_ERR(dst2)) {
355                 dst_release(dst);
356                 dst = dst2;
357         } else {
358                 err = PTR_ERR(dst2);
359                 if (err == -EPERM) {
360                         dst_release(dst);
361                         return dst2;
362                 } else
363                         goto relookup_failed;
364         }
365
366 relookup_failed:
367         if (dst)
368                 return dst;
369         return ERR_PTR(err);
370 }
371
372 /*
373  *      Send an ICMP message in response to a packet in error
374  */
375 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
376 {
377         struct net *net = dev_net(skb->dev);
378         struct inet6_dev *idev = NULL;
379         struct ipv6hdr *hdr = ipv6_hdr(skb);
380         struct sock *sk;
381         struct ipv6_pinfo *np;
382         const struct in6_addr *saddr = NULL;
383         struct dst_entry *dst;
384         struct icmp6hdr tmp_hdr;
385         struct flowi6 fl6;
386         struct icmpv6_msg msg;
387         int iif = 0;
388         int addr_type = 0;
389         int len;
390         int hlimit;
391         int err = 0;
392
393         if ((u8 *)hdr < skb->head ||
394             (skb->network_header + sizeof(*hdr)) > skb->tail)
395                 return;
396
397         /*
398          *      Make sure we respect the rules
399          *      i.e. RFC 1885 2.4(e)
400          *      Rule (e.1) is enforced by not using icmp6_send
401          *      in any code that processes icmp errors.
402          */
403         addr_type = ipv6_addr_type(&hdr->daddr);
404
405         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
406                 saddr = &hdr->daddr;
407
408         /*
409          *      Dest addr check
410          */
411
412         if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
413                 if (type != ICMPV6_PKT_TOOBIG &&
414                     !(type == ICMPV6_PARAMPROB &&
415                       code == ICMPV6_UNK_OPTION &&
416                       (opt_unrec(skb, info))))
417                         return;
418
419                 saddr = NULL;
420         }
421
422         addr_type = ipv6_addr_type(&hdr->saddr);
423
424         /*
425          *      Source addr check
426          */
427
428         if (__ipv6_addr_needs_scope_id(addr_type))
429                 iif = skb->dev->ifindex;
430
431         /*
432          *      Must not send error if the source does not uniquely
433          *      identify a single node (RFC2463 Section 2.4).
434          *      We check unspecified / multicast addresses here,
435          *      and anycast addresses will be checked later.
436          */
437         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
438                 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
439                 return;
440         }
441
442         /*
443          *      Never answer to a ICMP packet.
444          */
445         if (is_ineligible(skb)) {
446                 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
447                 return;
448         }
449
450         mip6_addr_swap(skb);
451
452         memset(&fl6, 0, sizeof(fl6));
453         fl6.flowi6_proto = IPPROTO_ICMPV6;
454         fl6.daddr = hdr->saddr;
455         if (saddr)
456                 fl6.saddr = *saddr;
457         fl6.flowi6_oif = iif;
458         fl6.fl6_icmp_type = type;
459         fl6.fl6_icmp_code = code;
460         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
461
462         sk = icmpv6_xmit_lock(net);
463         if (sk == NULL)
464                 return;
465         np = inet6_sk(sk);
466
467         if (!icmpv6_xrlim_allow(sk, type, &fl6))
468                 goto out;
469
470         tmp_hdr.icmp6_type = type;
471         tmp_hdr.icmp6_code = code;
472         tmp_hdr.icmp6_cksum = 0;
473         tmp_hdr.icmp6_pointer = htonl(info);
474
475         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
476                 fl6.flowi6_oif = np->mcast_oif;
477         else if (!fl6.flowi6_oif)
478                 fl6.flowi6_oif = np->ucast_oif;
479
480         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
481         if (IS_ERR(dst))
482                 goto out;
483
484         if (ipv6_addr_is_multicast(&fl6.daddr))
485                 hlimit = np->mcast_hops;
486         else
487                 hlimit = np->hop_limit;
488         if (hlimit < 0)
489                 hlimit = ip6_dst_hoplimit(dst);
490
491         msg.skb = skb;
492         msg.offset = skb_network_offset(skb);
493         msg.type = type;
494
495         len = skb->len - msg.offset;
496         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
497         if (len < 0) {
498                 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
499                 goto out_dst_release;
500         }
501
502         rcu_read_lock();
503         idev = __in6_dev_get(skb->dev);
504
505         err = ip6_append_data(sk, icmpv6_getfrag, &msg,
506                               len + sizeof(struct icmp6hdr),
507                               sizeof(struct icmp6hdr), hlimit,
508                               np->tclass, NULL, &fl6, (struct rt6_info *)dst,
509                               MSG_DONTWAIT, np->dontfrag);
510         if (err) {
511                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
512                 ip6_flush_pending_frames(sk);
513         } else {
514                 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
515                                                  len + sizeof(struct icmp6hdr));
516         }
517         rcu_read_unlock();
518 out_dst_release:
519         dst_release(dst);
520 out:
521         icmpv6_xmit_unlock(sk);
522 }
523
524 /* Slightly more convenient version of icmp6_send.
525  */
526 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
527 {
528         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
529         kfree_skb(skb);
530 }
531
532 static void icmpv6_echo_reply(struct sk_buff *skb)
533 {
534         struct net *net = dev_net(skb->dev);
535         struct sock *sk;
536         struct inet6_dev *idev;
537         struct ipv6_pinfo *np;
538         const struct in6_addr *saddr = NULL;
539         struct icmp6hdr *icmph = icmp6_hdr(skb);
540         struct icmp6hdr tmp_hdr;
541         struct flowi6 fl6;
542         struct icmpv6_msg msg;
543         struct dst_entry *dst;
544         int err = 0;
545         int hlimit;
546
547         saddr = &ipv6_hdr(skb)->daddr;
548
549         if (!ipv6_unicast_destination(skb))
550                 saddr = NULL;
551
552         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
553         tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
554
555         memset(&fl6, 0, sizeof(fl6));
556         fl6.flowi6_proto = IPPROTO_ICMPV6;
557         fl6.daddr = ipv6_hdr(skb)->saddr;
558         if (saddr)
559                 fl6.saddr = *saddr;
560         fl6.flowi6_oif = skb->dev->ifindex;
561         fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
562         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
563
564         sk = icmpv6_xmit_lock(net);
565         if (sk == NULL)
566                 return;
567         np = inet6_sk(sk);
568
569         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
570                 fl6.flowi6_oif = np->mcast_oif;
571         else if (!fl6.flowi6_oif)
572                 fl6.flowi6_oif = np->ucast_oif;
573
574         err = ip6_dst_lookup(sk, &dst, &fl6);
575         if (err)
576                 goto out;
577         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
578         if (IS_ERR(dst))
579                 goto out;
580
581         if (ipv6_addr_is_multicast(&fl6.daddr))
582                 hlimit = np->mcast_hops;
583         else
584                 hlimit = np->hop_limit;
585         if (hlimit < 0)
586                 hlimit = ip6_dst_hoplimit(dst);
587
588         idev = __in6_dev_get(skb->dev);
589
590         msg.skb = skb;
591         msg.offset = 0;
592         msg.type = ICMPV6_ECHO_REPLY;
593
594         err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
595                                 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
596                                 (struct rt6_info *)dst, MSG_DONTWAIT,
597                                 np->dontfrag);
598
599         if (err) {
600                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
601                 ip6_flush_pending_frames(sk);
602         } else {
603                 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
604                                                  skb->len + sizeof(struct icmp6hdr));
605         }
606         dst_release(dst);
607 out:
608         icmpv6_xmit_unlock(sk);
609 }
610
611 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
612 {
613         const struct inet6_protocol *ipprot;
614         int inner_offset;
615         __be16 frag_off;
616         u8 nexthdr;
617
618         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
619                 return;
620
621         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
622         if (ipv6_ext_hdr(nexthdr)) {
623                 /* now skip over extension headers */
624                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
625                                                 &nexthdr, &frag_off);
626                 if (inner_offset<0)
627                         return;
628         } else {
629                 inner_offset = sizeof(struct ipv6hdr);
630         }
631
632         /* Checkin header including 8 bytes of inner protocol header. */
633         if (!pskb_may_pull(skb, inner_offset+8))
634                 return;
635
636         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
637            Without this we will not able f.e. to make source routed
638            pmtu discovery.
639            Corresponding argument (opt) to notifiers is already added.
640            --ANK (980726)
641          */
642
643         rcu_read_lock();
644         ipprot = rcu_dereference(inet6_protos[nexthdr]);
645         if (ipprot && ipprot->err_handler)
646                 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
647         rcu_read_unlock();
648
649         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
650 }
651
652 /*
653  *      Handle icmp messages
654  */
655
656 static int icmpv6_rcv(struct sk_buff *skb)
657 {
658         struct net_device *dev = skb->dev;
659         struct inet6_dev *idev = __in6_dev_get(dev);
660         const struct in6_addr *saddr, *daddr;
661         struct icmp6hdr *hdr;
662         u8 type;
663
664         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
665                 struct sec_path *sp = skb_sec_path(skb);
666                 int nh;
667
668                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
669                                  XFRM_STATE_ICMP))
670                         goto drop_no_count;
671
672                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
673                         goto drop_no_count;
674
675                 nh = skb_network_offset(skb);
676                 skb_set_network_header(skb, sizeof(*hdr));
677
678                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
679                         goto drop_no_count;
680
681                 skb_set_network_header(skb, nh);
682         }
683
684         ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
685
686         saddr = &ipv6_hdr(skb)->saddr;
687         daddr = &ipv6_hdr(skb)->daddr;
688
689         /* Perform checksum. */
690         switch (skb->ip_summed) {
691         case CHECKSUM_COMPLETE:
692                 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
693                                      skb->csum))
694                         break;
695                 /* fall through */
696         case CHECKSUM_NONE:
697                 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
698                                              IPPROTO_ICMPV6, 0));
699                 if (__skb_checksum_complete(skb)) {
700                         LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
701                                        saddr, daddr);
702                         goto csum_error;
703                 }
704         }
705
706         if (!pskb_pull(skb, sizeof(*hdr)))
707                 goto discard_it;
708
709         hdr = icmp6_hdr(skb);
710
711         type = hdr->icmp6_type;
712
713         ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
714
715         switch (type) {
716         case ICMPV6_ECHO_REQUEST:
717                 icmpv6_echo_reply(skb);
718                 break;
719
720         case ICMPV6_ECHO_REPLY:
721                 /* we couldn't care less */
722                 break;
723
724         case ICMPV6_PKT_TOOBIG:
725                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
726                    standard destination cache. Seems, only "advanced"
727                    destination cache will allow to solve this problem
728                    --ANK (980726)
729                  */
730                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
731                         goto discard_it;
732                 hdr = icmp6_hdr(skb);
733
734                 /*
735                  *      Drop through to notify
736                  */
737
738         case ICMPV6_DEST_UNREACH:
739         case ICMPV6_TIME_EXCEED:
740         case ICMPV6_PARAMPROB:
741                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
742                 break;
743
744         case NDISC_ROUTER_SOLICITATION:
745         case NDISC_ROUTER_ADVERTISEMENT:
746         case NDISC_NEIGHBOUR_SOLICITATION:
747         case NDISC_NEIGHBOUR_ADVERTISEMENT:
748         case NDISC_REDIRECT:
749                 ndisc_rcv(skb);
750                 break;
751
752         case ICMPV6_MGM_QUERY:
753                 igmp6_event_query(skb);
754                 break;
755
756         case ICMPV6_MGM_REPORT:
757                 igmp6_event_report(skb);
758                 break;
759
760         case ICMPV6_MGM_REDUCTION:
761         case ICMPV6_NI_QUERY:
762         case ICMPV6_NI_REPLY:
763         case ICMPV6_MLD2_REPORT:
764         case ICMPV6_DHAAD_REQUEST:
765         case ICMPV6_DHAAD_REPLY:
766         case ICMPV6_MOBILE_PREFIX_SOL:
767         case ICMPV6_MOBILE_PREFIX_ADV:
768                 break;
769
770         default:
771                 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
772
773                 /* informational */
774                 if (type & ICMPV6_INFOMSG_MASK)
775                         break;
776
777                 /*
778                  * error of unknown type.
779                  * must pass to upper level
780                  */
781
782                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
783         }
784
785         kfree_skb(skb);
786         return 0;
787
788 csum_error:
789         ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
790 discard_it:
791         ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
792 drop_no_count:
793         kfree_skb(skb);
794         return 0;
795 }
796
797 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
798                       u8 type,
799                       const struct in6_addr *saddr,
800                       const struct in6_addr *daddr,
801                       int oif)
802 {
803         memset(fl6, 0, sizeof(*fl6));
804         fl6->saddr = *saddr;
805         fl6->daddr = *daddr;
806         fl6->flowi6_proto       = IPPROTO_ICMPV6;
807         fl6->fl6_icmp_type      = type;
808         fl6->fl6_icmp_code      = 0;
809         fl6->flowi6_oif         = oif;
810         security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
811 }
812
813 /*
814  * Special lock-class for __icmpv6_sk:
815  */
816 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
817
818 static int __net_init icmpv6_sk_init(struct net *net)
819 {
820         struct sock *sk;
821         int err, i, j;
822
823         net->ipv6.icmp_sk =
824                 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
825         if (net->ipv6.icmp_sk == NULL)
826                 return -ENOMEM;
827
828         for_each_possible_cpu(i) {
829                 err = inet_ctl_sock_create(&sk, PF_INET6,
830                                            SOCK_RAW, IPPROTO_ICMPV6, net);
831                 if (err < 0) {
832                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
833                                err);
834                         goto fail;
835                 }
836
837                 net->ipv6.icmp_sk[i] = sk;
838
839                 /*
840                  * Split off their lock-class, because sk->sk_dst_lock
841                  * gets used from softirqs, which is safe for
842                  * __icmpv6_sk (because those never get directly used
843                  * via userspace syscalls), but unsafe for normal sockets.
844                  */
845                 lockdep_set_class(&sk->sk_dst_lock,
846                                   &icmpv6_socket_sk_dst_lock_key);
847
848                 /* Enough space for 2 64K ICMP packets, including
849                  * sk_buff struct overhead.
850                  */
851                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
852         }
853         return 0;
854
855  fail:
856         for (j = 0; j < i; j++)
857                 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
858         kfree(net->ipv6.icmp_sk);
859         return err;
860 }
861
862 static void __net_exit icmpv6_sk_exit(struct net *net)
863 {
864         int i;
865
866         for_each_possible_cpu(i) {
867                 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
868         }
869         kfree(net->ipv6.icmp_sk);
870 }
871
872 static struct pernet_operations icmpv6_sk_ops = {
873        .init = icmpv6_sk_init,
874        .exit = icmpv6_sk_exit,
875 };
876
877 int __init icmpv6_init(void)
878 {
879         int err;
880
881         err = register_pernet_subsys(&icmpv6_sk_ops);
882         if (err < 0)
883                 return err;
884
885         err = -EAGAIN;
886         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
887                 goto fail;
888
889         err = inet6_register_icmp_sender(icmp6_send);
890         if (err)
891                 goto sender_reg_err;
892         return 0;
893
894 sender_reg_err:
895         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
896 fail:
897         pr_err("Failed to register ICMP6 protocol\n");
898         unregister_pernet_subsys(&icmpv6_sk_ops);
899         return err;
900 }
901
902 void icmpv6_cleanup(void)
903 {
904         inet6_unregister_icmp_sender(icmp6_send);
905         unregister_pernet_subsys(&icmpv6_sk_ops);
906         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
907 }
908
909
910 static const struct icmp6_err {
911         int err;
912         int fatal;
913 } tab_unreach[] = {
914         {       /* NOROUTE */
915                 .err    = ENETUNREACH,
916                 .fatal  = 0,
917         },
918         {       /* ADM_PROHIBITED */
919                 .err    = EACCES,
920                 .fatal  = 1,
921         },
922         {       /* Was NOT_NEIGHBOUR, now reserved */
923                 .err    = EHOSTUNREACH,
924                 .fatal  = 0,
925         },
926         {       /* ADDR_UNREACH */
927                 .err    = EHOSTUNREACH,
928                 .fatal  = 0,
929         },
930         {       /* PORT_UNREACH */
931                 .err    = ECONNREFUSED,
932                 .fatal  = 1,
933         },
934 };
935
936 int icmpv6_err_convert(u8 type, u8 code, int *err)
937 {
938         int fatal = 0;
939
940         *err = EPROTO;
941
942         switch (type) {
943         case ICMPV6_DEST_UNREACH:
944                 fatal = 1;
945                 if (code <= ICMPV6_PORT_UNREACH) {
946                         *err  = tab_unreach[code].err;
947                         fatal = tab_unreach[code].fatal;
948                 }
949                 break;
950
951         case ICMPV6_PKT_TOOBIG:
952                 *err = EMSGSIZE;
953                 break;
954
955         case ICMPV6_PARAMPROB:
956                 *err = EPROTO;
957                 fatal = 1;
958                 break;
959
960         case ICMPV6_TIME_EXCEED:
961                 *err = EHOSTUNREACH;
962                 break;
963         }
964
965         return fatal;
966 }
967 EXPORT_SYMBOL(icmpv6_err_convert);
968
969 #ifdef CONFIG_SYSCTL
970 ctl_table ipv6_icmp_table_template[] = {
971         {
972                 .procname       = "ratelimit",
973                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
974                 .maxlen         = sizeof(int),
975                 .mode           = 0644,
976                 .proc_handler   = proc_dointvec_ms_jiffies,
977         },
978         { },
979 };
980
981 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
982 {
983         struct ctl_table *table;
984
985         table = kmemdup(ipv6_icmp_table_template,
986                         sizeof(ipv6_icmp_table_template),
987                         GFP_KERNEL);
988
989         if (table)
990                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
991
992         return table;
993 }
994 #endif
995