Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
[pandora-kernel.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62         static u32 ipv6_fragmentation_id = 1;
63         static DEFINE_SPINLOCK(ip6_id_lock);
64
65         spin_lock_bh(&ip6_id_lock);
66         fhdr->identification = htonl(ipv6_fragmentation_id);
67         if (++ipv6_fragmentation_id == 0)
68                 ipv6_fragmentation_id = 1;
69         spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74         int len;
75
76         len = skb->len - sizeof(struct ipv6hdr);
77         if (len > IPV6_MAXPLEN)
78                 len = 0;
79         ipv6_hdr(skb)->payload_len = htons(len);
80
81         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82                        dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87         int err;
88
89         err = __ip6_local_out(skb);
90         if (likely(err == 1))
91                 err = dst_output(skb);
92
93         return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99         struct dst_entry *dst = skb->dst;
100
101         if (dst->hh)
102                 return neigh_hh_output(dst->hh, skb);
103         else if (dst->neighbour)
104                 return dst->neighbour->output(skb);
105
106         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107         kfree_skb(skb);
108         return -EINVAL;
109
110 }
111
112 /* dev_loopback_xmit for use with netfilter. */
113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114 {
115         skb_reset_mac_header(newskb);
116         __skb_pull(newskb, skb_network_offset(newskb));
117         newskb->pkt_type = PACKET_LOOPBACK;
118         newskb->ip_summed = CHECKSUM_UNNECESSARY;
119         WARN_ON(!newskb->dst);
120
121         netif_rx(newskb);
122         return 0;
123 }
124
125
126 static int ip6_output2(struct sk_buff *skb)
127 {
128         struct dst_entry *dst = skb->dst;
129         struct net_device *dev = dst->dev;
130
131         skb->protocol = htons(ETH_P_IPV6);
132         skb->dev = dev;
133
134         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
137
138                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139                     ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141                                          &ipv6_hdr(skb)->saddr))) {
142                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144                         /* Do not check for IFF_ALLMULTI; multicast routing
145                            is not supported in any case.
146                          */
147                         if (newskb)
148                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149                                         NULL, newskb->dev,
150                                         ip6_dev_loopback_xmit);
151
152                         if (ipv6_hdr(skb)->hop_limit == 0) {
153                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154                                 kfree_skb(skb);
155                                 return 0;
156                         }
157                 }
158
159                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160         }
161
162         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163                        ip6_output_finish);
164 }
165
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171                skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173
174 int ip6_output(struct sk_buff *skb)
175 {
176         struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177         if (unlikely(idev->cnf.disable_ipv6)) {
178                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179                 kfree_skb(skb);
180                 return 0;
181         }
182
183         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184                                 dst_allfrag(skb->dst))
185                 return ip6_fragment(skb, ip6_output2);
186         else
187                 return ip6_output2(skb);
188 }
189
190 /*
191  *      xmit an sk_buff (used by TCP)
192  */
193
194 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195              struct ipv6_txoptions *opt, int ipfragok)
196 {
197         struct ipv6_pinfo *np = inet6_sk(sk);
198         struct in6_addr *first_hop = &fl->fl6_dst;
199         struct dst_entry *dst = skb->dst;
200         struct ipv6hdr *hdr;
201         u8  proto = fl->proto;
202         int seg_len = skb->len;
203         int hlimit, tclass;
204         u32 mtu;
205
206         if (opt) {
207                 unsigned int head_room;
208
209                 /* First: exthdrs may take lots of space (~8K for now)
210                    MAX_HEADER is not enough.
211                  */
212                 head_room = opt->opt_nflen + opt->opt_flen;
213                 seg_len += head_room;
214                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
215
216                 if (skb_headroom(skb) < head_room) {
217                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218                         if (skb2 == NULL) {
219                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220                                               IPSTATS_MIB_OUTDISCARDS);
221                                 kfree_skb(skb);
222                                 return -ENOBUFS;
223                         }
224                         kfree_skb(skb);
225                         skb = skb2;
226                         if (sk)
227                                 skb_set_owner_w(skb, sk);
228                 }
229                 if (opt->opt_flen)
230                         ipv6_push_frag_opts(skb, opt, &proto);
231                 if (opt->opt_nflen)
232                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
233         }
234
235         skb_push(skb, sizeof(struct ipv6hdr));
236         skb_reset_network_header(skb);
237         hdr = ipv6_hdr(skb);
238
239         /* Allow local fragmentation. */
240         if (ipfragok)
241                 skb->local_df = 1;
242
243         /*
244          *      Fill in the IPv6 header
245          */
246
247         hlimit = -1;
248         if (np)
249                 hlimit = np->hop_limit;
250         if (hlimit < 0)
251                 hlimit = ip6_dst_hoplimit(dst);
252
253         tclass = -1;
254         if (np)
255                 tclass = np->tclass;
256         if (tclass < 0)
257                 tclass = 0;
258
259         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
260
261         hdr->payload_len = htons(seg_len);
262         hdr->nexthdr = proto;
263         hdr->hop_limit = hlimit;
264
265         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
266         ipv6_addr_copy(&hdr->daddr, first_hop);
267
268         skb->priority = sk->sk_priority;
269         skb->mark = sk->sk_mark;
270
271         mtu = dst_mtu(dst);
272         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
273                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
274                               IPSTATS_MIB_OUTREQUESTS);
275                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
276                                 dst_output);
277         }
278
279         if (net_ratelimit())
280                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
281         skb->dev = dst->dev;
282         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
283         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
284         kfree_skb(skb);
285         return -EMSGSIZE;
286 }
287
288 EXPORT_SYMBOL(ip6_xmit);
289
290 /*
291  *      To avoid extra problems ND packets are send through this
292  *      routine. It's code duplication but I really want to avoid
293  *      extra checks since ipv6_build_header is used by TCP (which
294  *      is for us performance critical)
295  */
296
297 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
298                const struct in6_addr *saddr, const struct in6_addr *daddr,
299                int proto, int len)
300 {
301         struct ipv6_pinfo *np = inet6_sk(sk);
302         struct ipv6hdr *hdr;
303         int totlen;
304
305         skb->protocol = htons(ETH_P_IPV6);
306         skb->dev = dev;
307
308         totlen = len + sizeof(struct ipv6hdr);
309
310         skb_reset_network_header(skb);
311         skb_put(skb, sizeof(struct ipv6hdr));
312         hdr = ipv6_hdr(skb);
313
314         *(__be32*)hdr = htonl(0x60000000);
315
316         hdr->payload_len = htons(len);
317         hdr->nexthdr = proto;
318         hdr->hop_limit = np->hop_limit;
319
320         ipv6_addr_copy(&hdr->saddr, saddr);
321         ipv6_addr_copy(&hdr->daddr, daddr);
322
323         return 0;
324 }
325
326 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
327 {
328         struct ip6_ra_chain *ra;
329         struct sock *last = NULL;
330
331         read_lock(&ip6_ra_lock);
332         for (ra = ip6_ra_chain; ra; ra = ra->next) {
333                 struct sock *sk = ra->sk;
334                 if (sk && ra->sel == sel &&
335                     (!sk->sk_bound_dev_if ||
336                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
337                         if (last) {
338                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
339                                 if (skb2)
340                                         rawv6_rcv(last, skb2);
341                         }
342                         last = sk;
343                 }
344         }
345
346         if (last) {
347                 rawv6_rcv(last, skb);
348                 read_unlock(&ip6_ra_lock);
349                 return 1;
350         }
351         read_unlock(&ip6_ra_lock);
352         return 0;
353 }
354
355 static int ip6_forward_proxy_check(struct sk_buff *skb)
356 {
357         struct ipv6hdr *hdr = ipv6_hdr(skb);
358         u8 nexthdr = hdr->nexthdr;
359         int offset;
360
361         if (ipv6_ext_hdr(nexthdr)) {
362                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
363                 if (offset < 0)
364                         return 0;
365         } else
366                 offset = sizeof(struct ipv6hdr);
367
368         if (nexthdr == IPPROTO_ICMPV6) {
369                 struct icmp6hdr *icmp6;
370
371                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
372                                          offset + 1 - skb->data)))
373                         return 0;
374
375                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
376
377                 switch (icmp6->icmp6_type) {
378                 case NDISC_ROUTER_SOLICITATION:
379                 case NDISC_ROUTER_ADVERTISEMENT:
380                 case NDISC_NEIGHBOUR_SOLICITATION:
381                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
382                 case NDISC_REDIRECT:
383                         /* For reaction involving unicast neighbor discovery
384                          * message destined to the proxied address, pass it to
385                          * input function.
386                          */
387                         return 1;
388                 default:
389                         break;
390                 }
391         }
392
393         /*
394          * The proxying router can't forward traffic sent to a link-local
395          * address, so signal the sender and discard the packet. This
396          * behavior is clarified by the MIPv6 specification.
397          */
398         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
399                 dst_link_failure(skb);
400                 return -1;
401         }
402
403         return 0;
404 }
405
406 static inline int ip6_forward_finish(struct sk_buff *skb)
407 {
408         return dst_output(skb);
409 }
410
411 int ip6_forward(struct sk_buff *skb)
412 {
413         struct dst_entry *dst = skb->dst;
414         struct ipv6hdr *hdr = ipv6_hdr(skb);
415         struct inet6_skb_parm *opt = IP6CB(skb);
416         struct net *net = dev_net(dst->dev);
417
418         if (net->ipv6.devconf_all->forwarding == 0)
419                 goto error;
420
421         if (skb_warn_if_lro(skb))
422                 goto drop;
423
424         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
425                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
426                 goto drop;
427         }
428
429         skb_forward_csum(skb);
430
431         /*
432          *      We DO NOT make any processing on
433          *      RA packets, pushing them to user level AS IS
434          *      without ane WARRANTY that application will be able
435          *      to interpret them. The reason is that we
436          *      cannot make anything clever here.
437          *
438          *      We are not end-node, so that if packet contains
439          *      AH/ESP, we cannot make anything.
440          *      Defragmentation also would be mistake, RA packets
441          *      cannot be fragmented, because there is no warranty
442          *      that different fragments will go along one path. --ANK
443          */
444         if (opt->ra) {
445                 u8 *ptr = skb_network_header(skb) + opt->ra;
446                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
447                         return 0;
448         }
449
450         /*
451          *      check and decrement ttl
452          */
453         if (hdr->hop_limit <= 1) {
454                 /* Force OUTPUT device used as source address */
455                 skb->dev = dst->dev;
456                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
457                             0, skb->dev);
458                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
459
460                 kfree_skb(skb);
461                 return -ETIMEDOUT;
462         }
463
464         /* XXX: idev->cnf.proxy_ndp? */
465         if (net->ipv6.devconf_all->proxy_ndp &&
466             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
467                 int proxied = ip6_forward_proxy_check(skb);
468                 if (proxied > 0)
469                         return ip6_input(skb);
470                 else if (proxied < 0) {
471                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
472                         goto drop;
473                 }
474         }
475
476         if (!xfrm6_route_forward(skb)) {
477                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
478                 goto drop;
479         }
480         dst = skb->dst;
481
482         /* IPv6 specs say nothing about it, but it is clear that we cannot
483            send redirects to source routed frames.
484            We don't send redirects to frames decapsulated from IPsec.
485          */
486         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
487             !skb->sp) {
488                 struct in6_addr *target = NULL;
489                 struct rt6_info *rt;
490                 struct neighbour *n = dst->neighbour;
491
492                 /*
493                  *      incoming and outgoing devices are the same
494                  *      send a redirect.
495                  */
496
497                 rt = (struct rt6_info *) dst;
498                 if ((rt->rt6i_flags & RTF_GATEWAY))
499                         target = (struct in6_addr*)&n->primary_key;
500                 else
501                         target = &hdr->daddr;
502
503                 /* Limit redirects both by destination (here)
504                    and by source (inside ndisc_send_redirect)
505                  */
506                 if (xrlim_allow(dst, 1*HZ))
507                         ndisc_send_redirect(skb, n, target);
508         } else {
509                 int addrtype = ipv6_addr_type(&hdr->saddr);
510
511                 /* This check is security critical. */
512                 if (addrtype == IPV6_ADDR_ANY ||
513                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
514                         goto error;
515                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
516                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
517                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
518                         goto error;
519                 }
520         }
521
522         if (skb->len > dst_mtu(dst)) {
523                 /* Again, force OUTPUT device used as source address */
524                 skb->dev = dst->dev;
525                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
526                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
527                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
528                 kfree_skb(skb);
529                 return -EMSGSIZE;
530         }
531
532         if (skb_cow(skb, dst->dev->hard_header_len)) {
533                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
534                 goto drop;
535         }
536
537         hdr = ipv6_hdr(skb);
538
539         /* Mangling hops number delayed to point after skb COW */
540
541         hdr->hop_limit--;
542
543         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
544         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
545                        ip6_forward_finish);
546
547 error:
548         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
549 drop:
550         kfree_skb(skb);
551         return -EINVAL;
552 }
553
554 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
555 {
556         to->pkt_type = from->pkt_type;
557         to->priority = from->priority;
558         to->protocol = from->protocol;
559         dst_release(to->dst);
560         to->dst = dst_clone(from->dst);
561         to->dev = from->dev;
562         to->mark = from->mark;
563
564 #ifdef CONFIG_NET_SCHED
565         to->tc_index = from->tc_index;
566 #endif
567         nf_copy(to, from);
568 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
569     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
570         to->nf_trace = from->nf_trace;
571 #endif
572         skb_copy_secmark(to, from);
573 }
574
575 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
576 {
577         u16 offset = sizeof(struct ipv6hdr);
578         struct ipv6_opt_hdr *exthdr =
579                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
580         unsigned int packet_len = skb->tail - skb->network_header;
581         int found_rhdr = 0;
582         *nexthdr = &ipv6_hdr(skb)->nexthdr;
583
584         while (offset + 1 <= packet_len) {
585
586                 switch (**nexthdr) {
587
588                 case NEXTHDR_HOP:
589                         break;
590                 case NEXTHDR_ROUTING:
591                         found_rhdr = 1;
592                         break;
593                 case NEXTHDR_DEST:
594 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
595                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
596                                 break;
597 #endif
598                         if (found_rhdr)
599                                 return offset;
600                         break;
601                 default :
602                         return offset;
603                 }
604
605                 offset += ipv6_optlen(exthdr);
606                 *nexthdr = &exthdr->nexthdr;
607                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
608                                                  offset);
609         }
610
611         return offset;
612 }
613
614 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
615 {
616         struct net_device *dev;
617         struct sk_buff *frag;
618         struct rt6_info *rt = (struct rt6_info*)skb->dst;
619         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
620         struct ipv6hdr *tmp_hdr;
621         struct frag_hdr *fh;
622         unsigned int mtu, hlen, left, len;
623         __be32 frag_id = 0;
624         int ptr, offset = 0, err=0;
625         u8 *prevhdr, nexthdr = 0;
626
627         dev = rt->u.dst.dev;
628         hlen = ip6_find_1stfragopt(skb, &prevhdr);
629         nexthdr = *prevhdr;
630
631         mtu = ip6_skb_dst_mtu(skb);
632
633         /* We must not fragment if the socket is set to force MTU discovery
634          * or if the skb it not generated by a local socket.  (This last
635          * check should be redundant, but it's free.)
636          */
637         if (!skb->local_df) {
638                 skb->dev = skb->dst->dev;
639                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
640                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
641                 kfree_skb(skb);
642                 return -EMSGSIZE;
643         }
644
645         if (np && np->frag_size < mtu) {
646                 if (np->frag_size)
647                         mtu = np->frag_size;
648         }
649         mtu -= hlen + sizeof(struct frag_hdr);
650
651         if (skb_shinfo(skb)->frag_list) {
652                 int first_len = skb_pagelen(skb);
653                 int truesizes = 0;
654
655                 if (first_len - hlen > mtu ||
656                     ((first_len - hlen) & 7) ||
657                     skb_cloned(skb))
658                         goto slow_path;
659
660                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
661                         /* Correct geometry. */
662                         if (frag->len > mtu ||
663                             ((frag->len & 7) && frag->next) ||
664                             skb_headroom(frag) < hlen)
665                             goto slow_path;
666
667                         /* Partially cloned skb? */
668                         if (skb_shared(frag))
669                                 goto slow_path;
670
671                         BUG_ON(frag->sk);
672                         if (skb->sk) {
673                                 sock_hold(skb->sk);
674                                 frag->sk = skb->sk;
675                                 frag->destructor = sock_wfree;
676                                 truesizes += frag->truesize;
677                         }
678                 }
679
680                 err = 0;
681                 offset = 0;
682                 frag = skb_shinfo(skb)->frag_list;
683                 skb_shinfo(skb)->frag_list = NULL;
684                 /* BUILD HEADER */
685
686                 *prevhdr = NEXTHDR_FRAGMENT;
687                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
688                 if (!tmp_hdr) {
689                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
690                         return -ENOMEM;
691                 }
692
693                 __skb_pull(skb, hlen);
694                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
695                 __skb_push(skb, hlen);
696                 skb_reset_network_header(skb);
697                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
698
699                 ipv6_select_ident(skb, fh);
700                 fh->nexthdr = nexthdr;
701                 fh->reserved = 0;
702                 fh->frag_off = htons(IP6_MF);
703                 frag_id = fh->identification;
704
705                 first_len = skb_pagelen(skb);
706                 skb->data_len = first_len - skb_headlen(skb);
707                 skb->truesize -= truesizes;
708                 skb->len = first_len;
709                 ipv6_hdr(skb)->payload_len = htons(first_len -
710                                                    sizeof(struct ipv6hdr));
711
712                 dst_hold(&rt->u.dst);
713
714                 for (;;) {
715                         /* Prepare header of the next frame,
716                          * before previous one went down. */
717                         if (frag) {
718                                 frag->ip_summed = CHECKSUM_NONE;
719                                 skb_reset_transport_header(frag);
720                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
721                                 __skb_push(frag, hlen);
722                                 skb_reset_network_header(frag);
723                                 memcpy(skb_network_header(frag), tmp_hdr,
724                                        hlen);
725                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
726                                 fh->nexthdr = nexthdr;
727                                 fh->reserved = 0;
728                                 fh->frag_off = htons(offset);
729                                 if (frag->next != NULL)
730                                         fh->frag_off |= htons(IP6_MF);
731                                 fh->identification = frag_id;
732                                 ipv6_hdr(frag)->payload_len =
733                                                 htons(frag->len -
734                                                       sizeof(struct ipv6hdr));
735                                 ip6_copy_metadata(frag, skb);
736                         }
737
738                         err = output(skb);
739                         if(!err)
740                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
741
742                         if (err || !frag)
743                                 break;
744
745                         skb = frag;
746                         frag = skb->next;
747                         skb->next = NULL;
748                 }
749
750                 kfree(tmp_hdr);
751
752                 if (err == 0) {
753                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
754                         dst_release(&rt->u.dst);
755                         return 0;
756                 }
757
758                 while (frag) {
759                         skb = frag->next;
760                         kfree_skb(frag);
761                         frag = skb;
762                 }
763
764                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
765                 dst_release(&rt->u.dst);
766                 return err;
767         }
768
769 slow_path:
770         left = skb->len - hlen;         /* Space per frame */
771         ptr = hlen;                     /* Where to start from */
772
773         /*
774          *      Fragment the datagram.
775          */
776
777         *prevhdr = NEXTHDR_FRAGMENT;
778
779         /*
780          *      Keep copying data until we run out.
781          */
782         while(left > 0) {
783                 len = left;
784                 /* IF: it doesn't fit, use 'mtu' - the data space left */
785                 if (len > mtu)
786                         len = mtu;
787                 /* IF: we are not sending upto and including the packet end
788                    then align the next start on an eight byte boundary */
789                 if (len < left) {
790                         len &= ~7;
791                 }
792                 /*
793                  *      Allocate buffer.
794                  */
795
796                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
797                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
798                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
799                                       IPSTATS_MIB_FRAGFAILS);
800                         err = -ENOMEM;
801                         goto fail;
802                 }
803
804                 /*
805                  *      Set up data on packet
806                  */
807
808                 ip6_copy_metadata(frag, skb);
809                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
810                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
811                 skb_reset_network_header(frag);
812                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
813                 frag->transport_header = (frag->network_header + hlen +
814                                           sizeof(struct frag_hdr));
815
816                 /*
817                  *      Charge the memory for the fragment to any owner
818                  *      it might possess
819                  */
820                 if (skb->sk)
821                         skb_set_owner_w(frag, skb->sk);
822
823                 /*
824                  *      Copy the packet header into the new buffer.
825                  */
826                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
827
828                 /*
829                  *      Build fragment header.
830                  */
831                 fh->nexthdr = nexthdr;
832                 fh->reserved = 0;
833                 if (!frag_id) {
834                         ipv6_select_ident(skb, fh);
835                         frag_id = fh->identification;
836                 } else
837                         fh->identification = frag_id;
838
839                 /*
840                  *      Copy a block of the IP datagram.
841                  */
842                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
843                         BUG();
844                 left -= len;
845
846                 fh->frag_off = htons(offset);
847                 if (left > 0)
848                         fh->frag_off |= htons(IP6_MF);
849                 ipv6_hdr(frag)->payload_len = htons(frag->len -
850                                                     sizeof(struct ipv6hdr));
851
852                 ptr += len;
853                 offset += len;
854
855                 /*
856                  *      Put this fragment into the sending queue.
857                  */
858                 err = output(frag);
859                 if (err)
860                         goto fail;
861
862                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
863         }
864         IP6_INC_STATS(ip6_dst_idev(skb->dst),
865                       IPSTATS_MIB_FRAGOKS);
866         kfree_skb(skb);
867         return err;
868
869 fail:
870         IP6_INC_STATS(ip6_dst_idev(skb->dst),
871                       IPSTATS_MIB_FRAGFAILS);
872         kfree_skb(skb);
873         return err;
874 }
875
876 static inline int ip6_rt_check(struct rt6key *rt_key,
877                                struct in6_addr *fl_addr,
878                                struct in6_addr *addr_cache)
879 {
880         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
881                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
882 }
883
884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
885                                           struct dst_entry *dst,
886                                           struct flowi *fl)
887 {
888         struct ipv6_pinfo *np = inet6_sk(sk);
889         struct rt6_info *rt = (struct rt6_info *)dst;
890
891         if (!dst)
892                 goto out;
893
894         /* Yes, checking route validity in not connected
895          * case is not very simple. Take into account,
896          * that we do not support routing by source, TOS,
897          * and MSG_DONTROUTE            --ANK (980726)
898          *
899          * 1. ip6_rt_check(): If route was host route,
900          *    check that cached destination is current.
901          *    If it is network route, we still may
902          *    check its validity using saved pointer
903          *    to the last used address: daddr_cache.
904          *    We do not want to save whole address now,
905          *    (because main consumer of this service
906          *    is tcp, which has not this problem),
907          *    so that the last trick works only on connected
908          *    sockets.
909          * 2. oif also should be the same.
910          */
911         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
914 #endif
915             (fl->oif && fl->oif != dst->dev->ifindex)) {
916                 dst_release(dst);
917                 dst = NULL;
918         }
919
920 out:
921         return dst;
922 }
923
924 static int ip6_dst_lookup_tail(struct sock *sk,
925                                struct dst_entry **dst, struct flowi *fl)
926 {
927         int err;
928         struct net *net = sock_net(sk);
929
930         if (*dst == NULL)
931                 *dst = ip6_route_output(net, sk, fl);
932
933         if ((err = (*dst)->error))
934                 goto out_err_release;
935
936         if (ipv6_addr_any(&fl->fl6_src)) {
937                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
938                                          &fl->fl6_dst,
939                                          sk ? inet6_sk(sk)->srcprefs : 0,
940                                          &fl->fl6_src);
941                 if (err)
942                         goto out_err_release;
943         }
944
945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
946                 /*
947                  * Here if the dst entry we've looked up
948                  * has a neighbour entry that is in the INCOMPLETE
949                  * state and the src address from the flow is
950                  * marked as OPTIMISTIC, we release the found
951                  * dst entry and replace it instead with the
952                  * dst entry of the nexthop router
953                  */
954                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
955                         struct inet6_ifaddr *ifp;
956                         struct flowi fl_gw;
957                         int redirect;
958
959                         ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
960                                               (*dst)->dev, 1);
961
962                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963                         if (ifp)
964                                 in6_ifa_put(ifp);
965
966                         if (redirect) {
967                                 /*
968                                  * We need to get the dst entry for the
969                                  * default router instead
970                                  */
971                                 dst_release(*dst);
972                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
973                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
974                                 *dst = ip6_route_output(net, sk, &fl_gw);
975                                 if ((err = (*dst)->error))
976                                         goto out_err_release;
977                         }
978                 }
979 #endif
980
981         return 0;
982
983 out_err_release:
984         if (err == -ENETUNREACH)
985                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
986         dst_release(*dst);
987         *dst = NULL;
988         return err;
989 }
990
991 /**
992  *      ip6_dst_lookup - perform route lookup on flow
993  *      @sk: socket which provides route info
994  *      @dst: pointer to dst_entry * for result
995  *      @fl: flow to lookup
996  *
997  *      This function performs a route lookup on the given flow.
998  *
999  *      It returns zero on success, or a standard errno code on error.
1000  */
1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1002 {
1003         *dst = NULL;
1004         return ip6_dst_lookup_tail(sk, dst, fl);
1005 }
1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1007
1008 /**
1009  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1010  *      @sk: socket which provides the dst cache and route info
1011  *      @dst: pointer to dst_entry * for result
1012  *      @fl: flow to lookup
1013  *
1014  *      This function performs a route lookup on the given flow with the
1015  *      possibility of using the cached route in the socket if it is valid.
1016  *      It will take the socket dst lock when operating on the dst cache.
1017  *      As a result, this function can only be used in process context.
1018  *
1019  *      It returns zero on success, or a standard errno code on error.
1020  */
1021 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1022 {
1023         *dst = NULL;
1024         if (sk) {
1025                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1026                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1027         }
1028
1029         return ip6_dst_lookup_tail(sk, dst, fl);
1030 }
1031 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1032
1033 static inline int ip6_ufo_append_data(struct sock *sk,
1034                         int getfrag(void *from, char *to, int offset, int len,
1035                         int odd, struct sk_buff *skb),
1036                         void *from, int length, int hh_len, int fragheaderlen,
1037                         int transhdrlen, int mtu,unsigned int flags)
1038
1039 {
1040         struct sk_buff *skb;
1041         int err;
1042
1043         /* There is support for UDP large send offload by network
1044          * device, so create one single skb packet containing complete
1045          * udp datagram
1046          */
1047         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1048                 skb = sock_alloc_send_skb(sk,
1049                         hh_len + fragheaderlen + transhdrlen + 20,
1050                         (flags & MSG_DONTWAIT), &err);
1051                 if (skb == NULL)
1052                         return -ENOMEM;
1053
1054                 /* reserve space for Hardware header */
1055                 skb_reserve(skb, hh_len);
1056
1057                 /* create space for UDP/IP header */
1058                 skb_put(skb,fragheaderlen + transhdrlen);
1059
1060                 /* initialize network header pointer */
1061                 skb_reset_network_header(skb);
1062
1063                 /* initialize protocol header pointer */
1064                 skb->transport_header = skb->network_header + fragheaderlen;
1065
1066                 skb->ip_summed = CHECKSUM_PARTIAL;
1067                 skb->csum = 0;
1068                 sk->sk_sndmsg_off = 0;
1069         }
1070
1071         err = skb_append_datato_frags(sk,skb, getfrag, from,
1072                                       (length - transhdrlen));
1073         if (!err) {
1074                 struct frag_hdr fhdr;
1075
1076                 /* specify the length of each IP datagram fragment*/
1077                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1078                                             sizeof(struct frag_hdr);
1079                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1080                 ipv6_select_ident(skb, &fhdr);
1081                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1082                 __skb_queue_tail(&sk->sk_write_queue, skb);
1083
1084                 return 0;
1085         }
1086         /* There is not enough support do UPD LSO,
1087          * so follow normal path
1088          */
1089         kfree_skb(skb);
1090
1091         return err;
1092 }
1093
1094 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1095         int offset, int len, int odd, struct sk_buff *skb),
1096         void *from, int length, int transhdrlen,
1097         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1098         struct rt6_info *rt, unsigned int flags)
1099 {
1100         struct inet_sock *inet = inet_sk(sk);
1101         struct ipv6_pinfo *np = inet6_sk(sk);
1102         struct sk_buff *skb;
1103         unsigned int maxfraglen, fragheaderlen;
1104         int exthdrlen;
1105         int hh_len;
1106         int mtu;
1107         int copy;
1108         int err;
1109         int offset = 0;
1110         int csummode = CHECKSUM_NONE;
1111
1112         if (flags&MSG_PROBE)
1113                 return 0;
1114         if (skb_queue_empty(&sk->sk_write_queue)) {
1115                 /*
1116                  * setup for corking
1117                  */
1118                 if (opt) {
1119                         if (np->cork.opt == NULL) {
1120                                 np->cork.opt = kmalloc(opt->tot_len,
1121                                                        sk->sk_allocation);
1122                                 if (unlikely(np->cork.opt == NULL))
1123                                         return -ENOBUFS;
1124                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1125                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1126                                 return -EINVAL;
1127                         }
1128                         memcpy(np->cork.opt, opt, opt->tot_len);
1129                         inet->cork.flags |= IPCORK_OPT;
1130                         /* need source address above miyazawa*/
1131                 }
1132                 dst_hold(&rt->u.dst);
1133                 inet->cork.dst = &rt->u.dst;
1134                 inet->cork.fl = *fl;
1135                 np->cork.hop_limit = hlimit;
1136                 np->cork.tclass = tclass;
1137                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1138                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1139                 if (np->frag_size < mtu) {
1140                         if (np->frag_size)
1141                                 mtu = np->frag_size;
1142                 }
1143                 inet->cork.fragsize = mtu;
1144                 if (dst_allfrag(rt->u.dst.path))
1145                         inet->cork.flags |= IPCORK_ALLFRAG;
1146                 inet->cork.length = 0;
1147                 sk->sk_sndmsg_page = NULL;
1148                 sk->sk_sndmsg_off = 0;
1149                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1150                             rt->rt6i_nfheader_len;
1151                 length += exthdrlen;
1152                 transhdrlen += exthdrlen;
1153         } else {
1154                 rt = (struct rt6_info *)inet->cork.dst;
1155                 fl = &inet->cork.fl;
1156                 if (inet->cork.flags & IPCORK_OPT)
1157                         opt = np->cork.opt;
1158                 transhdrlen = 0;
1159                 exthdrlen = 0;
1160                 mtu = inet->cork.fragsize;
1161         }
1162
1163         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1164
1165         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1166                         (opt ? opt->opt_nflen : 0);
1167         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1168
1169         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1170                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1171                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1172                         return -EMSGSIZE;
1173                 }
1174         }
1175
1176         /*
1177          * Let's try using as much space as possible.
1178          * Use MTU if total length of the message fits into the MTU.
1179          * Otherwise, we need to reserve fragment header and
1180          * fragment alignment (= 8-15 octects, in total).
1181          *
1182          * Note that we may need to "move" the data from the tail of
1183          * of the buffer to the new fragment when we split
1184          * the message.
1185          *
1186          * FIXME: It may be fragmented into multiple chunks
1187          *        at once if non-fragmentable extension headers
1188          *        are too large.
1189          * --yoshfuji
1190          */
1191
1192         inet->cork.length += length;
1193         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1194             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1195
1196                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1197                                           fragheaderlen, transhdrlen, mtu,
1198                                           flags);
1199                 if (err)
1200                         goto error;
1201                 return 0;
1202         }
1203
1204         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1205                 goto alloc_new_skb;
1206
1207         while (length > 0) {
1208                 /* Check if the remaining data fits into current packet. */
1209                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1210                 if (copy < length)
1211                         copy = maxfraglen - skb->len;
1212
1213                 if (copy <= 0) {
1214                         char *data;
1215                         unsigned int datalen;
1216                         unsigned int fraglen;
1217                         unsigned int fraggap;
1218                         unsigned int alloclen;
1219                         struct sk_buff *skb_prev;
1220 alloc_new_skb:
1221                         skb_prev = skb;
1222
1223                         /* There's no room in the current skb */
1224                         if (skb_prev)
1225                                 fraggap = skb_prev->len - maxfraglen;
1226                         else
1227                                 fraggap = 0;
1228
1229                         /*
1230                          * If remaining data exceeds the mtu,
1231                          * we know we need more fragment(s).
1232                          */
1233                         datalen = length + fraggap;
1234                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1235                                 datalen = maxfraglen - fragheaderlen;
1236
1237                         fraglen = datalen + fragheaderlen;
1238                         if ((flags & MSG_MORE) &&
1239                             !(rt->u.dst.dev->features&NETIF_F_SG))
1240                                 alloclen = mtu;
1241                         else
1242                                 alloclen = datalen + fragheaderlen;
1243
1244                         /*
1245                          * The last fragment gets additional space at tail.
1246                          * Note: we overallocate on fragments with MSG_MODE
1247                          * because we have no idea if we're the last one.
1248                          */
1249                         if (datalen == length + fraggap)
1250                                 alloclen += rt->u.dst.trailer_len;
1251
1252                         /*
1253                          * We just reserve space for fragment header.
1254                          * Note: this may be overallocation if the message
1255                          * (without MSG_MORE) fits into the MTU.
1256                          */
1257                         alloclen += sizeof(struct frag_hdr);
1258
1259                         if (transhdrlen) {
1260                                 skb = sock_alloc_send_skb(sk,
1261                                                 alloclen + hh_len,
1262                                                 (flags & MSG_DONTWAIT), &err);
1263                         } else {
1264                                 skb = NULL;
1265                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1266                                     2 * sk->sk_sndbuf)
1267                                         skb = sock_wmalloc(sk,
1268                                                            alloclen + hh_len, 1,
1269                                                            sk->sk_allocation);
1270                                 if (unlikely(skb == NULL))
1271                                         err = -ENOBUFS;
1272                         }
1273                         if (skb == NULL)
1274                                 goto error;
1275                         /*
1276                          *      Fill in the control structures
1277                          */
1278                         skb->ip_summed = csummode;
1279                         skb->csum = 0;
1280                         /* reserve for fragmentation */
1281                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1282
1283                         /*
1284                          *      Find where to start putting bytes
1285                          */
1286                         data = skb_put(skb, fraglen);
1287                         skb_set_network_header(skb, exthdrlen);
1288                         data += fragheaderlen;
1289                         skb->transport_header = (skb->network_header +
1290                                                  fragheaderlen);
1291                         if (fraggap) {
1292                                 skb->csum = skb_copy_and_csum_bits(
1293                                         skb_prev, maxfraglen,
1294                                         data + transhdrlen, fraggap, 0);
1295                                 skb_prev->csum = csum_sub(skb_prev->csum,
1296                                                           skb->csum);
1297                                 data += fraggap;
1298                                 pskb_trim_unique(skb_prev, maxfraglen);
1299                         }
1300                         copy = datalen - transhdrlen - fraggap;
1301                         if (copy < 0) {
1302                                 err = -EINVAL;
1303                                 kfree_skb(skb);
1304                                 goto error;
1305                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1306                                 err = -EFAULT;
1307                                 kfree_skb(skb);
1308                                 goto error;
1309                         }
1310
1311                         offset += copy;
1312                         length -= datalen - fraggap;
1313                         transhdrlen = 0;
1314                         exthdrlen = 0;
1315                         csummode = CHECKSUM_NONE;
1316
1317                         /*
1318                          * Put the packet on the pending queue
1319                          */
1320                         __skb_queue_tail(&sk->sk_write_queue, skb);
1321                         continue;
1322                 }
1323
1324                 if (copy > length)
1325                         copy = length;
1326
1327                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1328                         unsigned int off;
1329
1330                         off = skb->len;
1331                         if (getfrag(from, skb_put(skb, copy),
1332                                                 offset, copy, off, skb) < 0) {
1333                                 __skb_trim(skb, off);
1334                                 err = -EFAULT;
1335                                 goto error;
1336                         }
1337                 } else {
1338                         int i = skb_shinfo(skb)->nr_frags;
1339                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1340                         struct page *page = sk->sk_sndmsg_page;
1341                         int off = sk->sk_sndmsg_off;
1342                         unsigned int left;
1343
1344                         if (page && (left = PAGE_SIZE - off) > 0) {
1345                                 if (copy >= left)
1346                                         copy = left;
1347                                 if (page != frag->page) {
1348                                         if (i == MAX_SKB_FRAGS) {
1349                                                 err = -EMSGSIZE;
1350                                                 goto error;
1351                                         }
1352                                         get_page(page);
1353                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1354                                         frag = &skb_shinfo(skb)->frags[i];
1355                                 }
1356                         } else if(i < MAX_SKB_FRAGS) {
1357                                 if (copy > PAGE_SIZE)
1358                                         copy = PAGE_SIZE;
1359                                 page = alloc_pages(sk->sk_allocation, 0);
1360                                 if (page == NULL) {
1361                                         err = -ENOMEM;
1362                                         goto error;
1363                                 }
1364                                 sk->sk_sndmsg_page = page;
1365                                 sk->sk_sndmsg_off = 0;
1366
1367                                 skb_fill_page_desc(skb, i, page, 0, 0);
1368                                 frag = &skb_shinfo(skb)->frags[i];
1369                         } else {
1370                                 err = -EMSGSIZE;
1371                                 goto error;
1372                         }
1373                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1374                                 err = -EFAULT;
1375                                 goto error;
1376                         }
1377                         sk->sk_sndmsg_off += copy;
1378                         frag->size += copy;
1379                         skb->len += copy;
1380                         skb->data_len += copy;
1381                         skb->truesize += copy;
1382                         atomic_add(copy, &sk->sk_wmem_alloc);
1383                 }
1384                 offset += copy;
1385                 length -= copy;
1386         }
1387         return 0;
1388 error:
1389         inet->cork.length -= length;
1390         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1391         return err;
1392 }
1393
1394 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1395 {
1396         inet->cork.flags &= ~IPCORK_OPT;
1397         kfree(np->cork.opt);
1398         np->cork.opt = NULL;
1399         if (inet->cork.dst) {
1400                 dst_release(inet->cork.dst);
1401                 inet->cork.dst = NULL;
1402                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1403         }
1404         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1405 }
1406
1407 int ip6_push_pending_frames(struct sock *sk)
1408 {
1409         struct sk_buff *skb, *tmp_skb;
1410         struct sk_buff **tail_skb;
1411         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1412         struct inet_sock *inet = inet_sk(sk);
1413         struct ipv6_pinfo *np = inet6_sk(sk);
1414         struct ipv6hdr *hdr;
1415         struct ipv6_txoptions *opt = np->cork.opt;
1416         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1417         struct flowi *fl = &inet->cork.fl;
1418         unsigned char proto = fl->proto;
1419         int err = 0;
1420
1421         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1422                 goto out;
1423         tail_skb = &(skb_shinfo(skb)->frag_list);
1424
1425         /* move skb->data to ip header from ext header */
1426         if (skb->data < skb_network_header(skb))
1427                 __skb_pull(skb, skb_network_offset(skb));
1428         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1429                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1430                 *tail_skb = tmp_skb;
1431                 tail_skb = &(tmp_skb->next);
1432                 skb->len += tmp_skb->len;
1433                 skb->data_len += tmp_skb->len;
1434                 skb->truesize += tmp_skb->truesize;
1435                 __sock_put(tmp_skb->sk);
1436                 tmp_skb->destructor = NULL;
1437                 tmp_skb->sk = NULL;
1438         }
1439
1440         /* Allow local fragmentation. */
1441         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1442                 skb->local_df = 1;
1443
1444         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1445         __skb_pull(skb, skb_network_header_len(skb));
1446         if (opt && opt->opt_flen)
1447                 ipv6_push_frag_opts(skb, opt, &proto);
1448         if (opt && opt->opt_nflen)
1449                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1450
1451         skb_push(skb, sizeof(struct ipv6hdr));
1452         skb_reset_network_header(skb);
1453         hdr = ipv6_hdr(skb);
1454
1455         *(__be32*)hdr = fl->fl6_flowlabel |
1456                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1457
1458         hdr->hop_limit = np->cork.hop_limit;
1459         hdr->nexthdr = proto;
1460         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1461         ipv6_addr_copy(&hdr->daddr, final_dst);
1462
1463         skb->priority = sk->sk_priority;
1464         skb->mark = sk->sk_mark;
1465
1466         skb->dst = dst_clone(&rt->u.dst);
1467         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1468         if (proto == IPPROTO_ICMPV6) {
1469                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1470
1471                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1472                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1473         }
1474
1475         err = ip6_local_out(skb);
1476         if (err) {
1477                 if (err > 0)
1478                         err = np->recverr ? net_xmit_errno(err) : 0;
1479                 if (err)
1480                         goto error;
1481         }
1482
1483 out:
1484         ip6_cork_release(inet, np);
1485         return err;
1486 error:
1487         goto out;
1488 }
1489
1490 void ip6_flush_pending_frames(struct sock *sk)
1491 {
1492         struct sk_buff *skb;
1493
1494         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1495                 if (skb->dst)
1496                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1497                                       IPSTATS_MIB_OUTDISCARDS);
1498                 kfree_skb(skb);
1499         }
1500
1501         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1502 }