ipv6: fix out of bound writes in __ip6_append_data()
[pandora-kernel.git] / net / ipv6 / ip6_output.c
index ec56271..4ce3e3f 100644 (file)
@@ -144,8 +144,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
                return res;
        }
        rcu_read_unlock();
-       IP6_INC_STATS_BH(dev_net(dst->dev),
-                        ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       IP6_INC_STATS(dev_net(dst->dev),
+                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
        kfree_skb(skb);
        return -EINVAL;
 }
@@ -562,13 +562,12 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
        u16 offset = sizeof(struct ipv6hdr);
-       struct ipv6_opt_hdr *exthdr =
-                               (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
        unsigned int packet_len = skb->tail - skb->network_header;
        int found_rhdr = 0;
        *nexthdr = &ipv6_hdr(skb)->nexthdr;
 
-       while (offset + 1 <= packet_len) {
+       while (offset <= packet_len) {
+               struct ipv6_opt_hdr *exthdr;
 
                switch (**nexthdr) {
 
@@ -589,38 +588,33 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
                        return offset;
                }
 
-               offset += ipv6_optlen(exthdr);
-               *nexthdr = &exthdr->nexthdr;
+               if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+                       return -EINVAL;
+
                exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
                                                 offset);
+               offset += ipv6_optlen(exthdr);
+               *nexthdr = &exthdr->nexthdr;
        }
 
-       return offset;
+       return -EINVAL;
 }
 
 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 {
-       static atomic_t ipv6_fragmentation_id;
-       int old, new;
-
-       if (rt && !(rt->dst.flags & DST_NOPEER)) {
-               struct inet_peer *peer;
+       static u32 ip6_idents_hashrnd __read_mostly;
+       static bool hashrnd_initialized = false;
+       u32 hash, id;
 
-               if (!rt->rt6i_peer)
-                       rt6_bind_peer(rt, 1);
-               peer = rt->rt6i_peer;
-               if (peer) {
-                       fhdr->identification = htonl(inet_getid(peer, 0));
-                       return;
-               }
+       if (unlikely(!hashrnd_initialized)) {
+               hashrnd_initialized = true;
+               get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
        }
-       do {
-               old = atomic_read(&ipv6_fragmentation_id);
-               new = old + 1;
-               if (!new)
-                       new = 1;
-       } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
-       fhdr->identification = htonl(new);
+       hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+       hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+       id = ip_idents_reserve(hash, 1);
+       fhdr->identification = htonl(id);
 }
 
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
@@ -631,12 +625,16 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
        struct ipv6hdr *tmp_hdr;
        struct frag_hdr *fh;
        unsigned int mtu, hlen, left, len;
+       int hroom, troom;
        __be32 frag_id = 0;
        int ptr, offset = 0, err=0;
        u8 *prevhdr, nexthdr = 0;
        struct net *net = dev_net(skb_dst(skb)->dev);
 
-       hlen = ip6_find_1stfragopt(skb, &prevhdr);
+       err = ip6_find_1stfragopt(skb, &prevhdr);
+       if (err < 0)
+               goto fail;
+       hlen = err;
        nexthdr = *prevhdr;
 
        mtu = ip6_skb_dst_mtu(skb);
@@ -797,6 +795,8 @@ slow_path:
         */
 
        *prevhdr = NEXTHDR_FRAGMENT;
+       hroom = LL_RESERVED_SPACE(rt->dst.dev);
+       troom = rt->dst.dev->needed_tailroom;
 
        /*
         *      Keep copying data until we run out.
@@ -815,7 +815,8 @@ slow_path:
                 *      Allocate buffer.
                 */
 
-               if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
+               if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+                                     hroom + troom, GFP_ATOMIC)) == NULL) {
                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
                        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
                                      IPSTATS_MIB_FRAGFAILS);
@@ -828,7 +829,7 @@ slow_path:
                 */
 
                ip6_copy_metadata(frag, skb);
-               skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
+               skb_reserve(frag, hroom);
                skb_put(frag, len + hlen + sizeof(struct frag_hdr));
                skb_reset_network_header(frag);
                fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -909,11 +910,17 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
                                          const struct flowi6 *fl6)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
-       struct rt6_info *rt = (struct rt6_info *)dst;
+       struct rt6_info *rt;
 
        if (!dst)
                goto out;
 
+       if (dst->ops->family != AF_INET6) {
+               dst_release(dst);
+               return NULL;
+       }
+
+       rt = (struct rt6_info *)dst;
        /* Yes, checking route validity in not connected
         * case is not very simple. Take into account,
         * that we do not support routing by source, TOS,
@@ -1107,9 +1114,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                        int getfrag(void *from, char *to, int offset, int len,
                        int odd, struct sk_buff *skb),
                        void *from, int length, int hh_len, int fragheaderlen,
-                       int transhdrlen, int mtu,unsigned int flags,
-                       struct rt6_info *rt)
-
+                       int exthdrlen, int transhdrlen, int mtu,
+                       unsigned int flags, struct rt6_info *rt)
 {
        struct sk_buff *skb;
        int err;
@@ -1119,6 +1125,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
         * udp datagram
         */
        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+               struct frag_hdr fhdr;
+
                skb = sock_alloc_send_skb(sk,
                        hh_len + fragheaderlen + transhdrlen + 20,
                        (flags & MSG_DONTWAIT), &err);
@@ -1132,19 +1140,13 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                skb_put(skb,fragheaderlen + transhdrlen);
 
                /* initialize network header pointer */
-               skb_reset_network_header(skb);
+               skb_set_network_header(skb, exthdrlen);
 
                /* initialize protocol header pointer */
                skb->transport_header = skb->network_header + fragheaderlen;
 
                skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum = 0;
-       }
-
-       err = skb_append_datato_frags(sk,skb, getfrag, from,
-                                     (length - transhdrlen));
-       if (!err) {
-               struct frag_hdr fhdr;
 
                /* Specify the length of each IPv6 datagram fragment.
                 * It has to be a multiple of 8.
@@ -1155,15 +1157,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                ipv6_select_ident(&fhdr, rt);
                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
                __skb_queue_tail(&sk->sk_write_queue, skb);
-
-               return 0;
        }
-       /* There is not enough support do UPD LSO,
-        * so follow normal path
-        */
-       kfree_skb(skb);
 
-       return err;
+       return skb_append_datato_frags(sk, skb, getfrag, from,
+                                      (length - transhdrlen));
 }
 
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1178,6 +1175,30 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
        return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
 }
 
+static void ip6_append_data_mtu(unsigned int *mtu,
+                               int *maxfraglen,
+                               unsigned int fragheaderlen,
+                               struct sk_buff *skb,
+                               struct rt6_info *rt,
+                               unsigned int orig_mtu)
+{
+       if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
+               if (skb == NULL) {
+                       /* first fragment, reserve header_len */
+                       *mtu = orig_mtu - rt->dst.header_len;
+
+               } else {
+                       /*
+                        * this fragment is not first, the headers
+                        * space is regarded as data space.
+                        */
+                       *mtu = orig_mtu;
+               }
+               *maxfraglen = ((*mtu - fragheaderlen) & ~7)
+                             + fragheaderlen - sizeof(struct frag_hdr);
+       }
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
        int offset, int len, int odd, struct sk_buff *skb),
        void *from, int length, int transhdrlen,
@@ -1187,12 +1208,11 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
        struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct inet_cork *cork;
-       struct sk_buff *skb;
-       unsigned int maxfraglen, fragheaderlen;
+       struct sk_buff *skb, *skb_prev = NULL;
+       unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
        int exthdrlen;
        int dst_exthdrlen;
        int hh_len;
-       int mtu;
        int copy;
        int err;
        int offset = 0;
@@ -1210,7 +1230,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
                        if (WARN_ON(np->cork.opt))
                                return -EINVAL;
 
-                       np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+                       np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
                        if (unlikely(np->cork.opt == NULL))
                                return -ENOBUFS;
 
@@ -1245,8 +1265,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
                inet->cork.fl.u.ip6 = *fl6;
                np->cork.hop_limit = hlimit;
                np->cork.tclass = tclass;
-               mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-                     rt->dst.dev->mtu : dst_mtu(&rt->dst);
+               if (rt->dst.flags & DST_XFRM_TUNNEL)
+                       mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+                             rt->dst.dev->mtu : dst_mtu(&rt->dst);
+               else
+                       mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+                             rt->dst.dev->mtu : dst_mtu(rt->dst.path);
                if (np->frag_size < mtu) {
                        if (np->frag_size)
                                mtu = np->frag_size;
@@ -1257,10 +1281,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
                cork->length = 0;
                sk->sk_sndmsg_page = NULL;
                sk->sk_sndmsg_off = 0;
-               exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+               exthdrlen = (opt ? opt->opt_flen : 0);
                length += exthdrlen;
                transhdrlen += exthdrlen;
-               dst_exthdrlen = rt->dst.header_len;
+               dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
        } else {
                rt = (struct rt6_info *)cork->dst;
                fl6 = &inet->cork.fl.u.ip6;
@@ -1270,6 +1294,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
                dst_exthdrlen = 0;
                mtu = cork->fragsize;
        }
+       orig_mtu = mtu;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
@@ -1307,27 +1332,28 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
         * --yoshfuji
         */
 
-       cork->length += length;
-       if (length > mtu) {
-               int proto = sk->sk_protocol;
-               if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
-                       ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
-                       return -EMSGSIZE;
-               }
-
-               if (proto == IPPROTO_UDP &&
-                   (rt->dst.dev->features & NETIF_F_UFO)) {
+       if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+                                          sk->sk_protocol == IPPROTO_RAW)) {
+               ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+               return -EMSGSIZE;
+       }
 
-                       err = ip6_ufo_append_data(sk, getfrag, from, length,
-                                                 hh_len, fragheaderlen,
-                                                 transhdrlen, mtu, flags, rt);
-                       if (err)
-                               goto error;
-                       return 0;
-               }
+       skb = skb_peek_tail(&sk->sk_write_queue);
+       cork->length += length;
+       if (((length > mtu) ||
+            (skb && skb_has_frags(skb))) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (sk->sk_type == SOCK_DGRAM)) {
+               err = ip6_ufo_append_data(sk, getfrag, from, length,
+                                         hh_len, fragheaderlen, exthdrlen,
+                                         transhdrlen, mtu, flags, rt);
+               if (err)
+                       goto error;
+               return 0;
        }
 
-       if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+       if (!skb)
                goto alloc_new_skb;
 
        while (length > 0) {
@@ -1342,25 +1368,28 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
-                       struct sk_buff *skb_prev;
 alloc_new_skb:
-                       skb_prev = skb;
-
                        /* There's no room in the current skb */
-                       if (skb_prev)
-                               fraggap = skb_prev->len - maxfraglen;
+                       if (skb)
+                               fraggap = skb->len - maxfraglen;
                        else
                                fraggap = 0;
+                       /* update mtu and maxfraglen if necessary */
+                       if (skb == NULL || skb_prev == NULL)
+                               ip6_append_data_mtu(&mtu, &maxfraglen,
+                                                   fragheaderlen, skb, rt,
+                                                   orig_mtu);
+
+                       skb_prev = skb;
 
                        /*
                         * If remaining data exceeds the mtu,
                         * we know we need more fragment(s).
                         */
                        datalen = length + fraggap;
-                       if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
-                               datalen = maxfraglen - fragheaderlen;
 
-                       fraglen = datalen + fragheaderlen;
+                       if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+                               datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
@@ -1369,13 +1398,16 @@ alloc_new_skb:
 
                        alloclen += dst_exthdrlen;
 
-                       /*
-                        * The last fragment gets additional space at tail.
-                        * Note: we overallocate on fragments with MSG_MODE
-                        * because we have no idea if we're the last one.
-                        */
-                       if (datalen == length + fraggap)
-                               alloclen += rt->dst.trailer_len;
+                       if (datalen != length + fraggap) {
+                               /*
+                                * this is not the last fragment, the trailer
+                                * space is regarded as data space.
+                                */
+                               datalen += rt->dst.trailer_len;
+                       }
+
+                       alloclen += rt->dst.trailer_len;
+                       fraglen = datalen + fragheaderlen;
 
                        /*
                         * We just reserve space for fragment header.
@@ -1384,6 +1416,11 @@ alloc_new_skb:
                         */
                        alloclen += sizeof(struct frag_hdr);
 
+                       copy = datalen - transhdrlen - fraggap;
+                       if (copy < 0) {
+                               err = -EINVAL;
+                               goto error;
+                       }
                        if (transhdrlen) {
                                skb = sock_alloc_send_skb(sk,
                                                alloclen + hh_len,
@@ -1411,8 +1448,9 @@ alloc_new_skb:
                         */
                        skb->ip_summed = csummode;
                        skb->csum = 0;
-                       /* reserve for fragmentation */
-                       skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+                       /* reserve for fragmentation and ipsec header */
+                       skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
+                                   dst_exthdrlen);
 
                        if (sk->sk_type == SOCK_DGRAM)
                                skb_shinfo(skb)->tx_flags = tx_flags;
@@ -1420,9 +1458,9 @@ alloc_new_skb:
                        /*
                         *      Find where to start putting bytes
                         */
-                       data = skb_put(skb, fraglen + dst_exthdrlen);
-                       skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
-                       data += fragheaderlen + dst_exthdrlen;
+                       data = skb_put(skb, fraglen);
+                       skb_set_network_header(skb, exthdrlen);
+                       data += fragheaderlen;
                        skb->transport_header = (skb->network_header +
                                                 fragheaderlen);
                        if (fraggap) {
@@ -1434,13 +1472,9 @@ alloc_new_skb:
                                data += fraggap;
                                pskb_trim_unique(skb_prev, maxfraglen);
                        }
-                       copy = datalen - transhdrlen - fraggap;
-
-                       if (copy < 0) {
-                               err = -EINVAL;
-                               kfree_skb(skb);
-                               goto error;
-                       } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+                       if (copy > 0 &&
+                           getfrag(from, data + transhdrlen, offset,
+                                   copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
                                goto error;
@@ -1615,8 +1649,8 @@ int ip6_push_pending_frames(struct sock *sk)
        if (proto == IPPROTO_ICMPV6) {
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
-               ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
-               ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+               ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+               ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
        }
 
        err = ip6_local_out(skb);