net: add skb frag size accessors
[pandora-kernel.git] / net / core / skbuff.c
index 27002df..ce357d9 100644 (file)
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                goto out;
        prefetchw(skb);
 
-       size = SKB_DATA_ALIGN(size);
-       data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-                       gfp_mask, node);
+       /* We do our best to align skb_shared_info on a separate cache
+        * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+        * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+        * Both skb->head and skb_shared_info are cache line aligned.
+        */
+       size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       data = kmalloc_node_track_caller(size, gfp_mask, node);
        if (!data)
                goto nodata;
+       /* kmalloc(size) might give us more room than requested.
+        * Put skb_shared_info exactly at the end of allocated zone,
+        * to allow max possible filling before reallocation.
+        */
+       size = SKB_WITH_OVERHEAD(ksize(data));
        prefetchw(data + size);
 
        /*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
         * the tail pointer in struct sk_buff!
         */
        memset(skb, 0, offsetof(struct sk_buff, tail));
-       skb->truesize = size + sizeof(struct sk_buff);
+       /* Account for allocated memory : skb + skb->head */
+       skb->truesize = SKB_TRUESIZE(size);
        atomic_set(&skb->users, 1);
        skb->head = data;
        skb->data = data;
@@ -326,7 +336,7 @@ static void skb_release_data(struct sk_buff *skb)
                if (skb_shinfo(skb)->nr_frags) {
                        int i;
                        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                               put_page(skb_shinfo(skb)->frags[i].page);
+                               skb_frag_unref(skb, i);
                }
 
                /*
@@ -529,6 +539,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->mac_header         = old->mac_header;
        skb_dst_copy(new, old);
        new->rxhash             = old->rxhash;
+       new->ooo_okay           = old->ooo_okay;
+       new->l4_rxhash          = old->l4_rxhash;
 #ifdef CONFIG_XFRM
        new->sp                 = secpath_get(old->sp);
 #endif
@@ -611,8 +623,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
-/* skb frags copy userspace buffers to kernel */
-static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+/*     skb_copy_ubufs  -       copy userspace skb frags buffers to kernel
+ *     @skb: the skb to modify
+ *     @gfp_mask: allocation priority
+ *
+ *     This must be called on SKBTX_DEV_ZEROCOPY skb.
+ *     It will copy all frags into kernel and drop the reference
+ *     to userspace pages.
+ *
+ *     If this function is called from an interrupt gfp_mask() must be
+ *     %GFP_ATOMIC.
+ *
+ *     Returns 0 on success or a negative error code on failure
+ *     to allocate kernel memory to copy to.
+ */
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 {
        int i;
        int num_frags = skb_shinfo(skb)->nr_frags;
@@ -634,7 +659,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
                }
                vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
                memcpy(page_address(page),
-                      vaddr + f->page_offset, f->size);
+                      vaddr + f->page_offset, skb_frag_size(f));
                kunmap_skb_frag(vaddr);
                page->private = (unsigned long)head;
                head = page;
@@ -652,6 +677,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
                skb_shinfo(skb)->frags[i - 1].page = head;
                head = (struct page *)head->private;
        }
+
+       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
        return 0;
 }
 
@@ -677,7 +704,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
        if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                if (skb_copy_ubufs(skb, gfp_mask))
                        return NULL;
-               skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
        }
 
        n = skb + 1;
@@ -803,11 +829,10 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
                                n = NULL;
                                goto out;
                        }
-                       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
                }
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                        skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
-                       get_page(skb_shinfo(n)->frags[i].page);
+                       skb_frag_ref(skb, i);
                }
                skb_shinfo(n)->nr_frags = i;
        }
@@ -896,10 +921,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                        if (skb_copy_ubufs(skb, gfp_mask))
                                goto nofrags;
-                       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
                }
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                       get_page(skb_shinfo(skb)->frags[i].page);
+                       skb_frag_ref(skb, i);
 
                if (skb_has_frag_list(skb))
                        skb_clone_fraglist(skb);
@@ -1166,20 +1190,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
                goto drop_pages;
 
        for (; i < nfrags; i++) {
-               int end = offset + skb_shinfo(skb)->frags[i].size;
+               int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 
                if (end < len) {
                        offset = end;
                        continue;
                }
 
-               skb_shinfo(skb)->frags[i++].size = len - offset;
+               skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
 
 drop_pages:
                skb_shinfo(skb)->nr_frags = i;
 
                for (; i < nfrags; i++)
-                       put_page(skb_shinfo(skb)->frags[i].page);
+                       skb_frag_unref(skb, i);
 
                if (skb_has_frag_list(skb))
                        skb_drop_fraglist(skb);
@@ -1282,9 +1306,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
        /* Estimate size of pulled pages. */
        eat = delta;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-               if (skb_shinfo(skb)->frags[i].size >= eat)
+               int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+               if (size >= eat)
                        goto pull_pages;
-               eat -= skb_shinfo(skb)->frags[i].size;
+               eat -= size;
        }
 
        /* If we need update frag list, we are in troubles.
@@ -1347,14 +1373,16 @@ pull_pages:
        eat = delta;
        k = 0;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-               if (skb_shinfo(skb)->frags[i].size <= eat) {
-                       put_page(skb_shinfo(skb)->frags[i].page);
-                       eat -= skb_shinfo(skb)->frags[i].size;
+               int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+               if (size <= eat) {
+                       skb_frag_unref(skb, i);
+                       eat -= size;
                } else {
                        skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
                        if (eat) {
                                skb_shinfo(skb)->frags[k].page_offset += eat;
-                               skb_shinfo(skb)->frags[k].size -= eat;
+                               skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
                                eat = 0;
                        }
                        k++;
@@ -1409,7 +1437,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        u8 *vaddr;
 
@@ -1607,7 +1635,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
        for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
                const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
-               if (__splice_segment(f->page, f->page_offset, f->size,
+               if (__splice_segment(skb_frag_page(f),
+                                    f->page_offset, skb_frag_size(f),
                                     offset, len, skb, spd, 0, sk, pipe))
                        return 1;
        }
@@ -1717,7 +1746,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 
                WARN_ON(start > offset + len);
 
-               end = start + frag->size;
+               end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
                        u8 *vaddr;
 
@@ -1790,7 +1819,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        __wsum csum2;
                        u8 *vaddr;
@@ -1865,7 +1894,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        __wsum csum2;
                        u8 *vaddr;
@@ -2138,7 +2167,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
        skb->data_len             = len - pos;
 
        for (i = 0; i < nfrags; i++) {
-               int size = skb_shinfo(skb)->frags[i].size;
+               int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 
                if (pos + size > len) {
                        skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -2152,10 +2181,10 @@ static inline void skb_split_no_header(struct sk_buff *skb,
                                 *    where splitting is expensive.
                                 * 2. Split is accurately. We make this.
                                 */
-                               get_page(skb_shinfo(skb)->frags[i].page);
+                               skb_frag_ref(skb, i);
                                skb_shinfo(skb1)->frags[0].page_offset += len - pos;
-                               skb_shinfo(skb1)->frags[0].size -= len - pos;
-                               skb_shinfo(skb)->frags[i].size  = len - pos;
+                               skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
+                               skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
                                skb_shinfo(skb)->nr_frags++;
                        }
                        k++;
@@ -2227,12 +2256,13 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
         * commit all, so that we don't have to undo partial changes
         */
        if (!to ||
-           !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+           !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+                             fragfrom->page_offset)) {
                merge = -1;
        } else {
                merge = to - 1;
 
-               todo -= fragfrom->size;
+               todo -= skb_frag_size(fragfrom);
                if (todo < 0) {
                        if (skb_prepare_for_shift(skb) ||
                            skb_prepare_for_shift(tgt))
@@ -2242,8 +2272,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
                        fragfrom = &skb_shinfo(skb)->frags[from];
                        fragto = &skb_shinfo(tgt)->frags[merge];
 
-                       fragto->size += shiftlen;
-                       fragfrom->size -= shiftlen;
+                       skb_frag_size_add(fragto, shiftlen);
+                       skb_frag_size_sub(fragfrom, shiftlen);
                        fragfrom->page_offset += shiftlen;
 
                        goto onlymerged;
@@ -2267,20 +2297,20 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
                fragfrom = &skb_shinfo(skb)->frags[from];
                fragto = &skb_shinfo(tgt)->frags[to];
 
-               if (todo >= fragfrom->size) {
+               if (todo >= skb_frag_size(fragfrom)) {
                        *fragto = *fragfrom;
-                       todo -= fragfrom->size;
+                       todo -= skb_frag_size(fragfrom);
                        from++;
                        to++;
 
                } else {
-                       get_page(fragfrom->page);
+                       __skb_frag_ref(fragfrom);
                        fragto->page = fragfrom->page;
                        fragto->page_offset = fragfrom->page_offset;
-                       fragto->size = todo;
+                       skb_frag_size_set(fragto, todo);
 
                        fragfrom->page_offset += todo;
-                       fragfrom->size -= todo;
+                       skb_frag_size_sub(fragfrom, todo);
                        todo = 0;
 
                        to++;
@@ -2295,8 +2325,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
                fragfrom = &skb_shinfo(skb)->frags[0];
                fragto = &skb_shinfo(tgt)->frags[merge];
 
-               fragto->size += fragfrom->size;
-               put_page(fragfrom->page);
+               skb_frag_size_add(fragto, skb_frag_size(fragfrom));
+               __skb_frag_unref(fragfrom);
        }
 
        /* Reposition in the original skb */
@@ -2393,7 +2423,7 @@ next_skb:
 
        while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
                frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
-               block_limit = frag->size + st->stepped_offset;
+               block_limit = skb_frag_size(frag) + st->stepped_offset;
 
                if (abs_offset < block_limit) {
                        if (!st->frag_data)
@@ -2411,7 +2441,7 @@ next_skb:
                }
 
                st->frag_idx++;
-               st->stepped_offset += frag->size;
+               st->stepped_offset += skb_frag_size(frag);
        }
 
        if (st->frag_data) {
@@ -2541,14 +2571,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
                left = PAGE_SIZE - frag->page_offset;
                copy = (length > left)? left : length;
 
-               ret = getfrag(from, (page_address(frag->page) +
-                           frag->page_offset + frag->size),
+               ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
                            offset, copy, 0, skb);
                if (ret < 0)
                        return -EFAULT;
 
                /* copy was successful so update the size parameters */
-               frag->size += copy;
+               skb_frag_size_add(frag, copy);
                skb->len += copy;
                skb->data_len += copy;
                offset += copy;
@@ -2694,12 +2723,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
 
                while (pos < offset + len && i < nfrags) {
                        *frag = skb_shinfo(skb)->frags[i];
-                       get_page(frag->page);
-                       size = frag->size;
+                       __skb_frag_ref(frag);
+                       size = skb_frag_size(frag);
 
                        if (pos < offset) {
                                frag->page_offset += offset - pos;
-                               frag->size -= offset - pos;
+                               skb_frag_size_sub(frag, offset - pos);
                        }
 
                        skb_shinfo(nskb)->nr_frags++;
@@ -2708,7 +2737,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
                                i++;
                                pos += size;
                        } else {
-                               frag->size -= pos + size - (offset + len);
+                               skb_frag_size_sub(frag, pos + size - (offset + len));
                                goto skip_fraglist;
                        }
 
@@ -2788,7 +2817,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
                } while (--i);
 
                frag->page_offset += offset;
-               frag->size -= offset;
+               skb_frag_size_sub(frag, offset);
 
                skb->truesize -= skb->data_len;
                skb->len -= skb->data_len;
@@ -2840,7 +2869,7 @@ merge:
                unsigned int eat = offset - headlen;
 
                skbinfo->frags[0].page_offset += eat;
-               skbinfo->frags[0].size -= eat;
+               skb_frag_size_sub(&skbinfo->frags[0], eat);
                skb->data_len -= eat;
                skb->len -= eat;
                offset = headlen;
@@ -2911,13 +2940,13 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 
                WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
                        if (copy > len)
                                copy = len;
-                       sg_set_page(&sg[elt], frag->page, copy,
+                       sg_set_page(&sg[elt], skb_frag_page(frag), copy,
                                        frag->page_offset+offset-start);
                        elt++;
                        if (!(len -= copy))