tcp: md5: remove spinlock usage in fast path
[pandora-kernel.git] / net / ipv4 / tcp.c
index 34f5db1..573d786 100644 (file)
@@ -481,14 +481,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                         !tp->urg_data ||
                         before(tp->urg_seq, tp->copied_seq) ||
                         !before(tp->urg_seq, tp->rcv_nxt)) {
-                       struct sk_buff *skb;
 
                        answ = tp->rcv_nxt - tp->copied_seq;
 
-                       /* Subtract 1, if FIN is in queue. */
-                       skb = skb_peek_tail(&sk->sk_receive_queue);
-                       if (answ && skb)
-                               answ -= tcp_hdr(skb)->fin;
+                       /* Subtract 1, if FIN was received */
+                       if (answ && sock_flag(sk, SOCK_DONE))
+                               answ--;
                } else
                        answ = tp->urg_seq - tp->copied_seq;
                release_sock(sk);
@@ -701,11 +699,12 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
        skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
        if (skb) {
                if (sk_wmem_schedule(sk, skb->truesize)) {
+                       skb_reserve(skb, sk->sk_prot->max_header);
                        /*
                         * Make sure that we have exactly size bytes
                         * available to the caller, no more, no less.
                         */
-                       skb_reserve(skb, skb_tailroom(skb) - size);
+                       skb->reserved_tailroom = skb->end - skb->tail - size;
                        return skb;
                }
                __kfree_skb(skb);
@@ -739,7 +738,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                           old_size_goal + mss_now > xmit_size_goal)) {
                        xmit_size_goal = old_size_goal;
                } else {
-                       tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+                       tp->xmit_size_goal_segs =
+                               min_t(u16, xmit_size_goal / mss_now,
+                                     sk->sk_gso_max_segs);
                        xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
                }
        }
@@ -850,8 +851,7 @@ new_segment:
 wait_for_sndbuf:
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-               if (copied)
-                       tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                        goto do_error;
@@ -860,7 +860,7 @@ wait_for_memory:
        }
 
 out:
-       if (copied)
+       if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        return copied;
 
@@ -995,10 +995,9 @@ new_segment:
                                copy = seglen;
 
                        /* Where to copy to? */
-                       if (skb_tailroom(skb) > 0) {
+                       if (skb_availroom(skb) > 0) {
                                /* We have some space in skb head. Superb! */
-                               if (copy > skb_tailroom(skb))
-                                       copy = skb_tailroom(skb);
+                               copy = min_t(int, copy, skb_availroom(skb));
                                err = skb_add_data_nocache(sk, skb, from, copy);
                                if (err)
                                        goto do_fault;
@@ -1588,8 +1587,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                }
 
 #ifdef CONFIG_NET_DMA
-               if (tp->ucopy.dma_chan)
-                       dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+               if (tp->ucopy.dma_chan) {
+                       if (tp->rcv_wnd == 0 &&
+                           !skb_queue_empty(&sk->sk_async_wait_queue)) {
+                               tcp_service_net_dma(sk, true);
+                               tcp_cleanup_rbuf(sk, copied);
+                       } else
+                               dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+               }
 #endif
                if (copied >= target) {
                        /* Do not sleep, just process backlog. */
@@ -2392,7 +2397,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                /* Cap the max timeout in ms TCP will retry/retrans
                 * before giving up and aborting (ETIMEDOUT) a connection.
                 */
-               icsk->icsk_user_timeout = msecs_to_jiffies(val);
+               if (val < 0)
+                       err = -EINVAL;
+               else
+                       icsk->icsk_user_timeout = msecs_to_jiffies(val);
                break;
        default:
                err = -ENOPROTOOPT;
@@ -2855,9 +2863,8 @@ int tcp_gro_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(tcp_gro_complete);
 
 #ifdef CONFIG_TCP_MD5SIG
-static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
-static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_MUTEX(tcp_md5sig_mutex);
 
 static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
 {
@@ -2872,30 +2879,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
        free_percpu(pool);
 }
 
-void tcp_free_md5sig_pool(void)
-{
-       struct tcp_md5sig_pool __percpu *pool = NULL;
-
-       spin_lock_bh(&tcp_md5sig_pool_lock);
-       if (--tcp_md5sig_users == 0) {
-               pool = tcp_md5sig_pool;
-               tcp_md5sig_pool = NULL;
-       }
-       spin_unlock_bh(&tcp_md5sig_pool_lock);
-       if (pool)
-               __tcp_free_md5sig_pool(pool);
-}
-EXPORT_SYMBOL(tcp_free_md5sig_pool);
-
-static struct tcp_md5sig_pool __percpu *
-__tcp_alloc_md5sig_pool(struct sock *sk)
+static void __tcp_alloc_md5sig_pool(void)
 {
        int cpu;
        struct tcp_md5sig_pool __percpu *pool;
 
        pool = alloc_percpu(struct tcp_md5sig_pool);
        if (!pool)
-               return NULL;
+               return;
 
        for_each_possible_cpu(cpu) {
                struct crypto_hash *hash;
@@ -2906,53 +2897,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
 
                per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
        }
-       return pool;
+       /* before setting tcp_md5sig_pool, we must commit all writes
+        * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+        */
+       smp_wmb();
+       tcp_md5sig_pool = pool;
+       return;
 out_free:
        __tcp_free_md5sig_pool(pool);
-       return NULL;
 }
 
-struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+bool tcp_alloc_md5sig_pool(void)
 {
-       struct tcp_md5sig_pool __percpu *pool;
-       int alloc = 0;
-
-retry:
-       spin_lock_bh(&tcp_md5sig_pool_lock);
-       pool = tcp_md5sig_pool;
-       if (tcp_md5sig_users++ == 0) {
-               alloc = 1;
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-       } else if (!pool) {
-               tcp_md5sig_users--;
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-               cpu_relax();
-               goto retry;
-       } else
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-
-       if (alloc) {
-               /* we cannot hold spinlock here because this may sleep. */
-               struct tcp_md5sig_pool __percpu *p;
-
-               p = __tcp_alloc_md5sig_pool(sk);
-               spin_lock_bh(&tcp_md5sig_pool_lock);
-               if (!p) {
-                       tcp_md5sig_users--;
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-                       return NULL;
-               }
-               pool = tcp_md5sig_pool;
-               if (pool) {
-                       /* oops, it has already been assigned. */
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-                       __tcp_free_md5sig_pool(p);
-               } else {
-                       tcp_md5sig_pool = pool = p;
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-               }
+       if (unlikely(!tcp_md5sig_pool)) {
+               mutex_lock(&tcp_md5sig_mutex);
+
+               if (!tcp_md5sig_pool)
+                       __tcp_alloc_md5sig_pool();
+
+               mutex_unlock(&tcp_md5sig_mutex);
        }
-       return pool;
+       return tcp_md5sig_pool != NULL;
 }
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
@@ -2969,28 +2934,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
        struct tcp_md5sig_pool __percpu *p;
 
        local_bh_disable();
-
-       spin_lock(&tcp_md5sig_pool_lock);
-       p = tcp_md5sig_pool;
+       p = ACCESS_ONCE(tcp_md5sig_pool);
        if (p)
-               tcp_md5sig_users++;
-       spin_unlock(&tcp_md5sig_pool_lock);
-
-       if (p)
-               return this_cpu_ptr(p);
+               return __this_cpu_ptr(p);
 
        local_bh_enable();
        return NULL;
 }
 EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void tcp_put_md5sig_pool(void)
-{
-       local_bh_enable();
-       tcp_free_md5sig_pool();
-}
-EXPORT_SYMBOL(tcp_put_md5sig_pool);
-
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
                        const struct tcphdr *th)
 {
@@ -3029,8 +2981,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 
        for (i = 0; i < shi->nr_frags; ++i) {
                const struct skb_frag_struct *f = &shi->frags[i];
-               struct page *page = skb_frag_page(f);
-               sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+               unsigned int offset = f->page_offset;
+               struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+               sg_set_page(&sg, page, skb_frag_size(f),
+                           offset_in_page(offset));
                if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
                        return 1;
        }
@@ -3216,7 +3171,7 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-       int i, max_share, cnt;
+       int i, max_rshare, max_wshare, cnt;
        unsigned long jiffy = jiffies;
 
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3280,15 +3235,16 @@ void __init tcp_init(void)
 
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
        limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
-       max_share = min(4UL*1024*1024, limit);
+       max_wshare = min(4UL*1024*1024, limit);
+       max_rshare = min(6UL*1024*1024, limit);
 
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-       sysctl_tcp_wmem[2] = max(64*1024, max_share);
+       sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-       sysctl_tcp_rmem[2] = max(87380, max_share);
+       sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
        printk(KERN_INFO "TCP: Hash tables configured "
               "(established %u bind %u)\n",