[INET]: Generalise the tcp_listen_ lock routines
authorArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Wed, 10 Aug 2005 03:08:09 +0000 (20:08 -0700)
committerDavid S. Miller <davem@sunset.davemloft.net>
Mon, 29 Aug 2005 22:41:49 +0000 (15:41 -0700)
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inet_hashtables.h
include/net/tcp.h
net/ipv4/inet_hashtables.c
net/ipv4/tcp_diag.c
net/ipv4/tcp_ipv4.c
net/ipv6/tcp_ipv6.c

index da07411..f5d6512 100644 (file)
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/tcp.h>         /* only for TCP_LISTEN, damn :-( */
 #include <linux/types.h>
 #include <linux/types.h>
+#include <linux/wait.h>
 
 #include <net/sock.h>
 
 
 #include <net/sock.h>
 
+#include <asm/atomic.h>
+
 /* This is for all connections with a full identity, no wildcards.
  * New scheme, half the table is for TIME_WAIT, the other half is
  * for the rest.  I'll experiment with dynamic table growth later.
 /* This is for all connections with a full identity, no wildcards.
  * New scheme, half the table is for TIME_WAIT, the other half is
  * for the rest.  I'll experiment with dynamic table growth later.
@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table,
 
 extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
 
 
 extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
 
+extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
+
+/*
+ * - We may sleep inside this lock.
+ * - If sleeping is not required (or called from BH),
+ *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
+ */
+static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
+{
+       /* read_lock synchronizes to candidates to writers */
+       read_lock(&hashinfo->lhash_lock);
+       atomic_inc(&hashinfo->lhash_users);
+       read_unlock(&hashinfo->lhash_lock);
+}
+
+static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
+{
+       if (atomic_dec_and_test(&hashinfo->lhash_users))
+               wake_up(&hashinfo->lhash_wait);
+}
+
+static inline void __inet_hash(struct inet_hashinfo *hashinfo,
+                              struct sock *sk, const int listen_possible)
+{
+       struct hlist_head *list;
+       rwlock_t *lock;
+
+       BUG_TRAP(sk_unhashed(sk));
+       if (listen_possible && sk->sk_state == TCP_LISTEN) {
+               list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+               lock = &hashinfo->lhash_lock;
+               inet_listen_wlock(hashinfo);
+       } else {
+               sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
+               list = &hashinfo->ehash[sk->sk_hashent].chain;
+               lock = &hashinfo->ehash[sk->sk_hashent].lock;
+               write_lock(lock);
+       }
+       __sk_add_node(sk, list);
+       sock_prot_inc_use(sk->sk_prot);
+       write_unlock(lock);
+       if (listen_possible && sk->sk_state == TCP_LISTEN)
+               wake_up(&hashinfo->lhash_wait);
+}
 #endif /* _INET_HASHTABLES_H */
 #endif /* _INET_HASHTABLES_H */
index 99e4769..bc110cc 100644 (file)
@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
 
 extern void tcp_enter_memory_pressure(void);
 
 
 extern void tcp_enter_memory_pressure(void);
 
-extern void tcp_listen_wlock(void);
-
-/* - We may sleep inside this lock.
- * - If sleeping is not required (or called from BH),
- *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
- */
-
-static inline void tcp_listen_lock(void)
-{
-       /* read_lock synchronizes to candidates to writers */
-       read_lock(&tcp_hashinfo.lhash_lock);
-       atomic_inc(&tcp_hashinfo.lhash_users);
-       read_unlock(&tcp_hashinfo.lhash_lock);
-}
-
-static inline void tcp_listen_unlock(void)
-{
-       if (atomic_dec_and_test(&tcp_hashinfo.lhash_users))
-               wake_up(&tcp_hashinfo.lhash_wait);
-}
-
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
 {
        return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
 {
        return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
index 33d6cbe..06cbc6f 100644 (file)
@@ -15,7 +15,9 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/wait.h>
 
 #include <net/inet_hashtables.h>
 
 
 #include <net/inet_hashtables.h>
 
@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
 }
 
 EXPORT_SYMBOL(inet_put_port);
 }
 
 EXPORT_SYMBOL(inet_put_port);
+
+/*
+ * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines (wake up each
+ * exclusive lock release). It should be ifdefed really.
+ */
+void inet_listen_wlock(struct inet_hashinfo *hashinfo)
+{
+       write_lock(&hashinfo->lhash_lock);
+
+       if (atomic_read(&hashinfo->lhash_users)) {
+               DEFINE_WAIT(wait);
+
+               for (;;) {
+                       prepare_to_wait_exclusive(&hashinfo->lhash_wait,
+                                                 &wait, TASK_UNINTERRUPTIBLE);
+                       if (!atomic_read(&hashinfo->lhash_users))
+                               break;
+                       write_unlock_bh(&hashinfo->lhash_lock);
+                       schedule();
+                       write_lock_bh(&hashinfo->lhash_lock);
+               }
+
+               finish_wait(&hashinfo->lhash_wait, &wait);
+       }
+}
+
+EXPORT_SYMBOL(inet_listen_wlock);
index 0ae738b..1a89a03 100644 (file)
@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
        if (cb->args[0] == 0) {
                if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
                        goto skip_listen_ht;
        if (cb->args[0] == 0) {
                if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
                        goto skip_listen_ht;
-               tcp_listen_lock();
+               inet_listen_lock(&tcp_hashinfo);
                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
                        struct sock *sk;
                        struct hlist_node *node;
                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
                        struct sock *sk;
                        struct hlist_node *node;
@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
                                        goto syn_recv;
 
                                if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
                                        goto syn_recv;
 
                                if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                       tcp_listen_unlock();
+                                       inet_listen_unlock(&tcp_hashinfo);
                                        goto done;
                                }
 
                                        goto done;
                                }
 
@@ -622,7 +622,7 @@ syn_recv:
                                        goto next_listen;
 
                                if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
                                        goto next_listen;
 
                                if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
-                                       tcp_listen_unlock();
+                                       inet_listen_unlock(&tcp_hashinfo);
                                        goto done;
                                }
 
                                        goto done;
                                }
 
@@ -636,7 +636,7 @@ next_listen:
                        cb->args[3] = 0;
                        cb->args[4] = 0;
                }
                        cb->args[3] = 0;
                        cb->args[4] = 0;
                }
-               tcp_listen_unlock();
+               inet_listen_unlock(&tcp_hashinfo);
 skip_listen_ht:
                cb->args[0] = 1;
                s_i = num = s_num = 0;
 skip_listen_ht:
                cb->args[0] = 1;
                s_i = num = s_num = 0;
index f5373f9..5f9ad95 100644 (file)
@@ -228,62 +228,11 @@ fail:
        return ret;
 }
 
        return ret;
 }
 
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
- * immediately hit write lock and grab all the cpus. Exclusive sleep solves
- * this, _but_ remember, it adds useless work on UP machines (wake up each
- * exclusive lock release). It should be ifdefed really.
- */
-
-void tcp_listen_wlock(void)
-{
-       write_lock(&tcp_hashinfo.lhash_lock);
-
-       if (atomic_read(&tcp_hashinfo.lhash_users)) {
-               DEFINE_WAIT(wait);
-
-               for (;;) {
-                       prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait,
-                                               &wait, TASK_UNINTERRUPTIBLE);
-                       if (!atomic_read(&tcp_hashinfo.lhash_users))
-                               break;
-                       write_unlock_bh(&tcp_hashinfo.lhash_lock);
-                       schedule();
-                       write_lock_bh(&tcp_hashinfo.lhash_lock);
-               }
-
-               finish_wait(&tcp_hashinfo.lhash_wait, &wait);
-       }
-}
-
-static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
-{
-       struct hlist_head *list;
-       rwlock_t *lock;
-
-       BUG_TRAP(sk_unhashed(sk));
-       if (listen_possible && sk->sk_state == TCP_LISTEN) {
-               list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
-               lock = &tcp_hashinfo.lhash_lock;
-               tcp_listen_wlock();
-       } else {
-               sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
-               list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
-               lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
-               write_lock(lock);
-       }
-       __sk_add_node(sk, list);
-       sock_prot_inc_use(sk->sk_prot);
-       write_unlock(lock);
-       if (listen_possible && sk->sk_state == TCP_LISTEN)
-               wake_up(&tcp_hashinfo.lhash_wait);
-}
-
 static void tcp_v4_hash(struct sock *sk)
 {
        if (sk->sk_state != TCP_CLOSE) {
                local_bh_disable();
 static void tcp_v4_hash(struct sock *sk)
 {
        if (sk->sk_state != TCP_CLOSE) {
                local_bh_disable();
-               __tcp_v4_hash(sk, 1);
+               __inet_hash(&tcp_hashinfo, sk, 1);
                local_bh_enable();
        }
 }
                local_bh_enable();
        }
 }
@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk)
 
        if (sk->sk_state == TCP_LISTEN) {
                local_bh_disable();
 
        if (sk->sk_state == TCP_LISTEN) {
                local_bh_disable();
-               tcp_listen_wlock();
+               inet_listen_wlock(&tcp_hashinfo);
                lock = &tcp_hashinfo.lhash_lock;
        } else {
                struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
                lock = &tcp_hashinfo.lhash_lock;
        } else {
                struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
@@ -624,7 +573,7 @@ ok:
                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->sport = htons(port);
                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->sport = htons(port);
-                       __tcp_v4_hash(sk, 0);
+                       __inet_hash(&tcp_hashinfo, sk, 0);
                }
                spin_unlock(&head->lock);
 
                }
                spin_unlock(&head->lock);
 
@@ -641,7 +590,7 @@ ok:
        tb  = inet_sk(sk)->bind_hash;
        spin_lock_bh(&head->lock);
        if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
        tb  = inet_sk(sk)->bind_hash;
        spin_lock_bh(&head->lock);
        if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-               __tcp_v4_hash(sk, 0);
+               __inet_hash(&tcp_hashinfo, sk, 0);
                spin_unlock_bh(&head->lock);
                return 0;
        } else {
                spin_unlock_bh(&head->lock);
                return 0;
        } else {
@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
        tcp_initialize_rcv_mss(newsk);
 
        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
        tcp_initialize_rcv_mss(newsk);
 
-       __tcp_v4_hash(newsk, 0);
+       __inet_hash(&tcp_hashinfo, newsk, 0);
        __inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
        return newsk;
        __inet_inherit_port(&tcp_hashinfo, sk, newsk);
 
        return newsk;
@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
        void *rc;
        struct tcp_iter_state* st = seq->private;
 
        void *rc;
        struct tcp_iter_state* st = seq->private;
 
-       tcp_listen_lock();
+       inet_listen_lock(&tcp_hashinfo);
        st->state = TCP_SEQ_STATE_LISTENING;
        rc        = listening_get_idx(seq, &pos);
 
        if (!rc) {
        st->state = TCP_SEQ_STATE_LISTENING;
        rc        = listening_get_idx(seq, &pos);
 
        if (!rc) {
-               tcp_listen_unlock();
+               inet_listen_unlock(&tcp_hashinfo);
                local_bh_disable();
                st->state = TCP_SEQ_STATE_ESTABLISHED;
                rc        = established_get_idx(seq, pos);
                local_bh_disable();
                st->state = TCP_SEQ_STATE_ESTABLISHED;
                rc        = established_get_idx(seq, pos);
@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        case TCP_SEQ_STATE_LISTENING:
                rc = listening_get_next(seq, v);
                if (!rc) {
        case TCP_SEQ_STATE_LISTENING:
                rc = listening_get_next(seq, v);
                if (!rc) {
-                       tcp_listen_unlock();
+                       inet_listen_unlock(&tcp_hashinfo);
                        local_bh_disable();
                        st->state = TCP_SEQ_STATE_ESTABLISHED;
                        rc        = established_get_first(seq);
                        local_bh_disable();
                        st->state = TCP_SEQ_STATE_ESTABLISHED;
                        rc        = established_get_first(seq);
@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
                }
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
                }
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
-                       tcp_listen_unlock();
+                       inet_listen_unlock(&tcp_hashinfo);
                break;
        case TCP_SEQ_STATE_TIME_WAIT:
        case TCP_SEQ_STATE_ESTABLISHED:
                break;
        case TCP_SEQ_STATE_TIME_WAIT:
        case TCP_SEQ_STATE_ESTABLISHED:
@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops)
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(inet_bind_bucket_create);
 EXPORT_SYMBOL(tcp_hashinfo);
 EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(inet_bind_bucket_create);
 EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_listen_wlock);
 EXPORT_SYMBOL(tcp_prot);
 EXPORT_SYMBOL(tcp_unhash);
 EXPORT_SYMBOL(tcp_v4_conn_request);
 EXPORT_SYMBOL(tcp_prot);
 EXPORT_SYMBOL(tcp_unhash);
 EXPORT_SYMBOL(tcp_v4_conn_request);
index 362ef5a..93a66b9 100644 (file)
@@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
        if (sk->sk_state == TCP_LISTEN) {
                list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
                lock = &tcp_hashinfo.lhash_lock;
        if (sk->sk_state == TCP_LISTEN) {
                list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
                lock = &tcp_hashinfo.lhash_lock;
-               tcp_listen_wlock();
+               inet_listen_wlock(&tcp_hashinfo);
        } else {
                sk->sk_hashent = tcp_v6_sk_hashfn(sk);
                list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
        } else {
                sk->sk_hashent = tcp_v6_sk_hashfn(sk);
                list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;