net: Use a percpu_counter for orphan_count
[pandora-kernel.git] / net / dccp / proto.c
index d0bd348..db225f9 100644 (file)
@@ -40,16 +40,10 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
 EXPORT_SYMBOL_GPL(dccp_statistics);
 
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
 EXPORT_SYMBOL_GPL(dccp_orphan_count);
 
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
-       .lhash_lock     = RW_LOCK_UNLOCKED,
-       .lhash_users    = ATOMIC_INIT(0),
-       .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
 EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 /* the maximum queue length for tx in packets. 0 is no limit */
@@ -193,6 +187,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 
        dccp_init_xmit_timers(sk);
 
+       INIT_LIST_HEAD(&dp->dccps_featneg);
        /*
         * FIXME: We're hardcoding the CCID, and doing this at this point makes
         * the listening (master) sock get CCID control blocks, which is not
@@ -201,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
         * setsockopt(CCIDs-I-want/accept). -acme
         */
        if (likely(ctl_sock_initialized)) {
-               int rc = dccp_feat_init(dmsk);
+               int rc = dccp_feat_init(sk);
 
                if (rc)
                        return rc;
@@ -267,7 +262,7 @@ void dccp_destroy_sock(struct sock *sk)
        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 
        /* clean up feature negotiation state */
-       dccp_feat_clean(dmsk);
+       dccp_feat_list_purge(&dp->dccps_featneg);
 }
 
 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
@@ -277,6 +272,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
        struct dccp_sock *dp = dccp_sk(sk);
 
        dp->dccps_role = DCCP_ROLE_LISTEN;
+       /* do not start to listen if feature negotiation setup fails */
+       if (dccp_feat_finalise_settings(dp))
+               return -EPROTO;
        return inet_csk_listen_start(sk, backlog);
 }
 
@@ -466,42 +464,70 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
        return 0;
 }
 
-/* byte 1 is feature.  the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
-                                 struct dccp_so_feat __user *optval)
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 {
-       struct dccp_so_feat opt;
-       u8 *val;
-       int rc;
+       u8 *list, len;
+       int i, rc;
 
-       if (copy_from_user(&opt, optval, sizeof(opt)))
-               return -EFAULT;
+       if (cscov < 0 || cscov > 15)
+               return -EINVAL;
        /*
-        * rfc4340: 6.1. Change Options
+        * Populate a list of permissible values, in the range cscov...15. This
+        * is necessary since feature negotiation of single values only works if
+        * both sides incidentally choose the same value. Since the list starts
+        * lowest-value first, negotiation will pick the smallest shared value.
         */
-       if (opt.dccpsf_len < 1)
+       if (cscov == 0)
+               return 0;
+       len = 16 - cscov;
+
+       list = kmalloc(len, GFP_KERNEL);
+       if (list == NULL)
+               return -ENOBUFS;
+
+       for (i = 0; i < len; i++)
+               list[i] = cscov++;
+
+       rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
+
+       if (rc == 0) {
+               if (rx)
+                       dccp_sk(sk)->dccps_pcrlen = cscov;
+               else
+                       dccp_sk(sk)->dccps_pcslen = cscov;
+       }
+       kfree(list);
+       return rc;
+}
+
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+                               char __user *optval, int optlen)
+{
+       u8 *val;
+       int rc = 0;
+
+       if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
                return -EINVAL;
 
-       val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
-       if (!val)
+       val = kmalloc(optlen, GFP_KERNEL);
+       if (val == NULL)
                return -ENOMEM;
 
-       if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
-               rc = -EFAULT;
-               goto out_free_val;
+       if (copy_from_user(val, optval, optlen)) {
+               kfree(val);
+               return -EFAULT;
        }
 
-       rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
-                             val, opt.dccpsf_len, GFP_KERNEL);
-       if (rc)
-               goto out_free_val;
+       lock_sock(sk);
+       if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+               rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
 
-out:
-       return rc;
+       if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+               rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+       release_sock(sk);
 
-out_free_val:
        kfree(val);
-       goto out;
+       return rc;
 }
 
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -510,7 +536,21 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
        struct dccp_sock *dp = dccp_sk(sk);
        int val, err = 0;
 
-       if (optlen < sizeof(int))
+       switch (optname) {
+       case DCCP_SOCKOPT_PACKET_SIZE:
+               DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+               return 0;
+       case DCCP_SOCKOPT_CHANGE_L:
+       case DCCP_SOCKOPT_CHANGE_R:
+               DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+               return 0;
+       case DCCP_SOCKOPT_CCID:
+       case DCCP_SOCKOPT_RX_CCID:
+       case DCCP_SOCKOPT_TX_CCID:
+               return dccp_setsockopt_ccid(sk, optname, optval, optlen);
+       }
+
+       if (optlen < (int)sizeof(int))
                return -EINVAL;
 
        if (get_user(val, (int __user *)optval))
@@ -521,53 +561,24 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 
        lock_sock(sk);
        switch (optname) {
-       case DCCP_SOCKOPT_PACKET_SIZE:
-               DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
-               err = 0;
-               break;
-       case DCCP_SOCKOPT_CHANGE_L:
-               if (optlen != sizeof(struct dccp_so_feat))
-                       err = -EINVAL;
-               else
-                       err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
-                                                    (struct dccp_so_feat __user *)
-                                                    optval);
-               break;
-       case DCCP_SOCKOPT_CHANGE_R:
-               if (optlen != sizeof(struct dccp_so_feat))
-                       err = -EINVAL;
-               else
-                       err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
-                                                    (struct dccp_so_feat __user *)
-                                                    optval);
-               break;
        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
                if (dp->dccps_role != DCCP_ROLE_SERVER)
                        err = -EOPNOTSUPP;
                else
                        dp->dccps_server_timewait = (val != 0);
                break;
-       case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
-               if (val < 0 || val > 15)
-                       err = -EINVAL;
-               else
-                       dp->dccps_pcslen = val;
+       case DCCP_SOCKOPT_SEND_CSCOV:
+               err = dccp_setsockopt_cscov(sk, val, false);
                break;
-       case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
-               if (val < 0 || val > 15)
-                       err = -EINVAL;
-               else {
-                       dp->dccps_pcrlen = val;
-                       /* FIXME: add feature negotiation,
-                        * ChangeL(MinimumChecksumCoverage, val) */
-               }
+       case DCCP_SOCKOPT_RECV_CSCOV:
+               err = dccp_setsockopt_cscov(sk, val, true);
                break;
        default:
                err = -ENOPROTOOPT;
                break;
        }
-
        release_sock(sk);
+
        return err;
 }
 
@@ -648,6 +659,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
        case DCCP_SOCKOPT_GET_CUR_MPS:
                val = dp->dccps_mss_cache;
                break;
+       case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+               return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+       case DCCP_SOCKOPT_TX_CCID:
+               val = ccid_get_current_tx_ccid(dp);
+               if (val < 0)
+                       return -ENOPROTOOPT;
+               break;
+       case DCCP_SOCKOPT_RX_CCID:
+               val = ccid_get_current_rx_ccid(dp);
+               if (val < 0)
+                       return -ENOPROTOOPT;
+               break;
        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
                val = dp->dccps_server_timewait;
                break;
@@ -976,7 +999,7 @@ adjudge_to_death:
        state = sk->sk_state;
        sock_hold(sk);
        sock_orphan(sk);
-       atomic_inc(sk->sk_prot->orphan_count);
+       percpu_counter_inc(sk->sk_prot->orphan_count);
 
        /*
         * It is the last release_sock in its life. It will remove backlog.
@@ -1040,17 +1063,21 @@ static int __init dccp_init(void)
 {
        unsigned long goal;
        int ehash_order, bhash_order, i;
-       int rc = -ENOBUFS;
+       int rc;
 
        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
                     FIELD_SIZEOF(struct sk_buff, cb));
-
+       rc = percpu_counter_init(&dccp_orphan_count, 0);
+       if (rc)
+               goto out;
+       rc = -ENOBUFS;
+       inet_hashinfo_init(&dccp_hashinfo);
        dccp_hashinfo.bind_bucket_cachep =
                kmem_cache_create("dccp_bind_bucket",
                                  sizeof(struct inet_bind_bucket), 0,
                                  SLAB_HWCACHE_ALIGN, NULL);
        if (!dccp_hashinfo.bind_bucket_cachep)
-               goto out;
+               goto out_free_percpu;
 
        /*
         * Size and allocate the main established and bind bucket
@@ -1084,8 +1111,8 @@ static int __init dccp_init(void)
        }
 
        for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
-               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
-               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
+               INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
+               INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
        }
 
        if (inet_ehash_locks_alloc(&dccp_hashinfo))
@@ -1143,6 +1170,8 @@ out_free_dccp_ehash:
 out_free_bind_bucket_cachep:
        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
        dccp_hashinfo.bind_bucket_cachep = NULL;
+out_free_percpu:
+       percpu_counter_destroy(&dccp_orphan_count);
        goto out;
 }