4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 /* Client retransmits all Confirm options until entering OPEN */
71 if (oldstate == DCCP_PARTOPEN)
72 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
76 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77 oldstate == DCCP_CLOSING)
78 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
80 sk->sk_prot->unhash(sk);
81 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
86 if (oldstate == DCCP_OPEN)
87 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
90 /* Change state AFTER socket is unhashed to avoid closed
91 * socket sitting in hash tables.
96 EXPORT_SYMBOL_GPL(dccp_set_state);
98 static void dccp_finish_passive_close(struct sock *sk)
100 switch (sk->sk_state) {
101 case DCCP_PASSIVE_CLOSE:
102 /* Node (client or server) has received Close packet. */
103 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104 dccp_set_state(sk, DCCP_CLOSED);
106 case DCCP_PASSIVE_CLOSEREQ:
108 * Client received CloseReq. We set the `active' flag so that
109 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
111 dccp_send_close(sk, 1);
112 dccp_set_state(sk, DCCP_CLOSING);
116 void dccp_done(struct sock *sk)
118 dccp_set_state(sk, DCCP_CLOSED);
119 dccp_clear_xmit_timers(sk);
121 sk->sk_shutdown = SHUTDOWN_MASK;
123 if (!sock_flag(sk, SOCK_DEAD))
124 sk->sk_state_change(sk);
126 inet_csk_destroy_sock(sk);
129 EXPORT_SYMBOL_GPL(dccp_done);
131 const char *dccp_packet_name(const int type)
133 static const char *dccp_packet_names[] = {
134 [DCCP_PKT_REQUEST] = "REQUEST",
135 [DCCP_PKT_RESPONSE] = "RESPONSE",
136 [DCCP_PKT_DATA] = "DATA",
137 [DCCP_PKT_ACK] = "ACK",
138 [DCCP_PKT_DATAACK] = "DATAACK",
139 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140 [DCCP_PKT_CLOSE] = "CLOSE",
141 [DCCP_PKT_RESET] = "RESET",
142 [DCCP_PKT_SYNC] = "SYNC",
143 [DCCP_PKT_SYNCACK] = "SYNCACK",
146 if (type >= DCCP_NR_PKT_TYPES)
149 return dccp_packet_names[type];
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
154 const char *dccp_state_name(const int state)
156 static char *dccp_state_names[] = {
157 [DCCP_OPEN] = "OPEN",
158 [DCCP_REQUESTING] = "REQUESTING",
159 [DCCP_PARTOPEN] = "PARTOPEN",
160 [DCCP_LISTEN] = "LISTEN",
161 [DCCP_RESPOND] = "RESPOND",
162 [DCCP_CLOSING] = "CLOSING",
163 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
164 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
165 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166 [DCCP_TIME_WAIT] = "TIME_WAIT",
167 [DCCP_CLOSED] = "CLOSED",
170 if (state >= DCCP_MAX_STATES)
171 return "INVALID STATE!";
173 return dccp_state_names[state];
176 EXPORT_SYMBOL_GPL(dccp_state_name);
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
180 struct dccp_sock *dp = dccp_sk(sk);
181 struct inet_connection_sock *icsk = inet_csk(sk);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
197 /* control socket doesn't need feat nego */
198 if (likely(ctl_sock_initialized))
199 return dccp_feat_init(sk);
203 EXPORT_SYMBOL_GPL(dccp_init_sock);
205 void dccp_destroy_sock(struct sock *sk)
207 struct dccp_sock *dp = dccp_sk(sk);
210 * DCCP doesn't use sk_write_queue, just sk_send_head
211 * for retransmissions
213 if (sk->sk_send_head != NULL) {
214 kfree_skb(sk->sk_send_head);
215 sk->sk_send_head = NULL;
218 /* Clean up a referenced DCCP bind bucket. */
219 if (inet_csk(sk)->icsk_bind_hash != NULL)
222 kfree(dp->dccps_service_list);
223 dp->dccps_service_list = NULL;
225 if (dp->dccps_hc_rx_ackvec != NULL) {
226 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
227 dp->dccps_hc_rx_ackvec = NULL;
229 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
230 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
231 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
233 /* clean up feature negotiation state */
234 dccp_feat_list_purge(&dp->dccps_featneg);
237 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
239 static inline int dccp_listen_start(struct sock *sk, int backlog)
241 struct dccp_sock *dp = dccp_sk(sk);
243 dp->dccps_role = DCCP_ROLE_LISTEN;
244 /* do not start to listen if feature negotiation setup fails */
245 if (dccp_feat_finalise_settings(dp))
247 return inet_csk_listen_start(sk, backlog);
250 static inline int dccp_need_reset(int state)
252 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253 state != DCCP_REQUESTING;
256 int dccp_disconnect(struct sock *sk, int flags)
258 struct inet_connection_sock *icsk = inet_csk(sk);
259 struct inet_sock *inet = inet_sk(sk);
261 const int old_state = sk->sk_state;
263 if (old_state != DCCP_CLOSED)
264 dccp_set_state(sk, DCCP_CLOSED);
267 * This corresponds to the ABORT function of RFC793, sec. 3.8
268 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
270 if (old_state == DCCP_LISTEN) {
271 inet_csk_listen_stop(sk);
272 } else if (dccp_need_reset(old_state)) {
273 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274 sk->sk_err = ECONNRESET;
275 } else if (old_state == DCCP_REQUESTING)
276 sk->sk_err = ECONNRESET;
278 dccp_clear_xmit_timers(sk);
280 __skb_queue_purge(&sk->sk_receive_queue);
281 __skb_queue_purge(&sk->sk_write_queue);
282 if (sk->sk_send_head != NULL) {
283 __kfree_skb(sk->sk_send_head);
284 sk->sk_send_head = NULL;
289 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
290 inet_reset_saddr(sk);
293 sock_reset_flag(sk, SOCK_DONE);
295 icsk->icsk_backoff = 0;
296 inet_csk_delack_init(sk);
299 WARN_ON(inet->num && !icsk->icsk_bind_hash);
301 sk->sk_error_report(sk);
305 EXPORT_SYMBOL_GPL(dccp_disconnect);
308 * Wait for a DCCP event.
310 * Note that we don't need to lock the socket, as the upper poll layers
311 * take care of normal races (between the test and the event) and we don't
312 * go look at any of the socket buffers directly.
314 unsigned int dccp_poll(struct file *file, struct socket *sock,
318 struct sock *sk = sock->sk;
320 poll_wait(file, sk->sk_sleep, wait);
321 if (sk->sk_state == DCCP_LISTEN)
322 return inet_csk_listen_poll(sk);
324 /* Socket is not locked. We are protected from async events
325 by poll logic and correct handling of state changes
326 made by another threads is impossible in any case.
333 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
335 if (sk->sk_shutdown & RCV_SHUTDOWN)
336 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
339 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
340 if (atomic_read(&sk->sk_rmem_alloc) > 0)
341 mask |= POLLIN | POLLRDNORM;
343 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
344 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
345 mask |= POLLOUT | POLLWRNORM;
346 } else { /* send SIGIO later */
347 set_bit(SOCK_ASYNC_NOSPACE,
348 &sk->sk_socket->flags);
349 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
351 /* Race breaker. If space is freed after
352 * wspace test but before the flags are set,
353 * IO signal will be lost.
355 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
356 mask |= POLLOUT | POLLWRNORM;
363 EXPORT_SYMBOL_GPL(dccp_poll);
365 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
371 if (sk->sk_state == DCCP_LISTEN)
377 unsigned long amount = 0;
379 skb = skb_peek(&sk->sk_receive_queue);
382 * We will only return the amount of this packet since
383 * that is all that will be read.
387 rc = put_user(amount, (int __user *)arg);
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402 char __user *optval, int optlen)
404 struct dccp_sock *dp = dccp_sk(sk);
405 struct dccp_service_list *sl = NULL;
407 if (service == DCCP_SERVICE_INVALID_VALUE ||
408 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
411 if (optlen > sizeof(service)) {
412 sl = kmalloc(optlen, GFP_KERNEL);
416 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417 if (copy_from_user(sl->dccpsl_list,
418 optval + sizeof(service),
419 optlen - sizeof(service)) ||
420 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
427 dp->dccps_service = service;
429 kfree(dp->dccps_service_list);
431 dp->dccps_service_list = sl;
436 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
441 if (cscov < 0 || cscov > 15)
444 * Populate a list of permissible values, in the range cscov...15. This
445 * is necessary since feature negotiation of single values only works if
446 * both sides incidentally choose the same value. Since the list starts
447 * lowest-value first, negotiation will pick the smallest shared value.
453 list = kmalloc(len, GFP_KERNEL);
457 for (i = 0; i < len; i++)
460 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
464 dccp_sk(sk)->dccps_pcrlen = cscov;
466 dccp_sk(sk)->dccps_pcslen = cscov;
472 static int dccp_setsockopt_ccid(struct sock *sk, int type,
473 char __user *optval, int optlen)
478 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
481 val = kmalloc(optlen, GFP_KERNEL);
485 if (copy_from_user(val, optval, optlen)) {
491 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
492 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
494 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
495 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503 char __user *optval, int optlen)
505 struct dccp_sock *dp = dccp_sk(sk);
509 case DCCP_SOCKOPT_PACKET_SIZE:
510 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
512 case DCCP_SOCKOPT_CHANGE_L:
513 case DCCP_SOCKOPT_CHANGE_R:
514 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
516 case DCCP_SOCKOPT_CCID:
517 case DCCP_SOCKOPT_RX_CCID:
518 case DCCP_SOCKOPT_TX_CCID:
519 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
522 if (optlen < (int)sizeof(int))
525 if (get_user(val, (int __user *)optval))
528 if (optname == DCCP_SOCKOPT_SERVICE)
529 return dccp_setsockopt_service(sk, val, optval, optlen);
533 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
534 if (dp->dccps_role != DCCP_ROLE_SERVER)
537 dp->dccps_server_timewait = (val != 0);
539 case DCCP_SOCKOPT_SEND_CSCOV:
540 err = dccp_setsockopt_cscov(sk, val, false);
542 case DCCP_SOCKOPT_RECV_CSCOV:
543 err = dccp_setsockopt_cscov(sk, val, true);
545 case DCCP_SOCKOPT_QPOLICY_ID:
546 if (sk->sk_state != DCCP_CLOSED)
548 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
551 dp->dccps_qpolicy = val;
553 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
557 dp->dccps_tx_qlen = val;
568 int dccp_setsockopt(struct sock *sk, int level, int optname,
569 char __user *optval, int optlen)
571 if (level != SOL_DCCP)
572 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
575 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
578 EXPORT_SYMBOL_GPL(dccp_setsockopt);
581 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
582 char __user *optval, int optlen)
584 if (level != SOL_DCCP)
585 return inet_csk_compat_setsockopt(sk, level, optname,
587 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
590 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
593 static int dccp_getsockopt_service(struct sock *sk, int len,
594 __be32 __user *optval,
597 const struct dccp_sock *dp = dccp_sk(sk);
598 const struct dccp_service_list *sl;
599 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
602 if ((sl = dp->dccps_service_list) != NULL) {
603 slen = sl->dccpsl_nr * sizeof(u32);
612 if (put_user(total_len, optlen) ||
613 put_user(dp->dccps_service, optval) ||
614 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
621 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
622 char __user *optval, int __user *optlen)
624 struct dccp_sock *dp;
627 if (get_user(len, optlen))
630 if (len < (int)sizeof(int))
636 case DCCP_SOCKOPT_PACKET_SIZE:
637 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
639 case DCCP_SOCKOPT_SERVICE:
640 return dccp_getsockopt_service(sk, len,
641 (__be32 __user *)optval, optlen);
642 case DCCP_SOCKOPT_GET_CUR_MPS:
643 val = dp->dccps_mss_cache;
645 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
646 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
647 case DCCP_SOCKOPT_TX_CCID:
648 val = ccid_get_current_tx_ccid(dp);
652 case DCCP_SOCKOPT_RX_CCID:
653 val = ccid_get_current_rx_ccid(dp);
657 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
658 val = dp->dccps_server_timewait;
660 case DCCP_SOCKOPT_SEND_CSCOV:
661 val = dp->dccps_pcslen;
663 case DCCP_SOCKOPT_RECV_CSCOV:
664 val = dp->dccps_pcrlen;
666 case DCCP_SOCKOPT_QPOLICY_ID:
667 val = dp->dccps_qpolicy;
669 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
670 val = dp->dccps_tx_qlen;
673 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
674 len, (u32 __user *)optval, optlen);
676 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
677 len, (u32 __user *)optval, optlen);
683 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
689 int dccp_getsockopt(struct sock *sk, int level, int optname,
690 char __user *optval, int __user *optlen)
692 if (level != SOL_DCCP)
693 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
696 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
699 EXPORT_SYMBOL_GPL(dccp_getsockopt);
702 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
703 char __user *optval, int __user *optlen)
705 if (level != SOL_DCCP)
706 return inet_csk_compat_getsockopt(sk, level, optname,
708 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
711 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
714 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
716 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
719 * Assign an (opaque) qpolicy priority value to skb->priority.
721 * We are overloading this skb field for use with the qpolicy subystem.
722 * The skb->priority is normally used for the SO_PRIORITY option, which
723 * is initialised from sk_priority. Since the assignment of sk_priority
724 * to skb->priority happens later (on layer 3), we overload this field
725 * for use with queueing priorities as long as the skb is on layer 4.
726 * The default priority value (if nothing is set) is 0.
730 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
732 if (!CMSG_OK(msg, cmsg))
735 if (cmsg->cmsg_level != SOL_DCCP)
738 switch (cmsg->cmsg_type) {
739 case DCCP_SCM_PRIORITY:
740 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
742 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
751 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
754 const struct dccp_sock *dp = dccp_sk(sk);
755 const int flags = msg->msg_flags;
756 const int noblock = flags & MSG_DONTWAIT;
761 if (len > dp->dccps_mss_cache)
766 if (dccp_qpolicy_full(sk)) {
771 timeo = sock_sndtimeo(sk, noblock);
774 * We have to use sk_stream_wait_connect here to set sk_write_pending,
775 * so that the trick in dccp_rcv_request_sent_state_process.
777 /* Wait for a connection to finish. */
778 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
779 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
782 size = sk->sk_prot->max_header + len;
784 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
789 skb_reserve(skb, sk->sk_prot->max_header);
790 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
794 rc = dccp_msghdr_parse(msg, skb);
798 dccp_qpolicy_push(sk, skb);
808 EXPORT_SYMBOL_GPL(dccp_sendmsg);
810 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
811 size_t len, int nonblock, int flags, int *addr_len)
813 const struct dccp_hdr *dh;
818 if (sk->sk_state == DCCP_LISTEN) {
823 timeo = sock_rcvtimeo(sk, nonblock);
826 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
829 goto verify_sock_status;
833 switch (dh->dccph_type) {
835 case DCCP_PKT_DATAACK:
839 case DCCP_PKT_CLOSEREQ:
840 if (!(flags & MSG_PEEK))
841 dccp_finish_passive_close(sk);
844 dccp_pr_debug("found fin (%s) ok!\n",
845 dccp_packet_name(dh->dccph_type));
849 dccp_pr_debug("packet_type=%s\n",
850 dccp_packet_name(dh->dccph_type));
851 sk_eat_skb(sk, skb, 0);
854 if (sock_flag(sk, SOCK_DONE)) {
860 len = sock_error(sk);
864 if (sk->sk_shutdown & RCV_SHUTDOWN) {
869 if (sk->sk_state == DCCP_CLOSED) {
870 if (!sock_flag(sk, SOCK_DONE)) {
871 /* This occurs when user tries to read
872 * from never connected socket.
886 if (signal_pending(current)) {
887 len = sock_intr_errno(timeo);
891 sk_wait_data(sk, &timeo);
896 else if (len < skb->len)
897 msg->msg_flags |= MSG_TRUNC;
899 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
900 /* Exception. Bailout! */
905 if (!(flags & MSG_PEEK))
906 sk_eat_skb(sk, skb, 0);
914 EXPORT_SYMBOL_GPL(dccp_recvmsg);
916 int inet_dccp_listen(struct socket *sock, int backlog)
918 struct sock *sk = sock->sk;
919 unsigned char old_state;
925 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
928 old_state = sk->sk_state;
929 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
932 /* Really, if the socket is already in listen state
933 * we can only allow the backlog to be adjusted.
935 if (old_state != DCCP_LISTEN) {
937 * FIXME: here it probably should be sk->sk_prot->listen_start
938 * see tcp_listen_start
940 err = dccp_listen_start(sk, backlog);
944 sk->sk_max_ack_backlog = backlog;
952 EXPORT_SYMBOL_GPL(inet_dccp_listen);
954 static void dccp_terminate_connection(struct sock *sk)
956 u8 next_state = DCCP_CLOSED;
958 switch (sk->sk_state) {
959 case DCCP_PASSIVE_CLOSE:
960 case DCCP_PASSIVE_CLOSEREQ:
961 dccp_finish_passive_close(sk);
964 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
965 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
968 dccp_send_close(sk, 1);
970 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
971 !dccp_sk(sk)->dccps_server_timewait)
972 next_state = DCCP_ACTIVE_CLOSEREQ;
974 next_state = DCCP_CLOSING;
977 dccp_set_state(sk, next_state);
981 void dccp_close(struct sock *sk, long timeout)
983 struct dccp_sock *dp = dccp_sk(sk);
985 u32 data_was_unread = 0;
990 sk->sk_shutdown = SHUTDOWN_MASK;
992 if (sk->sk_state == DCCP_LISTEN) {
993 dccp_set_state(sk, DCCP_CLOSED);
996 inet_csk_listen_stop(sk);
998 goto adjudge_to_death;
1001 sk_stop_timer(sk, &dp->dccps_xmit_timer);
1004 * We need to flush the recv. buffs. We do this only on the
1005 * descriptor close, not protocol-sourced closes, because the
1006 *reader process may not have drained the data yet!
1008 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1009 data_was_unread += skb->len;
1013 if (data_was_unread) {
1014 /* Unread data was tossed, send an appropriate Reset Code */
1015 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
1016 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1017 dccp_set_state(sk, DCCP_CLOSED);
1018 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1019 /* Check zero linger _after_ checking for unread data. */
1020 sk->sk_prot->disconnect(sk, 0);
1021 } else if (sk->sk_state != DCCP_CLOSED) {
1023 * Normal connection termination. May need to wait if there are
1024 * still packets in the TX queue that are delayed by the CCID.
1026 dccp_flush_write_queue(sk, &timeout);
1027 dccp_terminate_connection(sk);
1031 * Flush write queue. This may be necessary in several cases:
1032 * - we have been closed by the peer but still have application data;
1033 * - abortive termination (unread data or zero linger time),
1034 * - normal termination but queue could not be flushed within time limit
1036 __skb_queue_purge(&sk->sk_write_queue);
1038 sk_stream_wait_close(sk, timeout);
1041 state = sk->sk_state;
1044 atomic_inc(sk->sk_prot->orphan_count);
1047 * It is the last release_sock in its life. It will remove backlog.
1051 * Now socket is owned by kernel and we acquire BH lock
1052 * to finish close. No need to check for user refs.
1056 WARN_ON(sock_owned_by_user(sk));
1058 /* Have we already been destroyed by a softirq or backlog? */
1059 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1062 if (sk->sk_state == DCCP_CLOSED)
1063 inet_csk_destroy_sock(sk);
1065 /* Otherwise, socket is reprieved until protocol close. */
1073 EXPORT_SYMBOL_GPL(dccp_close);
1075 void dccp_shutdown(struct sock *sk, int how)
1077 dccp_pr_debug("called shutdown(%x)\n", how);
1080 EXPORT_SYMBOL_GPL(dccp_shutdown);
1082 static inline int dccp_mib_init(void)
1084 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1087 static inline void dccp_mib_exit(void)
1089 snmp_mib_free((void**)dccp_statistics);
1092 static int thash_entries;
1093 module_param(thash_entries, int, 0444);
1094 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1096 #ifdef CONFIG_IP_DCCP_DEBUG
1098 module_param(dccp_debug, bool, 0644);
1099 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1101 EXPORT_SYMBOL_GPL(dccp_debug);
1104 static int __init dccp_init(void)
1107 int ehash_order, bhash_order, i;
1110 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1111 FIELD_SIZEOF(struct sk_buff, cb));
1113 dccp_hashinfo.bind_bucket_cachep =
1114 kmem_cache_create("dccp_bind_bucket",
1115 sizeof(struct inet_bind_bucket), 0,
1116 SLAB_HWCACHE_ALIGN, NULL);
1117 if (!dccp_hashinfo.bind_bucket_cachep)
1121 * Size and allocate the main established and bind bucket
1124 * The methodology is similar to that of the buffer cache.
1126 if (num_physpages >= (128 * 1024))
1127 goal = num_physpages >> (21 - PAGE_SHIFT);
1129 goal = num_physpages >> (23 - PAGE_SHIFT);
1132 goal = (thash_entries *
1133 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1134 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1137 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1138 sizeof(struct inet_ehash_bucket);
1139 while (dccp_hashinfo.ehash_size &
1140 (dccp_hashinfo.ehash_size - 1))
1141 dccp_hashinfo.ehash_size--;
1142 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1143 __get_free_pages(GFP_ATOMIC, ehash_order);
1144 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1146 if (!dccp_hashinfo.ehash) {
1147 DCCP_CRIT("Failed to allocate DCCP established hash table");
1148 goto out_free_bind_bucket_cachep;
1151 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1152 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1153 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1156 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1157 goto out_free_dccp_ehash;
1159 bhash_order = ehash_order;
1162 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1163 sizeof(struct inet_bind_hashbucket);
1164 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1167 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1168 __get_free_pages(GFP_ATOMIC, bhash_order);
1169 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1171 if (!dccp_hashinfo.bhash) {
1172 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1173 goto out_free_dccp_locks;
1176 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1177 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1178 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1181 rc = dccp_mib_init();
1183 goto out_free_dccp_bhash;
1185 rc = dccp_ackvec_init();
1187 goto out_free_dccp_mib;
1189 rc = dccp_sysctl_init();
1191 goto out_ackvec_exit;
1193 dccp_timestamping_init();
1200 out_free_dccp_bhash:
1201 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1202 dccp_hashinfo.bhash = NULL;
1203 out_free_dccp_locks:
1204 inet_ehash_locks_free(&dccp_hashinfo);
1205 out_free_dccp_ehash:
1206 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1207 dccp_hashinfo.ehash = NULL;
1208 out_free_bind_bucket_cachep:
1209 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1210 dccp_hashinfo.bind_bucket_cachep = NULL;
1214 static void __exit dccp_fini(void)
1217 free_pages((unsigned long)dccp_hashinfo.bhash,
1218 get_order(dccp_hashinfo.bhash_size *
1219 sizeof(struct inet_bind_hashbucket)));
1220 free_pages((unsigned long)dccp_hashinfo.ehash,
1221 get_order(dccp_hashinfo.ehash_size *
1222 sizeof(struct inet_ehash_bucket)));
1223 inet_ehash_locks_free(&dccp_hashinfo);
1224 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1229 module_init(dccp_init);
1230 module_exit(dccp_fini);
1232 MODULE_LICENSE("GPL");
1233 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1234 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");