4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
44 EXPORT_SYMBOL_GPL(dccp_statistics);
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50 static struct net_protocol dccp_protocol = {
51 .handler = dccp_v4_rcv,
52 .err_handler = dccp_v4_err,
56 const char *dccp_packet_name(const int type)
58 static const char *dccp_packet_names[] = {
59 [DCCP_PKT_REQUEST] = "REQUEST",
60 [DCCP_PKT_RESPONSE] = "RESPONSE",
61 [DCCP_PKT_DATA] = "DATA",
62 [DCCP_PKT_ACK] = "ACK",
63 [DCCP_PKT_DATAACK] = "DATAACK",
64 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PKT_CLOSE] = "CLOSE",
66 [DCCP_PKT_RESET] = "RESET",
67 [DCCP_PKT_SYNC] = "SYNC",
68 [DCCP_PKT_SYNCACK] = "SYNCACK",
71 if (type >= DCCP_NR_PKT_TYPES)
74 return dccp_packet_names[type];
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
79 const char *dccp_state_name(const int state)
81 static char *dccp_state_names[] = {
83 [DCCP_REQUESTING] = "REQUESTING",
84 [DCCP_PARTOPEN] = "PARTOPEN",
85 [DCCP_LISTEN] = "LISTEN",
86 [DCCP_RESPOND] = "RESPOND",
87 [DCCP_CLOSING] = "CLOSING",
88 [DCCP_TIME_WAIT] = "TIME_WAIT",
89 [DCCP_CLOSED] = "CLOSED",
92 if (state >= DCCP_MAX_STATES)
93 return "INVALID STATE!";
95 return dccp_state_names[state];
98 EXPORT_SYMBOL_GPL(dccp_state_name);
100 void dccp_hash(struct sock *sk)
102 inet_hash(&dccp_hashinfo, sk);
105 EXPORT_SYMBOL_GPL(dccp_hash);
107 void dccp_unhash(struct sock *sk)
109 inet_unhash(&dccp_hashinfo, sk);
112 EXPORT_SYMBOL_GPL(dccp_unhash);
114 int dccp_init_sock(struct sock *sk)
116 struct dccp_sock *dp = dccp_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 static int dccp_ctl_socket_init = 1;
120 dccp_options_init(&dp->dccps_options);
121 do_gettimeofday(&dp->dccps_epoch);
124 * FIXME: We're hardcoding the CCID, and doing this at this point makes
125 * the listening (master) sock get CCID control blocks, which is not
126 * necessary, but for now, to not mess with the test userspace apps,
127 * lets leave it here, later the real solution is to do this in a
128 * setsockopt(CCIDs-I-want/accept). -acme
130 if (likely(!dccp_ctl_socket_init)) {
131 int rc = dccp_feat_init(sk);
136 if (dp->dccps_options.dccpo_send_ack_vector) {
137 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138 if (dp->dccps_hc_rx_ackvec == NULL)
141 dp->dccps_hc_rx_ccid =
142 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
144 dp->dccps_hc_tx_ccid =
145 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
147 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148 dp->dccps_hc_tx_ccid == NULL)) {
149 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151 if (dp->dccps_options.dccpo_send_ack_vector) {
152 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153 dp->dccps_hc_rx_ackvec = NULL;
155 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
159 /* control socket doesn't need feat nego */
160 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162 dccp_ctl_socket_init = 0;
165 dccp_init_xmit_timers(sk);
166 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
167 sk->sk_state = DCCP_CLOSED;
168 sk->sk_write_space = dccp_write_space;
169 icsk->icsk_sync_mss = dccp_sync_mss;
170 dp->dccps_mss_cache = 536;
171 dp->dccps_role = DCCP_ROLE_UNDEFINED;
172 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
173 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
178 EXPORT_SYMBOL_GPL(dccp_init_sock);
180 int dccp_destroy_sock(struct sock *sk)
182 struct dccp_sock *dp = dccp_sk(sk);
185 * DCCP doesn't use sk_write_queue, just sk_send_head
186 * for retransmissions
188 if (sk->sk_send_head != NULL) {
189 kfree_skb(sk->sk_send_head);
190 sk->sk_send_head = NULL;
193 /* Clean up a referenced DCCP bind bucket. */
194 if (inet_csk(sk)->icsk_bind_hash != NULL)
195 inet_put_port(&dccp_hashinfo, sk);
197 kfree(dp->dccps_service_list);
198 dp->dccps_service_list = NULL;
200 if (dp->dccps_options.dccpo_send_ack_vector) {
201 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202 dp->dccps_hc_rx_ackvec = NULL;
204 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208 /* clean up feature negotiation state */
214 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
216 static inline int dccp_listen_start(struct sock *sk)
218 struct dccp_sock *dp = dccp_sk(sk);
220 dp->dccps_role = DCCP_ROLE_LISTEN;
222 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223 * before calling listen()
225 if (dccp_service_not_initialized(sk))
227 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
230 int dccp_disconnect(struct sock *sk, int flags)
232 struct inet_connection_sock *icsk = inet_csk(sk);
233 struct inet_sock *inet = inet_sk(sk);
235 const int old_state = sk->sk_state;
237 if (old_state != DCCP_CLOSED)
238 dccp_set_state(sk, DCCP_CLOSED);
240 /* ABORT function of RFC793 */
241 if (old_state == DCCP_LISTEN) {
242 inet_csk_listen_stop(sk);
243 /* FIXME: do the active reset thing */
244 } else if (old_state == DCCP_REQUESTING)
245 sk->sk_err = ECONNRESET;
247 dccp_clear_xmit_timers(sk);
248 __skb_queue_purge(&sk->sk_receive_queue);
249 if (sk->sk_send_head != NULL) {
250 __kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
256 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257 inet_reset_saddr(sk);
260 sock_reset_flag(sk, SOCK_DONE);
262 icsk->icsk_backoff = 0;
263 inet_csk_delack_init(sk);
266 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
268 sk->sk_error_report(sk);
272 EXPORT_SYMBOL_GPL(dccp_disconnect);
275 * Wait for a DCCP event.
277 * Note that we don't need to lock the socket, as the upper poll layers
278 * take care of normal races (between the test and the event) and we don't
279 * go look at any of the socket buffers directly.
281 unsigned int dccp_poll(struct file *file, struct socket *sock,
285 struct sock *sk = sock->sk;
287 poll_wait(file, sk->sk_sleep, wait);
288 if (sk->sk_state == DCCP_LISTEN)
289 return inet_csk_listen_poll(sk);
291 /* Socket is not locked. We are protected from async events
292 by poll logic and correct handling of state changes
293 made by another threads is impossible in any case.
300 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
302 if (sk->sk_shutdown & RCV_SHUTDOWN)
303 mask |= POLLIN | POLLRDNORM;
306 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308 mask |= POLLIN | POLLRDNORM;
310 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312 mask |= POLLOUT | POLLWRNORM;
313 } else { /* send SIGIO later */
314 set_bit(SOCK_ASYNC_NOSPACE,
315 &sk->sk_socket->flags);
316 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
318 /* Race breaker. If space is freed after
319 * wspace test but before the flags are set,
320 * IO signal will be lost.
322 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323 mask |= POLLOUT | POLLWRNORM;
330 EXPORT_SYMBOL_GPL(dccp_poll);
332 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
334 dccp_pr_debug("entry\n");
338 EXPORT_SYMBOL_GPL(dccp_ioctl);
340 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
341 char __user *optval, int optlen)
343 struct dccp_sock *dp = dccp_sk(sk);
344 struct dccp_service_list *sl = NULL;
346 if (service == DCCP_SERVICE_INVALID_VALUE ||
347 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
350 if (optlen > sizeof(service)) {
351 sl = kmalloc(optlen, GFP_KERNEL);
355 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356 if (copy_from_user(sl->dccpsl_list,
357 optval + sizeof(service),
358 optlen - sizeof(service)) ||
359 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
366 dp->dccps_service = service;
368 kfree(dp->dccps_service_list);
370 dp->dccps_service_list = sl;
375 /* byte 1 is feature. the rest is the preference list */
376 static int dccp_setsockopt_change(struct sock *sk, int type,
377 struct dccp_so_feat __user *optval)
379 struct dccp_so_feat opt;
383 if (copy_from_user(&opt, optval, sizeof(opt)))
386 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
390 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
395 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
408 int dccp_setsockopt(struct sock *sk, int level, int optname,
409 char __user *optval, int optlen)
411 struct dccp_sock *dp;
415 if (level != SOL_DCCP)
416 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
420 if (optlen < sizeof(int))
423 if (get_user(val, (int __user *)optval))
426 if (optname == DCCP_SOCKOPT_SERVICE)
427 return dccp_setsockopt_service(sk, val, optval, optlen);
434 case DCCP_SOCKOPT_PACKET_SIZE:
435 dp->dccps_packet_size = val;
438 case DCCP_SOCKOPT_CHANGE_L:
439 if (optlen != sizeof(struct dccp_so_feat))
442 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443 (struct dccp_so_feat *)
447 case DCCP_SOCKOPT_CHANGE_R:
448 if (optlen != sizeof(struct dccp_so_feat))
451 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452 (struct dccp_so_feat *)
465 EXPORT_SYMBOL_GPL(dccp_setsockopt);
467 static int dccp_getsockopt_service(struct sock *sk, int len,
468 __be32 __user *optval,
471 const struct dccp_sock *dp = dccp_sk(sk);
472 const struct dccp_service_list *sl;
473 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
476 if (dccp_service_not_initialized(sk))
479 if ((sl = dp->dccps_service_list) != NULL) {
480 slen = sl->dccpsl_nr * sizeof(u32);
489 if (put_user(total_len, optlen) ||
490 put_user(dp->dccps_service, optval) ||
491 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
498 int dccp_getsockopt(struct sock *sk, int level, int optname,
499 char __user *optval, int __user *optlen)
501 struct dccp_sock *dp;
504 if (level != SOL_DCCP)
505 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
508 if (get_user(len, optlen))
511 if (len < sizeof(int))
517 case DCCP_SOCKOPT_PACKET_SIZE:
518 val = dp->dccps_packet_size;
519 len = sizeof(dp->dccps_packet_size);
521 case DCCP_SOCKOPT_SERVICE:
522 return dccp_getsockopt_service(sk, len,
523 (__be32 __user *)optval, optlen);
525 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526 len, (u32 __user *)optval, optlen);
528 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529 len, (u32 __user *)optval, optlen);
534 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
540 EXPORT_SYMBOL_GPL(dccp_getsockopt);
542 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
545 const struct dccp_sock *dp = dccp_sk(sk);
546 const int flags = msg->msg_flags;
547 const int noblock = flags & MSG_DONTWAIT;
552 if (len > dp->dccps_mss_cache)
556 timeo = sock_sndtimeo(sk, noblock);
559 * We have to use sk_stream_wait_connect here to set sk_write_pending,
560 * so that the trick in dccp_rcv_request_sent_state_process.
562 /* Wait for a connection to finish. */
563 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
567 size = sk->sk_prot->max_header + len;
569 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
574 skb_reserve(skb, sk->sk_prot->max_header);
575 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
579 rc = dccp_write_xmit(sk, skb, &timeo);
581 * XXX we don't use sk_write_queue, so just discard the packet.
582 * Current plan however is to _use_ sk_write_queue with
583 * an algorith similar to tcp_sendmsg, where the main difference
584 * is that in DCCP we have to respect packet boundaries, so
585 * no coalescing of skbs.
587 * This bug was _quickly_ found & fixed by just looking at an OSTRA
588 * generated callgraph 8) -acme
598 EXPORT_SYMBOL_GPL(dccp_sendmsg);
600 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601 size_t len, int nonblock, int flags, int *addr_len)
603 const struct dccp_hdr *dh;
608 if (sk->sk_state == DCCP_LISTEN) {
613 timeo = sock_rcvtimeo(sk, nonblock);
616 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
619 goto verify_sock_status;
623 if (dh->dccph_type == DCCP_PKT_DATA ||
624 dh->dccph_type == DCCP_PKT_DATAACK)
627 if (dh->dccph_type == DCCP_PKT_RESET ||
628 dh->dccph_type == DCCP_PKT_CLOSE) {
629 dccp_pr_debug("found fin ok!\n");
633 dccp_pr_debug("packet_type=%s\n",
634 dccp_packet_name(dh->dccph_type));
637 if (sock_flag(sk, SOCK_DONE)) {
643 len = sock_error(sk);
647 if (sk->sk_shutdown & RCV_SHUTDOWN) {
652 if (sk->sk_state == DCCP_CLOSED) {
653 if (!sock_flag(sk, SOCK_DONE)) {
654 /* This occurs when user tries to read
655 * from never connected socket.
669 if (signal_pending(current)) {
670 len = sock_intr_errno(timeo);
674 sk_wait_data(sk, &timeo);
679 else if (len < skb->len)
680 msg->msg_flags |= MSG_TRUNC;
682 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683 /* Exception. Bailout! */
688 if (!(flags & MSG_PEEK))
697 EXPORT_SYMBOL_GPL(dccp_recvmsg);
699 int inet_dccp_listen(struct socket *sock, int backlog)
701 struct sock *sk = sock->sk;
702 unsigned char old_state;
708 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
711 old_state = sk->sk_state;
712 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
715 /* Really, if the socket is already in listen state
716 * we can only allow the backlog to be adjusted.
718 if (old_state != DCCP_LISTEN) {
720 * FIXME: here it probably should be sk->sk_prot->listen_start
721 * see tcp_listen_start
723 err = dccp_listen_start(sk);
727 sk->sk_max_ack_backlog = backlog;
735 EXPORT_SYMBOL_GPL(inet_dccp_listen);
737 static const unsigned char dccp_new_state[] = {
738 /* current state: new state: action: */
740 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
741 [DCCP_REQUESTING] = DCCP_CLOSED,
742 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
743 [DCCP_LISTEN] = DCCP_CLOSED,
744 [DCCP_RESPOND] = DCCP_CLOSED,
745 [DCCP_CLOSING] = DCCP_CLOSED,
746 [DCCP_TIME_WAIT] = DCCP_CLOSED,
747 [DCCP_CLOSED] = DCCP_CLOSED,
750 static int dccp_close_state(struct sock *sk)
752 const int next = dccp_new_state[sk->sk_state];
753 const int ns = next & DCCP_STATE_MASK;
755 if (ns != sk->sk_state)
756 dccp_set_state(sk, ns);
758 return next & DCCP_ACTION_FIN;
761 void dccp_close(struct sock *sk, long timeout)
767 sk->sk_shutdown = SHUTDOWN_MASK;
769 if (sk->sk_state == DCCP_LISTEN) {
770 dccp_set_state(sk, DCCP_CLOSED);
773 inet_csk_listen_stop(sk);
775 goto adjudge_to_death;
779 * We need to flush the recv. buffs. We do this only on the
780 * descriptor close, not protocol-sourced closes, because the
781 *reader process may not have drained the data yet!
783 /* FIXME: check for unread data */
784 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
788 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789 /* Check zero linger _after_ checking for unread data. */
790 sk->sk_prot->disconnect(sk, 0);
791 } else if (dccp_close_state(sk)) {
792 dccp_send_close(sk, 1);
795 sk_stream_wait_close(sk, timeout);
799 * It is the last release_sock in its life. It will remove backlog.
803 * Now socket is owned by kernel and we acquire BH lock
804 * to finish close. No need to check for user refs.
808 BUG_TRAP(!sock_owned_by_user(sk));
814 * The last release_sock may have processed the CLOSE or RESET
815 * packet moving sock to CLOSED state, if not we have to fire
816 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817 * in draft-ietf-dccp-spec-11. -acme
819 if (sk->sk_state == DCCP_CLOSING) {
820 /* FIXME: should start at 2 * RTT */
821 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823 inet_csk(sk)->icsk_rto,
826 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
827 dccp_set_state(sk, DCCP_CLOSED);
831 atomic_inc(sk->sk_prot->orphan_count);
832 if (sk->sk_state == DCCP_CLOSED)
833 inet_csk_destroy_sock(sk);
835 /* Otherwise, socket is reprieved until protocol close. */
842 EXPORT_SYMBOL_GPL(dccp_close);
844 void dccp_shutdown(struct sock *sk, int how)
846 dccp_pr_debug("entry\n");
849 EXPORT_SYMBOL_GPL(dccp_shutdown);
851 static const struct proto_ops inet_dccp_ops = {
853 .owner = THIS_MODULE,
854 .release = inet_release,
856 .connect = inet_stream_connect,
857 .socketpair = sock_no_socketpair,
858 .accept = inet_accept,
859 .getname = inet_getname,
860 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
863 /* FIXME: work on inet_listen to rename it to sock_common_listen */
864 .listen = inet_dccp_listen,
865 .shutdown = inet_shutdown,
866 .setsockopt = sock_common_setsockopt,
867 .getsockopt = sock_common_getsockopt,
868 .sendmsg = inet_sendmsg,
869 .recvmsg = sock_common_recvmsg,
870 .mmap = sock_no_mmap,
871 .sendpage = sock_no_sendpage,
874 extern struct net_proto_family inet_family_ops;
876 static struct inet_protosw dccp_v4_protosw = {
878 .protocol = IPPROTO_DCCP,
880 .ops = &inet_dccp_ops,
883 .flags = INET_PROTOSW_ICSK,
887 * This is the global socket data structure used for responding to
888 * the Out-of-the-blue (OOTB) packets. A control sock will be created
889 * for this socket at the initialization time.
891 struct socket *dccp_ctl_socket;
893 static char dccp_ctl_socket_err_msg[] __initdata =
894 KERN_ERR "DCCP: Failed to create the control socket.\n";
896 static int __init dccp_ctl_sock_init(void)
898 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
901 printk(dccp_ctl_socket_err_msg);
903 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
904 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
906 /* Unhash it so that IP input processing does not even
907 * see it, we do not wish this socket to see incoming
910 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
916 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
917 void dccp_ctl_sock_exit(void)
919 if (dccp_ctl_socket != NULL) {
920 sock_release(dccp_ctl_socket);
921 dccp_ctl_socket = NULL;
925 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
928 static int __init init_dccp_v4_mibs(void)
932 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
933 if (dccp_statistics[0] == NULL)
936 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
937 if (dccp_statistics[1] == NULL)
944 free_percpu(dccp_statistics[0]);
945 dccp_statistics[0] = NULL;
950 static int thash_entries;
951 module_param(thash_entries, int, 0444);
952 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
954 #ifdef CONFIG_IP_DCCP_DEBUG
956 module_param(dccp_debug, int, 0444);
957 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
959 EXPORT_SYMBOL_GPL(dccp_debug);
962 static int __init dccp_init(void)
965 int ehash_order, bhash_order, i;
966 int rc = proto_register(&dccp_prot, 1);
972 dccp_hashinfo.bind_bucket_cachep =
973 kmem_cache_create("dccp_bind_bucket",
974 sizeof(struct inet_bind_bucket), 0,
975 SLAB_HWCACHE_ALIGN, NULL, NULL);
976 if (!dccp_hashinfo.bind_bucket_cachep)
977 goto out_proto_unregister;
980 * Size and allocate the main established and bind bucket
983 * The methodology is similar to that of the buffer cache.
985 if (num_physpages >= (128 * 1024))
986 goal = num_physpages >> (21 - PAGE_SHIFT);
988 goal = num_physpages >> (23 - PAGE_SHIFT);
991 goal = (thash_entries *
992 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
993 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
996 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
997 sizeof(struct inet_ehash_bucket);
998 dccp_hashinfo.ehash_size >>= 1;
999 while (dccp_hashinfo.ehash_size &
1000 (dccp_hashinfo.ehash_size - 1))
1001 dccp_hashinfo.ehash_size--;
1002 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1003 __get_free_pages(GFP_ATOMIC, ehash_order);
1004 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1006 if (!dccp_hashinfo.ehash) {
1007 printk(KERN_CRIT "Failed to allocate DCCP "
1008 "established hash table\n");
1009 goto out_free_bind_bucket_cachep;
1012 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1013 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1014 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1017 bhash_order = ehash_order;
1020 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1021 sizeof(struct inet_bind_hashbucket);
1022 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1025 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1026 __get_free_pages(GFP_ATOMIC, bhash_order);
1027 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1029 if (!dccp_hashinfo.bhash) {
1030 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1031 goto out_free_dccp_ehash;
1034 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1035 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1036 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1039 rc = init_dccp_v4_mibs();
1041 goto out_free_dccp_bhash;
1044 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1045 goto out_free_dccp_v4_mibs;
1047 inet_register_protosw(&dccp_v4_protosw);
1049 rc = dccp_ackvec_init();
1051 goto out_unregister_protosw;
1053 rc = dccp_sysctl_init();
1055 goto out_ackvec_exit;
1057 rc = dccp_ctl_sock_init();
1059 goto out_sysctl_exit;
1066 out_unregister_protosw:
1067 inet_unregister_protosw(&dccp_v4_protosw);
1068 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1069 out_free_dccp_v4_mibs:
1070 free_percpu(dccp_statistics[0]);
1071 free_percpu(dccp_statistics[1]);
1072 dccp_statistics[0] = dccp_statistics[1] = NULL;
1073 out_free_dccp_bhash:
1074 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1075 dccp_hashinfo.bhash = NULL;
1076 out_free_dccp_ehash:
1077 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1078 dccp_hashinfo.ehash = NULL;
1079 out_free_bind_bucket_cachep:
1080 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1081 dccp_hashinfo.bind_bucket_cachep = NULL;
1082 out_proto_unregister:
1083 proto_unregister(&dccp_prot);
1087 static const char dccp_del_proto_err_msg[] __exitdata =
1088 KERN_ERR "can't remove dccp net_protocol\n";
1090 static void __exit dccp_fini(void)
1092 inet_unregister_protosw(&dccp_v4_protosw);
1094 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1095 printk(dccp_del_proto_err_msg);
1097 free_percpu(dccp_statistics[0]);
1098 free_percpu(dccp_statistics[1]);
1099 free_pages((unsigned long)dccp_hashinfo.bhash,
1100 get_order(dccp_hashinfo.bhash_size *
1101 sizeof(struct inet_bind_hashbucket)));
1102 free_pages((unsigned long)dccp_hashinfo.ehash,
1103 get_order(dccp_hashinfo.ehash_size *
1104 sizeof(struct inet_ehash_bucket)));
1105 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1106 proto_unregister(&dccp_prot);
1111 module_init(dccp_init);
1112 module_exit(dccp_fini);
1115 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1116 * values directly, Also cover the case where the protocol is not specified,
1117 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1119 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1120 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1121 MODULE_LICENSE("GPL");
1122 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1123 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");