Merge master.kernel.org:/home/rmk/linux-2.6-serial
[pandora-kernel.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         sk->sk_state            = DCCP_CLOSED;
216         sk->sk_write_space      = dccp_write_space;
217         icsk->icsk_sync_mss     = dccp_sync_mss;
218         dp->dccps_mss_cache     = 536;
219         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
220         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
221         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
222
223         return 0;
224 }
225
226 EXPORT_SYMBOL_GPL(dccp_init_sock);
227
228 int dccp_destroy_sock(struct sock *sk)
229 {
230         struct dccp_sock *dp = dccp_sk(sk);
231         struct dccp_minisock *dmsk = dccp_msk(sk);
232
233         /*
234          * DCCP doesn't use sk_write_queue, just sk_send_head
235          * for retransmissions
236          */
237         if (sk->sk_send_head != NULL) {
238                 kfree_skb(sk->sk_send_head);
239                 sk->sk_send_head = NULL;
240         }
241
242         /* Clean up a referenced DCCP bind bucket. */
243         if (inet_csk(sk)->icsk_bind_hash != NULL)
244                 inet_put_port(&dccp_hashinfo, sk);
245
246         kfree(dp->dccps_service_list);
247         dp->dccps_service_list = NULL;
248
249         if (dmsk->dccpms_send_ack_vector) {
250                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
251                 dp->dccps_hc_rx_ackvec = NULL;
252         }
253         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
254         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
255         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
256
257         /* clean up feature negotiation state */
258         dccp_feat_clean(dmsk);
259
260         return 0;
261 }
262
263 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
264
265 static inline int dccp_listen_start(struct sock *sk)
266 {
267         struct dccp_sock *dp = dccp_sk(sk);
268
269         dp->dccps_role = DCCP_ROLE_LISTEN;
270         /*
271          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
272          * before calling listen()
273          */
274         if (dccp_service_not_initialized(sk))
275                 return -EPROTO;
276         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
277 }
278
279 int dccp_disconnect(struct sock *sk, int flags)
280 {
281         struct inet_connection_sock *icsk = inet_csk(sk);
282         struct inet_sock *inet = inet_sk(sk);
283         int err = 0;
284         const int old_state = sk->sk_state;
285
286         if (old_state != DCCP_CLOSED)
287                 dccp_set_state(sk, DCCP_CLOSED);
288
289         /* ABORT function of RFC793 */
290         if (old_state == DCCP_LISTEN) {
291                 inet_csk_listen_stop(sk);
292         /* FIXME: do the active reset thing */
293         } else if (old_state == DCCP_REQUESTING)
294                 sk->sk_err = ECONNRESET;
295
296         dccp_clear_xmit_timers(sk);
297         __skb_queue_purge(&sk->sk_receive_queue);
298         if (sk->sk_send_head != NULL) {
299                 __kfree_skb(sk->sk_send_head);
300                 sk->sk_send_head = NULL;
301         }
302
303         inet->dport = 0;
304
305         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
306                 inet_reset_saddr(sk);
307
308         sk->sk_shutdown = 0;
309         sock_reset_flag(sk, SOCK_DONE);
310
311         icsk->icsk_backoff = 0;
312         inet_csk_delack_init(sk);
313         __sk_dst_reset(sk);
314
315         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316
317         sk->sk_error_report(sk);
318         return err;
319 }
320
321 EXPORT_SYMBOL_GPL(dccp_disconnect);
322
323 /*
324  *      Wait for a DCCP event.
325  *
326  *      Note that we don't need to lock the socket, as the upper poll layers
327  *      take care of normal races (between the test and the event) and we don't
328  *      go look at any of the socket buffers directly.
329  */
330 unsigned int dccp_poll(struct file *file, struct socket *sock,
331                        poll_table *wait)
332 {
333         unsigned int mask;
334         struct sock *sk = sock->sk;
335
336         poll_wait(file, sk->sk_sleep, wait);
337         if (sk->sk_state == DCCP_LISTEN)
338                 return inet_csk_listen_poll(sk);
339
340         /* Socket is not locked. We are protected from async events
341            by poll logic and correct handling of state changes
342            made by another threads is impossible in any case.
343          */
344
345         mask = 0;
346         if (sk->sk_err)
347                 mask = POLLERR;
348
349         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350                 mask |= POLLHUP;
351         if (sk->sk_shutdown & RCV_SHUTDOWN)
352                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
353
354         /* Connected? */
355         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
356                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
357                         mask |= POLLIN | POLLRDNORM;
358
359                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
360                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
361                                 mask |= POLLOUT | POLLWRNORM;
362                         } else {  /* send SIGIO later */
363                                 set_bit(SOCK_ASYNC_NOSPACE,
364                                         &sk->sk_socket->flags);
365                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366
367                                 /* Race breaker. If space is freed after
368                                  * wspace test but before the flags are set,
369                                  * IO signal will be lost.
370                                  */
371                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
372                                         mask |= POLLOUT | POLLWRNORM;
373                         }
374                 }
375         }
376         return mask;
377 }
378
379 EXPORT_SYMBOL_GPL(dccp_poll);
380
381 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 {
383         dccp_pr_debug("entry\n");
384         return -ENOIOCTLCMD;
385 }
386
387 EXPORT_SYMBOL_GPL(dccp_ioctl);
388
389 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
390                                    char __user *optval, int optlen)
391 {
392         struct dccp_sock *dp = dccp_sk(sk);
393         struct dccp_service_list *sl = NULL;
394
395         if (service == DCCP_SERVICE_INVALID_VALUE || 
396             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
397                 return -EINVAL;
398
399         if (optlen > sizeof(service)) {
400                 sl = kmalloc(optlen, GFP_KERNEL);
401                 if (sl == NULL)
402                         return -ENOMEM;
403
404                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
405                 if (copy_from_user(sl->dccpsl_list,
406                                    optval + sizeof(service),
407                                    optlen - sizeof(service)) ||
408                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
409                         kfree(sl);
410                         return -EFAULT;
411                 }
412         }
413
414         lock_sock(sk);
415         dp->dccps_service = service;
416
417         kfree(dp->dccps_service_list);
418
419         dp->dccps_service_list = sl;
420         release_sock(sk);
421         return 0;
422 }
423
424 /* byte 1 is feature.  the rest is the preference list */
425 static int dccp_setsockopt_change(struct sock *sk, int type,
426                                   struct dccp_so_feat __user *optval)
427 {
428         struct dccp_so_feat opt;
429         u8 *val;
430         int rc;
431
432         if (copy_from_user(&opt, optval, sizeof(opt)))
433                 return -EFAULT;
434
435         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
436         if (!val)
437                 return -ENOMEM;
438
439         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
440                 rc = -EFAULT;
441                 goto out_free_val;
442         }
443
444         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
445                               val, opt.dccpsf_len, GFP_KERNEL);
446         if (rc)
447                 goto out_free_val;
448
449 out:
450         return rc;
451
452 out_free_val:
453         kfree(val);
454         goto out;
455 }
456
457 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
458                 char __user *optval, int optlen)
459 {
460         struct dccp_sock *dp;
461         int err;
462         int val;
463
464         if (optlen < sizeof(int))
465                 return -EINVAL;
466
467         if (get_user(val, (int __user *)optval))
468                 return -EFAULT;
469
470         if (optname == DCCP_SOCKOPT_SERVICE)
471                 return dccp_setsockopt_service(sk, val, optval, optlen);
472
473         lock_sock(sk);
474         dp = dccp_sk(sk);
475         err = 0;
476
477         switch (optname) {
478         case DCCP_SOCKOPT_PACKET_SIZE:
479                 dp->dccps_packet_size = val;
480                 break;
481
482         case DCCP_SOCKOPT_CHANGE_L:
483                 if (optlen != sizeof(struct dccp_so_feat))
484                         err = -EINVAL;
485                 else
486                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
487                                                      (struct dccp_so_feat __user *)
488                                                      optval);
489                 break;
490
491         case DCCP_SOCKOPT_CHANGE_R:
492                 if (optlen != sizeof(struct dccp_so_feat))
493                         err = -EINVAL;
494                 else
495                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
496                                                      (struct dccp_so_feat __user *)
497                                                      optval);
498                 break;
499
500         default:
501                 err = -ENOPROTOOPT;
502                 break;
503         }
504         
505         release_sock(sk);
506         return err;
507 }
508
509 int dccp_setsockopt(struct sock *sk, int level, int optname,
510                     char __user *optval, int optlen)
511 {
512         if (level != SOL_DCCP)
513                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
514                                                              optname, optval,
515                                                              optlen);
516         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
517 }
518
519 EXPORT_SYMBOL_GPL(dccp_setsockopt);
520
521 #ifdef CONFIG_COMPAT
522 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
523                            char __user *optval, int optlen)
524 {
525         if (level != SOL_DCCP)
526                 return inet_csk_compat_setsockopt(sk, level, optname,
527                                                   optval, optlen);
528         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
529 }
530
531 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
532 #endif
533
534 static int dccp_getsockopt_service(struct sock *sk, int len,
535                                    __be32 __user *optval,
536                                    int __user *optlen)
537 {
538         const struct dccp_sock *dp = dccp_sk(sk);
539         const struct dccp_service_list *sl;
540         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
541
542         lock_sock(sk);
543         if (dccp_service_not_initialized(sk))
544                 goto out;
545
546         if ((sl = dp->dccps_service_list) != NULL) {
547                 slen = sl->dccpsl_nr * sizeof(u32);
548                 total_len += slen;
549         }
550
551         err = -EINVAL;
552         if (total_len > len)
553                 goto out;
554
555         err = 0;
556         if (put_user(total_len, optlen) ||
557             put_user(dp->dccps_service, optval) ||
558             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
559                 err = -EFAULT;
560 out:
561         release_sock(sk);
562         return err;
563 }
564
565 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
566                     char __user *optval, int __user *optlen)
567 {
568         struct dccp_sock *dp;
569         int val, len;
570
571         if (get_user(len, optlen))
572                 return -EFAULT;
573
574         if (len < sizeof(int))
575                 return -EINVAL;
576
577         dp = dccp_sk(sk);
578
579         switch (optname) {
580         case DCCP_SOCKOPT_PACKET_SIZE:
581                 val = dp->dccps_packet_size;
582                 len = sizeof(dp->dccps_packet_size);
583                 break;
584         case DCCP_SOCKOPT_SERVICE:
585                 return dccp_getsockopt_service(sk, len,
586                                                (__be32 __user *)optval, optlen);
587         case 128 ... 191:
588                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
589                                              len, (u32 __user *)optval, optlen);
590         case 192 ... 255:
591                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
592                                              len, (u32 __user *)optval, optlen);
593         default:
594                 return -ENOPROTOOPT;
595         }
596
597         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
598                 return -EFAULT;
599
600         return 0;
601 }
602
603 int dccp_getsockopt(struct sock *sk, int level, int optname,
604                     char __user *optval, int __user *optlen)
605 {
606         if (level != SOL_DCCP)
607                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
608                                                              optname, optval,
609                                                              optlen);
610         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
611 }
612
613 EXPORT_SYMBOL_GPL(dccp_getsockopt);
614
615 #ifdef CONFIG_COMPAT
616 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
617                            char __user *optval, int __user *optlen)
618 {
619         if (level != SOL_DCCP)
620                 return inet_csk_compat_getsockopt(sk, level, optname,
621                                                   optval, optlen);
622         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
623 }
624
625 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
626 #endif
627
628 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
629                  size_t len)
630 {
631         const struct dccp_sock *dp = dccp_sk(sk);
632         const int flags = msg->msg_flags;
633         const int noblock = flags & MSG_DONTWAIT;
634         struct sk_buff *skb;
635         int rc, size;
636         long timeo;
637
638         if (len > dp->dccps_mss_cache)
639                 return -EMSGSIZE;
640
641         lock_sock(sk);
642         timeo = sock_sndtimeo(sk, noblock);
643
644         /*
645          * We have to use sk_stream_wait_connect here to set sk_write_pending,
646          * so that the trick in dccp_rcv_request_sent_state_process.
647          */
648         /* Wait for a connection to finish. */
649         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
650                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
651                         goto out_release;
652
653         size = sk->sk_prot->max_header + len;
654         release_sock(sk);
655         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
656         lock_sock(sk);
657         if (skb == NULL)
658                 goto out_release;
659
660         skb_reserve(skb, sk->sk_prot->max_header);
661         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
662         if (rc != 0)
663                 goto out_discard;
664
665         rc = dccp_write_xmit(sk, skb, &timeo);
666         /*
667          * XXX we don't use sk_write_queue, so just discard the packet.
668          *     Current plan however is to _use_ sk_write_queue with
669          *     an algorith similar to tcp_sendmsg, where the main difference
670          *     is that in DCCP we have to respect packet boundaries, so
671          *     no coalescing of skbs.
672          *
673          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
674          *     generated callgraph 8) -acme
675          */
676 out_release:
677         release_sock(sk);
678         return rc ? : len;
679 out_discard:
680         kfree_skb(skb);
681         goto out_release;
682 }
683
684 EXPORT_SYMBOL_GPL(dccp_sendmsg);
685
686 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
687                  size_t len, int nonblock, int flags, int *addr_len)
688 {
689         const struct dccp_hdr *dh;
690         long timeo;
691
692         lock_sock(sk);
693
694         if (sk->sk_state == DCCP_LISTEN) {
695                 len = -ENOTCONN;
696                 goto out;
697         }
698
699         timeo = sock_rcvtimeo(sk, nonblock);
700
701         do {
702                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
703
704                 if (skb == NULL)
705                         goto verify_sock_status;
706
707                 dh = dccp_hdr(skb);
708
709                 if (dh->dccph_type == DCCP_PKT_DATA ||
710                     dh->dccph_type == DCCP_PKT_DATAACK)
711                         goto found_ok_skb;
712
713                 if (dh->dccph_type == DCCP_PKT_RESET ||
714                     dh->dccph_type == DCCP_PKT_CLOSE) {
715                         dccp_pr_debug("found fin ok!\n");
716                         len = 0;
717                         goto found_fin_ok;
718                 }
719                 dccp_pr_debug("packet_type=%s\n",
720                               dccp_packet_name(dh->dccph_type));
721                 sk_eat_skb(sk, skb, 0);
722 verify_sock_status:
723                 if (sock_flag(sk, SOCK_DONE)) {
724                         len = 0;
725                         break;
726                 }
727
728                 if (sk->sk_err) {
729                         len = sock_error(sk);
730                         break;
731                 }
732
733                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
734                         len = 0;
735                         break;
736                 }
737
738                 if (sk->sk_state == DCCP_CLOSED) {
739                         if (!sock_flag(sk, SOCK_DONE)) {
740                                 /* This occurs when user tries to read
741                                  * from never connected socket.
742                                  */
743                                 len = -ENOTCONN;
744                                 break;
745                         }
746                         len = 0;
747                         break;
748                 }
749
750                 if (!timeo) {
751                         len = -EAGAIN;
752                         break;
753                 }
754
755                 if (signal_pending(current)) {
756                         len = sock_intr_errno(timeo);
757                         break;
758                 }
759
760                 sk_wait_data(sk, &timeo);
761                 continue;
762         found_ok_skb:
763                 if (len > skb->len)
764                         len = skb->len;
765                 else if (len < skb->len)
766                         msg->msg_flags |= MSG_TRUNC;
767
768                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
769                         /* Exception. Bailout! */
770                         len = -EFAULT;
771                         break;
772                 }
773         found_fin_ok:
774                 if (!(flags & MSG_PEEK))
775                         sk_eat_skb(sk, skb, 0);
776                 break;
777         } while (1);
778 out:
779         release_sock(sk);
780         return len;
781 }
782
783 EXPORT_SYMBOL_GPL(dccp_recvmsg);
784
785 int inet_dccp_listen(struct socket *sock, int backlog)
786 {
787         struct sock *sk = sock->sk;
788         unsigned char old_state;
789         int err;
790
791         lock_sock(sk);
792
793         err = -EINVAL;
794         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
795                 goto out;
796
797         old_state = sk->sk_state;
798         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
799                 goto out;
800
801         /* Really, if the socket is already in listen state
802          * we can only allow the backlog to be adjusted.
803          */
804         if (old_state != DCCP_LISTEN) {
805                 /*
806                  * FIXME: here it probably should be sk->sk_prot->listen_start
807                  * see tcp_listen_start
808                  */
809                 err = dccp_listen_start(sk);
810                 if (err)
811                         goto out;
812         }
813         sk->sk_max_ack_backlog = backlog;
814         err = 0;
815
816 out:
817         release_sock(sk);
818         return err;
819 }
820
821 EXPORT_SYMBOL_GPL(inet_dccp_listen);
822
823 static const unsigned char dccp_new_state[] = {
824         /* current state:   new state:      action:     */
825         [0]               = DCCP_CLOSED,
826         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
827         [DCCP_REQUESTING] = DCCP_CLOSED,
828         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
829         [DCCP_LISTEN]     = DCCP_CLOSED,
830         [DCCP_RESPOND]    = DCCP_CLOSED,
831         [DCCP_CLOSING]    = DCCP_CLOSED,
832         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
833         [DCCP_CLOSED]     = DCCP_CLOSED,
834 };
835
836 static int dccp_close_state(struct sock *sk)
837 {
838         const int next = dccp_new_state[sk->sk_state];
839         const int ns = next & DCCP_STATE_MASK;
840
841         if (ns != sk->sk_state)
842                 dccp_set_state(sk, ns);
843
844         return next & DCCP_ACTION_FIN;
845 }
846
847 void dccp_close(struct sock *sk, long timeout)
848 {
849         struct sk_buff *skb;
850         int state;
851
852         lock_sock(sk);
853
854         sk->sk_shutdown = SHUTDOWN_MASK;
855
856         if (sk->sk_state == DCCP_LISTEN) {
857                 dccp_set_state(sk, DCCP_CLOSED);
858
859                 /* Special case. */
860                 inet_csk_listen_stop(sk);
861
862                 goto adjudge_to_death;
863         }
864
865         /*
866          * We need to flush the recv. buffs.  We do this only on the
867          * descriptor close, not protocol-sourced closes, because the
868           *reader process may not have drained the data yet!
869          */
870         /* FIXME: check for unread data */
871         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
872                 __kfree_skb(skb);
873         }
874
875         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
876                 /* Check zero linger _after_ checking for unread data. */
877                 sk->sk_prot->disconnect(sk, 0);
878         } else if (dccp_close_state(sk)) {
879                 dccp_send_close(sk, 1);
880         }
881
882         sk_stream_wait_close(sk, timeout);
883
884 adjudge_to_death:
885         state = sk->sk_state;
886         sock_hold(sk);
887         sock_orphan(sk);
888         atomic_inc(sk->sk_prot->orphan_count);
889
890         /*
891          * It is the last release_sock in its life. It will remove backlog.
892          */
893         release_sock(sk);
894         /*
895          * Now socket is owned by kernel and we acquire BH lock
896          * to finish close. No need to check for user refs.
897          */
898         local_bh_disable();
899         bh_lock_sock(sk);
900         BUG_TRAP(!sock_owned_by_user(sk));
901
902         /* Have we already been destroyed by a softirq or backlog? */
903         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
904                 goto out;
905
906         /*
907          * The last release_sock may have processed the CLOSE or RESET
908          * packet moving sock to CLOSED state, if not we have to fire
909          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
910          * in draft-ietf-dccp-spec-11. -acme
911          */
912         if (sk->sk_state == DCCP_CLOSING) {
913                 /* FIXME: should start at 2 * RTT */
914                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
915                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
916                                           inet_csk(sk)->icsk_rto,
917                                           DCCP_RTO_MAX);
918 #if 0
919                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
920                 dccp_set_state(sk, DCCP_CLOSED);
921 #endif
922         }
923
924         if (sk->sk_state == DCCP_CLOSED)
925                 inet_csk_destroy_sock(sk);
926
927         /* Otherwise, socket is reprieved until protocol close. */
928
929 out:
930         bh_unlock_sock(sk);
931         local_bh_enable();
932         sock_put(sk);
933 }
934
935 EXPORT_SYMBOL_GPL(dccp_close);
936
937 void dccp_shutdown(struct sock *sk, int how)
938 {
939         dccp_pr_debug("entry\n");
940 }
941
942 EXPORT_SYMBOL_GPL(dccp_shutdown);
943
944 static int __init dccp_mib_init(void)
945 {
946         int rc = -ENOMEM;
947
948         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
949         if (dccp_statistics[0] == NULL)
950                 goto out;
951
952         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
953         if (dccp_statistics[1] == NULL)
954                 goto out_free_one;
955
956         rc = 0;
957 out:
958         return rc;
959 out_free_one:
960         free_percpu(dccp_statistics[0]);
961         dccp_statistics[0] = NULL;
962         goto out;
963
964 }
965
966 static void dccp_mib_exit(void)
967 {
968         free_percpu(dccp_statistics[0]);
969         free_percpu(dccp_statistics[1]);
970         dccp_statistics[0] = dccp_statistics[1] = NULL;
971 }
972
973 static int thash_entries;
974 module_param(thash_entries, int, 0444);
975 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
976
977 #ifdef CONFIG_IP_DCCP_DEBUG
978 int dccp_debug;
979 module_param(dccp_debug, int, 0444);
980 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
981
982 EXPORT_SYMBOL_GPL(dccp_debug);
983 #endif
984
985 static int __init dccp_init(void)
986 {
987         unsigned long goal;
988         int ehash_order, bhash_order, i;
989         int rc = -ENOBUFS;
990
991         dccp_hashinfo.bind_bucket_cachep =
992                 kmem_cache_create("dccp_bind_bucket",
993                                   sizeof(struct inet_bind_bucket), 0,
994                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
995         if (!dccp_hashinfo.bind_bucket_cachep)
996                 goto out;
997
998         /*
999          * Size and allocate the main established and bind bucket
1000          * hash tables.
1001          *
1002          * The methodology is similar to that of the buffer cache.
1003          */
1004         if (num_physpages >= (128 * 1024))
1005                 goal = num_physpages >> (21 - PAGE_SHIFT);
1006         else
1007                 goal = num_physpages >> (23 - PAGE_SHIFT);
1008
1009         if (thash_entries)
1010                 goal = (thash_entries *
1011                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1012         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1013                 ;
1014         do {
1015                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1016                                         sizeof(struct inet_ehash_bucket);
1017                 dccp_hashinfo.ehash_size >>= 1;
1018                 while (dccp_hashinfo.ehash_size &
1019                        (dccp_hashinfo.ehash_size - 1))
1020                         dccp_hashinfo.ehash_size--;
1021                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1022                         __get_free_pages(GFP_ATOMIC, ehash_order);
1023         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1024
1025         if (!dccp_hashinfo.ehash) {
1026                 printk(KERN_CRIT "Failed to allocate DCCP "
1027                                  "established hash table\n");
1028                 goto out_free_bind_bucket_cachep;
1029         }
1030
1031         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1032                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1033                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1034         }
1035
1036         bhash_order = ehash_order;
1037
1038         do {
1039                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1040                                         sizeof(struct inet_bind_hashbucket);
1041                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1042                     bhash_order > 0)
1043                         continue;
1044                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1045                         __get_free_pages(GFP_ATOMIC, bhash_order);
1046         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1047
1048         if (!dccp_hashinfo.bhash) {
1049                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1050                 goto out_free_dccp_ehash;
1051         }
1052
1053         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1054                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1055                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1056         }
1057
1058         rc = dccp_mib_init();
1059         if (rc)
1060                 goto out_free_dccp_bhash;
1061
1062         rc = dccp_ackvec_init();
1063         if (rc)
1064                 goto out_free_dccp_mib;
1065
1066         rc = dccp_sysctl_init();
1067         if (rc)
1068                 goto out_ackvec_exit;
1069 out:
1070         return rc;
1071 out_ackvec_exit:
1072         dccp_ackvec_exit();
1073 out_free_dccp_mib:
1074         dccp_mib_exit();
1075 out_free_dccp_bhash:
1076         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1077         dccp_hashinfo.bhash = NULL;
1078 out_free_dccp_ehash:
1079         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1080         dccp_hashinfo.ehash = NULL;
1081 out_free_bind_bucket_cachep:
1082         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1083         dccp_hashinfo.bind_bucket_cachep = NULL;
1084         goto out;
1085 }
1086
1087 static void __exit dccp_fini(void)
1088 {
1089         dccp_mib_exit();
1090         free_pages((unsigned long)dccp_hashinfo.bhash,
1091                    get_order(dccp_hashinfo.bhash_size *
1092                              sizeof(struct inet_bind_hashbucket)));
1093         free_pages((unsigned long)dccp_hashinfo.ehash,
1094                    get_order(dccp_hashinfo.ehash_size *
1095                              sizeof(struct inet_ehash_bucket)));
1096         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1097         dccp_ackvec_exit();
1098         dccp_sysctl_exit();
1099 }
1100
1101 module_init(dccp_init);
1102 module_exit(dccp_fini);
1103
1104 MODULE_LICENSE("GPL");
1105 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1106 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");