[NET]: Make the device list and device lookups per namespace.
[pandora-kernel.git] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/net_namespace.h>
123 #include <net/request_sock.h>
124 #include <net/sock.h>
125 #include <net/xfrm.h>
126 #include <linux/ipsec.h>
127
128 #include <linux/filter.h>
129
130 #ifdef CONFIG_INET
131 #include <net/tcp.h>
132 #endif
133
134 /*
135  * Each address family might have different locking rules, so we have
136  * one slock key per address family:
137  */
138 static struct lock_class_key af_family_keys[AF_MAX];
139 static struct lock_class_key af_family_slock_keys[AF_MAX];
140
141 #ifdef CONFIG_DEBUG_LOCK_ALLOC
142 /*
143  * Make lock validator output more readable. (we pre-construct these
144  * strings build-time, so that runtime initialization of socket
145  * locks is fast):
146  */
147 static const char *af_family_key_strings[AF_MAX+1] = {
148   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
149   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
150   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
151   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
152   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
153   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
154   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
155   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
156   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
157   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
158   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
159   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160 };
161 static const char *af_family_slock_key_strings[AF_MAX+1] = {
162   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171   "slock-27"       , "slock-28"          , "slock-29"          ,
172   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
173   "slock-AF_RXRPC" , "slock-AF_MAX"
174 };
175 static const char *af_family_clock_key_strings[AF_MAX+1] = {
176   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
177   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
178   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
179   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
180   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
181   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
182   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
183   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
184   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
185   "clock-27"       , "clock-28"          , "clock-29"          ,
186   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
187   "clock-AF_RXRPC" , "clock-AF_MAX"
188 };
189 #endif
190
191 /*
192  * sk_callback_lock locking rules are per-address-family,
193  * so split the lock classes by using a per-AF key:
194  */
195 static struct lock_class_key af_callback_keys[AF_MAX];
196
197 /* Take into consideration the size of the struct sk_buff overhead in the
198  * determination of these values, since that is non-constant across
199  * platforms.  This makes socket queueing behavior and performance
200  * not depend upon such differences.
201  */
202 #define _SK_MEM_PACKETS         256
203 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
204 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207 /* Run time adjustable parameters. */
208 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212
213 /* Maximal space eaten by iovec or ancilliary data plus some space */
214 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
215
216 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217 {
218         struct timeval tv;
219
220         if (optlen < sizeof(tv))
221                 return -EINVAL;
222         if (copy_from_user(&tv, optval, sizeof(tv)))
223                 return -EFAULT;
224         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
225                 return -EDOM;
226
227         if (tv.tv_sec < 0) {
228                 static int warned __read_mostly;
229
230                 *timeo_p = 0;
231                 if (warned < 10 && net_ratelimit())
232                         warned++;
233                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234                                "tries to set negative timeout\n",
235                                 current->comm, current->pid);
236                 return 0;
237         }
238         *timeo_p = MAX_SCHEDULE_TIMEOUT;
239         if (tv.tv_sec == 0 && tv.tv_usec == 0)
240                 return 0;
241         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
242                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
243         return 0;
244 }
245
246 static void sock_warn_obsolete_bsdism(const char *name)
247 {
248         static int warned;
249         static char warncomm[TASK_COMM_LEN];
250         if (strcmp(warncomm, current->comm) && warned < 5) {
251                 strcpy(warncomm,  current->comm);
252                 printk(KERN_WARNING "process `%s' is using obsolete "
253                        "%s SO_BSDCOMPAT\n", warncomm, name);
254                 warned++;
255         }
256 }
257
258 static void sock_disable_timestamp(struct sock *sk)
259 {
260         if (sock_flag(sk, SOCK_TIMESTAMP)) {
261                 sock_reset_flag(sk, SOCK_TIMESTAMP);
262                 net_disable_timestamp();
263         }
264 }
265
266
267 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
268 {
269         int err = 0;
270         int skb_len;
271
272         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
273            number of warnings when compiling with -W --ANK
274          */
275         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
276             (unsigned)sk->sk_rcvbuf) {
277                 err = -ENOMEM;
278                 goto out;
279         }
280
281         err = sk_filter(sk, skb);
282         if (err)
283                 goto out;
284
285         skb->dev = NULL;
286         skb_set_owner_r(skb, sk);
287
288         /* Cache the SKB length before we tack it onto the receive
289          * queue.  Once it is added it no longer belongs to us and
290          * may be freed by other threads of control pulling packets
291          * from the queue.
292          */
293         skb_len = skb->len;
294
295         skb_queue_tail(&sk->sk_receive_queue, skb);
296
297         if (!sock_flag(sk, SOCK_DEAD))
298                 sk->sk_data_ready(sk, skb_len);
299 out:
300         return err;
301 }
302 EXPORT_SYMBOL(sock_queue_rcv_skb);
303
304 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
305 {
306         int rc = NET_RX_SUCCESS;
307
308         if (sk_filter(sk, skb))
309                 goto discard_and_relse;
310
311         skb->dev = NULL;
312
313         if (nested)
314                 bh_lock_sock_nested(sk);
315         else
316                 bh_lock_sock(sk);
317         if (!sock_owned_by_user(sk)) {
318                 /*
319                  * trylock + unlock semantics:
320                  */
321                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
322
323                 rc = sk->sk_backlog_rcv(sk, skb);
324
325                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
326         } else
327                 sk_add_backlog(sk, skb);
328         bh_unlock_sock(sk);
329 out:
330         sock_put(sk);
331         return rc;
332 discard_and_relse:
333         kfree_skb(skb);
334         goto out;
335 }
336 EXPORT_SYMBOL(sk_receive_skb);
337
338 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
339 {
340         struct dst_entry *dst = sk->sk_dst_cache;
341
342         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
343                 sk->sk_dst_cache = NULL;
344                 dst_release(dst);
345                 return NULL;
346         }
347
348         return dst;
349 }
350 EXPORT_SYMBOL(__sk_dst_check);
351
352 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
353 {
354         struct dst_entry *dst = sk_dst_get(sk);
355
356         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
357                 sk_dst_reset(sk);
358                 dst_release(dst);
359                 return NULL;
360         }
361
362         return dst;
363 }
364 EXPORT_SYMBOL(sk_dst_check);
365
366 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
367 {
368         int ret = -ENOPROTOOPT;
369 #ifdef CONFIG_NETDEVICES
370         struct net *net = sk->sk_net;
371         char devname[IFNAMSIZ];
372         int index;
373
374         /* Sorry... */
375         ret = -EPERM;
376         if (!capable(CAP_NET_RAW))
377                 goto out;
378
379         ret = -EINVAL;
380         if (optlen < 0)
381                 goto out;
382
383         /* Bind this socket to a particular device like "eth0",
384          * as specified in the passed interface name. If the
385          * name is "" or the option length is zero the socket
386          * is not bound.
387          */
388         if (optlen > IFNAMSIZ - 1)
389                 optlen = IFNAMSIZ - 1;
390         memset(devname, 0, sizeof(devname));
391
392         ret = -EFAULT;
393         if (copy_from_user(devname, optval, optlen))
394                 goto out;
395
396         if (devname[0] == '\0') {
397                 index = 0;
398         } else {
399                 struct net_device *dev = dev_get_by_name(net, devname);
400
401                 ret = -ENODEV;
402                 if (!dev)
403                         goto out;
404
405                 index = dev->ifindex;
406                 dev_put(dev);
407         }
408
409         lock_sock(sk);
410         sk->sk_bound_dev_if = index;
411         sk_dst_reset(sk);
412         release_sock(sk);
413
414         ret = 0;
415
416 out:
417 #endif
418
419         return ret;
420 }
421
422 /*
423  *      This is meant for all protocols to use and covers goings on
424  *      at the socket level. Everything here is generic.
425  */
426
427 int sock_setsockopt(struct socket *sock, int level, int optname,
428                     char __user *optval, int optlen)
429 {
430         struct sock *sk=sock->sk;
431         struct sk_filter *filter;
432         int val;
433         int valbool;
434         struct linger ling;
435         int ret = 0;
436
437         /*
438          *      Options without arguments
439          */
440
441 #ifdef SO_DONTLINGER            /* Compatibility item... */
442         if (optname == SO_DONTLINGER) {
443                 lock_sock(sk);
444                 sock_reset_flag(sk, SOCK_LINGER);
445                 release_sock(sk);
446                 return 0;
447         }
448 #endif
449
450         if (optname == SO_BINDTODEVICE)
451                 return sock_bindtodevice(sk, optval, optlen);
452
453         if (optlen < sizeof(int))
454                 return -EINVAL;
455
456         if (get_user(val, (int __user *)optval))
457                 return -EFAULT;
458
459         valbool = val?1:0;
460
461         lock_sock(sk);
462
463         switch(optname) {
464         case SO_DEBUG:
465                 if (val && !capable(CAP_NET_ADMIN)) {
466                         ret = -EACCES;
467                 }
468                 else if (valbool)
469                         sock_set_flag(sk, SOCK_DBG);
470                 else
471                         sock_reset_flag(sk, SOCK_DBG);
472                 break;
473         case SO_REUSEADDR:
474                 sk->sk_reuse = valbool;
475                 break;
476         case SO_TYPE:
477         case SO_ERROR:
478                 ret = -ENOPROTOOPT;
479                 break;
480         case SO_DONTROUTE:
481                 if (valbool)
482                         sock_set_flag(sk, SOCK_LOCALROUTE);
483                 else
484                         sock_reset_flag(sk, SOCK_LOCALROUTE);
485                 break;
486         case SO_BROADCAST:
487                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
488                 break;
489         case SO_SNDBUF:
490                 /* Don't error on this BSD doesn't and if you think
491                    about it this is right. Otherwise apps have to
492                    play 'guess the biggest size' games. RCVBUF/SNDBUF
493                    are treated in BSD as hints */
494
495                 if (val > sysctl_wmem_max)
496                         val = sysctl_wmem_max;
497 set_sndbuf:
498                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
499                 if ((val * 2) < SOCK_MIN_SNDBUF)
500                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
501                 else
502                         sk->sk_sndbuf = val * 2;
503
504                 /*
505                  *      Wake up sending tasks if we
506                  *      upped the value.
507                  */
508                 sk->sk_write_space(sk);
509                 break;
510
511         case SO_SNDBUFFORCE:
512                 if (!capable(CAP_NET_ADMIN)) {
513                         ret = -EPERM;
514                         break;
515                 }
516                 goto set_sndbuf;
517
518         case SO_RCVBUF:
519                 /* Don't error on this BSD doesn't and if you think
520                    about it this is right. Otherwise apps have to
521                    play 'guess the biggest size' games. RCVBUF/SNDBUF
522                    are treated in BSD as hints */
523
524                 if (val > sysctl_rmem_max)
525                         val = sysctl_rmem_max;
526 set_rcvbuf:
527                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
528                 /*
529                  * We double it on the way in to account for
530                  * "struct sk_buff" etc. overhead.   Applications
531                  * assume that the SO_RCVBUF setting they make will
532                  * allow that much actual data to be received on that
533                  * socket.
534                  *
535                  * Applications are unaware that "struct sk_buff" and
536                  * other overheads allocate from the receive buffer
537                  * during socket buffer allocation.
538                  *
539                  * And after considering the possible alternatives,
540                  * returning the value we actually used in getsockopt
541                  * is the most desirable behavior.
542                  */
543                 if ((val * 2) < SOCK_MIN_RCVBUF)
544                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
545                 else
546                         sk->sk_rcvbuf = val * 2;
547                 break;
548
549         case SO_RCVBUFFORCE:
550                 if (!capable(CAP_NET_ADMIN)) {
551                         ret = -EPERM;
552                         break;
553                 }
554                 goto set_rcvbuf;
555
556         case SO_KEEPALIVE:
557 #ifdef CONFIG_INET
558                 if (sk->sk_protocol == IPPROTO_TCP)
559                         tcp_set_keepalive(sk, valbool);
560 #endif
561                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
562                 break;
563
564         case SO_OOBINLINE:
565                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
566                 break;
567
568         case SO_NO_CHECK:
569                 sk->sk_no_check = valbool;
570                 break;
571
572         case SO_PRIORITY:
573                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
574                         sk->sk_priority = val;
575                 else
576                         ret = -EPERM;
577                 break;
578
579         case SO_LINGER:
580                 if (optlen < sizeof(ling)) {
581                         ret = -EINVAL;  /* 1003.1g */
582                         break;
583                 }
584                 if (copy_from_user(&ling,optval,sizeof(ling))) {
585                         ret = -EFAULT;
586                         break;
587                 }
588                 if (!ling.l_onoff)
589                         sock_reset_flag(sk, SOCK_LINGER);
590                 else {
591 #if (BITS_PER_LONG == 32)
592                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
593                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
594                         else
595 #endif
596                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
597                         sock_set_flag(sk, SOCK_LINGER);
598                 }
599                 break;
600
601         case SO_BSDCOMPAT:
602                 sock_warn_obsolete_bsdism("setsockopt");
603                 break;
604
605         case SO_PASSCRED:
606                 if (valbool)
607                         set_bit(SOCK_PASSCRED, &sock->flags);
608                 else
609                         clear_bit(SOCK_PASSCRED, &sock->flags);
610                 break;
611
612         case SO_TIMESTAMP:
613         case SO_TIMESTAMPNS:
614                 if (valbool)  {
615                         if (optname == SO_TIMESTAMP)
616                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
617                         else
618                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
619                         sock_set_flag(sk, SOCK_RCVTSTAMP);
620                         sock_enable_timestamp(sk);
621                 } else {
622                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
623                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
624                 }
625                 break;
626
627         case SO_RCVLOWAT:
628                 if (val < 0)
629                         val = INT_MAX;
630                 sk->sk_rcvlowat = val ? : 1;
631                 break;
632
633         case SO_RCVTIMEO:
634                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
635                 break;
636
637         case SO_SNDTIMEO:
638                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
639                 break;
640
641         case SO_ATTACH_FILTER:
642                 ret = -EINVAL;
643                 if (optlen == sizeof(struct sock_fprog)) {
644                         struct sock_fprog fprog;
645
646                         ret = -EFAULT;
647                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
648                                 break;
649
650                         ret = sk_attach_filter(&fprog, sk);
651                 }
652                 break;
653
654         case SO_DETACH_FILTER:
655                 rcu_read_lock_bh();
656                 filter = rcu_dereference(sk->sk_filter);
657                 if (filter) {
658                         rcu_assign_pointer(sk->sk_filter, NULL);
659                         sk_filter_release(sk, filter);
660                         rcu_read_unlock_bh();
661                         break;
662                 }
663                 rcu_read_unlock_bh();
664                 ret = -ENONET;
665                 break;
666
667         case SO_PASSSEC:
668                 if (valbool)
669                         set_bit(SOCK_PASSSEC, &sock->flags);
670                 else
671                         clear_bit(SOCK_PASSSEC, &sock->flags);
672                 break;
673
674                 /* We implement the SO_SNDLOWAT etc to
675                    not be settable (1003.1g 5.3) */
676         default:
677                 ret = -ENOPROTOOPT;
678                 break;
679         }
680         release_sock(sk);
681         return ret;
682 }
683
684
685 int sock_getsockopt(struct socket *sock, int level, int optname,
686                     char __user *optval, int __user *optlen)
687 {
688         struct sock *sk = sock->sk;
689
690         union {
691                 int val;
692                 struct linger ling;
693                 struct timeval tm;
694         } v;
695
696         unsigned int lv = sizeof(int);
697         int len;
698
699         if (get_user(len, optlen))
700                 return -EFAULT;
701         if (len < 0)
702                 return -EINVAL;
703
704         switch(optname) {
705         case SO_DEBUG:
706                 v.val = sock_flag(sk, SOCK_DBG);
707                 break;
708
709         case SO_DONTROUTE:
710                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
711                 break;
712
713         case SO_BROADCAST:
714                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
715                 break;
716
717         case SO_SNDBUF:
718                 v.val = sk->sk_sndbuf;
719                 break;
720
721         case SO_RCVBUF:
722                 v.val = sk->sk_rcvbuf;
723                 break;
724
725         case SO_REUSEADDR:
726                 v.val = sk->sk_reuse;
727                 break;
728
729         case SO_KEEPALIVE:
730                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
731                 break;
732
733         case SO_TYPE:
734                 v.val = sk->sk_type;
735                 break;
736
737         case SO_ERROR:
738                 v.val = -sock_error(sk);
739                 if (v.val==0)
740                         v.val = xchg(&sk->sk_err_soft, 0);
741                 break;
742
743         case SO_OOBINLINE:
744                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
745                 break;
746
747         case SO_NO_CHECK:
748                 v.val = sk->sk_no_check;
749                 break;
750
751         case SO_PRIORITY:
752                 v.val = sk->sk_priority;
753                 break;
754
755         case SO_LINGER:
756                 lv              = sizeof(v.ling);
757                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
758                 v.ling.l_linger = sk->sk_lingertime / HZ;
759                 break;
760
761         case SO_BSDCOMPAT:
762                 sock_warn_obsolete_bsdism("getsockopt");
763                 break;
764
765         case SO_TIMESTAMP:
766                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
767                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
768                 break;
769
770         case SO_TIMESTAMPNS:
771                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
772                 break;
773
774         case SO_RCVTIMEO:
775                 lv=sizeof(struct timeval);
776                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
777                         v.tm.tv_sec = 0;
778                         v.tm.tv_usec = 0;
779                 } else {
780                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
781                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
782                 }
783                 break;
784
785         case SO_SNDTIMEO:
786                 lv=sizeof(struct timeval);
787                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
788                         v.tm.tv_sec = 0;
789                         v.tm.tv_usec = 0;
790                 } else {
791                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
792                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
793                 }
794                 break;
795
796         case SO_RCVLOWAT:
797                 v.val = sk->sk_rcvlowat;
798                 break;
799
800         case SO_SNDLOWAT:
801                 v.val=1;
802                 break;
803
804         case SO_PASSCRED:
805                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
806                 break;
807
808         case SO_PEERCRED:
809                 if (len > sizeof(sk->sk_peercred))
810                         len = sizeof(sk->sk_peercred);
811                 if (copy_to_user(optval, &sk->sk_peercred, len))
812                         return -EFAULT;
813                 goto lenout;
814
815         case SO_PEERNAME:
816         {
817                 char address[128];
818
819                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
820                         return -ENOTCONN;
821                 if (lv < len)
822                         return -EINVAL;
823                 if (copy_to_user(optval, address, len))
824                         return -EFAULT;
825                 goto lenout;
826         }
827
828         /* Dubious BSD thing... Probably nobody even uses it, but
829          * the UNIX standard wants it for whatever reason... -DaveM
830          */
831         case SO_ACCEPTCONN:
832                 v.val = sk->sk_state == TCP_LISTEN;
833                 break;
834
835         case SO_PASSSEC:
836                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
837                 break;
838
839         case SO_PEERSEC:
840                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
841
842         default:
843                 return -ENOPROTOOPT;
844         }
845
846         if (len > lv)
847                 len = lv;
848         if (copy_to_user(optval, &v, len))
849                 return -EFAULT;
850 lenout:
851         if (put_user(len, optlen))
852                 return -EFAULT;
853         return 0;
854 }
855
856 /*
857  * Initialize an sk_lock.
858  *
859  * (We also register the sk_lock with the lock validator.)
860  */
861 static inline void sock_lock_init(struct sock *sk)
862 {
863         sock_lock_init_class_and_name(sk,
864                         af_family_slock_key_strings[sk->sk_family],
865                         af_family_slock_keys + sk->sk_family,
866                         af_family_key_strings[sk->sk_family],
867                         af_family_keys + sk->sk_family);
868 }
869
870 /**
871  *      sk_alloc - All socket objects are allocated here
872  *      @family: protocol family
873  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
874  *      @prot: struct proto associated with this new sock instance
875  *      @zero_it: if we should zero the newly allocated sock
876  */
877 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
878                       struct proto *prot, int zero_it)
879 {
880         struct sock *sk = NULL;
881         struct kmem_cache *slab = prot->slab;
882
883         if (slab != NULL)
884                 sk = kmem_cache_alloc(slab, priority);
885         else
886                 sk = kmalloc(prot->obj_size, priority);
887
888         if (sk) {
889                 if (zero_it) {
890                         memset(sk, 0, prot->obj_size);
891                         sk->sk_family = family;
892                         /*
893                          * See comment in struct sock definition to understand
894                          * why we need sk_prot_creator -acme
895                          */
896                         sk->sk_prot = sk->sk_prot_creator = prot;
897                         sock_lock_init(sk);
898                         sk->sk_net = get_net(net);
899                 }
900
901                 if (security_sk_alloc(sk, family, priority))
902                         goto out_free;
903
904                 if (!try_module_get(prot->owner))
905                         goto out_free;
906         }
907         return sk;
908
909 out_free:
910         if (slab != NULL)
911                 kmem_cache_free(slab, sk);
912         else
913                 kfree(sk);
914         return NULL;
915 }
916
917 void sk_free(struct sock *sk)
918 {
919         struct sk_filter *filter;
920         struct module *owner = sk->sk_prot_creator->owner;
921
922         if (sk->sk_destruct)
923                 sk->sk_destruct(sk);
924
925         filter = rcu_dereference(sk->sk_filter);
926         if (filter) {
927                 sk_filter_release(sk, filter);
928                 rcu_assign_pointer(sk->sk_filter, NULL);
929         }
930
931         sock_disable_timestamp(sk);
932
933         if (atomic_read(&sk->sk_omem_alloc))
934                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
935                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
936
937         security_sk_free(sk);
938         put_net(sk->sk_net);
939         if (sk->sk_prot_creator->slab != NULL)
940                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
941         else
942                 kfree(sk);
943         module_put(owner);
944 }
945
946 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
947 {
948         struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
949
950         if (newsk != NULL) {
951                 struct sk_filter *filter;
952
953                 sock_copy(newsk, sk);
954
955                 /* SANITY */
956                 sk_node_init(&newsk->sk_node);
957                 sock_lock_init(newsk);
958                 bh_lock_sock(newsk);
959                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
960
961                 atomic_set(&newsk->sk_rmem_alloc, 0);
962                 atomic_set(&newsk->sk_wmem_alloc, 0);
963                 atomic_set(&newsk->sk_omem_alloc, 0);
964                 skb_queue_head_init(&newsk->sk_receive_queue);
965                 skb_queue_head_init(&newsk->sk_write_queue);
966 #ifdef CONFIG_NET_DMA
967                 skb_queue_head_init(&newsk->sk_async_wait_queue);
968 #endif
969
970                 rwlock_init(&newsk->sk_dst_lock);
971                 rwlock_init(&newsk->sk_callback_lock);
972                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
973                                 af_callback_keys + newsk->sk_family,
974                                 af_family_clock_key_strings[newsk->sk_family]);
975
976                 newsk->sk_dst_cache     = NULL;
977                 newsk->sk_wmem_queued   = 0;
978                 newsk->sk_forward_alloc = 0;
979                 newsk->sk_send_head     = NULL;
980                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
981
982                 sock_reset_flag(newsk, SOCK_DONE);
983                 skb_queue_head_init(&newsk->sk_error_queue);
984
985                 filter = newsk->sk_filter;
986                 if (filter != NULL)
987                         sk_filter_charge(newsk, filter);
988
989                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
990                         /* It is still raw copy of parent, so invalidate
991                          * destructor and make plain sk_free() */
992                         newsk->sk_destruct = NULL;
993                         sk_free(newsk);
994                         newsk = NULL;
995                         goto out;
996                 }
997
998                 newsk->sk_err      = 0;
999                 newsk->sk_priority = 0;
1000                 atomic_set(&newsk->sk_refcnt, 2);
1001
1002                 /*
1003                  * Increment the counter in the same struct proto as the master
1004                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1005                  * is the same as sk->sk_prot->socks, as this field was copied
1006                  * with memcpy).
1007                  *
1008                  * This _changes_ the previous behaviour, where
1009                  * tcp_create_openreq_child always was incrementing the
1010                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1011                  * to be taken into account in all callers. -acme
1012                  */
1013                 sk_refcnt_debug_inc(newsk);
1014                 newsk->sk_socket = NULL;
1015                 newsk->sk_sleep  = NULL;
1016
1017                 if (newsk->sk_prot->sockets_allocated)
1018                         atomic_inc(newsk->sk_prot->sockets_allocated);
1019         }
1020 out:
1021         return newsk;
1022 }
1023
1024 EXPORT_SYMBOL_GPL(sk_clone);
1025
1026 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1027 {
1028         __sk_dst_set(sk, dst);
1029         sk->sk_route_caps = dst->dev->features;
1030         if (sk->sk_route_caps & NETIF_F_GSO)
1031                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1032         if (sk_can_gso(sk)) {
1033                 if (dst->header_len)
1034                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1035                 else
1036                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1037         }
1038 }
1039 EXPORT_SYMBOL_GPL(sk_setup_caps);
1040
1041 void __init sk_init(void)
1042 {
1043         if (num_physpages <= 4096) {
1044                 sysctl_wmem_max = 32767;
1045                 sysctl_rmem_max = 32767;
1046                 sysctl_wmem_default = 32767;
1047                 sysctl_rmem_default = 32767;
1048         } else if (num_physpages >= 131072) {
1049                 sysctl_wmem_max = 131071;
1050                 sysctl_rmem_max = 131071;
1051         }
1052 }
1053
1054 /*
1055  *      Simple resource managers for sockets.
1056  */
1057
1058
1059 /*
1060  * Write buffer destructor automatically called from kfree_skb.
1061  */
1062 void sock_wfree(struct sk_buff *skb)
1063 {
1064         struct sock *sk = skb->sk;
1065
1066         /* In case it might be waiting for more memory. */
1067         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1068         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1069                 sk->sk_write_space(sk);
1070         sock_put(sk);
1071 }
1072
1073 /*
1074  * Read buffer destructor automatically called from kfree_skb.
1075  */
1076 void sock_rfree(struct sk_buff *skb)
1077 {
1078         struct sock *sk = skb->sk;
1079
1080         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1081 }
1082
1083
1084 int sock_i_uid(struct sock *sk)
1085 {
1086         int uid;
1087
1088         read_lock(&sk->sk_callback_lock);
1089         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1090         read_unlock(&sk->sk_callback_lock);
1091         return uid;
1092 }
1093
1094 unsigned long sock_i_ino(struct sock *sk)
1095 {
1096         unsigned long ino;
1097
1098         read_lock(&sk->sk_callback_lock);
1099         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1100         read_unlock(&sk->sk_callback_lock);
1101         return ino;
1102 }
1103
1104 /*
1105  * Allocate a skb from the socket's send buffer.
1106  */
1107 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1108                              gfp_t priority)
1109 {
1110         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1111                 struct sk_buff * skb = alloc_skb(size, priority);
1112                 if (skb) {
1113                         skb_set_owner_w(skb, sk);
1114                         return skb;
1115                 }
1116         }
1117         return NULL;
1118 }
1119
1120 /*
1121  * Allocate a skb from the socket's receive buffer.
1122  */
1123 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1124                              gfp_t priority)
1125 {
1126         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1127                 struct sk_buff *skb = alloc_skb(size, priority);
1128                 if (skb) {
1129                         skb_set_owner_r(skb, sk);
1130                         return skb;
1131                 }
1132         }
1133         return NULL;
1134 }
1135
1136 /*
1137  * Allocate a memory block from the socket's option memory buffer.
1138  */
1139 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1140 {
1141         if ((unsigned)size <= sysctl_optmem_max &&
1142             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1143                 void *mem;
1144                 /* First do the add, to avoid the race if kmalloc
1145                  * might sleep.
1146                  */
1147                 atomic_add(size, &sk->sk_omem_alloc);
1148                 mem = kmalloc(size, priority);
1149                 if (mem)
1150                         return mem;
1151                 atomic_sub(size, &sk->sk_omem_alloc);
1152         }
1153         return NULL;
1154 }
1155
1156 /*
1157  * Free an option memory block.
1158  */
1159 void sock_kfree_s(struct sock *sk, void *mem, int size)
1160 {
1161         kfree(mem);
1162         atomic_sub(size, &sk->sk_omem_alloc);
1163 }
1164
1165 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1166    I think, these locks should be removed for datagram sockets.
1167  */
1168 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1169 {
1170         DEFINE_WAIT(wait);
1171
1172         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1173         for (;;) {
1174                 if (!timeo)
1175                         break;
1176                 if (signal_pending(current))
1177                         break;
1178                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1179                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1180                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1181                         break;
1182                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1183                         break;
1184                 if (sk->sk_err)
1185                         break;
1186                 timeo = schedule_timeout(timeo);
1187         }
1188         finish_wait(sk->sk_sleep, &wait);
1189         return timeo;
1190 }
1191
1192
1193 /*
1194  *      Generic send/receive buffer handlers
1195  */
1196
1197 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1198                                             unsigned long header_len,
1199                                             unsigned long data_len,
1200                                             int noblock, int *errcode)
1201 {
1202         struct sk_buff *skb;
1203         gfp_t gfp_mask;
1204         long timeo;
1205         int err;
1206
1207         gfp_mask = sk->sk_allocation;
1208         if (gfp_mask & __GFP_WAIT)
1209                 gfp_mask |= __GFP_REPEAT;
1210
1211         timeo = sock_sndtimeo(sk, noblock);
1212         while (1) {
1213                 err = sock_error(sk);
1214                 if (err != 0)
1215                         goto failure;
1216
1217                 err = -EPIPE;
1218                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1219                         goto failure;
1220
1221                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1222                         skb = alloc_skb(header_len, gfp_mask);
1223                         if (skb) {
1224                                 int npages;
1225                                 int i;
1226
1227                                 /* No pages, we're done... */
1228                                 if (!data_len)
1229                                         break;
1230
1231                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1232                                 skb->truesize += data_len;
1233                                 skb_shinfo(skb)->nr_frags = npages;
1234                                 for (i = 0; i < npages; i++) {
1235                                         struct page *page;
1236                                         skb_frag_t *frag;
1237
1238                                         page = alloc_pages(sk->sk_allocation, 0);
1239                                         if (!page) {
1240                                                 err = -ENOBUFS;
1241                                                 skb_shinfo(skb)->nr_frags = i;
1242                                                 kfree_skb(skb);
1243                                                 goto failure;
1244                                         }
1245
1246                                         frag = &skb_shinfo(skb)->frags[i];
1247                                         frag->page = page;
1248                                         frag->page_offset = 0;
1249                                         frag->size = (data_len >= PAGE_SIZE ?
1250                                                       PAGE_SIZE :
1251                                                       data_len);
1252                                         data_len -= PAGE_SIZE;
1253                                 }
1254
1255                                 /* Full success... */
1256                                 break;
1257                         }
1258                         err = -ENOBUFS;
1259                         goto failure;
1260                 }
1261                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1262                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1263                 err = -EAGAIN;
1264                 if (!timeo)
1265                         goto failure;
1266                 if (signal_pending(current))
1267                         goto interrupted;
1268                 timeo = sock_wait_for_wmem(sk, timeo);
1269         }
1270
1271         skb_set_owner_w(skb, sk);
1272         return skb;
1273
1274 interrupted:
1275         err = sock_intr_errno(timeo);
1276 failure:
1277         *errcode = err;
1278         return NULL;
1279 }
1280
1281 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1282                                     int noblock, int *errcode)
1283 {
1284         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1285 }
1286
1287 static void __lock_sock(struct sock *sk)
1288 {
1289         DEFINE_WAIT(wait);
1290
1291         for (;;) {
1292                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1293                                         TASK_UNINTERRUPTIBLE);
1294                 spin_unlock_bh(&sk->sk_lock.slock);
1295                 schedule();
1296                 spin_lock_bh(&sk->sk_lock.slock);
1297                 if (!sock_owned_by_user(sk))
1298                         break;
1299         }
1300         finish_wait(&sk->sk_lock.wq, &wait);
1301 }
1302
1303 static void __release_sock(struct sock *sk)
1304 {
1305         struct sk_buff *skb = sk->sk_backlog.head;
1306
1307         do {
1308                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1309                 bh_unlock_sock(sk);
1310
1311                 do {
1312                         struct sk_buff *next = skb->next;
1313
1314                         skb->next = NULL;
1315                         sk->sk_backlog_rcv(sk, skb);
1316
1317                         /*
1318                          * We are in process context here with softirqs
1319                          * disabled, use cond_resched_softirq() to preempt.
1320                          * This is safe to do because we've taken the backlog
1321                          * queue private:
1322                          */
1323                         cond_resched_softirq();
1324
1325                         skb = next;
1326                 } while (skb != NULL);
1327
1328                 bh_lock_sock(sk);
1329         } while ((skb = sk->sk_backlog.head) != NULL);
1330 }
1331
1332 /**
1333  * sk_wait_data - wait for data to arrive at sk_receive_queue
1334  * @sk:    sock to wait on
1335  * @timeo: for how long
1336  *
1337  * Now socket state including sk->sk_err is changed only under lock,
1338  * hence we may omit checks after joining wait queue.
1339  * We check receive queue before schedule() only as optimization;
1340  * it is very likely that release_sock() added new data.
1341  */
1342 int sk_wait_data(struct sock *sk, long *timeo)
1343 {
1344         int rc;
1345         DEFINE_WAIT(wait);
1346
1347         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1348         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1349         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1350         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1351         finish_wait(sk->sk_sleep, &wait);
1352         return rc;
1353 }
1354
1355 EXPORT_SYMBOL(sk_wait_data);
1356
1357 /*
1358  * Set of default routines for initialising struct proto_ops when
1359  * the protocol does not support a particular function. In certain
1360  * cases where it makes no sense for a protocol to have a "do nothing"
1361  * function, some default processing is provided.
1362  */
1363
1364 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1365 {
1366         return -EOPNOTSUPP;
1367 }
1368
1369 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1370                     int len, int flags)
1371 {
1372         return -EOPNOTSUPP;
1373 }
1374
1375 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1376 {
1377         return -EOPNOTSUPP;
1378 }
1379
1380 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1381 {
1382         return -EOPNOTSUPP;
1383 }
1384
1385 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1386                     int *len, int peer)
1387 {
1388         return -EOPNOTSUPP;
1389 }
1390
1391 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1392 {
1393         return 0;
1394 }
1395
1396 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1397 {
1398         return -EOPNOTSUPP;
1399 }
1400
1401 int sock_no_listen(struct socket *sock, int backlog)
1402 {
1403         return -EOPNOTSUPP;
1404 }
1405
1406 int sock_no_shutdown(struct socket *sock, int how)
1407 {
1408         return -EOPNOTSUPP;
1409 }
1410
1411 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1412                     char __user *optval, int optlen)
1413 {
1414         return -EOPNOTSUPP;
1415 }
1416
1417 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1418                     char __user *optval, int __user *optlen)
1419 {
1420         return -EOPNOTSUPP;
1421 }
1422
1423 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1424                     size_t len)
1425 {
1426         return -EOPNOTSUPP;
1427 }
1428
1429 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1430                     size_t len, int flags)
1431 {
1432         return -EOPNOTSUPP;
1433 }
1434
1435 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1436 {
1437         /* Mirror missing mmap method error code */
1438         return -ENODEV;
1439 }
1440
1441 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1442 {
1443         ssize_t res;
1444         struct msghdr msg = {.msg_flags = flags};
1445         struct kvec iov;
1446         char *kaddr = kmap(page);
1447         iov.iov_base = kaddr + offset;
1448         iov.iov_len = size;
1449         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1450         kunmap(page);
1451         return res;
1452 }
1453
1454 /*
1455  *      Default Socket Callbacks
1456  */
1457
1458 static void sock_def_wakeup(struct sock *sk)
1459 {
1460         read_lock(&sk->sk_callback_lock);
1461         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1462                 wake_up_interruptible_all(sk->sk_sleep);
1463         read_unlock(&sk->sk_callback_lock);
1464 }
1465
1466 static void sock_def_error_report(struct sock *sk)
1467 {
1468         read_lock(&sk->sk_callback_lock);
1469         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1470                 wake_up_interruptible(sk->sk_sleep);
1471         sk_wake_async(sk,0,POLL_ERR);
1472         read_unlock(&sk->sk_callback_lock);
1473 }
1474
1475 static void sock_def_readable(struct sock *sk, int len)
1476 {
1477         read_lock(&sk->sk_callback_lock);
1478         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1479                 wake_up_interruptible(sk->sk_sleep);
1480         sk_wake_async(sk,1,POLL_IN);
1481         read_unlock(&sk->sk_callback_lock);
1482 }
1483
1484 static void sock_def_write_space(struct sock *sk)
1485 {
1486         read_lock(&sk->sk_callback_lock);
1487
1488         /* Do not wake up a writer until he can make "significant"
1489          * progress.  --DaveM
1490          */
1491         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1492                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1493                         wake_up_interruptible(sk->sk_sleep);
1494
1495                 /* Should agree with poll, otherwise some programs break */
1496                 if (sock_writeable(sk))
1497                         sk_wake_async(sk, 2, POLL_OUT);
1498         }
1499
1500         read_unlock(&sk->sk_callback_lock);
1501 }
1502
1503 static void sock_def_destruct(struct sock *sk)
1504 {
1505         kfree(sk->sk_protinfo);
1506 }
1507
1508 void sk_send_sigurg(struct sock *sk)
1509 {
1510         if (sk->sk_socket && sk->sk_socket->file)
1511                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1512                         sk_wake_async(sk, 3, POLL_PRI);
1513 }
1514
1515 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1516                     unsigned long expires)
1517 {
1518         if (!mod_timer(timer, expires))
1519                 sock_hold(sk);
1520 }
1521
1522 EXPORT_SYMBOL(sk_reset_timer);
1523
1524 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1525 {
1526         if (timer_pending(timer) && del_timer(timer))
1527                 __sock_put(sk);
1528 }
1529
1530 EXPORT_SYMBOL(sk_stop_timer);
1531
1532 void sock_init_data(struct socket *sock, struct sock *sk)
1533 {
1534         skb_queue_head_init(&sk->sk_receive_queue);
1535         skb_queue_head_init(&sk->sk_write_queue);
1536         skb_queue_head_init(&sk->sk_error_queue);
1537 #ifdef CONFIG_NET_DMA
1538         skb_queue_head_init(&sk->sk_async_wait_queue);
1539 #endif
1540
1541         sk->sk_send_head        =       NULL;
1542
1543         init_timer(&sk->sk_timer);
1544
1545         sk->sk_allocation       =       GFP_KERNEL;
1546         sk->sk_rcvbuf           =       sysctl_rmem_default;
1547         sk->sk_sndbuf           =       sysctl_wmem_default;
1548         sk->sk_state            =       TCP_CLOSE;
1549         sk->sk_socket           =       sock;
1550
1551         sock_set_flag(sk, SOCK_ZAPPED);
1552
1553         if (sock) {
1554                 sk->sk_type     =       sock->type;
1555                 sk->sk_sleep    =       &sock->wait;
1556                 sock->sk        =       sk;
1557         } else
1558                 sk->sk_sleep    =       NULL;
1559
1560         rwlock_init(&sk->sk_dst_lock);
1561         rwlock_init(&sk->sk_callback_lock);
1562         lockdep_set_class_and_name(&sk->sk_callback_lock,
1563                         af_callback_keys + sk->sk_family,
1564                         af_family_clock_key_strings[sk->sk_family]);
1565
1566         sk->sk_state_change     =       sock_def_wakeup;
1567         sk->sk_data_ready       =       sock_def_readable;
1568         sk->sk_write_space      =       sock_def_write_space;
1569         sk->sk_error_report     =       sock_def_error_report;
1570         sk->sk_destruct         =       sock_def_destruct;
1571
1572         sk->sk_sndmsg_page      =       NULL;
1573         sk->sk_sndmsg_off       =       0;
1574
1575         sk->sk_peercred.pid     =       0;
1576         sk->sk_peercred.uid     =       -1;
1577         sk->sk_peercred.gid     =       -1;
1578         sk->sk_write_pending    =       0;
1579         sk->sk_rcvlowat         =       1;
1580         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1581         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1582
1583         sk->sk_stamp = ktime_set(-1L, -1L);
1584
1585         atomic_set(&sk->sk_refcnt, 1);
1586 }
1587
1588 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1589 {
1590         might_sleep();
1591         spin_lock_bh(&sk->sk_lock.slock);
1592         if (sk->sk_lock.owned)
1593                 __lock_sock(sk);
1594         sk->sk_lock.owned = 1;
1595         spin_unlock(&sk->sk_lock.slock);
1596         /*
1597          * The sk_lock has mutex_lock() semantics here:
1598          */
1599         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1600         local_bh_enable();
1601 }
1602
1603 EXPORT_SYMBOL(lock_sock_nested);
1604
1605 void fastcall release_sock(struct sock *sk)
1606 {
1607         /*
1608          * The sk_lock has mutex_unlock() semantics:
1609          */
1610         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1611
1612         spin_lock_bh(&sk->sk_lock.slock);
1613         if (sk->sk_backlog.tail)
1614                 __release_sock(sk);
1615         sk->sk_lock.owned = 0;
1616         if (waitqueue_active(&sk->sk_lock.wq))
1617                 wake_up(&sk->sk_lock.wq);
1618         spin_unlock_bh(&sk->sk_lock.slock);
1619 }
1620 EXPORT_SYMBOL(release_sock);
1621
1622 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1623 {
1624         struct timeval tv;
1625         if (!sock_flag(sk, SOCK_TIMESTAMP))
1626                 sock_enable_timestamp(sk);
1627         tv = ktime_to_timeval(sk->sk_stamp);
1628         if (tv.tv_sec == -1)
1629                 return -ENOENT;
1630         if (tv.tv_sec == 0) {
1631                 sk->sk_stamp = ktime_get_real();
1632                 tv = ktime_to_timeval(sk->sk_stamp);
1633         }
1634         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1635 }
1636 EXPORT_SYMBOL(sock_get_timestamp);
1637
1638 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1639 {
1640         struct timespec ts;
1641         if (!sock_flag(sk, SOCK_TIMESTAMP))
1642                 sock_enable_timestamp(sk);
1643         ts = ktime_to_timespec(sk->sk_stamp);
1644         if (ts.tv_sec == -1)
1645                 return -ENOENT;
1646         if (ts.tv_sec == 0) {
1647                 sk->sk_stamp = ktime_get_real();
1648                 ts = ktime_to_timespec(sk->sk_stamp);
1649         }
1650         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1651 }
1652 EXPORT_SYMBOL(sock_get_timestampns);
1653
1654 void sock_enable_timestamp(struct sock *sk)
1655 {
1656         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1657                 sock_set_flag(sk, SOCK_TIMESTAMP);
1658                 net_enable_timestamp();
1659         }
1660 }
1661 EXPORT_SYMBOL(sock_enable_timestamp);
1662
1663 /*
1664  *      Get a socket option on an socket.
1665  *
1666  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1667  *      asynchronous errors should be reported by getsockopt. We assume
1668  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1669  */
1670 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1671                            char __user *optval, int __user *optlen)
1672 {
1673         struct sock *sk = sock->sk;
1674
1675         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1676 }
1677
1678 EXPORT_SYMBOL(sock_common_getsockopt);
1679
1680 #ifdef CONFIG_COMPAT
1681 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1682                                   char __user *optval, int __user *optlen)
1683 {
1684         struct sock *sk = sock->sk;
1685
1686         if (sk->sk_prot->compat_getsockopt != NULL)
1687                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1688                                                       optval, optlen);
1689         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1690 }
1691 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1692 #endif
1693
1694 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1695                         struct msghdr *msg, size_t size, int flags)
1696 {
1697         struct sock *sk = sock->sk;
1698         int addr_len = 0;
1699         int err;
1700
1701         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1702                                    flags & ~MSG_DONTWAIT, &addr_len);
1703         if (err >= 0)
1704                 msg->msg_namelen = addr_len;
1705         return err;
1706 }
1707
1708 EXPORT_SYMBOL(sock_common_recvmsg);
1709
1710 /*
1711  *      Set socket options on an inet socket.
1712  */
1713 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1714                            char __user *optval, int optlen)
1715 {
1716         struct sock *sk = sock->sk;
1717
1718         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1719 }
1720
1721 EXPORT_SYMBOL(sock_common_setsockopt);
1722
1723 #ifdef CONFIG_COMPAT
1724 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1725                                   char __user *optval, int optlen)
1726 {
1727         struct sock *sk = sock->sk;
1728
1729         if (sk->sk_prot->compat_setsockopt != NULL)
1730                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1731                                                       optval, optlen);
1732         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1733 }
1734 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1735 #endif
1736
1737 void sk_common_release(struct sock *sk)
1738 {
1739         if (sk->sk_prot->destroy)
1740                 sk->sk_prot->destroy(sk);
1741
1742         /*
1743          * Observation: when sock_common_release is called, processes have
1744          * no access to socket. But net still has.
1745          * Step one, detach it from networking:
1746          *
1747          * A. Remove from hash tables.
1748          */
1749
1750         sk->sk_prot->unhash(sk);
1751
1752         /*
1753          * In this point socket cannot receive new packets, but it is possible
1754          * that some packets are in flight because some CPU runs receiver and
1755          * did hash table lookup before we unhashed socket. They will achieve
1756          * receive queue and will be purged by socket destructor.
1757          *
1758          * Also we still have packets pending on receive queue and probably,
1759          * our own packets waiting in device queues. sock_destroy will drain
1760          * receive queue, but transmitted packets will delay socket destruction
1761          * until the last reference will be released.
1762          */
1763
1764         sock_orphan(sk);
1765
1766         xfrm_sk_free_policy(sk);
1767
1768         sk_refcnt_debug_release(sk);
1769         sock_put(sk);
1770 }
1771
1772 EXPORT_SYMBOL(sk_common_release);
1773
1774 static DEFINE_RWLOCK(proto_list_lock);
1775 static LIST_HEAD(proto_list);
1776
1777 int proto_register(struct proto *prot, int alloc_slab)
1778 {
1779         char *request_sock_slab_name = NULL;
1780         char *timewait_sock_slab_name;
1781         int rc = -ENOBUFS;
1782
1783         if (alloc_slab) {
1784                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1785                                                SLAB_HWCACHE_ALIGN, NULL);
1786
1787                 if (prot->slab == NULL) {
1788                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1789                                prot->name);
1790                         goto out;
1791                 }
1792
1793                 if (prot->rsk_prot != NULL) {
1794                         static const char mask[] = "request_sock_%s";
1795
1796                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1797                         if (request_sock_slab_name == NULL)
1798                                 goto out_free_sock_slab;
1799
1800                         sprintf(request_sock_slab_name, mask, prot->name);
1801                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1802                                                                  prot->rsk_prot->obj_size, 0,
1803                                                                  SLAB_HWCACHE_ALIGN, NULL);
1804
1805                         if (prot->rsk_prot->slab == NULL) {
1806                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1807                                        prot->name);
1808                                 goto out_free_request_sock_slab_name;
1809                         }
1810                 }
1811
1812                 if (prot->twsk_prot != NULL) {
1813                         static const char mask[] = "tw_sock_%s";
1814
1815                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1816
1817                         if (timewait_sock_slab_name == NULL)
1818                                 goto out_free_request_sock_slab;
1819
1820                         sprintf(timewait_sock_slab_name, mask, prot->name);
1821                         prot->twsk_prot->twsk_slab =
1822                                 kmem_cache_create(timewait_sock_slab_name,
1823                                                   prot->twsk_prot->twsk_obj_size,
1824                                                   0, SLAB_HWCACHE_ALIGN,
1825                                                   NULL);
1826                         if (prot->twsk_prot->twsk_slab == NULL)
1827                                 goto out_free_timewait_sock_slab_name;
1828                 }
1829         }
1830
1831         write_lock(&proto_list_lock);
1832         list_add(&prot->node, &proto_list);
1833         write_unlock(&proto_list_lock);
1834         rc = 0;
1835 out:
1836         return rc;
1837 out_free_timewait_sock_slab_name:
1838         kfree(timewait_sock_slab_name);
1839 out_free_request_sock_slab:
1840         if (prot->rsk_prot && prot->rsk_prot->slab) {
1841                 kmem_cache_destroy(prot->rsk_prot->slab);
1842                 prot->rsk_prot->slab = NULL;
1843         }
1844 out_free_request_sock_slab_name:
1845         kfree(request_sock_slab_name);
1846 out_free_sock_slab:
1847         kmem_cache_destroy(prot->slab);
1848         prot->slab = NULL;
1849         goto out;
1850 }
1851
1852 EXPORT_SYMBOL(proto_register);
1853
1854 void proto_unregister(struct proto *prot)
1855 {
1856         write_lock(&proto_list_lock);
1857         list_del(&prot->node);
1858         write_unlock(&proto_list_lock);
1859
1860         if (prot->slab != NULL) {
1861                 kmem_cache_destroy(prot->slab);
1862                 prot->slab = NULL;
1863         }
1864
1865         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1866                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1867
1868                 kmem_cache_destroy(prot->rsk_prot->slab);
1869                 kfree(name);
1870                 prot->rsk_prot->slab = NULL;
1871         }
1872
1873         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1874                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1875
1876                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1877                 kfree(name);
1878                 prot->twsk_prot->twsk_slab = NULL;
1879         }
1880 }
1881
1882 EXPORT_SYMBOL(proto_unregister);
1883
1884 #ifdef CONFIG_PROC_FS
1885 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1886 {
1887         read_lock(&proto_list_lock);
1888         return seq_list_start_head(&proto_list, *pos);
1889 }
1890
1891 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1892 {
1893         return seq_list_next(v, &proto_list, pos);
1894 }
1895
1896 static void proto_seq_stop(struct seq_file *seq, void *v)
1897 {
1898         read_unlock(&proto_list_lock);
1899 }
1900
1901 static char proto_method_implemented(const void *method)
1902 {
1903         return method == NULL ? 'n' : 'y';
1904 }
1905
1906 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1907 {
1908         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1909                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1910                    proto->name,
1911                    proto->obj_size,
1912                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1913                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1914                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1915                    proto->max_header,
1916                    proto->slab == NULL ? "no" : "yes",
1917                    module_name(proto->owner),
1918                    proto_method_implemented(proto->close),
1919                    proto_method_implemented(proto->connect),
1920                    proto_method_implemented(proto->disconnect),
1921                    proto_method_implemented(proto->accept),
1922                    proto_method_implemented(proto->ioctl),
1923                    proto_method_implemented(proto->init),
1924                    proto_method_implemented(proto->destroy),
1925                    proto_method_implemented(proto->shutdown),
1926                    proto_method_implemented(proto->setsockopt),
1927                    proto_method_implemented(proto->getsockopt),
1928                    proto_method_implemented(proto->sendmsg),
1929                    proto_method_implemented(proto->recvmsg),
1930                    proto_method_implemented(proto->sendpage),
1931                    proto_method_implemented(proto->bind),
1932                    proto_method_implemented(proto->backlog_rcv),
1933                    proto_method_implemented(proto->hash),
1934                    proto_method_implemented(proto->unhash),
1935                    proto_method_implemented(proto->get_port),
1936                    proto_method_implemented(proto->enter_memory_pressure));
1937 }
1938
1939 static int proto_seq_show(struct seq_file *seq, void *v)
1940 {
1941         if (v == &proto_list)
1942                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1943                            "protocol",
1944                            "size",
1945                            "sockets",
1946                            "memory",
1947                            "press",
1948                            "maxhdr",
1949                            "slab",
1950                            "module",
1951                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1952         else
1953                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1954         return 0;
1955 }
1956
1957 static const struct seq_operations proto_seq_ops = {
1958         .start  = proto_seq_start,
1959         .next   = proto_seq_next,
1960         .stop   = proto_seq_stop,
1961         .show   = proto_seq_show,
1962 };
1963
1964 static int proto_seq_open(struct inode *inode, struct file *file)
1965 {
1966         return seq_open(file, &proto_seq_ops);
1967 }
1968
1969 static const struct file_operations proto_seq_fops = {
1970         .owner          = THIS_MODULE,
1971         .open           = proto_seq_open,
1972         .read           = seq_read,
1973         .llseek         = seq_lseek,
1974         .release        = seq_release,
1975 };
1976
1977 static int __init proto_init(void)
1978 {
1979         /* register /proc/net/protocols */
1980         return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1981 }
1982
1983 subsys_initcall(proto_init);
1984
1985 #endif /* PROC_FS */
1986
1987 EXPORT_SYMBOL(sk_alloc);
1988 EXPORT_SYMBOL(sk_free);
1989 EXPORT_SYMBOL(sk_send_sigurg);
1990 EXPORT_SYMBOL(sock_alloc_send_skb);
1991 EXPORT_SYMBOL(sock_init_data);
1992 EXPORT_SYMBOL(sock_kfree_s);
1993 EXPORT_SYMBOL(sock_kmalloc);
1994 EXPORT_SYMBOL(sock_no_accept);
1995 EXPORT_SYMBOL(sock_no_bind);
1996 EXPORT_SYMBOL(sock_no_connect);
1997 EXPORT_SYMBOL(sock_no_getname);
1998 EXPORT_SYMBOL(sock_no_getsockopt);
1999 EXPORT_SYMBOL(sock_no_ioctl);
2000 EXPORT_SYMBOL(sock_no_listen);
2001 EXPORT_SYMBOL(sock_no_mmap);
2002 EXPORT_SYMBOL(sock_no_poll);
2003 EXPORT_SYMBOL(sock_no_recvmsg);
2004 EXPORT_SYMBOL(sock_no_sendmsg);
2005 EXPORT_SYMBOL(sock_no_sendpage);
2006 EXPORT_SYMBOL(sock_no_setsockopt);
2007 EXPORT_SYMBOL(sock_no_shutdown);
2008 EXPORT_SYMBOL(sock_no_socketpair);
2009 EXPORT_SYMBOL(sock_rfree);
2010 EXPORT_SYMBOL(sock_setsockopt);
2011 EXPORT_SYMBOL(sock_wfree);
2012 EXPORT_SYMBOL(sock_wmalloc);
2013 EXPORT_SYMBOL(sock_i_uid);
2014 EXPORT_SYMBOL(sock_i_ino);
2015 EXPORT_SYMBOL(sysctl_optmem_max);
2016 #ifdef CONFIG_SYSCTL
2017 EXPORT_SYMBOL(sysctl_rmem_max);
2018 EXPORT_SYMBOL(sysctl_wmem_max);
2019 #endif