Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfashe...
[pandora-kernel.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/signal.h>
89 #include <linux/sched.h>
90 #include <linux/errno.h>
91 #include <linux/string.h>
92 #include <linux/stat.h>
93 #include <linux/dcache.h>
94 #include <linux/namei.h>
95 #include <linux/socket.h>
96 #include <linux/un.h>
97 #include <linux/fcntl.h>
98 #include <linux/termios.h>
99 #include <linux/sockios.h>
100 #include <linux/net.h>
101 #include <linux/in.h>
102 #include <linux/fs.h>
103 #include <linux/slab.h>
104 #include <asm/uaccess.h>
105 #include <linux/skbuff.h>
106 #include <linux/netdevice.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/smp_lock.h>
116 #include <linux/rtnetlink.h>
117 #include <linux/mount.h>
118 #include <net/checksum.h>
119 #include <linux/security.h>
120
121 int sysctl_unix_max_dgram_qlen = 10;
122
123 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
124 DEFINE_SPINLOCK(unix_table_lock);
125 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126
127 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
128
129 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130
131 #ifdef CONFIG_SECURITY_NETWORK
132 static void unix_get_peersec_dgram(struct sk_buff *skb)
133 {
134         int err;
135
136         err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb),
137                                                UNIXSECLEN(skb));
138         if (err)
139                 *(UNIXSECDATA(skb)) = NULL;
140 }
141
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 {
144         scm->secdata = *UNIXSECDATA(skb);
145         scm->seclen = *UNIXSECLEN(skb);
146 }
147 #else
148 static void unix_get_peersec_dgram(struct sk_buff *skb)
149 { }
150
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154
155 /*
156  *  SMP locking strategy:
157  *    hash table is protected with spinlock unix_table_lock
158  *    each socket state is protected by separate rwlock.
159  */
160
161 static inline unsigned unix_hash_fold(unsigned hash)
162 {
163         hash ^= hash>>16;
164         hash ^= hash>>8;
165         return hash&(UNIX_HASH_SIZE-1);
166 }
167
168 #define unix_peer(sk) (unix_sk(sk)->peer)
169
170 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
171 {
172         return unix_peer(osk) == sk;
173 }
174
175 static inline int unix_may_send(struct sock *sk, struct sock *osk)
176 {
177         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
178 }
179
180 static struct sock *unix_peer_get(struct sock *s)
181 {
182         struct sock *peer;
183
184         unix_state_rlock(s);
185         peer = unix_peer(s);
186         if (peer)
187                 sock_hold(peer);
188         unix_state_runlock(s);
189         return peer;
190 }
191
192 static inline void unix_release_addr(struct unix_address *addr)
193 {
194         if (atomic_dec_and_test(&addr->refcnt))
195                 kfree(addr);
196 }
197
198 /*
199  *      Check unix socket name:
200  *              - should be not zero length.
201  *              - if started by not zero, should be NULL terminated (FS object)
202  *              - if started by zero, it is abstract name.
203  */
204  
205 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
206 {
207         if (len <= sizeof(short) || len > sizeof(*sunaddr))
208                 return -EINVAL;
209         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
210                 return -EINVAL;
211         if (sunaddr->sun_path[0]) {
212                 /*
213                  * This may look like an off by one error but it is a bit more
214                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
215                  * sun_path[108] doesnt as such exist.  However in kernel space
216                  * we are guaranteed that it is a valid memory location in our
217                  * kernel address buffer.
218                  */
219                 ((char *)sunaddr)[len]=0;
220                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
221                 return len;
222         }
223
224         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
225         return len;
226 }
227
228 static void __unix_remove_socket(struct sock *sk)
229 {
230         sk_del_node_init(sk);
231 }
232
233 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
234 {
235         BUG_TRAP(sk_unhashed(sk));
236         sk_add_node(sk, list);
237 }
238
239 static inline void unix_remove_socket(struct sock *sk)
240 {
241         spin_lock(&unix_table_lock);
242         __unix_remove_socket(sk);
243         spin_unlock(&unix_table_lock);
244 }
245
246 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
247 {
248         spin_lock(&unix_table_lock);
249         __unix_insert_socket(list, sk);
250         spin_unlock(&unix_table_lock);
251 }
252
253 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
254                                               int len, int type, unsigned hash)
255 {
256         struct sock *s;
257         struct hlist_node *node;
258
259         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
260                 struct unix_sock *u = unix_sk(s);
261
262                 if (u->addr->len == len &&
263                     !memcmp(u->addr->name, sunname, len))
264                         goto found;
265         }
266         s = NULL;
267 found:
268         return s;
269 }
270
271 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
272                                                    int len, int type,
273                                                    unsigned hash)
274 {
275         struct sock *s;
276
277         spin_lock(&unix_table_lock);
278         s = __unix_find_socket_byname(sunname, len, type, hash);
279         if (s)
280                 sock_hold(s);
281         spin_unlock(&unix_table_lock);
282         return s;
283 }
284
285 static struct sock *unix_find_socket_byinode(struct inode *i)
286 {
287         struct sock *s;
288         struct hlist_node *node;
289
290         spin_lock(&unix_table_lock);
291         sk_for_each(s, node,
292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293                 struct dentry *dentry = unix_sk(s)->dentry;
294
295                 if(dentry && dentry->d_inode == i)
296                 {
297                         sock_hold(s);
298                         goto found;
299                 }
300         }
301         s = NULL;
302 found:
303         spin_unlock(&unix_table_lock);
304         return s;
305 }
306
307 static inline int unix_writable(struct sock *sk)
308 {
309         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
310 }
311
312 static void unix_write_space(struct sock *sk)
313 {
314         read_lock(&sk->sk_callback_lock);
315         if (unix_writable(sk)) {
316                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
317                         wake_up_interruptible(sk->sk_sleep);
318                 sk_wake_async(sk, 2, POLL_OUT);
319         }
320         read_unlock(&sk->sk_callback_lock);
321 }
322
323 /* When dgram socket disconnects (or changes its peer), we clear its receive
324  * queue of packets arrived from previous peer. First, it allows to do
325  * flow control based only on wmem_alloc; second, sk connected to peer
326  * may receive messages only from that peer. */
327 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
328 {
329         if (!skb_queue_empty(&sk->sk_receive_queue)) {
330                 skb_queue_purge(&sk->sk_receive_queue);
331                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
332
333                 /* If one link of bidirectional dgram pipe is disconnected,
334                  * we signal error. Messages are lost. Do not make this,
335                  * when peer was not connected to us.
336                  */
337                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
338                         other->sk_err = ECONNRESET;
339                         other->sk_error_report(other);
340                 }
341         }
342 }
343
344 static void unix_sock_destructor(struct sock *sk)
345 {
346         struct unix_sock *u = unix_sk(sk);
347
348         skb_queue_purge(&sk->sk_receive_queue);
349
350         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
351         BUG_TRAP(sk_unhashed(sk));
352         BUG_TRAP(!sk->sk_socket);
353         if (!sock_flag(sk, SOCK_DEAD)) {
354                 printk("Attempt to release alive unix socket: %p\n", sk);
355                 return;
356         }
357
358         if (u->addr)
359                 unix_release_addr(u->addr);
360
361         atomic_dec(&unix_nr_socks);
362 #ifdef UNIX_REFCNT_DEBUG
363         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
364 #endif
365 }
366
367 static int unix_release_sock (struct sock *sk, int embrion)
368 {
369         struct unix_sock *u = unix_sk(sk);
370         struct dentry *dentry;
371         struct vfsmount *mnt;
372         struct sock *skpair;
373         struct sk_buff *skb;
374         int state;
375
376         unix_remove_socket(sk);
377
378         /* Clear state */
379         unix_state_wlock(sk);
380         sock_orphan(sk);
381         sk->sk_shutdown = SHUTDOWN_MASK;
382         dentry       = u->dentry;
383         u->dentry    = NULL;
384         mnt          = u->mnt;
385         u->mnt       = NULL;
386         state = sk->sk_state;
387         sk->sk_state = TCP_CLOSE;
388         unix_state_wunlock(sk);
389
390         wake_up_interruptible_all(&u->peer_wait);
391
392         skpair=unix_peer(sk);
393
394         if (skpair!=NULL) {
395                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
396                         unix_state_wlock(skpair);
397                         /* No more writes */
398                         skpair->sk_shutdown = SHUTDOWN_MASK;
399                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
400                                 skpair->sk_err = ECONNRESET;
401                         unix_state_wunlock(skpair);
402                         skpair->sk_state_change(skpair);
403                         read_lock(&skpair->sk_callback_lock);
404                         sk_wake_async(skpair,1,POLL_HUP);
405                         read_unlock(&skpair->sk_callback_lock);
406                 }
407                 sock_put(skpair); /* It may now die */
408                 unix_peer(sk) = NULL;
409         }
410
411         /* Try to flush out this socket. Throw out buffers at least */
412
413         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
414                 if (state==TCP_LISTEN)
415                         unix_release_sock(skb->sk, 1);
416                 /* passed fds are erased in the kfree_skb hook        */
417                 kfree_skb(skb);
418         }
419
420         if (dentry) {
421                 dput(dentry);
422                 mntput(mnt);
423         }
424
425         sock_put(sk);
426
427         /* ---- Socket is dead now and most probably destroyed ---- */
428
429         /*
430          * Fixme: BSD difference: In BSD all sockets connected to use get
431          *        ECONNRESET and we die on the spot. In Linux we behave
432          *        like files and pipes do and wait for the last
433          *        dereference.
434          *
435          * Can't we simply set sock->err?
436          *
437          *        What the above comment does talk about? --ANK(980817)
438          */
439
440         if (atomic_read(&unix_tot_inflight))
441                 unix_gc();              /* Garbage collect fds */       
442
443         return 0;
444 }
445
446 static int unix_listen(struct socket *sock, int backlog)
447 {
448         int err;
449         struct sock *sk = sock->sk;
450         struct unix_sock *u = unix_sk(sk);
451
452         err = -EOPNOTSUPP;
453         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
454                 goto out;                       /* Only stream/seqpacket sockets accept */
455         err = -EINVAL;
456         if (!u->addr)
457                 goto out;                       /* No listens on an unbound socket */
458         unix_state_wlock(sk);
459         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
460                 goto out_unlock;
461         if (backlog > sk->sk_max_ack_backlog)
462                 wake_up_interruptible_all(&u->peer_wait);
463         sk->sk_max_ack_backlog  = backlog;
464         sk->sk_state            = TCP_LISTEN;
465         /* set credentials so connect can copy them */
466         sk->sk_peercred.pid     = current->tgid;
467         sk->sk_peercred.uid     = current->euid;
468         sk->sk_peercred.gid     = current->egid;
469         err = 0;
470
471 out_unlock:
472         unix_state_wunlock(sk);
473 out:
474         return err;
475 }
476
477 static int unix_release(struct socket *);
478 static int unix_bind(struct socket *, struct sockaddr *, int);
479 static int unix_stream_connect(struct socket *, struct sockaddr *,
480                                int addr_len, int flags);
481 static int unix_socketpair(struct socket *, struct socket *);
482 static int unix_accept(struct socket *, struct socket *, int);
483 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
485 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486 static int unix_shutdown(struct socket *, int);
487 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
488                                struct msghdr *, size_t);
489 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
490                                struct msghdr *, size_t, int);
491 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
492                               struct msghdr *, size_t);
493 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
494                               struct msghdr *, size_t, int);
495 static int unix_dgram_connect(struct socket *, struct sockaddr *,
496                               int, int);
497 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
498                                   struct msghdr *, size_t);
499
500 static const struct proto_ops unix_stream_ops = {
501         .family =       PF_UNIX,
502         .owner =        THIS_MODULE,
503         .release =      unix_release,
504         .bind =         unix_bind,
505         .connect =      unix_stream_connect,
506         .socketpair =   unix_socketpair,
507         .accept =       unix_accept,
508         .getname =      unix_getname,
509         .poll =         unix_poll,
510         .ioctl =        unix_ioctl,
511         .listen =       unix_listen,
512         .shutdown =     unix_shutdown,
513         .setsockopt =   sock_no_setsockopt,
514         .getsockopt =   sock_no_getsockopt,
515         .sendmsg =      unix_stream_sendmsg,
516         .recvmsg =      unix_stream_recvmsg,
517         .mmap =         sock_no_mmap,
518         .sendpage =     sock_no_sendpage,
519 };
520
521 static const struct proto_ops unix_dgram_ops = {
522         .family =       PF_UNIX,
523         .owner =        THIS_MODULE,
524         .release =      unix_release,
525         .bind =         unix_bind,
526         .connect =      unix_dgram_connect,
527         .socketpair =   unix_socketpair,
528         .accept =       sock_no_accept,
529         .getname =      unix_getname,
530         .poll =         datagram_poll,
531         .ioctl =        unix_ioctl,
532         .listen =       sock_no_listen,
533         .shutdown =     unix_shutdown,
534         .setsockopt =   sock_no_setsockopt,
535         .getsockopt =   sock_no_getsockopt,
536         .sendmsg =      unix_dgram_sendmsg,
537         .recvmsg =      unix_dgram_recvmsg,
538         .mmap =         sock_no_mmap,
539         .sendpage =     sock_no_sendpage,
540 };
541
542 static const struct proto_ops unix_seqpacket_ops = {
543         .family =       PF_UNIX,
544         .owner =        THIS_MODULE,
545         .release =      unix_release,
546         .bind =         unix_bind,
547         .connect =      unix_stream_connect,
548         .socketpair =   unix_socketpair,
549         .accept =       unix_accept,
550         .getname =      unix_getname,
551         .poll =         datagram_poll,
552         .ioctl =        unix_ioctl,
553         .listen =       unix_listen,
554         .shutdown =     unix_shutdown,
555         .setsockopt =   sock_no_setsockopt,
556         .getsockopt =   sock_no_getsockopt,
557         .sendmsg =      unix_seqpacket_sendmsg,
558         .recvmsg =      unix_dgram_recvmsg,
559         .mmap =         sock_no_mmap,
560         .sendpage =     sock_no_sendpage,
561 };
562
563 static struct proto unix_proto = {
564         .name     = "UNIX",
565         .owner    = THIS_MODULE,
566         .obj_size = sizeof(struct unix_sock),
567 };
568
569 static struct sock * unix_create1(struct socket *sock)
570 {
571         struct sock *sk = NULL;
572         struct unix_sock *u;
573
574         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
575                 goto out;
576
577         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
578         if (!sk)
579                 goto out;
580
581         atomic_inc(&unix_nr_socks);
582
583         sock_init_data(sock,sk);
584
585         sk->sk_write_space      = unix_write_space;
586         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
587         sk->sk_destruct         = unix_sock_destructor;
588         u         = unix_sk(sk);
589         u->dentry = NULL;
590         u->mnt    = NULL;
591         spin_lock_init(&u->lock);
592         atomic_set(&u->inflight, sock ? 0 : -1);
593         mutex_init(&u->readlock); /* single task reading lock */
594         init_waitqueue_head(&u->peer_wait);
595         unix_insert_socket(unix_sockets_unbound, sk);
596 out:
597         return sk;
598 }
599
600 static int unix_create(struct socket *sock, int protocol)
601 {
602         if (protocol && protocol != PF_UNIX)
603                 return -EPROTONOSUPPORT;
604
605         sock->state = SS_UNCONNECTED;
606
607         switch (sock->type) {
608         case SOCK_STREAM:
609                 sock->ops = &unix_stream_ops;
610                 break;
611                 /*
612                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
613                  *      nothing uses it.
614                  */
615         case SOCK_RAW:
616                 sock->type=SOCK_DGRAM;
617         case SOCK_DGRAM:
618                 sock->ops = &unix_dgram_ops;
619                 break;
620         case SOCK_SEQPACKET:
621                 sock->ops = &unix_seqpacket_ops;
622                 break;
623         default:
624                 return -ESOCKTNOSUPPORT;
625         }
626
627         return unix_create1(sock) ? 0 : -ENOMEM;
628 }
629
630 static int unix_release(struct socket *sock)
631 {
632         struct sock *sk = sock->sk;
633
634         if (!sk)
635                 return 0;
636
637         sock->sk = NULL;
638
639         return unix_release_sock (sk, 0);
640 }
641
642 static int unix_autobind(struct socket *sock)
643 {
644         struct sock *sk = sock->sk;
645         struct unix_sock *u = unix_sk(sk);
646         static u32 ordernum = 1;
647         struct unix_address * addr;
648         int err;
649
650         mutex_lock(&u->readlock);
651
652         err = 0;
653         if (u->addr)
654                 goto out;
655
656         err = -ENOMEM;
657         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
658         if (!addr)
659                 goto out;
660
661         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
662         addr->name->sun_family = AF_UNIX;
663         atomic_set(&addr->refcnt, 1);
664
665 retry:
666         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
667         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
668
669         spin_lock(&unix_table_lock);
670         ordernum = (ordernum+1)&0xFFFFF;
671
672         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
673                                       addr->hash)) {
674                 spin_unlock(&unix_table_lock);
675                 /* Sanity yield. It is unusual case, but yet... */
676                 if (!(ordernum&0xFF))
677                         yield();
678                 goto retry;
679         }
680         addr->hash ^= sk->sk_type;
681
682         __unix_remove_socket(sk);
683         u->addr = addr;
684         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
685         spin_unlock(&unix_table_lock);
686         err = 0;
687
688 out:    mutex_unlock(&u->readlock);
689         return err;
690 }
691
692 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
693                                     int type, unsigned hash, int *error)
694 {
695         struct sock *u;
696         struct nameidata nd;
697         int err = 0;
698         
699         if (sunname->sun_path[0]) {
700                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
701                 if (err)
702                         goto fail;
703                 err = vfs_permission(&nd, MAY_WRITE);
704                 if (err)
705                         goto put_fail;
706
707                 err = -ECONNREFUSED;
708                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
709                         goto put_fail;
710                 u=unix_find_socket_byinode(nd.dentry->d_inode);
711                 if (!u)
712                         goto put_fail;
713
714                 if (u->sk_type == type)
715                         touch_atime(nd.mnt, nd.dentry);
716
717                 path_release(&nd);
718
719                 err=-EPROTOTYPE;
720                 if (u->sk_type != type) {
721                         sock_put(u);
722                         goto fail;
723                 }
724         } else {
725                 err = -ECONNREFUSED;
726                 u=unix_find_socket_byname(sunname, len, type, hash);
727                 if (u) {
728                         struct dentry *dentry;
729                         dentry = unix_sk(u)->dentry;
730                         if (dentry)
731                                 touch_atime(unix_sk(u)->mnt, dentry);
732                 } else
733                         goto fail;
734         }
735         return u;
736
737 put_fail:
738         path_release(&nd);
739 fail:
740         *error=err;
741         return NULL;
742 }
743
744
745 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
746 {
747         struct sock *sk = sock->sk;
748         struct unix_sock *u = unix_sk(sk);
749         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
750         struct dentry * dentry = NULL;
751         struct nameidata nd;
752         int err;
753         unsigned hash;
754         struct unix_address *addr;
755         struct hlist_head *list;
756
757         err = -EINVAL;
758         if (sunaddr->sun_family != AF_UNIX)
759                 goto out;
760
761         if (addr_len==sizeof(short)) {
762                 err = unix_autobind(sock);
763                 goto out;
764         }
765
766         err = unix_mkname(sunaddr, addr_len, &hash);
767         if (err < 0)
768                 goto out;
769         addr_len = err;
770
771         mutex_lock(&u->readlock);
772
773         err = -EINVAL;
774         if (u->addr)
775                 goto out_up;
776
777         err = -ENOMEM;
778         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
779         if (!addr)
780                 goto out_up;
781
782         memcpy(addr->name, sunaddr, addr_len);
783         addr->len = addr_len;
784         addr->hash = hash ^ sk->sk_type;
785         atomic_set(&addr->refcnt, 1);
786
787         if (sunaddr->sun_path[0]) {
788                 unsigned int mode;
789                 err = 0;
790                 /*
791                  * Get the parent directory, calculate the hash for last
792                  * component.
793                  */
794                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
795                 if (err)
796                         goto out_mknod_parent;
797
798                 dentry = lookup_create(&nd, 0);
799                 err = PTR_ERR(dentry);
800                 if (IS_ERR(dentry))
801                         goto out_mknod_unlock;
802
803                 /*
804                  * All right, let's create it.
805                  */
806                 mode = S_IFSOCK |
807                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
808                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
809                 if (err)
810                         goto out_mknod_dput;
811                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
812                 dput(nd.dentry);
813                 nd.dentry = dentry;
814
815                 addr->hash = UNIX_HASH_SIZE;
816         }
817
818         spin_lock(&unix_table_lock);
819
820         if (!sunaddr->sun_path[0]) {
821                 err = -EADDRINUSE;
822                 if (__unix_find_socket_byname(sunaddr, addr_len,
823                                               sk->sk_type, hash)) {
824                         unix_release_addr(addr);
825                         goto out_unlock;
826                 }
827
828                 list = &unix_socket_table[addr->hash];
829         } else {
830                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
831                 u->dentry = nd.dentry;
832                 u->mnt    = nd.mnt;
833         }
834
835         err = 0;
836         __unix_remove_socket(sk);
837         u->addr = addr;
838         __unix_insert_socket(list, sk);
839
840 out_unlock:
841         spin_unlock(&unix_table_lock);
842 out_up:
843         mutex_unlock(&u->readlock);
844 out:
845         return err;
846
847 out_mknod_dput:
848         dput(dentry);
849 out_mknod_unlock:
850         mutex_unlock(&nd.dentry->d_inode->i_mutex);
851         path_release(&nd);
852 out_mknod_parent:
853         if (err==-EEXIST)
854                 err=-EADDRINUSE;
855         unix_release_addr(addr);
856         goto out_up;
857 }
858
859 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
860                               int alen, int flags)
861 {
862         struct sock *sk = sock->sk;
863         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
864         struct sock *other;
865         unsigned hash;
866         int err;
867
868         if (addr->sa_family != AF_UNSPEC) {
869                 err = unix_mkname(sunaddr, alen, &hash);
870                 if (err < 0)
871                         goto out;
872                 alen = err;
873
874                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
875                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
876                         goto out;
877
878                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
879                 if (!other)
880                         goto out;
881
882                 unix_state_wlock(sk);
883
884                 err = -EPERM;
885                 if (!unix_may_send(sk, other))
886                         goto out_unlock;
887
888                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
889                 if (err)
890                         goto out_unlock;
891
892         } else {
893                 /*
894                  *      1003.1g breaking connected state with AF_UNSPEC
895                  */
896                 other = NULL;
897                 unix_state_wlock(sk);
898         }
899
900         /*
901          * If it was connected, reconnect.
902          */
903         if (unix_peer(sk)) {
904                 struct sock *old_peer = unix_peer(sk);
905                 unix_peer(sk)=other;
906                 unix_state_wunlock(sk);
907
908                 if (other != old_peer)
909                         unix_dgram_disconnected(sk, old_peer);
910                 sock_put(old_peer);
911         } else {
912                 unix_peer(sk)=other;
913                 unix_state_wunlock(sk);
914         }
915         return 0;
916
917 out_unlock:
918         unix_state_wunlock(sk);
919         sock_put(other);
920 out:
921         return err;
922 }
923
924 static long unix_wait_for_peer(struct sock *other, long timeo)
925 {
926         struct unix_sock *u = unix_sk(other);
927         int sched;
928         DEFINE_WAIT(wait);
929
930         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
931
932         sched = !sock_flag(other, SOCK_DEAD) &&
933                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
934                 (skb_queue_len(&other->sk_receive_queue) >
935                  other->sk_max_ack_backlog);
936
937         unix_state_runlock(other);
938
939         if (sched)
940                 timeo = schedule_timeout(timeo);
941
942         finish_wait(&u->peer_wait, &wait);
943         return timeo;
944 }
945
946 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
947                                int addr_len, int flags)
948 {
949         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
950         struct sock *sk = sock->sk;
951         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
952         struct sock *newsk = NULL;
953         struct sock *other = NULL;
954         struct sk_buff *skb = NULL;
955         unsigned hash;
956         int st;
957         int err;
958         long timeo;
959
960         err = unix_mkname(sunaddr, addr_len, &hash);
961         if (err < 0)
962                 goto out;
963         addr_len = err;
964
965         if (test_bit(SOCK_PASSCRED, &sock->flags)
966                 && !u->addr && (err = unix_autobind(sock)) != 0)
967                 goto out;
968
969         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
970
971         /* First of all allocate resources.
972            If we will make it after state is locked,
973            we will have to recheck all again in any case.
974          */
975
976         err = -ENOMEM;
977
978         /* create new sock for complete connection */
979         newsk = unix_create1(NULL);
980         if (newsk == NULL)
981                 goto out;
982
983         /* Allocate skb for sending to listening sock */
984         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
985         if (skb == NULL)
986                 goto out;
987
988 restart:
989         /*  Find listening sock. */
990         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
991         if (!other)
992                 goto out;
993
994         /* Latch state of peer */
995         unix_state_rlock(other);
996
997         /* Apparently VFS overslept socket death. Retry. */
998         if (sock_flag(other, SOCK_DEAD)) {
999                 unix_state_runlock(other);
1000                 sock_put(other);
1001                 goto restart;
1002         }
1003
1004         err = -ECONNREFUSED;
1005         if (other->sk_state != TCP_LISTEN)
1006                 goto out_unlock;
1007
1008         if (skb_queue_len(&other->sk_receive_queue) >
1009             other->sk_max_ack_backlog) {
1010                 err = -EAGAIN;
1011                 if (!timeo)
1012                         goto out_unlock;
1013
1014                 timeo = unix_wait_for_peer(other, timeo);
1015
1016                 err = sock_intr_errno(timeo);
1017                 if (signal_pending(current))
1018                         goto out;
1019                 sock_put(other);
1020                 goto restart;
1021         }
1022
1023         /* Latch our state.
1024
1025            It is tricky place. We need to grab write lock and cannot
1026            drop lock on peer. It is dangerous because deadlock is
1027            possible. Connect to self case and simultaneous
1028            attempt to connect are eliminated by checking socket
1029            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1030            check this before attempt to grab lock.
1031
1032            Well, and we have to recheck the state after socket locked.
1033          */
1034         st = sk->sk_state;
1035
1036         switch (st) {
1037         case TCP_CLOSE:
1038                 /* This is ok... continue with connect */
1039                 break;
1040         case TCP_ESTABLISHED:
1041                 /* Socket is already connected */
1042                 err = -EISCONN;
1043                 goto out_unlock;
1044         default:
1045                 err = -EINVAL;
1046                 goto out_unlock;
1047         }
1048
1049         unix_state_wlock(sk);
1050
1051         if (sk->sk_state != st) {
1052                 unix_state_wunlock(sk);
1053                 unix_state_runlock(other);
1054                 sock_put(other);
1055                 goto restart;
1056         }
1057
1058         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1059         if (err) {
1060                 unix_state_wunlock(sk);
1061                 goto out_unlock;
1062         }
1063
1064         /* The way is open! Fastly set all the necessary fields... */
1065
1066         sock_hold(sk);
1067         unix_peer(newsk)        = sk;
1068         newsk->sk_state         = TCP_ESTABLISHED;
1069         newsk->sk_type          = sk->sk_type;
1070         newsk->sk_peercred.pid  = current->tgid;
1071         newsk->sk_peercred.uid  = current->euid;
1072         newsk->sk_peercred.gid  = current->egid;
1073         newu = unix_sk(newsk);
1074         newsk->sk_sleep         = &newu->peer_wait;
1075         otheru = unix_sk(other);
1076
1077         /* copy address information from listening to new sock*/
1078         if (otheru->addr) {
1079                 atomic_inc(&otheru->addr->refcnt);
1080                 newu->addr = otheru->addr;
1081         }
1082         if (otheru->dentry) {
1083                 newu->dentry    = dget(otheru->dentry);
1084                 newu->mnt       = mntget(otheru->mnt);
1085         }
1086
1087         /* Set credentials */
1088         sk->sk_peercred = other->sk_peercred;
1089
1090         sock->state     = SS_CONNECTED;
1091         sk->sk_state    = TCP_ESTABLISHED;
1092         sock_hold(newsk);
1093
1094         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1095         unix_peer(sk)   = newsk;
1096
1097         unix_state_wunlock(sk);
1098
1099         /* take ten and and send info to listening sock */
1100         spin_lock(&other->sk_receive_queue.lock);
1101         __skb_queue_tail(&other->sk_receive_queue, skb);
1102         /* Undo artificially decreased inflight after embrion
1103          * is installed to listening socket. */
1104         atomic_inc(&newu->inflight);
1105         spin_unlock(&other->sk_receive_queue.lock);
1106         unix_state_runlock(other);
1107         other->sk_data_ready(other, 0);
1108         sock_put(other);
1109         return 0;
1110
1111 out_unlock:
1112         if (other)
1113                 unix_state_runlock(other);
1114
1115 out:
1116         if (skb)
1117                 kfree_skb(skb);
1118         if (newsk)
1119                 unix_release_sock(newsk, 0);
1120         if (other)
1121                 sock_put(other);
1122         return err;
1123 }
1124
1125 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1126 {
1127         struct sock *ska=socka->sk, *skb = sockb->sk;
1128
1129         /* Join our sockets back to back */
1130         sock_hold(ska);
1131         sock_hold(skb);
1132         unix_peer(ska)=skb;
1133         unix_peer(skb)=ska;
1134         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1135         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1136         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1137
1138         if (ska->sk_type != SOCK_DGRAM) {
1139                 ska->sk_state = TCP_ESTABLISHED;
1140                 skb->sk_state = TCP_ESTABLISHED;
1141                 socka->state  = SS_CONNECTED;
1142                 sockb->state  = SS_CONNECTED;
1143         }
1144         return 0;
1145 }
1146
1147 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1148 {
1149         struct sock *sk = sock->sk;
1150         struct sock *tsk;
1151         struct sk_buff *skb;
1152         int err;
1153
1154         err = -EOPNOTSUPP;
1155         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1156                 goto out;
1157
1158         err = -EINVAL;
1159         if (sk->sk_state != TCP_LISTEN)
1160                 goto out;
1161
1162         /* If socket state is TCP_LISTEN it cannot change (for now...),
1163          * so that no locks are necessary.
1164          */
1165
1166         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1167         if (!skb) {
1168                 /* This means receive shutdown. */
1169                 if (err == 0)
1170                         err = -EINVAL;
1171                 goto out;
1172         }
1173
1174         tsk = skb->sk;
1175         skb_free_datagram(sk, skb);
1176         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1177
1178         /* attach accepted sock to socket */
1179         unix_state_wlock(tsk);
1180         newsock->state = SS_CONNECTED;
1181         sock_graft(tsk, newsock);
1182         unix_state_wunlock(tsk);
1183         return 0;
1184
1185 out:
1186         return err;
1187 }
1188
1189
1190 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1191 {
1192         struct sock *sk = sock->sk;
1193         struct unix_sock *u;
1194         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1195         int err = 0;
1196
1197         if (peer) {
1198                 sk = unix_peer_get(sk);
1199
1200                 err = -ENOTCONN;
1201                 if (!sk)
1202                         goto out;
1203                 err = 0;
1204         } else {
1205                 sock_hold(sk);
1206         }
1207
1208         u = unix_sk(sk);
1209         unix_state_rlock(sk);
1210         if (!u->addr) {
1211                 sunaddr->sun_family = AF_UNIX;
1212                 sunaddr->sun_path[0] = 0;
1213                 *uaddr_len = sizeof(short);
1214         } else {
1215                 struct unix_address *addr = u->addr;
1216
1217                 *uaddr_len = addr->len;
1218                 memcpy(sunaddr, addr->name, *uaddr_len);
1219         }
1220         unix_state_runlock(sk);
1221         sock_put(sk);
1222 out:
1223         return err;
1224 }
1225
1226 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1227 {
1228         int i;
1229
1230         scm->fp = UNIXCB(skb).fp;
1231         skb->destructor = sock_wfree;
1232         UNIXCB(skb).fp = NULL;
1233
1234         for (i=scm->fp->count-1; i>=0; i--)
1235                 unix_notinflight(scm->fp->fp[i]);
1236 }
1237
1238 static void unix_destruct_fds(struct sk_buff *skb)
1239 {
1240         struct scm_cookie scm;
1241         memset(&scm, 0, sizeof(scm));
1242         unix_detach_fds(&scm, skb);
1243
1244         /* Alas, it calls VFS */
1245         /* So fscking what? fput() had been SMP-safe since the last Summer */
1246         scm_destroy(&scm);
1247         sock_wfree(skb);
1248 }
1249
1250 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1251 {
1252         int i;
1253         for (i=scm->fp->count-1; i>=0; i--)
1254                 unix_inflight(scm->fp->fp[i]);
1255         UNIXCB(skb).fp = scm->fp;
1256         skb->destructor = unix_destruct_fds;
1257         scm->fp = NULL;
1258 }
1259
1260 /*
1261  *      Send AF_UNIX data.
1262  */
1263
1264 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1265                               struct msghdr *msg, size_t len)
1266 {
1267         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1268         struct sock *sk = sock->sk;
1269         struct unix_sock *u = unix_sk(sk);
1270         struct sockaddr_un *sunaddr=msg->msg_name;
1271         struct sock *other = NULL;
1272         int namelen = 0; /* fake GCC */
1273         int err;
1274         unsigned hash;
1275         struct sk_buff *skb;
1276         long timeo;
1277         struct scm_cookie tmp_scm;
1278
1279         if (NULL == siocb->scm)
1280                 siocb->scm = &tmp_scm;
1281         err = scm_send(sock, msg, siocb->scm);
1282         if (err < 0)
1283                 return err;
1284
1285         err = -EOPNOTSUPP;
1286         if (msg->msg_flags&MSG_OOB)
1287                 goto out;
1288
1289         if (msg->msg_namelen) {
1290                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1291                 if (err < 0)
1292                         goto out;
1293                 namelen = err;
1294         } else {
1295                 sunaddr = NULL;
1296                 err = -ENOTCONN;
1297                 other = unix_peer_get(sk);
1298                 if (!other)
1299                         goto out;
1300         }
1301
1302         if (test_bit(SOCK_PASSCRED, &sock->flags)
1303                 && !u->addr && (err = unix_autobind(sock)) != 0)
1304                 goto out;
1305
1306         err = -EMSGSIZE;
1307         if (len > sk->sk_sndbuf - 32)
1308                 goto out;
1309
1310         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1311         if (skb==NULL)
1312                 goto out;
1313
1314         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1315         if (siocb->scm->fp)
1316                 unix_attach_fds(siocb->scm, skb);
1317
1318         unix_get_peersec_dgram(skb);
1319
1320         skb->h.raw = skb->data;
1321         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1322         if (err)
1323                 goto out_free;
1324
1325         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1326
1327 restart:
1328         if (!other) {
1329                 err = -ECONNRESET;
1330                 if (sunaddr == NULL)
1331                         goto out_free;
1332
1333                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1334                                         hash, &err);
1335                 if (other==NULL)
1336                         goto out_free;
1337         }
1338
1339         unix_state_rlock(other);
1340         err = -EPERM;
1341         if (!unix_may_send(sk, other))
1342                 goto out_unlock;
1343
1344         if (sock_flag(other, SOCK_DEAD)) {
1345                 /*
1346                  *      Check with 1003.1g - what should
1347                  *      datagram error
1348                  */
1349                 unix_state_runlock(other);
1350                 sock_put(other);
1351
1352                 err = 0;
1353                 unix_state_wlock(sk);
1354                 if (unix_peer(sk) == other) {
1355                         unix_peer(sk)=NULL;
1356                         unix_state_wunlock(sk);
1357
1358                         unix_dgram_disconnected(sk, other);
1359                         sock_put(other);
1360                         err = -ECONNREFUSED;
1361                 } else {
1362                         unix_state_wunlock(sk);
1363                 }
1364
1365                 other = NULL;
1366                 if (err)
1367                         goto out_free;
1368                 goto restart;
1369         }
1370
1371         err = -EPIPE;
1372         if (other->sk_shutdown & RCV_SHUTDOWN)
1373                 goto out_unlock;
1374
1375         if (sk->sk_type != SOCK_SEQPACKET) {
1376                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1377                 if (err)
1378                         goto out_unlock;
1379         }
1380
1381         if (unix_peer(other) != sk &&
1382             (skb_queue_len(&other->sk_receive_queue) >
1383              other->sk_max_ack_backlog)) {
1384                 if (!timeo) {
1385                         err = -EAGAIN;
1386                         goto out_unlock;
1387                 }
1388
1389                 timeo = unix_wait_for_peer(other, timeo);
1390
1391                 err = sock_intr_errno(timeo);
1392                 if (signal_pending(current))
1393                         goto out_free;
1394
1395                 goto restart;
1396         }
1397
1398         skb_queue_tail(&other->sk_receive_queue, skb);
1399         unix_state_runlock(other);
1400         other->sk_data_ready(other, len);
1401         sock_put(other);
1402         scm_destroy(siocb->scm);
1403         return len;
1404
1405 out_unlock:
1406         unix_state_runlock(other);
1407 out_free:
1408         kfree_skb(skb);
1409 out:
1410         if (other)
1411                 sock_put(other);
1412         scm_destroy(siocb->scm);
1413         return err;
1414 }
1415
1416                 
1417 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1418                                struct msghdr *msg, size_t len)
1419 {
1420         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1421         struct sock *sk = sock->sk;
1422         struct sock *other = NULL;
1423         struct sockaddr_un *sunaddr=msg->msg_name;
1424         int err,size;
1425         struct sk_buff *skb;
1426         int sent=0;
1427         struct scm_cookie tmp_scm;
1428
1429         if (NULL == siocb->scm)
1430                 siocb->scm = &tmp_scm;
1431         err = scm_send(sock, msg, siocb->scm);
1432         if (err < 0)
1433                 return err;
1434
1435         err = -EOPNOTSUPP;
1436         if (msg->msg_flags&MSG_OOB)
1437                 goto out_err;
1438
1439         if (msg->msg_namelen) {
1440                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1441                 goto out_err;
1442         } else {
1443                 sunaddr = NULL;
1444                 err = -ENOTCONN;
1445                 other = unix_peer(sk);
1446                 if (!other)
1447                         goto out_err;
1448         }
1449
1450         if (sk->sk_shutdown & SEND_SHUTDOWN)
1451                 goto pipe_err;
1452
1453         while(sent < len)
1454         {
1455                 /*
1456                  *      Optimisation for the fact that under 0.01% of X
1457                  *      messages typically need breaking up.
1458                  */
1459
1460                 size = len-sent;
1461
1462                 /* Keep two messages in the pipe so it schedules better */
1463                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1464                         size = (sk->sk_sndbuf >> 1) - 64;
1465
1466                 if (size > SKB_MAX_ALLOC)
1467                         size = SKB_MAX_ALLOC;
1468                         
1469                 /*
1470                  *      Grab a buffer
1471                  */
1472                  
1473                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1474
1475                 if (skb==NULL)
1476                         goto out_err;
1477
1478                 /*
1479                  *      If you pass two values to the sock_alloc_send_skb
1480                  *      it tries to grab the large buffer with GFP_NOFS
1481                  *      (which can fail easily), and if it fails grab the
1482                  *      fallback size buffer which is under a page and will
1483                  *      succeed. [Alan]
1484                  */
1485                 size = min_t(int, size, skb_tailroom(skb));
1486
1487                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1488                 if (siocb->scm->fp)
1489                         unix_attach_fds(siocb->scm, skb);
1490
1491                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1492                         kfree_skb(skb);
1493                         goto out_err;
1494                 }
1495
1496                 unix_state_rlock(other);
1497
1498                 if (sock_flag(other, SOCK_DEAD) ||
1499                     (other->sk_shutdown & RCV_SHUTDOWN))
1500                         goto pipe_err_free;
1501
1502                 skb_queue_tail(&other->sk_receive_queue, skb);
1503                 unix_state_runlock(other);
1504                 other->sk_data_ready(other, size);
1505                 sent+=size;
1506         }
1507
1508         scm_destroy(siocb->scm);
1509         siocb->scm = NULL;
1510
1511         return sent;
1512
1513 pipe_err_free:
1514         unix_state_runlock(other);
1515         kfree_skb(skb);
1516 pipe_err:
1517         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1518                 send_sig(SIGPIPE,current,0);
1519         err = -EPIPE;
1520 out_err:
1521         scm_destroy(siocb->scm);
1522         siocb->scm = NULL;
1523         return sent ? : err;
1524 }
1525
1526 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1527                                   struct msghdr *msg, size_t len)
1528 {
1529         int err;
1530         struct sock *sk = sock->sk;
1531         
1532         err = sock_error(sk);
1533         if (err)
1534                 return err;
1535
1536         if (sk->sk_state != TCP_ESTABLISHED)
1537                 return -ENOTCONN;
1538
1539         if (msg->msg_namelen)
1540                 msg->msg_namelen = 0;
1541
1542         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1543 }
1544                                                                                             
1545 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1546 {
1547         struct unix_sock *u = unix_sk(sk);
1548
1549         msg->msg_namelen = 0;
1550         if (u->addr) {
1551                 msg->msg_namelen = u->addr->len;
1552                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1553         }
1554 }
1555
1556 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1557                               struct msghdr *msg, size_t size,
1558                               int flags)
1559 {
1560         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1561         struct scm_cookie tmp_scm;
1562         struct sock *sk = sock->sk;
1563         struct unix_sock *u = unix_sk(sk);
1564         int noblock = flags & MSG_DONTWAIT;
1565         struct sk_buff *skb;
1566         int err;
1567
1568         err = -EOPNOTSUPP;
1569         if (flags&MSG_OOB)
1570                 goto out;
1571
1572         msg->msg_namelen = 0;
1573
1574         mutex_lock(&u->readlock);
1575
1576         skb = skb_recv_datagram(sk, flags, noblock, &err);
1577         if (!skb)
1578                 goto out_unlock;
1579
1580         wake_up_interruptible(&u->peer_wait);
1581
1582         if (msg->msg_name)
1583                 unix_copy_addr(msg, skb->sk);
1584
1585         if (size > skb->len)
1586                 size = skb->len;
1587         else if (size < skb->len)
1588                 msg->msg_flags |= MSG_TRUNC;
1589
1590         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1591         if (err)
1592                 goto out_free;
1593
1594         if (!siocb->scm) {
1595                 siocb->scm = &tmp_scm;
1596                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1597         }
1598         siocb->scm->creds = *UNIXCREDS(skb);
1599         unix_set_secdata(siocb->scm, skb);
1600
1601         if (!(flags & MSG_PEEK))
1602         {
1603                 if (UNIXCB(skb).fp)
1604                         unix_detach_fds(siocb->scm, skb);
1605         }
1606         else 
1607         {
1608                 /* It is questionable: on PEEK we could:
1609                    - do not return fds - good, but too simple 8)
1610                    - return fds, and do not return them on read (old strategy,
1611                      apparently wrong)
1612                    - clone fds (I chose it for now, it is the most universal
1613                      solution)
1614                 
1615                    POSIX 1003.1g does not actually define this clearly
1616                    at all. POSIX 1003.1g doesn't define a lot of things
1617                    clearly however!                  
1618                    
1619                 */
1620                 if (UNIXCB(skb).fp)
1621                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1622         }
1623         err = size;
1624
1625         scm_recv(sock, msg, siocb->scm, flags);
1626
1627 out_free:
1628         skb_free_datagram(sk,skb);
1629 out_unlock:
1630         mutex_unlock(&u->readlock);
1631 out:
1632         return err;
1633 }
1634
1635 /*
1636  *      Sleep until data has arrive. But check for races..
1637  */
1638  
1639 static long unix_stream_data_wait(struct sock * sk, long timeo)
1640 {
1641         DEFINE_WAIT(wait);
1642
1643         unix_state_rlock(sk);
1644
1645         for (;;) {
1646                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1647
1648                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1649                     sk->sk_err ||
1650                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1651                     signal_pending(current) ||
1652                     !timeo)
1653                         break;
1654
1655                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1656                 unix_state_runlock(sk);
1657                 timeo = schedule_timeout(timeo);
1658                 unix_state_rlock(sk);
1659                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1660         }
1661
1662         finish_wait(sk->sk_sleep, &wait);
1663         unix_state_runlock(sk);
1664         return timeo;
1665 }
1666
1667
1668
1669 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1670                                struct msghdr *msg, size_t size,
1671                                int flags)
1672 {
1673         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1674         struct scm_cookie tmp_scm;
1675         struct sock *sk = sock->sk;
1676         struct unix_sock *u = unix_sk(sk);
1677         struct sockaddr_un *sunaddr=msg->msg_name;
1678         int copied = 0;
1679         int check_creds = 0;
1680         int target;
1681         int err = 0;
1682         long timeo;
1683
1684         err = -EINVAL;
1685         if (sk->sk_state != TCP_ESTABLISHED)
1686                 goto out;
1687
1688         err = -EOPNOTSUPP;
1689         if (flags&MSG_OOB)
1690                 goto out;
1691
1692         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1693         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1694
1695         msg->msg_namelen = 0;
1696
1697         /* Lock the socket to prevent queue disordering
1698          * while sleeps in memcpy_tomsg
1699          */
1700
1701         if (!siocb->scm) {
1702                 siocb->scm = &tmp_scm;
1703                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1704         }
1705
1706         mutex_lock(&u->readlock);
1707
1708         do
1709         {
1710                 int chunk;
1711                 struct sk_buff *skb;
1712
1713                 skb = skb_dequeue(&sk->sk_receive_queue);
1714                 if (skb==NULL)
1715                 {
1716                         if (copied >= target)
1717                                 break;
1718
1719                         /*
1720                          *      POSIX 1003.1g mandates this order.
1721                          */
1722                          
1723                         if ((err = sock_error(sk)) != 0)
1724                                 break;
1725                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1726                                 break;
1727                         err = -EAGAIN;
1728                         if (!timeo)
1729                                 break;
1730                         mutex_unlock(&u->readlock);
1731
1732                         timeo = unix_stream_data_wait(sk, timeo);
1733
1734                         if (signal_pending(current)) {
1735                                 err = sock_intr_errno(timeo);
1736                                 goto out;
1737                         }
1738                         mutex_lock(&u->readlock);
1739                         continue;
1740                 }
1741
1742                 if (check_creds) {
1743                         /* Never glue messages from different writers */
1744                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1745                                 skb_queue_head(&sk->sk_receive_queue, skb);
1746                                 break;
1747                         }
1748                 } else {
1749                         /* Copy credentials */
1750                         siocb->scm->creds = *UNIXCREDS(skb);
1751                         check_creds = 1;
1752                 }
1753
1754                 /* Copy address just once */
1755                 if (sunaddr)
1756                 {
1757                         unix_copy_addr(msg, skb->sk);
1758                         sunaddr = NULL;
1759                 }
1760
1761                 chunk = min_t(unsigned int, skb->len, size);
1762                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1763                         skb_queue_head(&sk->sk_receive_queue, skb);
1764                         if (copied == 0)
1765                                 copied = -EFAULT;
1766                         break;
1767                 }
1768                 copied += chunk;
1769                 size -= chunk;
1770
1771                 /* Mark read part of skb as used */
1772                 if (!(flags & MSG_PEEK))
1773                 {
1774                         skb_pull(skb, chunk);
1775
1776                         if (UNIXCB(skb).fp)
1777                                 unix_detach_fds(siocb->scm, skb);
1778
1779                         /* put the skb back if we didn't use it up.. */
1780                         if (skb->len)
1781                         {
1782                                 skb_queue_head(&sk->sk_receive_queue, skb);
1783                                 break;
1784                         }
1785
1786                         kfree_skb(skb);
1787
1788                         if (siocb->scm->fp)
1789                                 break;
1790                 }
1791                 else
1792                 {
1793                         /* It is questionable, see note in unix_dgram_recvmsg.
1794                          */
1795                         if (UNIXCB(skb).fp)
1796                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1797
1798                         /* put message back and return */
1799                         skb_queue_head(&sk->sk_receive_queue, skb);
1800                         break;
1801                 }
1802         } while (size);
1803
1804         mutex_unlock(&u->readlock);
1805         scm_recv(sock, msg, siocb->scm, flags);
1806 out:
1807         return copied ? : err;
1808 }
1809
1810 static int unix_shutdown(struct socket *sock, int mode)
1811 {
1812         struct sock *sk = sock->sk;
1813         struct sock *other;
1814
1815         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1816
1817         if (mode) {
1818                 unix_state_wlock(sk);
1819                 sk->sk_shutdown |= mode;
1820                 other=unix_peer(sk);
1821                 if (other)
1822                         sock_hold(other);
1823                 unix_state_wunlock(sk);
1824                 sk->sk_state_change(sk);
1825
1826                 if (other &&
1827                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1828
1829                         int peer_mode = 0;
1830
1831                         if (mode&RCV_SHUTDOWN)
1832                                 peer_mode |= SEND_SHUTDOWN;
1833                         if (mode&SEND_SHUTDOWN)
1834                                 peer_mode |= RCV_SHUTDOWN;
1835                         unix_state_wlock(other);
1836                         other->sk_shutdown |= peer_mode;
1837                         unix_state_wunlock(other);
1838                         other->sk_state_change(other);
1839                         read_lock(&other->sk_callback_lock);
1840                         if (peer_mode == SHUTDOWN_MASK)
1841                                 sk_wake_async(other,1,POLL_HUP);
1842                         else if (peer_mode & RCV_SHUTDOWN)
1843                                 sk_wake_async(other,1,POLL_IN);
1844                         read_unlock(&other->sk_callback_lock);
1845                 }
1846                 if (other)
1847                         sock_put(other);
1848         }
1849         return 0;
1850 }
1851
1852 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1853 {
1854         struct sock *sk = sock->sk;
1855         long amount=0;
1856         int err;
1857
1858         switch(cmd)
1859         {
1860                 case SIOCOUTQ:
1861                         amount = atomic_read(&sk->sk_wmem_alloc);
1862                         err = put_user(amount, (int __user *)arg);
1863                         break;
1864                 case SIOCINQ:
1865                 {
1866                         struct sk_buff *skb;
1867
1868                         if (sk->sk_state == TCP_LISTEN) {
1869                                 err = -EINVAL;
1870                                 break;
1871                         }
1872
1873                         spin_lock(&sk->sk_receive_queue.lock);
1874                         if (sk->sk_type == SOCK_STREAM ||
1875                             sk->sk_type == SOCK_SEQPACKET) {
1876                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1877                                         amount += skb->len;
1878                         } else {
1879                                 skb = skb_peek(&sk->sk_receive_queue);
1880                                 if (skb)
1881                                         amount=skb->len;
1882                         }
1883                         spin_unlock(&sk->sk_receive_queue.lock);
1884                         err = put_user(amount, (int __user *)arg);
1885                         break;
1886                 }
1887
1888                 default:
1889                         err = -ENOIOCTLCMD;
1890                         break;
1891         }
1892         return err;
1893 }
1894
1895 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1896 {
1897         struct sock *sk = sock->sk;
1898         unsigned int mask;
1899
1900         poll_wait(file, sk->sk_sleep, wait);
1901         mask = 0;
1902
1903         /* exceptional events? */
1904         if (sk->sk_err)
1905                 mask |= POLLERR;
1906         if (sk->sk_shutdown == SHUTDOWN_MASK)
1907                 mask |= POLLHUP;
1908         if (sk->sk_shutdown & RCV_SHUTDOWN)
1909                 mask |= POLLRDHUP;
1910
1911         /* readable? */
1912         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1913             (sk->sk_shutdown & RCV_SHUTDOWN))
1914                 mask |= POLLIN | POLLRDNORM;
1915
1916         /* Connection-based need to check for termination and startup */
1917         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1918                 mask |= POLLHUP;
1919
1920         /*
1921          * we set writable also when the other side has shut down the
1922          * connection. This prevents stuck sockets.
1923          */
1924         if (unix_writable(sk))
1925                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1926
1927         return mask;
1928 }
1929
1930
1931 #ifdef CONFIG_PROC_FS
1932 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1933 {
1934         loff_t off = 0;
1935         struct sock *s;
1936
1937         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1938                 if (off == pos) 
1939                         return s;
1940                 ++off;
1941         }
1942         return NULL;
1943 }
1944
1945
1946 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1947 {
1948         spin_lock(&unix_table_lock);
1949         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1950 }
1951
1952 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1953 {
1954         ++*pos;
1955
1956         if (v == (void *)1) 
1957                 return first_unix_socket(seq->private);
1958         return next_unix_socket(seq->private, v);
1959 }
1960
1961 static void unix_seq_stop(struct seq_file *seq, void *v)
1962 {
1963         spin_unlock(&unix_table_lock);
1964 }
1965
1966 static int unix_seq_show(struct seq_file *seq, void *v)
1967 {
1968         
1969         if (v == (void *)1)
1970                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1971                          "Inode Path\n");
1972         else {
1973                 struct sock *s = v;
1974                 struct unix_sock *u = unix_sk(s);
1975                 unix_state_rlock(s);
1976
1977                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1978                         s,
1979                         atomic_read(&s->sk_refcnt),
1980                         0,
1981                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1982                         s->sk_type,
1983                         s->sk_socket ?
1984                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1985                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1986                         sock_i_ino(s));
1987
1988                 if (u->addr) {
1989                         int i, len;
1990                         seq_putc(seq, ' ');
1991
1992                         i = 0;
1993                         len = u->addr->len - sizeof(short);
1994                         if (!UNIX_ABSTRACT(s))
1995                                 len--;
1996                         else {
1997                                 seq_putc(seq, '@');
1998                                 i++;
1999                         }
2000                         for ( ; i < len; i++)
2001                                 seq_putc(seq, u->addr->name->sun_path[i]);
2002                 }
2003                 unix_state_runlock(s);
2004                 seq_putc(seq, '\n');
2005         }
2006
2007         return 0;
2008 }
2009
2010 static struct seq_operations unix_seq_ops = {
2011         .start  = unix_seq_start,
2012         .next   = unix_seq_next,
2013         .stop   = unix_seq_stop,
2014         .show   = unix_seq_show,
2015 };
2016
2017
2018 static int unix_seq_open(struct inode *inode, struct file *file)
2019 {
2020         struct seq_file *seq;
2021         int rc = -ENOMEM;
2022         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2023
2024         if (!iter)
2025                 goto out;
2026
2027         rc = seq_open(file, &unix_seq_ops);
2028         if (rc)
2029                 goto out_kfree;
2030
2031         seq          = file->private_data;
2032         seq->private = iter;
2033         *iter = 0;
2034 out:
2035         return rc;
2036 out_kfree:
2037         kfree(iter);
2038         goto out;
2039 }
2040
2041 static struct file_operations unix_seq_fops = {
2042         .owner          = THIS_MODULE,
2043         .open           = unix_seq_open,
2044         .read           = seq_read,
2045         .llseek         = seq_lseek,
2046         .release        = seq_release_private,
2047 };
2048
2049 #endif
2050
2051 static struct net_proto_family unix_family_ops = {
2052         .family = PF_UNIX,
2053         .create = unix_create,
2054         .owner  = THIS_MODULE,
2055 };
2056
2057 static int __init af_unix_init(void)
2058 {
2059         int rc = -1;
2060         struct sk_buff *dummy_skb;
2061
2062         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2063                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2064                 goto out;
2065         }
2066
2067         rc = proto_register(&unix_proto, 1);
2068         if (rc != 0) {
2069                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2070                        __FUNCTION__);
2071                 goto out;
2072         }
2073
2074         sock_register(&unix_family_ops);
2075 #ifdef CONFIG_PROC_FS
2076         proc_net_fops_create("unix", 0, &unix_seq_fops);
2077 #endif
2078         unix_sysctl_register();
2079 out:
2080         return rc;
2081 }
2082
2083 static void __exit af_unix_exit(void)
2084 {
2085         sock_unregister(PF_UNIX);
2086         unix_sysctl_unregister();
2087         proc_net_remove("unix");
2088         proto_unregister(&unix_proto);
2089 }
2090
2091 module_init(af_unix_init);
2092 module_exit(af_unix_exit);
2093
2094 MODULE_LICENSE("GPL");
2095 MODULE_ALIAS_NETPROTO(PF_UNIX);