[libata] ahci: add SiS PCI IDs
[pandora-kernel.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/smp_lock.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119
120 int sysctl_unix_max_dgram_qlen = 10;
121
122 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123 DEFINE_SPINLOCK(unix_table_lock);
124 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
127
128 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
130 #ifdef CONFIG_SECURITY_NETWORK
131 static void unix_get_peersec_dgram(struct sk_buff *skb)
132 {
133         int err;
134
135         err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb),
136                                                UNIXSECLEN(skb));
137         if (err)
138                 *(UNIXSECDATA(skb)) = NULL;
139 }
140
141 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143         scm->secdata = *UNIXSECDATA(skb);
144         scm->seclen = *UNIXSECLEN(skb);
145 }
146 #else
147 static inline void unix_get_peersec_dgram(struct sk_buff *skb)
148 { }
149
150 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
151 { }
152 #endif /* CONFIG_SECURITY_NETWORK */
153
154 /*
155  *  SMP locking strategy:
156  *    hash table is protected with spinlock unix_table_lock
157  *    each socket state is protected by separate rwlock.
158  */
159
160 static inline unsigned unix_hash_fold(unsigned hash)
161 {
162         hash ^= hash>>16;
163         hash ^= hash>>8;
164         return hash&(UNIX_HASH_SIZE-1);
165 }
166
167 #define unix_peer(sk) (unix_sk(sk)->peer)
168
169 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
170 {
171         return unix_peer(osk) == sk;
172 }
173
174 static inline int unix_may_send(struct sock *sk, struct sock *osk)
175 {
176         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
177 }
178
179 static struct sock *unix_peer_get(struct sock *s)
180 {
181         struct sock *peer;
182
183         unix_state_rlock(s);
184         peer = unix_peer(s);
185         if (peer)
186                 sock_hold(peer);
187         unix_state_runlock(s);
188         return peer;
189 }
190
191 static inline void unix_release_addr(struct unix_address *addr)
192 {
193         if (atomic_dec_and_test(&addr->refcnt))
194                 kfree(addr);
195 }
196
197 /*
198  *      Check unix socket name:
199  *              - should be not zero length.
200  *              - if started by not zero, should be NULL terminated (FS object)
201  *              - if started by zero, it is abstract name.
202  */
203  
204 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
205 {
206         if (len <= sizeof(short) || len > sizeof(*sunaddr))
207                 return -EINVAL;
208         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
209                 return -EINVAL;
210         if (sunaddr->sun_path[0]) {
211                 /*
212                  * This may look like an off by one error but it is a bit more
213                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
214                  * sun_path[108] doesnt as such exist.  However in kernel space
215                  * we are guaranteed that it is a valid memory location in our
216                  * kernel address buffer.
217                  */
218                 ((char *)sunaddr)[len]=0;
219                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
220                 return len;
221         }
222
223         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
224         return len;
225 }
226
227 static void __unix_remove_socket(struct sock *sk)
228 {
229         sk_del_node_init(sk);
230 }
231
232 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
233 {
234         BUG_TRAP(sk_unhashed(sk));
235         sk_add_node(sk, list);
236 }
237
238 static inline void unix_remove_socket(struct sock *sk)
239 {
240         spin_lock(&unix_table_lock);
241         __unix_remove_socket(sk);
242         spin_unlock(&unix_table_lock);
243 }
244
245 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
246 {
247         spin_lock(&unix_table_lock);
248         __unix_insert_socket(list, sk);
249         spin_unlock(&unix_table_lock);
250 }
251
252 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
253                                               int len, int type, unsigned hash)
254 {
255         struct sock *s;
256         struct hlist_node *node;
257
258         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
259                 struct unix_sock *u = unix_sk(s);
260
261                 if (u->addr->len == len &&
262                     !memcmp(u->addr->name, sunname, len))
263                         goto found;
264         }
265         s = NULL;
266 found:
267         return s;
268 }
269
270 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
271                                                    int len, int type,
272                                                    unsigned hash)
273 {
274         struct sock *s;
275
276         spin_lock(&unix_table_lock);
277         s = __unix_find_socket_byname(sunname, len, type, hash);
278         if (s)
279                 sock_hold(s);
280         spin_unlock(&unix_table_lock);
281         return s;
282 }
283
284 static struct sock *unix_find_socket_byinode(struct inode *i)
285 {
286         struct sock *s;
287         struct hlist_node *node;
288
289         spin_lock(&unix_table_lock);
290         sk_for_each(s, node,
291                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
292                 struct dentry *dentry = unix_sk(s)->dentry;
293
294                 if(dentry && dentry->d_inode == i)
295                 {
296                         sock_hold(s);
297                         goto found;
298                 }
299         }
300         s = NULL;
301 found:
302         spin_unlock(&unix_table_lock);
303         return s;
304 }
305
306 static inline int unix_writable(struct sock *sk)
307 {
308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
309 }
310
311 static void unix_write_space(struct sock *sk)
312 {
313         read_lock(&sk->sk_callback_lock);
314         if (unix_writable(sk)) {
315                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
316                         wake_up_interruptible(sk->sk_sleep);
317                 sk_wake_async(sk, 2, POLL_OUT);
318         }
319         read_unlock(&sk->sk_callback_lock);
320 }
321
322 /* When dgram socket disconnects (or changes its peer), we clear its receive
323  * queue of packets arrived from previous peer. First, it allows to do
324  * flow control based only on wmem_alloc; second, sk connected to peer
325  * may receive messages only from that peer. */
326 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
327 {
328         if (!skb_queue_empty(&sk->sk_receive_queue)) {
329                 skb_queue_purge(&sk->sk_receive_queue);
330                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
331
332                 /* If one link of bidirectional dgram pipe is disconnected,
333                  * we signal error. Messages are lost. Do not make this,
334                  * when peer was not connected to us.
335                  */
336                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
337                         other->sk_err = ECONNRESET;
338                         other->sk_error_report(other);
339                 }
340         }
341 }
342
343 static void unix_sock_destructor(struct sock *sk)
344 {
345         struct unix_sock *u = unix_sk(sk);
346
347         skb_queue_purge(&sk->sk_receive_queue);
348
349         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
350         BUG_TRAP(sk_unhashed(sk));
351         BUG_TRAP(!sk->sk_socket);
352         if (!sock_flag(sk, SOCK_DEAD)) {
353                 printk("Attempt to release alive unix socket: %p\n", sk);
354                 return;
355         }
356
357         if (u->addr)
358                 unix_release_addr(u->addr);
359
360         atomic_dec(&unix_nr_socks);
361 #ifdef UNIX_REFCNT_DEBUG
362         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
363 #endif
364 }
365
366 static int unix_release_sock (struct sock *sk, int embrion)
367 {
368         struct unix_sock *u = unix_sk(sk);
369         struct dentry *dentry;
370         struct vfsmount *mnt;
371         struct sock *skpair;
372         struct sk_buff *skb;
373         int state;
374
375         unix_remove_socket(sk);
376
377         /* Clear state */
378         unix_state_wlock(sk);
379         sock_orphan(sk);
380         sk->sk_shutdown = SHUTDOWN_MASK;
381         dentry       = u->dentry;
382         u->dentry    = NULL;
383         mnt          = u->mnt;
384         u->mnt       = NULL;
385         state = sk->sk_state;
386         sk->sk_state = TCP_CLOSE;
387         unix_state_wunlock(sk);
388
389         wake_up_interruptible_all(&u->peer_wait);
390
391         skpair=unix_peer(sk);
392
393         if (skpair!=NULL) {
394                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
395                         unix_state_wlock(skpair);
396                         /* No more writes */
397                         skpair->sk_shutdown = SHUTDOWN_MASK;
398                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
399                                 skpair->sk_err = ECONNRESET;
400                         unix_state_wunlock(skpair);
401                         skpair->sk_state_change(skpair);
402                         read_lock(&skpair->sk_callback_lock);
403                         sk_wake_async(skpair,1,POLL_HUP);
404                         read_unlock(&skpair->sk_callback_lock);
405                 }
406                 sock_put(skpair); /* It may now die */
407                 unix_peer(sk) = NULL;
408         }
409
410         /* Try to flush out this socket. Throw out buffers at least */
411
412         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
413                 if (state==TCP_LISTEN)
414                         unix_release_sock(skb->sk, 1);
415                 /* passed fds are erased in the kfree_skb hook        */
416                 kfree_skb(skb);
417         }
418
419         if (dentry) {
420                 dput(dentry);
421                 mntput(mnt);
422         }
423
424         sock_put(sk);
425
426         /* ---- Socket is dead now and most probably destroyed ---- */
427
428         /*
429          * Fixme: BSD difference: In BSD all sockets connected to use get
430          *        ECONNRESET and we die on the spot. In Linux we behave
431          *        like files and pipes do and wait for the last
432          *        dereference.
433          *
434          * Can't we simply set sock->err?
435          *
436          *        What the above comment does talk about? --ANK(980817)
437          */
438
439         if (atomic_read(&unix_tot_inflight))
440                 unix_gc();              /* Garbage collect fds */       
441
442         return 0;
443 }
444
445 static int unix_listen(struct socket *sock, int backlog)
446 {
447         int err;
448         struct sock *sk = sock->sk;
449         struct unix_sock *u = unix_sk(sk);
450
451         err = -EOPNOTSUPP;
452         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
453                 goto out;                       /* Only stream/seqpacket sockets accept */
454         err = -EINVAL;
455         if (!u->addr)
456                 goto out;                       /* No listens on an unbound socket */
457         unix_state_wlock(sk);
458         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
459                 goto out_unlock;
460         if (backlog > sk->sk_max_ack_backlog)
461                 wake_up_interruptible_all(&u->peer_wait);
462         sk->sk_max_ack_backlog  = backlog;
463         sk->sk_state            = TCP_LISTEN;
464         /* set credentials so connect can copy them */
465         sk->sk_peercred.pid     = current->tgid;
466         sk->sk_peercred.uid     = current->euid;
467         sk->sk_peercred.gid     = current->egid;
468         err = 0;
469
470 out_unlock:
471         unix_state_wunlock(sk);
472 out:
473         return err;
474 }
475
476 static int unix_release(struct socket *);
477 static int unix_bind(struct socket *, struct sockaddr *, int);
478 static int unix_stream_connect(struct socket *, struct sockaddr *,
479                                int addr_len, int flags);
480 static int unix_socketpair(struct socket *, struct socket *);
481 static int unix_accept(struct socket *, struct socket *, int);
482 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
483 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
484 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
485 static int unix_shutdown(struct socket *, int);
486 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
487                                struct msghdr *, size_t);
488 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
489                                struct msghdr *, size_t, int);
490 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
491                               struct msghdr *, size_t);
492 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
493                               struct msghdr *, size_t, int);
494 static int unix_dgram_connect(struct socket *, struct sockaddr *,
495                               int, int);
496 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
497                                   struct msghdr *, size_t);
498
499 static const struct proto_ops unix_stream_ops = {
500         .family =       PF_UNIX,
501         .owner =        THIS_MODULE,
502         .release =      unix_release,
503         .bind =         unix_bind,
504         .connect =      unix_stream_connect,
505         .socketpair =   unix_socketpair,
506         .accept =       unix_accept,
507         .getname =      unix_getname,
508         .poll =         unix_poll,
509         .ioctl =        unix_ioctl,
510         .listen =       unix_listen,
511         .shutdown =     unix_shutdown,
512         .setsockopt =   sock_no_setsockopt,
513         .getsockopt =   sock_no_getsockopt,
514         .sendmsg =      unix_stream_sendmsg,
515         .recvmsg =      unix_stream_recvmsg,
516         .mmap =         sock_no_mmap,
517         .sendpage =     sock_no_sendpage,
518 };
519
520 static const struct proto_ops unix_dgram_ops = {
521         .family =       PF_UNIX,
522         .owner =        THIS_MODULE,
523         .release =      unix_release,
524         .bind =         unix_bind,
525         .connect =      unix_dgram_connect,
526         .socketpair =   unix_socketpair,
527         .accept =       sock_no_accept,
528         .getname =      unix_getname,
529         .poll =         datagram_poll,
530         .ioctl =        unix_ioctl,
531         .listen =       sock_no_listen,
532         .shutdown =     unix_shutdown,
533         .setsockopt =   sock_no_setsockopt,
534         .getsockopt =   sock_no_getsockopt,
535         .sendmsg =      unix_dgram_sendmsg,
536         .recvmsg =      unix_dgram_recvmsg,
537         .mmap =         sock_no_mmap,
538         .sendpage =     sock_no_sendpage,
539 };
540
541 static const struct proto_ops unix_seqpacket_ops = {
542         .family =       PF_UNIX,
543         .owner =        THIS_MODULE,
544         .release =      unix_release,
545         .bind =         unix_bind,
546         .connect =      unix_stream_connect,
547         .socketpair =   unix_socketpair,
548         .accept =       unix_accept,
549         .getname =      unix_getname,
550         .poll =         datagram_poll,
551         .ioctl =        unix_ioctl,
552         .listen =       unix_listen,
553         .shutdown =     unix_shutdown,
554         .setsockopt =   sock_no_setsockopt,
555         .getsockopt =   sock_no_getsockopt,
556         .sendmsg =      unix_seqpacket_sendmsg,
557         .recvmsg =      unix_dgram_recvmsg,
558         .mmap =         sock_no_mmap,
559         .sendpage =     sock_no_sendpage,
560 };
561
562 static struct proto unix_proto = {
563         .name     = "UNIX",
564         .owner    = THIS_MODULE,
565         .obj_size = sizeof(struct unix_sock),
566 };
567
568 /*
569  * AF_UNIX sockets do not interact with hardware, hence they
570  * dont trigger interrupts - so it's safe for them to have
571  * bh-unsafe locking for their sk_receive_queue.lock. Split off
572  * this special lock-class by reinitializing the spinlock key:
573  */
574 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
575
576 static struct sock * unix_create1(struct socket *sock)
577 {
578         struct sock *sk = NULL;
579         struct unix_sock *u;
580
581         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
582                 goto out;
583
584         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
585         if (!sk)
586                 goto out;
587
588         atomic_inc(&unix_nr_socks);
589
590         sock_init_data(sock,sk);
591         lockdep_set_class(&sk->sk_receive_queue.lock,
592                                 &af_unix_sk_receive_queue_lock_key);
593
594         sk->sk_write_space      = unix_write_space;
595         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
596         sk->sk_destruct         = unix_sock_destructor;
597         u         = unix_sk(sk);
598         u->dentry = NULL;
599         u->mnt    = NULL;
600         spin_lock_init(&u->lock);
601         atomic_set(&u->inflight, sock ? 0 : -1);
602         mutex_init(&u->readlock); /* single task reading lock */
603         init_waitqueue_head(&u->peer_wait);
604         unix_insert_socket(unix_sockets_unbound, sk);
605 out:
606         return sk;
607 }
608
609 static int unix_create(struct socket *sock, int protocol)
610 {
611         if (protocol && protocol != PF_UNIX)
612                 return -EPROTONOSUPPORT;
613
614         sock->state = SS_UNCONNECTED;
615
616         switch (sock->type) {
617         case SOCK_STREAM:
618                 sock->ops = &unix_stream_ops;
619                 break;
620                 /*
621                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
622                  *      nothing uses it.
623                  */
624         case SOCK_RAW:
625                 sock->type=SOCK_DGRAM;
626         case SOCK_DGRAM:
627                 sock->ops = &unix_dgram_ops;
628                 break;
629         case SOCK_SEQPACKET:
630                 sock->ops = &unix_seqpacket_ops;
631                 break;
632         default:
633                 return -ESOCKTNOSUPPORT;
634         }
635
636         return unix_create1(sock) ? 0 : -ENOMEM;
637 }
638
639 static int unix_release(struct socket *sock)
640 {
641         struct sock *sk = sock->sk;
642
643         if (!sk)
644                 return 0;
645
646         sock->sk = NULL;
647
648         return unix_release_sock (sk, 0);
649 }
650
651 static int unix_autobind(struct socket *sock)
652 {
653         struct sock *sk = sock->sk;
654         struct unix_sock *u = unix_sk(sk);
655         static u32 ordernum = 1;
656         struct unix_address * addr;
657         int err;
658
659         mutex_lock(&u->readlock);
660
661         err = 0;
662         if (u->addr)
663                 goto out;
664
665         err = -ENOMEM;
666         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
667         if (!addr)
668                 goto out;
669
670         addr->name->sun_family = AF_UNIX;
671         atomic_set(&addr->refcnt, 1);
672
673 retry:
674         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
675         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
676
677         spin_lock(&unix_table_lock);
678         ordernum = (ordernum+1)&0xFFFFF;
679
680         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
681                                       addr->hash)) {
682                 spin_unlock(&unix_table_lock);
683                 /* Sanity yield. It is unusual case, but yet... */
684                 if (!(ordernum&0xFF))
685                         yield();
686                 goto retry;
687         }
688         addr->hash ^= sk->sk_type;
689
690         __unix_remove_socket(sk);
691         u->addr = addr;
692         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
693         spin_unlock(&unix_table_lock);
694         err = 0;
695
696 out:    mutex_unlock(&u->readlock);
697         return err;
698 }
699
700 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
701                                     int type, unsigned hash, int *error)
702 {
703         struct sock *u;
704         struct nameidata nd;
705         int err = 0;
706         
707         if (sunname->sun_path[0]) {
708                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
709                 if (err)
710                         goto fail;
711                 err = vfs_permission(&nd, MAY_WRITE);
712                 if (err)
713                         goto put_fail;
714
715                 err = -ECONNREFUSED;
716                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
717                         goto put_fail;
718                 u=unix_find_socket_byinode(nd.dentry->d_inode);
719                 if (!u)
720                         goto put_fail;
721
722                 if (u->sk_type == type)
723                         touch_atime(nd.mnt, nd.dentry);
724
725                 path_release(&nd);
726
727                 err=-EPROTOTYPE;
728                 if (u->sk_type != type) {
729                         sock_put(u);
730                         goto fail;
731                 }
732         } else {
733                 err = -ECONNREFUSED;
734                 u=unix_find_socket_byname(sunname, len, type, hash);
735                 if (u) {
736                         struct dentry *dentry;
737                         dentry = unix_sk(u)->dentry;
738                         if (dentry)
739                                 touch_atime(unix_sk(u)->mnt, dentry);
740                 } else
741                         goto fail;
742         }
743         return u;
744
745 put_fail:
746         path_release(&nd);
747 fail:
748         *error=err;
749         return NULL;
750 }
751
752
753 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
754 {
755         struct sock *sk = sock->sk;
756         struct unix_sock *u = unix_sk(sk);
757         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
758         struct dentry * dentry = NULL;
759         struct nameidata nd;
760         int err;
761         unsigned hash;
762         struct unix_address *addr;
763         struct hlist_head *list;
764
765         err = -EINVAL;
766         if (sunaddr->sun_family != AF_UNIX)
767                 goto out;
768
769         if (addr_len==sizeof(short)) {
770                 err = unix_autobind(sock);
771                 goto out;
772         }
773
774         err = unix_mkname(sunaddr, addr_len, &hash);
775         if (err < 0)
776                 goto out;
777         addr_len = err;
778
779         mutex_lock(&u->readlock);
780
781         err = -EINVAL;
782         if (u->addr)
783                 goto out_up;
784
785         err = -ENOMEM;
786         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
787         if (!addr)
788                 goto out_up;
789
790         memcpy(addr->name, sunaddr, addr_len);
791         addr->len = addr_len;
792         addr->hash = hash ^ sk->sk_type;
793         atomic_set(&addr->refcnt, 1);
794
795         if (sunaddr->sun_path[0]) {
796                 unsigned int mode;
797                 err = 0;
798                 /*
799                  * Get the parent directory, calculate the hash for last
800                  * component.
801                  */
802                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
803                 if (err)
804                         goto out_mknod_parent;
805
806                 dentry = lookup_create(&nd, 0);
807                 err = PTR_ERR(dentry);
808                 if (IS_ERR(dentry))
809                         goto out_mknod_unlock;
810
811                 /*
812                  * All right, let's create it.
813                  */
814                 mode = S_IFSOCK |
815                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
816                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
817                 if (err)
818                         goto out_mknod_dput;
819                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
820                 dput(nd.dentry);
821                 nd.dentry = dentry;
822
823                 addr->hash = UNIX_HASH_SIZE;
824         }
825
826         spin_lock(&unix_table_lock);
827
828         if (!sunaddr->sun_path[0]) {
829                 err = -EADDRINUSE;
830                 if (__unix_find_socket_byname(sunaddr, addr_len,
831                                               sk->sk_type, hash)) {
832                         unix_release_addr(addr);
833                         goto out_unlock;
834                 }
835
836                 list = &unix_socket_table[addr->hash];
837         } else {
838                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
839                 u->dentry = nd.dentry;
840                 u->mnt    = nd.mnt;
841         }
842
843         err = 0;
844         __unix_remove_socket(sk);
845         u->addr = addr;
846         __unix_insert_socket(list, sk);
847
848 out_unlock:
849         spin_unlock(&unix_table_lock);
850 out_up:
851         mutex_unlock(&u->readlock);
852 out:
853         return err;
854
855 out_mknod_dput:
856         dput(dentry);
857 out_mknod_unlock:
858         mutex_unlock(&nd.dentry->d_inode->i_mutex);
859         path_release(&nd);
860 out_mknod_parent:
861         if (err==-EEXIST)
862                 err=-EADDRINUSE;
863         unix_release_addr(addr);
864         goto out_up;
865 }
866
867 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
868                               int alen, int flags)
869 {
870         struct sock *sk = sock->sk;
871         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
872         struct sock *other;
873         unsigned hash;
874         int err;
875
876         if (addr->sa_family != AF_UNSPEC) {
877                 err = unix_mkname(sunaddr, alen, &hash);
878                 if (err < 0)
879                         goto out;
880                 alen = err;
881
882                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
883                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
884                         goto out;
885
886                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
887                 if (!other)
888                         goto out;
889
890                 unix_state_wlock(sk);
891
892                 err = -EPERM;
893                 if (!unix_may_send(sk, other))
894                         goto out_unlock;
895
896                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
897                 if (err)
898                         goto out_unlock;
899
900         } else {
901                 /*
902                  *      1003.1g breaking connected state with AF_UNSPEC
903                  */
904                 other = NULL;
905                 unix_state_wlock(sk);
906         }
907
908         /*
909          * If it was connected, reconnect.
910          */
911         if (unix_peer(sk)) {
912                 struct sock *old_peer = unix_peer(sk);
913                 unix_peer(sk)=other;
914                 unix_state_wunlock(sk);
915
916                 if (other != old_peer)
917                         unix_dgram_disconnected(sk, old_peer);
918                 sock_put(old_peer);
919         } else {
920                 unix_peer(sk)=other;
921                 unix_state_wunlock(sk);
922         }
923         return 0;
924
925 out_unlock:
926         unix_state_wunlock(sk);
927         sock_put(other);
928 out:
929         return err;
930 }
931
932 static long unix_wait_for_peer(struct sock *other, long timeo)
933 {
934         struct unix_sock *u = unix_sk(other);
935         int sched;
936         DEFINE_WAIT(wait);
937
938         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
939
940         sched = !sock_flag(other, SOCK_DEAD) &&
941                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
942                 (skb_queue_len(&other->sk_receive_queue) >
943                  other->sk_max_ack_backlog);
944
945         unix_state_runlock(other);
946
947         if (sched)
948                 timeo = schedule_timeout(timeo);
949
950         finish_wait(&u->peer_wait, &wait);
951         return timeo;
952 }
953
954 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
955                                int addr_len, int flags)
956 {
957         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
958         struct sock *sk = sock->sk;
959         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
960         struct sock *newsk = NULL;
961         struct sock *other = NULL;
962         struct sk_buff *skb = NULL;
963         unsigned hash;
964         int st;
965         int err;
966         long timeo;
967
968         err = unix_mkname(sunaddr, addr_len, &hash);
969         if (err < 0)
970                 goto out;
971         addr_len = err;
972
973         if (test_bit(SOCK_PASSCRED, &sock->flags)
974                 && !u->addr && (err = unix_autobind(sock)) != 0)
975                 goto out;
976
977         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
978
979         /* First of all allocate resources.
980            If we will make it after state is locked,
981            we will have to recheck all again in any case.
982          */
983
984         err = -ENOMEM;
985
986         /* create new sock for complete connection */
987         newsk = unix_create1(NULL);
988         if (newsk == NULL)
989                 goto out;
990
991         /* Allocate skb for sending to listening sock */
992         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
993         if (skb == NULL)
994                 goto out;
995
996 restart:
997         /*  Find listening sock. */
998         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
999         if (!other)
1000                 goto out;
1001
1002         /* Latch state of peer */
1003         unix_state_rlock(other);
1004
1005         /* Apparently VFS overslept socket death. Retry. */
1006         if (sock_flag(other, SOCK_DEAD)) {
1007                 unix_state_runlock(other);
1008                 sock_put(other);
1009                 goto restart;
1010         }
1011
1012         err = -ECONNREFUSED;
1013         if (other->sk_state != TCP_LISTEN)
1014                 goto out_unlock;
1015
1016         if (skb_queue_len(&other->sk_receive_queue) >
1017             other->sk_max_ack_backlog) {
1018                 err = -EAGAIN;
1019                 if (!timeo)
1020                         goto out_unlock;
1021
1022                 timeo = unix_wait_for_peer(other, timeo);
1023
1024                 err = sock_intr_errno(timeo);
1025                 if (signal_pending(current))
1026                         goto out;
1027                 sock_put(other);
1028                 goto restart;
1029         }
1030
1031         /* Latch our state.
1032
1033            It is tricky place. We need to grab write lock and cannot
1034            drop lock on peer. It is dangerous because deadlock is
1035            possible. Connect to self case and simultaneous
1036            attempt to connect are eliminated by checking socket
1037            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1038            check this before attempt to grab lock.
1039
1040            Well, and we have to recheck the state after socket locked.
1041          */
1042         st = sk->sk_state;
1043
1044         switch (st) {
1045         case TCP_CLOSE:
1046                 /* This is ok... continue with connect */
1047                 break;
1048         case TCP_ESTABLISHED:
1049                 /* Socket is already connected */
1050                 err = -EISCONN;
1051                 goto out_unlock;
1052         default:
1053                 err = -EINVAL;
1054                 goto out_unlock;
1055         }
1056
1057         unix_state_wlock_nested(sk);
1058
1059         if (sk->sk_state != st) {
1060                 unix_state_wunlock(sk);
1061                 unix_state_runlock(other);
1062                 sock_put(other);
1063                 goto restart;
1064         }
1065
1066         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1067         if (err) {
1068                 unix_state_wunlock(sk);
1069                 goto out_unlock;
1070         }
1071
1072         /* The way is open! Fastly set all the necessary fields... */
1073
1074         sock_hold(sk);
1075         unix_peer(newsk)        = sk;
1076         newsk->sk_state         = TCP_ESTABLISHED;
1077         newsk->sk_type          = sk->sk_type;
1078         newsk->sk_peercred.pid  = current->tgid;
1079         newsk->sk_peercred.uid  = current->euid;
1080         newsk->sk_peercred.gid  = current->egid;
1081         newu = unix_sk(newsk);
1082         newsk->sk_sleep         = &newu->peer_wait;
1083         otheru = unix_sk(other);
1084
1085         /* copy address information from listening to new sock*/
1086         if (otheru->addr) {
1087                 atomic_inc(&otheru->addr->refcnt);
1088                 newu->addr = otheru->addr;
1089         }
1090         if (otheru->dentry) {
1091                 newu->dentry    = dget(otheru->dentry);
1092                 newu->mnt       = mntget(otheru->mnt);
1093         }
1094
1095         /* Set credentials */
1096         sk->sk_peercred = other->sk_peercred;
1097
1098         sock->state     = SS_CONNECTED;
1099         sk->sk_state    = TCP_ESTABLISHED;
1100         sock_hold(newsk);
1101
1102         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1103         unix_peer(sk)   = newsk;
1104
1105         unix_state_wunlock(sk);
1106
1107         /* take ten and and send info to listening sock */
1108         spin_lock(&other->sk_receive_queue.lock);
1109         __skb_queue_tail(&other->sk_receive_queue, skb);
1110         /* Undo artificially decreased inflight after embrion
1111          * is installed to listening socket. */
1112         atomic_inc(&newu->inflight);
1113         spin_unlock(&other->sk_receive_queue.lock);
1114         unix_state_runlock(other);
1115         other->sk_data_ready(other, 0);
1116         sock_put(other);
1117         return 0;
1118
1119 out_unlock:
1120         if (other)
1121                 unix_state_runlock(other);
1122
1123 out:
1124         if (skb)
1125                 kfree_skb(skb);
1126         if (newsk)
1127                 unix_release_sock(newsk, 0);
1128         if (other)
1129                 sock_put(other);
1130         return err;
1131 }
1132
1133 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1134 {
1135         struct sock *ska=socka->sk, *skb = sockb->sk;
1136
1137         /* Join our sockets back to back */
1138         sock_hold(ska);
1139         sock_hold(skb);
1140         unix_peer(ska)=skb;
1141         unix_peer(skb)=ska;
1142         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1143         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1144         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1145
1146         if (ska->sk_type != SOCK_DGRAM) {
1147                 ska->sk_state = TCP_ESTABLISHED;
1148                 skb->sk_state = TCP_ESTABLISHED;
1149                 socka->state  = SS_CONNECTED;
1150                 sockb->state  = SS_CONNECTED;
1151         }
1152         return 0;
1153 }
1154
1155 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1156 {
1157         struct sock *sk = sock->sk;
1158         struct sock *tsk;
1159         struct sk_buff *skb;
1160         int err;
1161
1162         err = -EOPNOTSUPP;
1163         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1164                 goto out;
1165
1166         err = -EINVAL;
1167         if (sk->sk_state != TCP_LISTEN)
1168                 goto out;
1169
1170         /* If socket state is TCP_LISTEN it cannot change (for now...),
1171          * so that no locks are necessary.
1172          */
1173
1174         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1175         if (!skb) {
1176                 /* This means receive shutdown. */
1177                 if (err == 0)
1178                         err = -EINVAL;
1179                 goto out;
1180         }
1181
1182         tsk = skb->sk;
1183         skb_free_datagram(sk, skb);
1184         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1185
1186         /* attach accepted sock to socket */
1187         unix_state_wlock(tsk);
1188         newsock->state = SS_CONNECTED;
1189         sock_graft(tsk, newsock);
1190         unix_state_wunlock(tsk);
1191         return 0;
1192
1193 out:
1194         return err;
1195 }
1196
1197
1198 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1199 {
1200         struct sock *sk = sock->sk;
1201         struct unix_sock *u;
1202         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1203         int err = 0;
1204
1205         if (peer) {
1206                 sk = unix_peer_get(sk);
1207
1208                 err = -ENOTCONN;
1209                 if (!sk)
1210                         goto out;
1211                 err = 0;
1212         } else {
1213                 sock_hold(sk);
1214         }
1215
1216         u = unix_sk(sk);
1217         unix_state_rlock(sk);
1218         if (!u->addr) {
1219                 sunaddr->sun_family = AF_UNIX;
1220                 sunaddr->sun_path[0] = 0;
1221                 *uaddr_len = sizeof(short);
1222         } else {
1223                 struct unix_address *addr = u->addr;
1224
1225                 *uaddr_len = addr->len;
1226                 memcpy(sunaddr, addr->name, *uaddr_len);
1227         }
1228         unix_state_runlock(sk);
1229         sock_put(sk);
1230 out:
1231         return err;
1232 }
1233
1234 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1235 {
1236         int i;
1237
1238         scm->fp = UNIXCB(skb).fp;
1239         skb->destructor = sock_wfree;
1240         UNIXCB(skb).fp = NULL;
1241
1242         for (i=scm->fp->count-1; i>=0; i--)
1243                 unix_notinflight(scm->fp->fp[i]);
1244 }
1245
1246 static void unix_destruct_fds(struct sk_buff *skb)
1247 {
1248         struct scm_cookie scm;
1249         memset(&scm, 0, sizeof(scm));
1250         unix_detach_fds(&scm, skb);
1251
1252         /* Alas, it calls VFS */
1253         /* So fscking what? fput() had been SMP-safe since the last Summer */
1254         scm_destroy(&scm);
1255         sock_wfree(skb);
1256 }
1257
1258 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1259 {
1260         int i;
1261         for (i=scm->fp->count-1; i>=0; i--)
1262                 unix_inflight(scm->fp->fp[i]);
1263         UNIXCB(skb).fp = scm->fp;
1264         skb->destructor = unix_destruct_fds;
1265         scm->fp = NULL;
1266 }
1267
1268 /*
1269  *      Send AF_UNIX data.
1270  */
1271
1272 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1273                               struct msghdr *msg, size_t len)
1274 {
1275         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1276         struct sock *sk = sock->sk;
1277         struct unix_sock *u = unix_sk(sk);
1278         struct sockaddr_un *sunaddr=msg->msg_name;
1279         struct sock *other = NULL;
1280         int namelen = 0; /* fake GCC */
1281         int err;
1282         unsigned hash;
1283         struct sk_buff *skb;
1284         long timeo;
1285         struct scm_cookie tmp_scm;
1286
1287         if (NULL == siocb->scm)
1288                 siocb->scm = &tmp_scm;
1289         err = scm_send(sock, msg, siocb->scm);
1290         if (err < 0)
1291                 return err;
1292
1293         err = -EOPNOTSUPP;
1294         if (msg->msg_flags&MSG_OOB)
1295                 goto out;
1296
1297         if (msg->msg_namelen) {
1298                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1299                 if (err < 0)
1300                         goto out;
1301                 namelen = err;
1302         } else {
1303                 sunaddr = NULL;
1304                 err = -ENOTCONN;
1305                 other = unix_peer_get(sk);
1306                 if (!other)
1307                         goto out;
1308         }
1309
1310         if (test_bit(SOCK_PASSCRED, &sock->flags)
1311                 && !u->addr && (err = unix_autobind(sock)) != 0)
1312                 goto out;
1313
1314         err = -EMSGSIZE;
1315         if (len > sk->sk_sndbuf - 32)
1316                 goto out;
1317
1318         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1319         if (skb==NULL)
1320                 goto out;
1321
1322         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1323         if (siocb->scm->fp)
1324                 unix_attach_fds(siocb->scm, skb);
1325
1326         unix_get_peersec_dgram(skb);
1327
1328         skb->h.raw = skb->data;
1329         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1330         if (err)
1331                 goto out_free;
1332
1333         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1334
1335 restart:
1336         if (!other) {
1337                 err = -ECONNRESET;
1338                 if (sunaddr == NULL)
1339                         goto out_free;
1340
1341                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1342                                         hash, &err);
1343                 if (other==NULL)
1344                         goto out_free;
1345         }
1346
1347         unix_state_rlock(other);
1348         err = -EPERM;
1349         if (!unix_may_send(sk, other))
1350                 goto out_unlock;
1351
1352         if (sock_flag(other, SOCK_DEAD)) {
1353                 /*
1354                  *      Check with 1003.1g - what should
1355                  *      datagram error
1356                  */
1357                 unix_state_runlock(other);
1358                 sock_put(other);
1359
1360                 err = 0;
1361                 unix_state_wlock(sk);
1362                 if (unix_peer(sk) == other) {
1363                         unix_peer(sk)=NULL;
1364                         unix_state_wunlock(sk);
1365
1366                         unix_dgram_disconnected(sk, other);
1367                         sock_put(other);
1368                         err = -ECONNREFUSED;
1369                 } else {
1370                         unix_state_wunlock(sk);
1371                 }
1372
1373                 other = NULL;
1374                 if (err)
1375                         goto out_free;
1376                 goto restart;
1377         }
1378
1379         err = -EPIPE;
1380         if (other->sk_shutdown & RCV_SHUTDOWN)
1381                 goto out_unlock;
1382
1383         if (sk->sk_type != SOCK_SEQPACKET) {
1384                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1385                 if (err)
1386                         goto out_unlock;
1387         }
1388
1389         if (unix_peer(other) != sk &&
1390             (skb_queue_len(&other->sk_receive_queue) >
1391              other->sk_max_ack_backlog)) {
1392                 if (!timeo) {
1393                         err = -EAGAIN;
1394                         goto out_unlock;
1395                 }
1396
1397                 timeo = unix_wait_for_peer(other, timeo);
1398
1399                 err = sock_intr_errno(timeo);
1400                 if (signal_pending(current))
1401                         goto out_free;
1402
1403                 goto restart;
1404         }
1405
1406         skb_queue_tail(&other->sk_receive_queue, skb);
1407         unix_state_runlock(other);
1408         other->sk_data_ready(other, len);
1409         sock_put(other);
1410         scm_destroy(siocb->scm);
1411         return len;
1412
1413 out_unlock:
1414         unix_state_runlock(other);
1415 out_free:
1416         kfree_skb(skb);
1417 out:
1418         if (other)
1419                 sock_put(other);
1420         scm_destroy(siocb->scm);
1421         return err;
1422 }
1423
1424                 
1425 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1426                                struct msghdr *msg, size_t len)
1427 {
1428         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1429         struct sock *sk = sock->sk;
1430         struct sock *other = NULL;
1431         struct sockaddr_un *sunaddr=msg->msg_name;
1432         int err,size;
1433         struct sk_buff *skb;
1434         int sent=0;
1435         struct scm_cookie tmp_scm;
1436
1437         if (NULL == siocb->scm)
1438                 siocb->scm = &tmp_scm;
1439         err = scm_send(sock, msg, siocb->scm);
1440         if (err < 0)
1441                 return err;
1442
1443         err = -EOPNOTSUPP;
1444         if (msg->msg_flags&MSG_OOB)
1445                 goto out_err;
1446
1447         if (msg->msg_namelen) {
1448                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1449                 goto out_err;
1450         } else {
1451                 sunaddr = NULL;
1452                 err = -ENOTCONN;
1453                 other = unix_peer(sk);
1454                 if (!other)
1455                         goto out_err;
1456         }
1457
1458         if (sk->sk_shutdown & SEND_SHUTDOWN)
1459                 goto pipe_err;
1460
1461         while(sent < len)
1462         {
1463                 /*
1464                  *      Optimisation for the fact that under 0.01% of X
1465                  *      messages typically need breaking up.
1466                  */
1467
1468                 size = len-sent;
1469
1470                 /* Keep two messages in the pipe so it schedules better */
1471                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1472                         size = (sk->sk_sndbuf >> 1) - 64;
1473
1474                 if (size > SKB_MAX_ALLOC)
1475                         size = SKB_MAX_ALLOC;
1476                         
1477                 /*
1478                  *      Grab a buffer
1479                  */
1480                  
1481                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1482
1483                 if (skb==NULL)
1484                         goto out_err;
1485
1486                 /*
1487                  *      If you pass two values to the sock_alloc_send_skb
1488                  *      it tries to grab the large buffer with GFP_NOFS
1489                  *      (which can fail easily), and if it fails grab the
1490                  *      fallback size buffer which is under a page and will
1491                  *      succeed. [Alan]
1492                  */
1493                 size = min_t(int, size, skb_tailroom(skb));
1494
1495                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1496                 if (siocb->scm->fp)
1497                         unix_attach_fds(siocb->scm, skb);
1498
1499                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1500                         kfree_skb(skb);
1501                         goto out_err;
1502                 }
1503
1504                 unix_state_rlock(other);
1505
1506                 if (sock_flag(other, SOCK_DEAD) ||
1507                     (other->sk_shutdown & RCV_SHUTDOWN))
1508                         goto pipe_err_free;
1509
1510                 skb_queue_tail(&other->sk_receive_queue, skb);
1511                 unix_state_runlock(other);
1512                 other->sk_data_ready(other, size);
1513                 sent+=size;
1514         }
1515
1516         scm_destroy(siocb->scm);
1517         siocb->scm = NULL;
1518
1519         return sent;
1520
1521 pipe_err_free:
1522         unix_state_runlock(other);
1523         kfree_skb(skb);
1524 pipe_err:
1525         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1526                 send_sig(SIGPIPE,current,0);
1527         err = -EPIPE;
1528 out_err:
1529         scm_destroy(siocb->scm);
1530         siocb->scm = NULL;
1531         return sent ? : err;
1532 }
1533
1534 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1535                                   struct msghdr *msg, size_t len)
1536 {
1537         int err;
1538         struct sock *sk = sock->sk;
1539         
1540         err = sock_error(sk);
1541         if (err)
1542                 return err;
1543
1544         if (sk->sk_state != TCP_ESTABLISHED)
1545                 return -ENOTCONN;
1546
1547         if (msg->msg_namelen)
1548                 msg->msg_namelen = 0;
1549
1550         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1551 }
1552                                                                                             
1553 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1554 {
1555         struct unix_sock *u = unix_sk(sk);
1556
1557         msg->msg_namelen = 0;
1558         if (u->addr) {
1559                 msg->msg_namelen = u->addr->len;
1560                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1561         }
1562 }
1563
1564 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1565                               struct msghdr *msg, size_t size,
1566                               int flags)
1567 {
1568         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1569         struct scm_cookie tmp_scm;
1570         struct sock *sk = sock->sk;
1571         struct unix_sock *u = unix_sk(sk);
1572         int noblock = flags & MSG_DONTWAIT;
1573         struct sk_buff *skb;
1574         int err;
1575
1576         err = -EOPNOTSUPP;
1577         if (flags&MSG_OOB)
1578                 goto out;
1579
1580         msg->msg_namelen = 0;
1581
1582         mutex_lock(&u->readlock);
1583
1584         skb = skb_recv_datagram(sk, flags, noblock, &err);
1585         if (!skb)
1586                 goto out_unlock;
1587
1588         wake_up_interruptible(&u->peer_wait);
1589
1590         if (msg->msg_name)
1591                 unix_copy_addr(msg, skb->sk);
1592
1593         if (size > skb->len)
1594                 size = skb->len;
1595         else if (size < skb->len)
1596                 msg->msg_flags |= MSG_TRUNC;
1597
1598         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1599         if (err)
1600                 goto out_free;
1601
1602         if (!siocb->scm) {
1603                 siocb->scm = &tmp_scm;
1604                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1605         }
1606         siocb->scm->creds = *UNIXCREDS(skb);
1607         unix_set_secdata(siocb->scm, skb);
1608
1609         if (!(flags & MSG_PEEK))
1610         {
1611                 if (UNIXCB(skb).fp)
1612                         unix_detach_fds(siocb->scm, skb);
1613         }
1614         else 
1615         {
1616                 /* It is questionable: on PEEK we could:
1617                    - do not return fds - good, but too simple 8)
1618                    - return fds, and do not return them on read (old strategy,
1619                      apparently wrong)
1620                    - clone fds (I chose it for now, it is the most universal
1621                      solution)
1622                 
1623                    POSIX 1003.1g does not actually define this clearly
1624                    at all. POSIX 1003.1g doesn't define a lot of things
1625                    clearly however!                  
1626                    
1627                 */
1628                 if (UNIXCB(skb).fp)
1629                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1630         }
1631         err = size;
1632
1633         scm_recv(sock, msg, siocb->scm, flags);
1634
1635 out_free:
1636         skb_free_datagram(sk,skb);
1637 out_unlock:
1638         mutex_unlock(&u->readlock);
1639 out:
1640         return err;
1641 }
1642
1643 /*
1644  *      Sleep until data has arrive. But check for races..
1645  */
1646  
1647 static long unix_stream_data_wait(struct sock * sk, long timeo)
1648 {
1649         DEFINE_WAIT(wait);
1650
1651         unix_state_rlock(sk);
1652
1653         for (;;) {
1654                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1655
1656                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1657                     sk->sk_err ||
1658                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1659                     signal_pending(current) ||
1660                     !timeo)
1661                         break;
1662
1663                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1664                 unix_state_runlock(sk);
1665                 timeo = schedule_timeout(timeo);
1666                 unix_state_rlock(sk);
1667                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1668         }
1669
1670         finish_wait(sk->sk_sleep, &wait);
1671         unix_state_runlock(sk);
1672         return timeo;
1673 }
1674
1675
1676
1677 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1678                                struct msghdr *msg, size_t size,
1679                                int flags)
1680 {
1681         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1682         struct scm_cookie tmp_scm;
1683         struct sock *sk = sock->sk;
1684         struct unix_sock *u = unix_sk(sk);
1685         struct sockaddr_un *sunaddr=msg->msg_name;
1686         int copied = 0;
1687         int check_creds = 0;
1688         int target;
1689         int err = 0;
1690         long timeo;
1691
1692         err = -EINVAL;
1693         if (sk->sk_state != TCP_ESTABLISHED)
1694                 goto out;
1695
1696         err = -EOPNOTSUPP;
1697         if (flags&MSG_OOB)
1698                 goto out;
1699
1700         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1701         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1702
1703         msg->msg_namelen = 0;
1704
1705         /* Lock the socket to prevent queue disordering
1706          * while sleeps in memcpy_tomsg
1707          */
1708
1709         if (!siocb->scm) {
1710                 siocb->scm = &tmp_scm;
1711                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1712         }
1713
1714         mutex_lock(&u->readlock);
1715
1716         do
1717         {
1718                 int chunk;
1719                 struct sk_buff *skb;
1720
1721                 skb = skb_dequeue(&sk->sk_receive_queue);
1722                 if (skb==NULL)
1723                 {
1724                         if (copied >= target)
1725                                 break;
1726
1727                         /*
1728                          *      POSIX 1003.1g mandates this order.
1729                          */
1730                          
1731                         if ((err = sock_error(sk)) != 0)
1732                                 break;
1733                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1734                                 break;
1735                         err = -EAGAIN;
1736                         if (!timeo)
1737                                 break;
1738                         mutex_unlock(&u->readlock);
1739
1740                         timeo = unix_stream_data_wait(sk, timeo);
1741
1742                         if (signal_pending(current)) {
1743                                 err = sock_intr_errno(timeo);
1744                                 goto out;
1745                         }
1746                         mutex_lock(&u->readlock);
1747                         continue;
1748                 }
1749
1750                 if (check_creds) {
1751                         /* Never glue messages from different writers */
1752                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1753                                 skb_queue_head(&sk->sk_receive_queue, skb);
1754                                 break;
1755                         }
1756                 } else {
1757                         /* Copy credentials */
1758                         siocb->scm->creds = *UNIXCREDS(skb);
1759                         check_creds = 1;
1760                 }
1761
1762                 /* Copy address just once */
1763                 if (sunaddr)
1764                 {
1765                         unix_copy_addr(msg, skb->sk);
1766                         sunaddr = NULL;
1767                 }
1768
1769                 chunk = min_t(unsigned int, skb->len, size);
1770                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1771                         skb_queue_head(&sk->sk_receive_queue, skb);
1772                         if (copied == 0)
1773                                 copied = -EFAULT;
1774                         break;
1775                 }
1776                 copied += chunk;
1777                 size -= chunk;
1778
1779                 /* Mark read part of skb as used */
1780                 if (!(flags & MSG_PEEK))
1781                 {
1782                         skb_pull(skb, chunk);
1783
1784                         if (UNIXCB(skb).fp)
1785                                 unix_detach_fds(siocb->scm, skb);
1786
1787                         /* put the skb back if we didn't use it up.. */
1788                         if (skb->len)
1789                         {
1790                                 skb_queue_head(&sk->sk_receive_queue, skb);
1791                                 break;
1792                         }
1793
1794                         kfree_skb(skb);
1795
1796                         if (siocb->scm->fp)
1797                                 break;
1798                 }
1799                 else
1800                 {
1801                         /* It is questionable, see note in unix_dgram_recvmsg.
1802                          */
1803                         if (UNIXCB(skb).fp)
1804                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1805
1806                         /* put message back and return */
1807                         skb_queue_head(&sk->sk_receive_queue, skb);
1808                         break;
1809                 }
1810         } while (size);
1811
1812         mutex_unlock(&u->readlock);
1813         scm_recv(sock, msg, siocb->scm, flags);
1814 out:
1815         return copied ? : err;
1816 }
1817
1818 static int unix_shutdown(struct socket *sock, int mode)
1819 {
1820         struct sock *sk = sock->sk;
1821         struct sock *other;
1822
1823         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1824
1825         if (mode) {
1826                 unix_state_wlock(sk);
1827                 sk->sk_shutdown |= mode;
1828                 other=unix_peer(sk);
1829                 if (other)
1830                         sock_hold(other);
1831                 unix_state_wunlock(sk);
1832                 sk->sk_state_change(sk);
1833
1834                 if (other &&
1835                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1836
1837                         int peer_mode = 0;
1838
1839                         if (mode&RCV_SHUTDOWN)
1840                                 peer_mode |= SEND_SHUTDOWN;
1841                         if (mode&SEND_SHUTDOWN)
1842                                 peer_mode |= RCV_SHUTDOWN;
1843                         unix_state_wlock(other);
1844                         other->sk_shutdown |= peer_mode;
1845                         unix_state_wunlock(other);
1846                         other->sk_state_change(other);
1847                         read_lock(&other->sk_callback_lock);
1848                         if (peer_mode == SHUTDOWN_MASK)
1849                                 sk_wake_async(other,1,POLL_HUP);
1850                         else if (peer_mode & RCV_SHUTDOWN)
1851                                 sk_wake_async(other,1,POLL_IN);
1852                         read_unlock(&other->sk_callback_lock);
1853                 }
1854                 if (other)
1855                         sock_put(other);
1856         }
1857         return 0;
1858 }
1859
1860 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1861 {
1862         struct sock *sk = sock->sk;
1863         long amount=0;
1864         int err;
1865
1866         switch(cmd)
1867         {
1868                 case SIOCOUTQ:
1869                         amount = atomic_read(&sk->sk_wmem_alloc);
1870                         err = put_user(amount, (int __user *)arg);
1871                         break;
1872                 case SIOCINQ:
1873                 {
1874                         struct sk_buff *skb;
1875
1876                         if (sk->sk_state == TCP_LISTEN) {
1877                                 err = -EINVAL;
1878                                 break;
1879                         }
1880
1881                         spin_lock(&sk->sk_receive_queue.lock);
1882                         if (sk->sk_type == SOCK_STREAM ||
1883                             sk->sk_type == SOCK_SEQPACKET) {
1884                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1885                                         amount += skb->len;
1886                         } else {
1887                                 skb = skb_peek(&sk->sk_receive_queue);
1888                                 if (skb)
1889                                         amount=skb->len;
1890                         }
1891                         spin_unlock(&sk->sk_receive_queue.lock);
1892                         err = put_user(amount, (int __user *)arg);
1893                         break;
1894                 }
1895
1896                 default:
1897                         err = -ENOIOCTLCMD;
1898                         break;
1899         }
1900         return err;
1901 }
1902
1903 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1904 {
1905         struct sock *sk = sock->sk;
1906         unsigned int mask;
1907
1908         poll_wait(file, sk->sk_sleep, wait);
1909         mask = 0;
1910
1911         /* exceptional events? */
1912         if (sk->sk_err)
1913                 mask |= POLLERR;
1914         if (sk->sk_shutdown == SHUTDOWN_MASK)
1915                 mask |= POLLHUP;
1916         if (sk->sk_shutdown & RCV_SHUTDOWN)
1917                 mask |= POLLRDHUP;
1918
1919         /* readable? */
1920         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1921             (sk->sk_shutdown & RCV_SHUTDOWN))
1922                 mask |= POLLIN | POLLRDNORM;
1923
1924         /* Connection-based need to check for termination and startup */
1925         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1926                 mask |= POLLHUP;
1927
1928         /*
1929          * we set writable also when the other side has shut down the
1930          * connection. This prevents stuck sockets.
1931          */
1932         if (unix_writable(sk))
1933                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1934
1935         return mask;
1936 }
1937
1938
1939 #ifdef CONFIG_PROC_FS
1940 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1941 {
1942         loff_t off = 0;
1943         struct sock *s;
1944
1945         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1946                 if (off == pos) 
1947                         return s;
1948                 ++off;
1949         }
1950         return NULL;
1951 }
1952
1953
1954 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1955 {
1956         spin_lock(&unix_table_lock);
1957         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1958 }
1959
1960 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1961 {
1962         ++*pos;
1963
1964         if (v == (void *)1) 
1965                 return first_unix_socket(seq->private);
1966         return next_unix_socket(seq->private, v);
1967 }
1968
1969 static void unix_seq_stop(struct seq_file *seq, void *v)
1970 {
1971         spin_unlock(&unix_table_lock);
1972 }
1973
1974 static int unix_seq_show(struct seq_file *seq, void *v)
1975 {
1976         
1977         if (v == (void *)1)
1978                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1979                          "Inode Path\n");
1980         else {
1981                 struct sock *s = v;
1982                 struct unix_sock *u = unix_sk(s);
1983                 unix_state_rlock(s);
1984
1985                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1986                         s,
1987                         atomic_read(&s->sk_refcnt),
1988                         0,
1989                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1990                         s->sk_type,
1991                         s->sk_socket ?
1992                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1993                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1994                         sock_i_ino(s));
1995
1996                 if (u->addr) {
1997                         int i, len;
1998                         seq_putc(seq, ' ');
1999
2000                         i = 0;
2001                         len = u->addr->len - sizeof(short);
2002                         if (!UNIX_ABSTRACT(s))
2003                                 len--;
2004                         else {
2005                                 seq_putc(seq, '@');
2006                                 i++;
2007                         }
2008                         for ( ; i < len; i++)
2009                                 seq_putc(seq, u->addr->name->sun_path[i]);
2010                 }
2011                 unix_state_runlock(s);
2012                 seq_putc(seq, '\n');
2013         }
2014
2015         return 0;
2016 }
2017
2018 static struct seq_operations unix_seq_ops = {
2019         .start  = unix_seq_start,
2020         .next   = unix_seq_next,
2021         .stop   = unix_seq_stop,
2022         .show   = unix_seq_show,
2023 };
2024
2025
2026 static int unix_seq_open(struct inode *inode, struct file *file)
2027 {
2028         struct seq_file *seq;
2029         int rc = -ENOMEM;
2030         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2031
2032         if (!iter)
2033                 goto out;
2034
2035         rc = seq_open(file, &unix_seq_ops);
2036         if (rc)
2037                 goto out_kfree;
2038
2039         seq          = file->private_data;
2040         seq->private = iter;
2041         *iter = 0;
2042 out:
2043         return rc;
2044 out_kfree:
2045         kfree(iter);
2046         goto out;
2047 }
2048
2049 static struct file_operations unix_seq_fops = {
2050         .owner          = THIS_MODULE,
2051         .open           = unix_seq_open,
2052         .read           = seq_read,
2053         .llseek         = seq_lseek,
2054         .release        = seq_release_private,
2055 };
2056
2057 #endif
2058
2059 static struct net_proto_family unix_family_ops = {
2060         .family = PF_UNIX,
2061         .create = unix_create,
2062         .owner  = THIS_MODULE,
2063 };
2064
2065 static int __init af_unix_init(void)
2066 {
2067         int rc = -1;
2068         struct sk_buff *dummy_skb;
2069
2070         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2071                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2072                 goto out;
2073         }
2074
2075         rc = proto_register(&unix_proto, 1);
2076         if (rc != 0) {
2077                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2078                        __FUNCTION__);
2079                 goto out;
2080         }
2081
2082         sock_register(&unix_family_ops);
2083 #ifdef CONFIG_PROC_FS
2084         proc_net_fops_create("unix", 0, &unix_seq_fops);
2085 #endif
2086         unix_sysctl_register();
2087 out:
2088         return rc;
2089 }
2090
2091 static void __exit af_unix_exit(void)
2092 {
2093         sock_unregister(PF_UNIX);
2094         unix_sysctl_unregister();
2095         proc_net_remove("unix");
2096         proto_unregister(&unix_proto);
2097 }
2098
2099 module_init(af_unix_init);
2100 module_exit(af_unix_exit);
2101
2102 MODULE_LICENSE("GPL");
2103 MODULE_ALIAS_NETPROTO(PF_UNIX);