net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_long_t unix_nr_socks;
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate spin lock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned hash = (__force unsigned)n;
 153         hash ^= hash>>16;
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168 }
 169
 170 static inline int unix_recvq_full(struct sock const *sk)
 171 {
 172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173 }
 174
 175 static struct sock *unix_peer_get(struct sock *s)
 176 {
 177         struct sock *peer;
 178
 179         unix_state_lock(s);
 180         peer = unix_peer(s);
 181         if (peer)
 182                 sock_hold(peer);
 183         unix_state_unlock(s);
 184         return peer;
 185 }
 186
 187 static inline void unix_release_addr(struct unix_address *addr)
 188 {
 189         if (atomic_dec_and_test(&addr->refcnt))
 190                 kfree(addr);
 191 }
 192
 193 /*
 194  *      Check unix socket name:
 195  *              - should be not zero length.
 196  *              - if started by not zero, should be NULL terminated (FS object)
 197  *              - if started by zero, it is abstract name.
 198  */
 199
 200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201 {
 202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                 return -EINVAL;
 204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                 return -EINVAL;
 206         if (sunaddr->sun_path[0]) {
 207                 /*
 208                  * This may look like an off by one error but it is a bit more
 209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                  * sun_path[108] doesn't as such exist.  However in kernel space
 211                  * we are guaranteed that it is a valid memory location in our
 212                  * kernel address buffer.
 213                  */
 214                 ((char *)sunaddr)[len] = 0;
 215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                 return len;
 217         }
 218
 219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 220         return len;
 221 }
 222
 223 static void __unix_remove_socket(struct sock *sk)
 224 {
 225         sk_del_node_init(sk);
 226 }
 227
 228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229 {
 230         WARN_ON(!sk_unhashed(sk));
 231         sk_add_node(sk, list);
 232 }
 233
 234 static inline void unix_remove_socket(struct sock *sk)
 235 {
 236         spin_lock(&unix_table_lock);
 237         __unix_remove_socket(sk);
 238         spin_unlock(&unix_table_lock);
 239 }
 240
 241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242 {
 243         spin_lock(&unix_table_lock);
 244         __unix_insert_socket(list, sk);
 245         spin_unlock(&unix_table_lock);
 246 }
 247
 248 static struct sock *__unix_find_socket_byname(struct net *net,
 249                                               struct sockaddr_un *sunname,
 250                                               int len, int type, unsigned hash)
 251 {
 252         struct sock *s;
 253         struct hlist_node *node;
 254
 255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                 struct unix_sock *u = unix_sk(s);
 257
 258                 if (!net_eq(sock_net(s), net))
 259                         continue;
 260
 261                 if (u->addr->len == len &&
 262                     !memcmp(u->addr->name, sunname, len))
 263                         goto found;
 264         }
 265         s = NULL;
 266 found:
 267         return s;
 268 }
 269
 270 static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                    struct sockaddr_un *sunname,
 272                                                    int len, int type,
 273                                                    unsigned hash)
 274 {
 275         struct sock *s;
 276
 277         spin_lock(&unix_table_lock);
 278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279         if (s)
 280                 sock_hold(s);
 281         spin_unlock(&unix_table_lock);
 282         return s;
 283 }
 284
 285 static struct sock *unix_find_socket_byinode(struct inode *i)
 286 {
 287         struct sock *s;
 288         struct hlist_node *node;
 289
 290         spin_lock(&unix_table_lock);
 291         sk_for_each(s, node,
 292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                 struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                 if (dentry && dentry->d_inode == i) {
 296                         sock_hold(s);
 297                         goto found;
 298                 }
 299         }
 300         s = NULL;
 301 found:
 302         spin_unlock(&unix_table_lock);
 303         return s;
 304 }
 305
 306 static inline int unix_writable(struct sock *sk)
 307 {
 308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 309 }
 310
 311 static void unix_write_space(struct sock *sk)
 312 {
 313         struct socket_wq *wq;
 314
 315         rcu_read_lock();
 316         if (unix_writable(sk)) {
 317                 wq = rcu_dereference(sk->sk_wq);
 318                 if (wq_has_sleeper(wq))
 319                         wake_up_interruptible_sync_poll(&wq->wait,
 320                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 321                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 322         }
 323         rcu_read_unlock();
 324 }
 325
 326 /* When dgram socket disconnects (or changes its peer), we clear its receive
 327  * queue of packets arrived from previous peer. First, it allows to do
 328  * flow control based only on wmem_alloc; second, sk connected to peer
 329  * may receive messages only from that peer. */
 330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 331 {
 332         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 333                 skb_queue_purge(&sk->sk_receive_queue);
 334                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 335
 336                 /* If one link of bidirectional dgram pipe is disconnected,
 337                  * we signal error. Messages are lost. Do not make this,
 338                  * when peer was not connected to us.
 339                  */
 340                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 341                         other->sk_err = ECONNRESET;
 342                         other->sk_error_report(other);
 343                 }
 344         }
 345 }
 346
 347 static void unix_sock_destructor(struct sock *sk)
 348 {
 349         struct unix_sock *u = unix_sk(sk);
 350
 351         skb_queue_purge(&sk->sk_receive_queue);
 352
 353         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 354         WARN_ON(!sk_unhashed(sk));
 355         WARN_ON(sk->sk_socket);
 356         if (!sock_flag(sk, SOCK_DEAD)) {
 357                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 358                 return;
 359         }
 360
 361         if (u->addr)
 362                 unix_release_addr(u->addr);
 363
 364         atomic_long_dec(&unix_nr_socks);
 365         local_bh_disable();
 366         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 367         local_bh_enable();
 368 #ifdef UNIX_REFCNT_DEBUG
 369         printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
 370                 atomic_long_read(&unix_nr_socks));
 371 #endif
 372 }
 373
 374 static void unix_release_sock(struct sock *sk, int embrion)
 375 {
 376         struct unix_sock *u = unix_sk(sk);
 377         struct dentry *dentry;
 378         struct vfsmount *mnt;
 379         struct sock *skpair;
 380         struct sk_buff *skb;
 381         int state;
 382
 383         unix_remove_socket(sk);
 384
 385         /* Clear state */
 386         unix_state_lock(sk);
 387         sock_orphan(sk);
 388         sk->sk_shutdown = SHUTDOWN_MASK;
 389         dentry       = u->dentry;
 390         u->dentry    = NULL;
 391         mnt          = u->mnt;
 392         u->mnt       = NULL;
 393         state = sk->sk_state;
 394         sk->sk_state = TCP_CLOSE;
 395         unix_state_unlock(sk);
 396
 397         wake_up_interruptible_all(&u->peer_wait);
 398
 399         skpair = unix_peer(sk);
 400
 401         if (skpair != NULL) {
 402                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 403                         unix_state_lock(skpair);
 404                         /* No more writes */
 405                         skpair->sk_shutdown = SHUTDOWN_MASK;
 406                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 407                                 skpair->sk_err = ECONNRESET;
 408                         unix_state_unlock(skpair);
 409                         skpair->sk_state_change(skpair);
 410                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 411                 }
 412                 sock_put(skpair); /* It may now die */
 413                 unix_peer(sk) = NULL;
 414         }
 415
 416         /* Try to flush out this socket. Throw out buffers at least */
 417
 418         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 419                 if (state == TCP_LISTEN)
 420                         unix_release_sock(skb->sk, 1);
 421                 /* passed fds are erased in the kfree_skb hook        */
 422                 kfree_skb(skb);
 423         }
 424
 425         if (dentry) {
 426                 dput(dentry);
 427                 mntput(mnt);
 428         }
 429
 430         sock_put(sk);
 431
 432         /* ---- Socket is dead now and most probably destroyed ---- */
 433
 434         /*
 435          * Fixme: BSD difference: In BSD all sockets connected to use get
 436          *        ECONNRESET and we die on the spot. In Linux we behave
 437          *        like files and pipes do and wait for the last
 438          *        dereference.
 439          *
 440          * Can't we simply set sock->err?
 441          *
 442          *        What the above comment does talk about? --ANK(980817)
 443          */
 444
 445         if (unix_tot_inflight)
 446                 unix_gc();              /* Garbage collect fds */
 447 }
 448
 449 static void init_peercred(struct sock *sk)
 450 {
 451         put_pid(sk->sk_peer_pid);
 452         if (sk->sk_peer_cred)
 453                 put_cred(sk->sk_peer_cred);
 454         sk->sk_peer_pid  = get_pid(task_tgid(current));
 455         sk->sk_peer_cred = get_current_cred();
 456 }
 457
 458 static void copy_peercred(struct sock *sk, struct sock *peersk)
 459 {
 460         put_pid(sk->sk_peer_pid);
 461         if (sk->sk_peer_cred)
 462                 put_cred(sk->sk_peer_cred);
 463         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 464         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 465 }
 466
 467 static int unix_listen(struct socket *sock, int backlog)
 468 {
 469         int err;
 470         struct sock *sk = sock->sk;
 471         struct unix_sock *u = unix_sk(sk);
 472         struct pid *old_pid = NULL;
 473         const struct cred *old_cred = NULL;
 474
 475         err = -EOPNOTSUPP;
 476         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 477                 goto out;       /* Only stream/seqpacket sockets accept */
 478         err = -EINVAL;
 479         if (!u->addr)
 480                 goto out;       /* No listens on an unbound socket */
 481         unix_state_lock(sk);
 482         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 483                 goto out_unlock;
 484         if (backlog > sk->sk_max_ack_backlog)
 485                 wake_up_interruptible_all(&u->peer_wait);
 486         sk->sk_max_ack_backlog  = backlog;
 487         sk->sk_state            = TCP_LISTEN;
 488         /* set credentials so connect can copy them */
 489         init_peercred(sk);
 490         err = 0;
 491
 492 out_unlock:
 493         unix_state_unlock(sk);
 494         put_pid(old_pid);
 495         if (old_cred)
 496                 put_cred(old_cred);
 497 out:
 498         return err;
 499 }
 500
 501 static int unix_release(struct socket *);
 502 static int unix_bind(struct socket *, struct sockaddr *, int);
 503 static int unix_stream_connect(struct socket *, struct sockaddr *,
 504                                int addr_len, int flags);
 505 static int unix_socketpair(struct socket *, struct socket *);
 506 static int unix_accept(struct socket *, struct socket *, int);
 507 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 508 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 509 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 510                                     poll_table *);
 511 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 512 static int unix_shutdown(struct socket *, int);
 513 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 514                                struct msghdr *, size_t);
 515 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 516                                struct msghdr *, size_t, int);
 517 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 518                               struct msghdr *, size_t);
 519 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 520                               struct msghdr *, size_t, int);
 521 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 522                               int, int);
 523 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 524                                   struct msghdr *, size_t);
 525 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 526                                   struct msghdr *, size_t, int);
 527
 528 static const struct proto_ops unix_stream_ops = {
 529         .family =       PF_UNIX,
 530         .owner =        THIS_MODULE,
 531         .release =      unix_release,
 532         .bind =         unix_bind,
 533         .connect =      unix_stream_connect,
 534         .socketpair =   unix_socketpair,
 535         .accept =       unix_accept,
 536         .getname =      unix_getname,
 537         .poll =         unix_poll,
 538         .ioctl =        unix_ioctl,
 539         .listen =       unix_listen,
 540         .shutdown =     unix_shutdown,
 541         .setsockopt =   sock_no_setsockopt,
 542         .getsockopt =   sock_no_getsockopt,
 543         .sendmsg =      unix_stream_sendmsg,
 544         .recvmsg =      unix_stream_recvmsg,
 545         .mmap =         sock_no_mmap,
 546         .sendpage =     sock_no_sendpage,
 547 };
 548
 549 static const struct proto_ops unix_dgram_ops = {
 550         .family =       PF_UNIX,
 551         .owner =        THIS_MODULE,
 552         .release =      unix_release,
 553         .bind =         unix_bind,
 554         .connect =      unix_dgram_connect,
 555         .socketpair =   unix_socketpair,
 556         .accept =       sock_no_accept,
 557         .getname =      unix_getname,
 558         .poll =         unix_dgram_poll,
 559         .ioctl =        unix_ioctl,
 560         .listen =       sock_no_listen,
 561         .shutdown =     unix_shutdown,
 562         .setsockopt =   sock_no_setsockopt,
 563         .getsockopt =   sock_no_getsockopt,
 564         .sendmsg =      unix_dgram_sendmsg,
 565         .recvmsg =      unix_dgram_recvmsg,
 566         .mmap =         sock_no_mmap,
 567         .sendpage =     sock_no_sendpage,
 568 };
 569
 570 static const struct proto_ops unix_seqpacket_ops = {
 571         .family =       PF_UNIX,
 572         .owner =        THIS_MODULE,
 573         .release =      unix_release,
 574         .bind =         unix_bind,
 575         .connect =      unix_stream_connect,
 576         .socketpair =   unix_socketpair,
 577         .accept =       unix_accept,
 578         .getname =      unix_getname,
 579         .poll =         unix_dgram_poll,
 580         .ioctl =        unix_ioctl,
 581         .listen =       unix_listen,
 582         .shutdown =     unix_shutdown,
 583         .setsockopt =   sock_no_setsockopt,
 584         .getsockopt =   sock_no_getsockopt,
 585         .sendmsg =      unix_seqpacket_sendmsg,
 586         .recvmsg =      unix_seqpacket_recvmsg,
 587         .mmap =         sock_no_mmap,
 588         .sendpage =     sock_no_sendpage,
 589 };
 590
 591 static struct proto unix_proto = {
 592         .name                   = "UNIX",
 593         .owner                  = THIS_MODULE,
 594         .obj_size               = sizeof(struct unix_sock),
 595 };
 596
 597 /*
 598  * AF_UNIX sockets do not interact with hardware, hence they
 599  * dont trigger interrupts - so it's safe for them to have
 600  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 601  * this special lock-class by reinitializing the spinlock key:
 602  */
 603 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 604
 605 static struct sock *unix_create1(struct net *net, struct socket *sock)
 606 {
 607         struct sock *sk = NULL;
 608         struct unix_sock *u;
 609
 610         atomic_long_inc(&unix_nr_socks);
 611         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 612                 goto out;
 613
 614         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 615         if (!sk)
 616                 goto out;
 617
 618         sock_init_data(sock, sk);
 619         lockdep_set_class(&sk->sk_receive_queue.lock,
 620                                 &af_unix_sk_receive_queue_lock_key);
 621
 622         sk->sk_write_space      = unix_write_space;
 623         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 624         sk->sk_destruct         = unix_sock_destructor;
 625         u         = unix_sk(sk);
 626         u->dentry = NULL;
 627         u->mnt    = NULL;
 628         spin_lock_init(&u->lock);
 629         atomic_long_set(&u->inflight, 0);
 630         INIT_LIST_HEAD(&u->link);
 631         mutex_init(&u->readlock); /* single task reading lock */
 632         init_waitqueue_head(&u->peer_wait);
 633         unix_insert_socket(unix_sockets_unbound, sk);
 634 out:
 635         if (sk == NULL)
 636                 atomic_long_dec(&unix_nr_socks);
 637         else {
 638                 local_bh_disable();
 639                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 640                 local_bh_enable();
 641         }
 642         return sk;
 643 }
 644
 645 static int unix_create(struct net *net, struct socket *sock, int protocol,
 646                        int kern)
 647 {
 648         if (protocol && protocol != PF_UNIX)
 649                 return -EPROTONOSUPPORT;
 650
 651         sock->state = SS_UNCONNECTED;
 652
 653         switch (sock->type) {
 654         case SOCK_STREAM:
 655                 sock->ops = &unix_stream_ops;
 656                 break;
 657                 /*
 658                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 659                  *      nothing uses it.
 660                  */
 661         case SOCK_RAW:
 662                 sock->type = SOCK_DGRAM;
 663         case SOCK_DGRAM:
 664                 sock->ops = &unix_dgram_ops;
 665                 break;
 666         case SOCK_SEQPACKET:
 667                 sock->ops = &unix_seqpacket_ops;
 668                 break;
 669         default:
 670                 return -ESOCKTNOSUPPORT;
 671         }
 672
 673         return unix_create1(net, sock) ? 0 : -ENOMEM;
 674 }
 675
 676 static int unix_release(struct socket *sock)
 677 {
 678         struct sock *sk = sock->sk;
 679
 680         if (!sk)
 681                 return 0;
 682
 683         unix_release_sock(sk, 0);
 684         sock->sk = NULL;
 685
 686         return 0;
 687 }
 688
 689 static int unix_autobind(struct socket *sock)
 690 {
 691         struct sock *sk = sock->sk;
 692         struct net *net = sock_net(sk);
 693         struct unix_sock *u = unix_sk(sk);
 694         static u32 ordernum = 1;
 695         struct unix_address *addr;
 696         int err;
 697         unsigned int retries = 0;
 698
 699         mutex_lock(&u->readlock);
 700
 701         err = 0;
 702         if (u->addr)
 703                 goto out;
 704
 705         err = -ENOMEM;
 706         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 707         if (!addr)
 708                 goto out;
 709
 710         addr->name->sun_family = AF_UNIX;
 711         atomic_set(&addr->refcnt, 1);
 712
 713 retry:
 714         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 715         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 716
 717         spin_lock(&unix_table_lock);
 718         ordernum = (ordernum+1)&0xFFFFF;
 719
 720         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 721                                       addr->hash)) {
 722                 spin_unlock(&unix_table_lock);
 723                 /*
 724                  * __unix_find_socket_byname() may take long time if many names
 725                  * are already in use.
 726                  */
 727                 cond_resched();
 728                 /* Give up if all names seems to be in use. */
 729                 if (retries++ == 0xFFFFF) {
 730                         err = -ENOSPC;
 731                         kfree(addr);
 732                         goto out;
 733                 }
 734                 goto retry;
 735         }
 736         addr->hash ^= sk->sk_type;
 737
 738         __unix_remove_socket(sk);
 739         u->addr = addr;
 740         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 741         spin_unlock(&unix_table_lock);
 742         err = 0;
 743
 744 out:    mutex_unlock(&u->readlock);
 745         return err;
 746 }
 747
 748 static struct sock *unix_find_other(struct net *net,
 749                                     struct sockaddr_un *sunname, int len,
 750                                     int type, unsigned hash, int *error)
 751 {
 752         struct sock *u;
 753         struct path path;
 754         int err = 0;
 755
 756         if (sunname->sun_path[0]) {
 757                 struct inode *inode;
 758                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 759                 if (err)
 760                         goto fail;
 761                 inode = path.dentry->d_inode;
 762                 err = inode_permission(inode, MAY_WRITE);
 763                 if (err)
 764                         goto put_fail;
 765
 766                 err = -ECONNREFUSED;
 767                 if (!S_ISSOCK(inode->i_mode))
 768                         goto put_fail;
 769                 u = unix_find_socket_byinode(inode);
 770                 if (!u)
 771                         goto put_fail;
 772
 773                 if (u->sk_type == type)
 774                         touch_atime(path.mnt, path.dentry);
 775
 776                 path_put(&path);
 777
 778                 err = -EPROTOTYPE;
 779                 if (u->sk_type != type) {
 780                         sock_put(u);
 781                         goto fail;
 782                 }
 783         } else {
 784                 err = -ECONNREFUSED;
 785                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 786                 if (u) {
 787                         struct dentry *dentry;
 788                         dentry = unix_sk(u)->dentry;
 789                         if (dentry)
 790                                 touch_atime(unix_sk(u)->mnt, dentry);
 791                 } else
 792                         goto fail;
 793         }
 794         return u;
 795
 796 put_fail:
 797         path_put(&path);
 798 fail:
 799         *error = err;
 800         return NULL;
 801 }
 802
 803
 804 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 805 {
 806         struct sock *sk = sock->sk;
 807         struct net *net = sock_net(sk);
 808         struct unix_sock *u = unix_sk(sk);
 809         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 810         char *sun_path = sunaddr->sun_path;
 811         struct dentry *dentry = NULL;
 812         struct path path;
 813         int err;
 814         unsigned hash;
 815         struct unix_address *addr;
 816         struct hlist_head *list;
 817
 818         err = -EINVAL;
 819         if (sunaddr->sun_family != AF_UNIX)
 820                 goto out;
 821
 822         if (addr_len == sizeof(short)) {
 823                 err = unix_autobind(sock);
 824                 goto out;
 825         }
 826
 827         err = unix_mkname(sunaddr, addr_len, &hash);
 828         if (err < 0)
 829                 goto out;
 830         addr_len = err;
 831
 832         mutex_lock(&u->readlock);
 833
 834         err = -EINVAL;
 835         if (u->addr)
 836                 goto out_up;
 837
 838         err = -ENOMEM;
 839         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 840         if (!addr)
 841                 goto out_up;
 842
 843         memcpy(addr->name, sunaddr, addr_len);
 844         addr->len = addr_len;
 845         addr->hash = hash ^ sk->sk_type;
 846         atomic_set(&addr->refcnt, 1);
 847
 848         if (sun_path[0]) {
 849                 unsigned int mode;
 850                 err = 0;
 851                 /*
 852                  * Get the parent directory, calculate the hash for last
 853                  * component.
 854                  */
 855                 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 856                 err = PTR_ERR(dentry);
 857                 if (IS_ERR(dentry))
 858                         goto out_mknod_parent;
 859
 860                 /*
 861                  * All right, let's create it.
 862                  */
 863                 mode = S_IFSOCK |
 864                        (SOCK_INODE(sock)->i_mode & ~current_umask());
 865                 err = mnt_want_write(path.mnt);
 866                 if (err)
 867                         goto out_mknod_dput;
 868                 err = security_path_mknod(&path, dentry, mode, 0);
 869                 if (err)
 870                         goto out_mknod_drop_write;
 871                 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
 872 out_mknod_drop_write:
 873                 mnt_drop_write(path.mnt);
 874                 if (err)
 875                         goto out_mknod_dput;
 876                 mutex_unlock(&path.dentry->d_inode->i_mutex);
 877                 dput(path.dentry);
 878                 path.dentry = dentry;
 879
 880                 addr->hash = UNIX_HASH_SIZE;
 881         }
 882
 883         spin_lock(&unix_table_lock);
 884
 885         if (!sun_path[0]) {
 886                 err = -EADDRINUSE;
 887                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 888                                               sk->sk_type, hash)) {
 889                         unix_release_addr(addr);
 890                         goto out_unlock;
 891                 }
 892
 893                 list = &unix_socket_table[addr->hash];
 894         } else {
 895                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 896                 u->dentry = path.dentry;
 897                 u->mnt    = path.mnt;
 898         }
 899
 900         err = 0;
 901         __unix_remove_socket(sk);
 902         u->addr = addr;
 903         __unix_insert_socket(list, sk);
 904
 905 out_unlock:
 906         spin_unlock(&unix_table_lock);
 907 out_up:
 908         mutex_unlock(&u->readlock);
 909 out:
 910         return err;
 911
 912 out_mknod_dput:
 913         dput(dentry);
 914         mutex_unlock(&path.dentry->d_inode->i_mutex);
 915         path_put(&path);
 916 out_mknod_parent:
 917         if (err == -EEXIST)
 918                 err = -EADDRINUSE;
 919         unix_release_addr(addr);
 920         goto out_up;
 921 }
 922
 923 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 924 {
 925         if (unlikely(sk1 == sk2) || !sk2) {
 926                 unix_state_lock(sk1);
 927                 return;
 928         }
 929         if (sk1 < sk2) {
 930                 unix_state_lock(sk1);
 931                 unix_state_lock_nested(sk2);
 932         } else {
 933                 unix_state_lock(sk2);
 934                 unix_state_lock_nested(sk1);
 935         }
 936 }
 937
 938 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 939 {
 940         if (unlikely(sk1 == sk2) || !sk2) {
 941                 unix_state_unlock(sk1);
 942                 return;
 943         }
 944         unix_state_unlock(sk1);
 945         unix_state_unlock(sk2);
 946 }
 947
 948 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 949                               int alen, int flags)
 950 {
 951         struct sock *sk = sock->sk;
 952         struct net *net = sock_net(sk);
 953         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 954         struct sock *other;
 955         unsigned hash;
 956         int err;
 957
 958         if (addr->sa_family != AF_UNSPEC) {
 959                 err = unix_mkname(sunaddr, alen, &hash);
 960                 if (err < 0)
 961                         goto out;
 962                 alen = err;
 963
 964                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 965                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 966                         goto out;
 967
 968 restart:
 969                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 970                 if (!other)
 971                         goto out;
 972
 973                 unix_state_double_lock(sk, other);
 974
 975                 /* Apparently VFS overslept socket death. Retry. */
 976                 if (sock_flag(other, SOCK_DEAD)) {
 977                         unix_state_double_unlock(sk, other);
 978                         sock_put(other);
 979                         goto restart;
 980                 }
 981
 982                 err = -EPERM;
 983                 if (!unix_may_send(sk, other))
 984                         goto out_unlock;
 985
 986                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 987                 if (err)
 988                         goto out_unlock;
 989
 990         } else {
 991                 /*
 992                  *      1003.1g breaking connected state with AF_UNSPEC
 993                  */
 994                 other = NULL;
 995                 unix_state_double_lock(sk, other);
 996         }
 997
 998         /*
 999          * If it was connected, reconnect.
1000          */
1001         if (unix_peer(sk)) {
1002                 struct sock *old_peer = unix_peer(sk);
1003                 unix_peer(sk) = other;
1004                 unix_state_double_unlock(sk, other);
1005
1006                 if (other != old_peer)
1007                         unix_dgram_disconnected(sk, old_peer);
1008                 sock_put(old_peer);
1009         } else {
1010                 unix_peer(sk) = other;
1011                 unix_state_double_unlock(sk, other);
1012         }
1013         return 0;
1014
1015 out_unlock:
1016         unix_state_double_unlock(sk, other);
1017         sock_put(other);
1018 out:
1019         return err;
1020 }
1021
1022 static long unix_wait_for_peer(struct sock *other, long timeo)
1023 {
1024         struct unix_sock *u = unix_sk(other);
1025         int sched;
1026         DEFINE_WAIT(wait);
1027
1028         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1029
1030         sched = !sock_flag(other, SOCK_DEAD) &&
1031                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1032                 unix_recvq_full(other);
1033
1034         unix_state_unlock(other);
1035
1036         if (sched)
1037                 timeo = schedule_timeout(timeo);
1038
1039         finish_wait(&u->peer_wait, &wait);
1040         return timeo;
1041 }
1042
1043 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1044                                int addr_len, int flags)
1045 {
1046         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1047         struct sock *sk = sock->sk;
1048         struct net *net = sock_net(sk);
1049         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1050         struct sock *newsk = NULL;
1051         struct sock *other = NULL;
1052         struct sk_buff *skb = NULL;
1053         unsigned hash;
1054         int st;
1055         int err;
1056         long timeo;
1057
1058         err = unix_mkname(sunaddr, addr_len, &hash);
1059         if (err < 0)
1060                 goto out;
1061         addr_len = err;
1062
1063         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1064             (err = unix_autobind(sock)) != 0)
1065                 goto out;
1066
1067         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1068
1069         /* First of all allocate resources.
1070            If we will make it after state is locked,
1071            we will have to recheck all again in any case.
1072          */
1073
1074         err = -ENOMEM;
1075
1076         /* create new sock for complete connection */
1077         newsk = unix_create1(sock_net(sk), NULL);
1078         if (newsk == NULL)
1079                 goto out;
1080
1081         /* Allocate skb for sending to listening sock */
1082         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1083         if (skb == NULL)
1084                 goto out;
1085
1086 restart:
1087         /*  Find listening sock. */
1088         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1089         if (!other)
1090                 goto out;
1091
1092         /* Latch state of peer */
1093         unix_state_lock(other);
1094
1095         /* Apparently VFS overslept socket death. Retry. */
1096         if (sock_flag(other, SOCK_DEAD)) {
1097                 unix_state_unlock(other);
1098                 sock_put(other);
1099                 goto restart;
1100         }
1101
1102         err = -ECONNREFUSED;
1103         if (other->sk_state != TCP_LISTEN)
1104                 goto out_unlock;
1105         if (other->sk_shutdown & RCV_SHUTDOWN)
1106                 goto out_unlock;
1107
1108         if (unix_recvq_full(other)) {
1109                 err = -EAGAIN;
1110                 if (!timeo)
1111                         goto out_unlock;
1112
1113                 timeo = unix_wait_for_peer(other, timeo);
1114
1115                 err = sock_intr_errno(timeo);
1116                 if (signal_pending(current))
1117                         goto out;
1118                 sock_put(other);
1119                 goto restart;
1120         }
1121
1122         /* Latch our state.
1123
1124            It is tricky place. We need to grab our state lock and cannot
1125            drop lock on peer. It is dangerous because deadlock is
1126            possible. Connect to self case and simultaneous
1127            attempt to connect are eliminated by checking socket
1128            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1129            check this before attempt to grab lock.
1130
1131            Well, and we have to recheck the state after socket locked.
1132          */
1133         st = sk->sk_state;
1134
1135         switch (st) {
1136         case TCP_CLOSE:
1137                 /* This is ok... continue with connect */
1138                 break;
1139         case TCP_ESTABLISHED:
1140                 /* Socket is already connected */
1141                 err = -EISCONN;
1142                 goto out_unlock;
1143         default:
1144                 err = -EINVAL;
1145                 goto out_unlock;
1146         }
1147
1148         unix_state_lock_nested(sk);
1149
1150         if (sk->sk_state != st) {
1151                 unix_state_unlock(sk);
1152                 unix_state_unlock(other);
1153                 sock_put(other);
1154                 goto restart;
1155         }
1156
1157         err = security_unix_stream_connect(sk, other, newsk);
1158         if (err) {
1159                 unix_state_unlock(sk);
1160                 goto out_unlock;
1161         }
1162
1163         /* The way is open! Fastly set all the necessary fields... */
1164
1165         sock_hold(sk);
1166         unix_peer(newsk)        = sk;
1167         newsk->sk_state         = TCP_ESTABLISHED;
1168         newsk->sk_type          = sk->sk_type;
1169         init_peercred(newsk);
1170         newu = unix_sk(newsk);
1171         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1172         otheru = unix_sk(other);
1173
1174         /* copy address information from listening to new sock*/
1175         if (otheru->addr) {
1176                 atomic_inc(&otheru->addr->refcnt);
1177                 newu->addr = otheru->addr;
1178         }
1179         if (otheru->dentry) {
1180                 newu->dentry    = dget(otheru->dentry);
1181                 newu->mnt       = mntget(otheru->mnt);
1182         }
1183
1184         /* Set credentials */
1185         copy_peercred(sk, other);
1186
1187         sock->state     = SS_CONNECTED;
1188         sk->sk_state    = TCP_ESTABLISHED;
1189         sock_hold(newsk);
1190
1191         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1192         unix_peer(sk)   = newsk;
1193
1194         unix_state_unlock(sk);
1195
1196         /* take ten and and send info to listening sock */
1197         spin_lock(&other->sk_receive_queue.lock);
1198         __skb_queue_tail(&other->sk_receive_queue, skb);
1199         spin_unlock(&other->sk_receive_queue.lock);
1200         unix_state_unlock(other);
1201         other->sk_data_ready(other, 0);
1202         sock_put(other);
1203         return 0;
1204
1205 out_unlock:
1206         if (other)
1207                 unix_state_unlock(other);
1208
1209 out:
1210         kfree_skb(skb);
1211         if (newsk)
1212                 unix_release_sock(newsk, 0);
1213         if (other)
1214                 sock_put(other);
1215         return err;
1216 }
1217
1218 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1219 {
1220         struct sock *ska = socka->sk, *skb = sockb->sk;
1221
1222         /* Join our sockets back to back */
1223         sock_hold(ska);
1224         sock_hold(skb);
1225         unix_peer(ska) = skb;
1226         unix_peer(skb) = ska;
1227         init_peercred(ska);
1228         init_peercred(skb);
1229
1230         if (ska->sk_type != SOCK_DGRAM) {
1231                 ska->sk_state = TCP_ESTABLISHED;
1232                 skb->sk_state = TCP_ESTABLISHED;
1233                 socka->state  = SS_CONNECTED;
1234                 sockb->state  = SS_CONNECTED;
1235         }
1236         return 0;
1237 }
1238
1239 static void unix_sock_inherit_flags(const struct socket *old,
1240                                     struct socket *new)
1241 {
1242         if (test_bit(SOCK_PASSCRED, &old->flags))
1243                 set_bit(SOCK_PASSCRED, &new->flags);
1244         if (test_bit(SOCK_PASSSEC, &old->flags))
1245                 set_bit(SOCK_PASSSEC, &new->flags);
1246 }
1247
1248 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1249 {
1250         struct sock *sk = sock->sk;
1251         struct sock *tsk;
1252         struct sk_buff *skb;
1253         int err;
1254
1255         err = -EOPNOTSUPP;
1256         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1257                 goto out;
1258
1259         err = -EINVAL;
1260         if (sk->sk_state != TCP_LISTEN)
1261                 goto out;
1262
1263         /* If socket state is TCP_LISTEN it cannot change (for now...),
1264          * so that no locks are necessary.
1265          */
1266
1267         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1268         if (!skb) {
1269                 /* This means receive shutdown. */
1270                 if (err == 0)
1271                         err = -EINVAL;
1272                 goto out;
1273         }
1274
1275         tsk = skb->sk;
1276         skb_free_datagram(sk, skb);
1277         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1278
1279         /* attach accepted sock to socket */
1280         unix_state_lock(tsk);
1281         newsock->state = SS_CONNECTED;
1282         unix_sock_inherit_flags(sock, newsock);
1283         sock_graft(tsk, newsock);
1284         unix_state_unlock(tsk);
1285         return 0;
1286
1287 out:
1288         return err;
1289 }
1290
1291
1292 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1293 {
1294         struct sock *sk = sock->sk;
1295         struct unix_sock *u;
1296         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1297         int err = 0;
1298
1299         if (peer) {
1300                 sk = unix_peer_get(sk);
1301
1302                 err = -ENOTCONN;
1303                 if (!sk)
1304                         goto out;
1305                 err = 0;
1306         } else {
1307                 sock_hold(sk);
1308         }
1309
1310         u = unix_sk(sk);
1311         unix_state_lock(sk);
1312         if (!u->addr) {
1313                 sunaddr->sun_family = AF_UNIX;
1314                 sunaddr->sun_path[0] = 0;
1315                 *uaddr_len = sizeof(short);
1316         } else {
1317                 struct unix_address *addr = u->addr;
1318
1319                 *uaddr_len = addr->len;
1320                 memcpy(sunaddr, addr->name, *uaddr_len);
1321         }
1322         unix_state_unlock(sk);
1323         sock_put(sk);
1324 out:
1325         return err;
1326 }
1327
1328 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1329 {
1330         int i;
1331
1332         scm->fp = UNIXCB(skb).fp;
1333         UNIXCB(skb).fp = NULL;
1334
1335         for (i = scm->fp->count-1; i >= 0; i--)
1336                 unix_notinflight(scm->fp->fp[i]);
1337 }
1338
1339 static void unix_destruct_scm(struct sk_buff *skb)
1340 {
1341         struct scm_cookie scm;
1342         memset(&scm, 0, sizeof(scm));
1343         scm.pid  = UNIXCB(skb).pid;
1344         scm.cred = UNIXCB(skb).cred;
1345         if (UNIXCB(skb).fp)
1346                 unix_detach_fds(&scm, skb);
1347
1348         /* Alas, it calls VFS */
1349         /* So fscking what? fput() had been SMP-safe since the last Summer */
1350         scm_destroy(&scm);
1351         sock_wfree(skb);
1352 }
1353
1354 #define MAX_RECURSION_LEVEL 4
1355
1356 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1357 {
1358         int i;
1359         unsigned char max_level = 0;
1360         int unix_sock_count = 0;
1361
1362         for (i = scm->fp->count - 1; i >= 0; i--) {
1363                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1364
1365                 if (sk) {
1366                         unix_sock_count++;
1367                         max_level = max(max_level,
1368                                         unix_sk(sk)->recursion_level);
1369                 }
1370         }
1371         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1372                 return -ETOOMANYREFS;
1373
1374         /*
1375          * Need to duplicate file references for the sake of garbage
1376          * collection.  Otherwise a socket in the fps might become a
1377          * candidate for GC while the skb is not yet queued.
1378          */
1379         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1380         if (!UNIXCB(skb).fp)
1381                 return -ENOMEM;
1382
1383         if (unix_sock_count) {
1384                 for (i = scm->fp->count - 1; i >= 0; i--)
1385                         unix_inflight(scm->fp->fp[i]);
1386         }
1387         return max_level;
1388 }
1389
1390 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1391 {
1392         int err = 0;
1393
1394         UNIXCB(skb).pid  = get_pid(scm->pid);
1395         if (scm->cred)
1396                 UNIXCB(skb).cred = get_cred(scm->cred);
1397         UNIXCB(skb).fp = NULL;
1398         if (scm->fp && send_fds)
1399                 err = unix_attach_fds(scm, skb);
1400
1401         skb->destructor = unix_destruct_scm;
1402         return err;
1403 }
1404
1405 /*
1406  * Some apps rely on write() giving SCM_CREDENTIALS
1407  * We include credentials if source or destination socket
1408  * asserted SOCK_PASSCRED.
1409  */
1410 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1411                             const struct sock *other)
1412 {
1413         if (UNIXCB(skb).cred)
1414                 return;
1415         if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1416             !other->sk_socket ||
1417             test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1418                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1419                 UNIXCB(skb).cred = get_current_cred();
1420         }
1421 }
1422
1423 /*
1424  *      Send AF_UNIX data.
1425  */
1426
1427 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1428                               struct msghdr *msg, size_t len)
1429 {
1430         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1431         struct sock *sk = sock->sk;
1432         struct net *net = sock_net(sk);
1433         struct unix_sock *u = unix_sk(sk);
1434         struct sockaddr_un *sunaddr = msg->msg_name;
1435         struct sock *other = NULL;
1436         int namelen = 0; /* fake GCC */
1437         int err;
1438         unsigned hash;
1439         struct sk_buff *skb;
1440         long timeo;
1441         struct scm_cookie tmp_scm;
1442         int max_level;
1443
1444         if (NULL == siocb->scm)
1445                 siocb->scm = &tmp_scm;
1446         wait_for_unix_gc();
1447         err = scm_send(sock, msg, siocb->scm, false);
1448         if (err < 0)
1449                 return err;
1450
1451         err = -EOPNOTSUPP;
1452         if (msg->msg_flags&MSG_OOB)
1453                 goto out;
1454
1455         if (msg->msg_namelen) {
1456                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1457                 if (err < 0)
1458                         goto out;
1459                 namelen = err;
1460         } else {
1461                 sunaddr = NULL;
1462                 err = -ENOTCONN;
1463                 other = unix_peer_get(sk);
1464                 if (!other)
1465                         goto out;
1466         }
1467
1468         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1469             && (err = unix_autobind(sock)) != 0)
1470                 goto out;
1471
1472         err = -EMSGSIZE;
1473         if (len > sk->sk_sndbuf - 32)
1474                 goto out;
1475
1476         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1477         if (skb == NULL)
1478                 goto out;
1479
1480         err = unix_scm_to_skb(siocb->scm, skb, true);
1481         if (err < 0)
1482                 goto out_free;
1483         max_level = err + 1;
1484         unix_get_secdata(siocb->scm, skb);
1485
1486         skb_reset_transport_header(skb);
1487         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1488         if (err)
1489                 goto out_free;
1490
1491         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1492
1493 restart:
1494         if (!other) {
1495                 err = -ECONNRESET;
1496                 if (sunaddr == NULL)
1497                         goto out_free;
1498
1499                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1500                                         hash, &err);
1501                 if (other == NULL)
1502                         goto out_free;
1503         }
1504
1505         if (sk_filter(other, skb) < 0) {
1506                 /* Toss the packet but do not return any error to the sender */
1507                 err = len;
1508                 goto out_free;
1509         }
1510
1511         unix_state_lock(other);
1512         err = -EPERM;
1513         if (!unix_may_send(sk, other))
1514                 goto out_unlock;
1515
1516         if (sock_flag(other, SOCK_DEAD)) {
1517                 /*
1518                  *      Check with 1003.1g - what should
1519                  *      datagram error
1520                  */
1521                 unix_state_unlock(other);
1522                 sock_put(other);
1523
1524                 err = 0;
1525                 unix_state_lock(sk);
1526                 if (unix_peer(sk) == other) {
1527                         unix_peer(sk) = NULL;
1528                         unix_state_unlock(sk);
1529
1530                         unix_dgram_disconnected(sk, other);
1531                         sock_put(other);
1532                         err = -ECONNREFUSED;
1533                 } else {
1534                         unix_state_unlock(sk);
1535                 }
1536
1537                 other = NULL;
1538                 if (err)
1539                         goto out_free;
1540                 goto restart;
1541         }
1542
1543         err = -EPIPE;
1544         if (other->sk_shutdown & RCV_SHUTDOWN)
1545                 goto out_unlock;
1546
1547         if (sk->sk_type != SOCK_SEQPACKET) {
1548                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1549                 if (err)
1550                         goto out_unlock;
1551         }
1552
1553         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1554                 if (!timeo) {
1555                         err = -EAGAIN;
1556                         goto out_unlock;
1557                 }
1558
1559                 timeo = unix_wait_for_peer(other, timeo);
1560
1561                 err = sock_intr_errno(timeo);
1562                 if (signal_pending(current))
1563                         goto out_free;
1564
1565                 goto restart;
1566         }
1567
1568         if (sock_flag(other, SOCK_RCVTSTAMP))
1569                 __net_timestamp(skb);
1570         maybe_add_creds(skb, sock, other);
1571         skb_queue_tail(&other->sk_receive_queue, skb);
1572         if (max_level > unix_sk(other)->recursion_level)
1573                 unix_sk(other)->recursion_level = max_level;
1574         unix_state_unlock(other);
1575         other->sk_data_ready(other, len);
1576         sock_put(other);
1577         scm_destroy(siocb->scm);
1578         return len;
1579
1580 out_unlock:
1581         unix_state_unlock(other);
1582 out_free:
1583         kfree_skb(skb);
1584 out:
1585         if (other)
1586                 sock_put(other);
1587         scm_destroy(siocb->scm);
1588         return err;
1589 }
1590
1591
1592 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1593                                struct msghdr *msg, size_t len)
1594 {
1595         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1596         struct sock *sk = sock->sk;
1597         struct sock *other = NULL;
1598         int err, size;
1599         struct sk_buff *skb;
1600         int sent = 0;
1601         struct scm_cookie tmp_scm;
1602         bool fds_sent = false;
1603         int max_level;
1604
1605         if (NULL == siocb->scm)
1606                 siocb->scm = &tmp_scm;
1607         wait_for_unix_gc();
1608         err = scm_send(sock, msg, siocb->scm, false);
1609         if (err < 0)
1610                 return err;
1611
1612         err = -EOPNOTSUPP;
1613         if (msg->msg_flags&MSG_OOB)
1614                 goto out_err;
1615
1616         if (msg->msg_namelen) {
1617                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1618                 goto out_err;
1619         } else {
1620                 err = -ENOTCONN;
1621                 other = unix_peer(sk);
1622                 if (!other)
1623                         goto out_err;
1624         }
1625
1626         if (sk->sk_shutdown & SEND_SHUTDOWN)
1627                 goto pipe_err;
1628
1629         while (sent < len) {
1630                 /*
1631                  *      Optimisation for the fact that under 0.01% of X
1632                  *      messages typically need breaking up.
1633                  */
1634
1635                 size = len-sent;
1636
1637                 /* Keep two messages in the pipe so it schedules better */
1638                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1639                         size = (sk->sk_sndbuf >> 1) - 64;
1640
1641                 if (size > SKB_MAX_ALLOC)
1642                         size = SKB_MAX_ALLOC;
1643
1644                 /*
1645                  *      Grab a buffer
1646                  */
1647
1648                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1649                                           &err);
1650
1651                 if (skb == NULL)
1652                         goto out_err;
1653
1654                 /*
1655                  *      If you pass two values to the sock_alloc_send_skb
1656                  *      it tries to grab the large buffer with GFP_NOFS
1657                  *      (which can fail easily), and if it fails grab the
1658                  *      fallback size buffer which is under a page and will
1659                  *      succeed. [Alan]
1660                  */
1661                 size = min_t(int, size, skb_tailroom(skb));
1662
1663
1664                 /* Only send the fds in the first buffer */
1665                 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1666                 if (err < 0) {
1667                         kfree_skb(skb);
1668                         goto out_err;
1669                 }
1670                 max_level = err + 1;
1671                 fds_sent = true;
1672
1673                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1674                 if (err) {
1675                         kfree_skb(skb);
1676                         goto out_err;
1677                 }
1678
1679                 unix_state_lock(other);
1680
1681                 if (sock_flag(other, SOCK_DEAD) ||
1682                     (other->sk_shutdown & RCV_SHUTDOWN))
1683                         goto pipe_err_free;
1684
1685                 maybe_add_creds(skb, sock, other);
1686                 skb_queue_tail(&other->sk_receive_queue, skb);
1687                 if (max_level > unix_sk(other)->recursion_level)
1688                         unix_sk(other)->recursion_level = max_level;
1689                 unix_state_unlock(other);
1690                 other->sk_data_ready(other, size);
1691                 sent += size;
1692         }
1693
1694         scm_destroy(siocb->scm);
1695         siocb->scm = NULL;
1696
1697         return sent;
1698
1699 pipe_err_free:
1700         unix_state_unlock(other);
1701         kfree_skb(skb);
1702 pipe_err:
1703         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1704                 send_sig(SIGPIPE, current, 0);
1705         err = -EPIPE;
1706 out_err:
1707         scm_destroy(siocb->scm);
1708         siocb->scm = NULL;
1709         return sent ? : err;
1710 }
1711
1712 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1713                                   struct msghdr *msg, size_t len)
1714 {
1715         int err;
1716         struct sock *sk = sock->sk;
1717
1718         err = sock_error(sk);
1719         if (err)
1720                 return err;
1721
1722         if (sk->sk_state != TCP_ESTABLISHED)
1723                 return -ENOTCONN;
1724
1725         if (msg->msg_namelen)
1726                 msg->msg_namelen = 0;
1727
1728         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1729 }
1730
1731 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1732                               struct msghdr *msg, size_t size,
1733                               int flags)
1734 {
1735         struct sock *sk = sock->sk;
1736
1737         if (sk->sk_state != TCP_ESTABLISHED)
1738                 return -ENOTCONN;
1739
1740         return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1741 }
1742
1743 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1744 {
1745         struct unix_sock *u = unix_sk(sk);
1746
1747         msg->msg_namelen = 0;
1748         if (u->addr) {
1749                 msg->msg_namelen = u->addr->len;
1750                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1751         }
1752 }
1753
1754 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1755                               struct msghdr *msg, size_t size,
1756                               int flags)
1757 {
1758         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1759         struct scm_cookie tmp_scm;
1760         struct sock *sk = sock->sk;
1761         struct unix_sock *u = unix_sk(sk);
1762         int noblock = flags & MSG_DONTWAIT;
1763         struct sk_buff *skb;
1764         int err;
1765
1766         err = -EOPNOTSUPP;
1767         if (flags&MSG_OOB)
1768                 goto out;
1769
1770         msg->msg_namelen = 0;
1771
1772         err = mutex_lock_interruptible(&u->readlock);
1773         if (err) {
1774                 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1775                 goto out;
1776         }
1777
1778         skb = skb_recv_datagram(sk, flags, noblock, &err);
1779         if (!skb) {
1780                 unix_state_lock(sk);
1781                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1782                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1783                     (sk->sk_shutdown & RCV_SHUTDOWN))
1784                         err = 0;
1785                 unix_state_unlock(sk);
1786                 goto out_unlock;
1787         }
1788
1789         wake_up_interruptible_sync_poll(&u->peer_wait,
1790                                         POLLOUT | POLLWRNORM | POLLWRBAND);
1791
1792         if (msg->msg_name)
1793                 unix_copy_addr(msg, skb->sk);
1794
1795         if (size > skb->len)
1796                 size = skb->len;
1797         else if (size < skb->len)
1798                 msg->msg_flags |= MSG_TRUNC;
1799
1800         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1801         if (err)
1802                 goto out_free;
1803
1804         if (sock_flag(sk, SOCK_RCVTSTAMP))
1805                 __sock_recv_timestamp(msg, sk, skb);
1806
1807         if (!siocb->scm) {
1808                 siocb->scm = &tmp_scm;
1809                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1810         }
1811         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1812         unix_set_secdata(siocb->scm, skb);
1813
1814         if (!(flags & MSG_PEEK)) {
1815                 if (UNIXCB(skb).fp)
1816                         unix_detach_fds(siocb->scm, skb);
1817         } else {
1818                 /* It is questionable: on PEEK we could:
1819                    - do not return fds - good, but too simple 8)
1820                    - return fds, and do not return them on read (old strategy,
1821                      apparently wrong)
1822                    - clone fds (I chose it for now, it is the most universal
1823                      solution)
1824
1825                    POSIX 1003.1g does not actually define this clearly
1826                    at all. POSIX 1003.1g doesn't define a lot of things
1827                    clearly however!
1828
1829                 */
1830                 if (UNIXCB(skb).fp)
1831                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1832         }
1833         err = size;
1834
1835         scm_recv(sock, msg, siocb->scm, flags);
1836
1837 out_free:
1838         skb_free_datagram(sk, skb);
1839 out_unlock:
1840         mutex_unlock(&u->readlock);
1841 out:
1842         return err;
1843 }
1844
1845 /*
1846  *      Sleep until data has arrive. But check for races..
1847  */
1848
1849 static long unix_stream_data_wait(struct sock *sk, long timeo)
1850 {
1851         DEFINE_WAIT(wait);
1852
1853         unix_state_lock(sk);
1854
1855         for (;;) {
1856                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1857
1858                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1859                     sk->sk_err ||
1860                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1861                     signal_pending(current) ||
1862                     !timeo)
1863                         break;
1864
1865                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1866                 unix_state_unlock(sk);
1867                 timeo = schedule_timeout(timeo);
1868                 unix_state_lock(sk);
1869                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1870         }
1871
1872         finish_wait(sk_sleep(sk), &wait);
1873         unix_state_unlock(sk);
1874         return timeo;
1875 }
1876
1877
1878
1879 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1880                                struct msghdr *msg, size_t size,
1881                                int flags)
1882 {
1883         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1884         struct scm_cookie tmp_scm;
1885         struct sock *sk = sock->sk;
1886         struct unix_sock *u = unix_sk(sk);
1887         struct sockaddr_un *sunaddr = msg->msg_name;
1888         int copied = 0;
1889         int check_creds = 0;
1890         int target;
1891         int err = 0;
1892         long timeo;
1893
1894         err = -EINVAL;
1895         if (sk->sk_state != TCP_ESTABLISHED)
1896                 goto out;
1897
1898         err = -EOPNOTSUPP;
1899         if (flags&MSG_OOB)
1900                 goto out;
1901
1902         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1903         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1904
1905         msg->msg_namelen = 0;
1906
1907         /* Lock the socket to prevent queue disordering
1908          * while sleeps in memcpy_tomsg
1909          */
1910
1911         if (!siocb->scm) {
1912                 siocb->scm = &tmp_scm;
1913                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1914         }
1915
1916         err = mutex_lock_interruptible(&u->readlock);
1917         if (err) {
1918                 err = sock_intr_errno(timeo);
1919                 goto out;
1920         }
1921
1922         do {
1923                 int chunk;
1924                 struct sk_buff *skb;
1925
1926                 unix_state_lock(sk);
1927                 skb = skb_peek(&sk->sk_receive_queue);
1928                 if (skb == NULL) {
1929                         unix_sk(sk)->recursion_level = 0;
1930                         if (copied >= target)
1931                                 goto unlock;
1932
1933                         /*
1934                          *      POSIX 1003.1g mandates this order.
1935                          */
1936
1937                         err = sock_error(sk);
1938                         if (err)
1939                                 goto unlock;
1940                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1941                                 goto unlock;
1942
1943                         unix_state_unlock(sk);
1944                         err = -EAGAIN;
1945                         if (!timeo)
1946                                 break;
1947                         mutex_unlock(&u->readlock);
1948
1949                         timeo = unix_stream_data_wait(sk, timeo);
1950
1951                         if (signal_pending(current)
1952                             ||  mutex_lock_interruptible(&u->readlock)) {
1953                                 err = sock_intr_errno(timeo);
1954                                 goto out;
1955                         }
1956
1957                         continue;
1958  unlock:
1959                         unix_state_unlock(sk);
1960                         break;
1961                 }
1962                 unix_state_unlock(sk);
1963
1964                 if (check_creds) {
1965                         /* Never glue messages from different writers */
1966                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1967                             (UNIXCB(skb).cred != siocb->scm->cred))
1968                                 break;
1969                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1970                         /* Copy credentials */
1971                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1972                         check_creds = 1;
1973                 }
1974
1975                 /* Copy address just once */
1976                 if (sunaddr) {
1977                         unix_copy_addr(msg, skb->sk);
1978                         sunaddr = NULL;
1979                 }
1980
1981                 chunk = min_t(unsigned int, skb->len, size);
1982                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1983                         if (copied == 0)
1984                                 copied = -EFAULT;
1985                         break;
1986                 }
1987                 copied += chunk;
1988                 size -= chunk;
1989
1990                 /* Mark read part of skb as used */
1991                 if (!(flags & MSG_PEEK)) {
1992                         skb_pull(skb, chunk);
1993
1994                         if (UNIXCB(skb).fp)
1995                                 unix_detach_fds(siocb->scm, skb);
1996
1997                         if (skb->len)
1998                                 break;
1999
2000                         skb_unlink(skb, &sk->sk_receive_queue);
2001                         consume_skb(skb);
2002
2003                         if (siocb->scm->fp)
2004                                 break;
2005                 } else {
2006                         /* It is questionable, see note in unix_dgram_recvmsg.
2007                          */
2008                         if (UNIXCB(skb).fp)
2009                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2010
2011                         break;
2012                 }
2013         } while (size);
2014
2015         mutex_unlock(&u->readlock);
2016         scm_recv(sock, msg, siocb->scm, flags);
2017 out:
2018         return copied ? : err;
2019 }
2020
2021 static int unix_shutdown(struct socket *sock, int mode)
2022 {
2023         struct sock *sk = sock->sk;
2024         struct sock *other;
2025
2026         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2027
2028         if (!mode)
2029                 return 0;
2030
2031         unix_state_lock(sk);
2032         sk->sk_shutdown |= mode;
2033         other = unix_peer(sk);
2034         if (other)
2035                 sock_hold(other);
2036         unix_state_unlock(sk);
2037         sk->sk_state_change(sk);
2038
2039         if (other &&
2040                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2041
2042                 int peer_mode = 0;
2043
2044                 if (mode&RCV_SHUTDOWN)
2045                         peer_mode |= SEND_SHUTDOWN;
2046                 if (mode&SEND_SHUTDOWN)
2047                         peer_mode |= RCV_SHUTDOWN;
2048                 unix_state_lock(other);
2049                 other->sk_shutdown |= peer_mode;
2050                 unix_state_unlock(other);
2051                 other->sk_state_change(other);
2052                 if (peer_mode == SHUTDOWN_MASK)
2053                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2054                 else if (peer_mode & RCV_SHUTDOWN)
2055                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2056         }
2057         if (other)
2058                 sock_put(other);
2059
2060         return 0;
2061 }
2062
2063 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2064 {
2065         struct sock *sk = sock->sk;
2066         long amount = 0;
2067         int err;
2068
2069         switch (cmd) {
2070         case SIOCOUTQ:
2071                 amount = sk_wmem_alloc_get(sk);
2072                 err = put_user(amount, (int __user *)arg);
2073                 break;
2074         case SIOCINQ:
2075                 {
2076                         struct sk_buff *skb;
2077
2078                         if (sk->sk_state == TCP_LISTEN) {
2079                                 err = -EINVAL;
2080                                 break;
2081                         }
2082
2083                         spin_lock(&sk->sk_receive_queue.lock);
2084                         if (sk->sk_type == SOCK_STREAM ||
2085                             sk->sk_type == SOCK_SEQPACKET) {
2086                                 skb_queue_walk(&sk->sk_receive_queue, skb)
2087                                         amount += skb->len;
2088                         } else {
2089                                 skb = skb_peek(&sk->sk_receive_queue);
2090                                 if (skb)
2091                                         amount = skb->len;
2092                         }
2093                         spin_unlock(&sk->sk_receive_queue.lock);
2094                         err = put_user(amount, (int __user *)arg);
2095                         break;
2096                 }
2097
2098         default:
2099                 err = -ENOIOCTLCMD;
2100                 break;
2101         }
2102         return err;
2103 }
2104
2105 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2106 {
2107         struct sock *sk = sock->sk;
2108         unsigned int mask;
2109
2110         sock_poll_wait(file, sk_sleep(sk), wait);
2111         mask = 0;
2112
2113         /* exceptional events? */
2114         if (sk->sk_err)
2115                 mask |= POLLERR;
2116         if (sk->sk_shutdown == SHUTDOWN_MASK)
2117                 mask |= POLLHUP;
2118         if (sk->sk_shutdown & RCV_SHUTDOWN)
2119                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2120
2121         /* readable? */
2122         if (!skb_queue_empty(&sk->sk_receive_queue))
2123                 mask |= POLLIN | POLLRDNORM;
2124
2125         /* Connection-based need to check for termination and startup */
2126         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2127             sk->sk_state == TCP_CLOSE)
2128                 mask |= POLLHUP;
2129
2130         /*
2131          * we set writable also when the other side has shut down the
2132          * connection. This prevents stuck sockets.
2133          */
2134         if (unix_writable(sk))
2135                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2136
2137         return mask;
2138 }
2139
2140 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2141                                     poll_table *wait)
2142 {
2143         struct sock *sk = sock->sk, *other;
2144         unsigned int mask, writable;
2145
2146         sock_poll_wait(file, sk_sleep(sk), wait);
2147         mask = 0;
2148
2149         /* exceptional events? */
2150         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2151                 mask |= POLLERR;
2152         if (sk->sk_shutdown & RCV_SHUTDOWN)
2153                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2154         if (sk->sk_shutdown == SHUTDOWN_MASK)
2155                 mask |= POLLHUP;
2156
2157         /* readable? */
2158         if (!skb_queue_empty(&sk->sk_receive_queue))
2159                 mask |= POLLIN | POLLRDNORM;
2160
2161         /* Connection-based need to check for termination and startup */
2162         if (sk->sk_type == SOCK_SEQPACKET) {
2163                 if (sk->sk_state == TCP_CLOSE)
2164                         mask |= POLLHUP;
2165                 /* connection hasn't started yet? */
2166                 if (sk->sk_state == TCP_SYN_SENT)
2167                         return mask;
2168         }
2169
2170         /* No write status requested, avoid expensive OUT tests. */
2171         if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2172                 return mask;
2173
2174         writable = unix_writable(sk);
2175         other = unix_peer_get(sk);
2176         if (other) {
2177                 if (unix_peer(other) != sk) {
2178                         sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2179                         if (unix_recvq_full(other))
2180                                 writable = 0;
2181                 }
2182                 sock_put(other);
2183         }
2184
2185         if (writable)
2186                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2187         else
2188                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2189
2190         return mask;
2191 }
2192
2193 #ifdef CONFIG_PROC_FS
2194 static struct sock *first_unix_socket(int *i)
2195 {
2196         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2197                 if (!hlist_empty(&unix_socket_table[*i]))
2198                         return __sk_head(&unix_socket_table[*i]);
2199         }
2200         return NULL;
2201 }
2202
2203 static struct sock *next_unix_socket(int *i, struct sock *s)
2204 {
2205         struct sock *next = sk_next(s);
2206         /* More in this chain? */
2207         if (next)
2208                 return next;
2209         /* Look for next non-empty chain. */
2210         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2211                 if (!hlist_empty(&unix_socket_table[*i]))
2212                         return __sk_head(&unix_socket_table[*i]);
2213         }
2214         return NULL;
2215 }
2216
2217 struct unix_iter_state {
2218         struct seq_net_private p;
2219         int i;
2220 };
2221
2222 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2223 {
2224         struct unix_iter_state *iter = seq->private;
2225         loff_t off = 0;
2226         struct sock *s;
2227
2228         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2229                 if (sock_net(s) != seq_file_net(seq))
2230                         continue;
2231                 if (off == pos)
2232                         return s;
2233                 ++off;
2234         }
2235         return NULL;
2236 }
2237
2238 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2239         __acquires(unix_table_lock)
2240 {
2241         spin_lock(&unix_table_lock);
2242         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2243 }
2244
2245 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2246 {
2247         struct unix_iter_state *iter = seq->private;
2248         struct sock *sk = v;
2249         ++*pos;
2250
2251         if (v == SEQ_START_TOKEN)
2252                 sk = first_unix_socket(&iter->i);
2253         else
2254                 sk = next_unix_socket(&iter->i, sk);
2255         while (sk && (sock_net(sk) != seq_file_net(seq)))
2256                 sk = next_unix_socket(&iter->i, sk);
2257         return sk;
2258 }
2259
2260 static void unix_seq_stop(struct seq_file *seq, void *v)
2261         __releases(unix_table_lock)
2262 {
2263         spin_unlock(&unix_table_lock);
2264 }
2265
2266 static int unix_seq_show(struct seq_file *seq, void *v)
2267 {
2268
2269         if (v == SEQ_START_TOKEN)
2270                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2271                          "Inode Path\n");
2272         else {
2273                 struct sock *s = v;
2274                 struct unix_sock *u = unix_sk(s);
2275                 unix_state_lock(s);
2276
2277                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2278                         s,
2279                         atomic_read(&s->sk_refcnt),
2280                         0,
2281                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2282                         s->sk_type,
2283                         s->sk_socket ?
2284                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2285                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2286                         sock_i_ino(s));
2287
2288                 if (u->addr) {
2289                         int i, len;
2290                         seq_putc(seq, ' ');
2291
2292                         i = 0;
2293                         len = u->addr->len - sizeof(short);
2294                         if (!UNIX_ABSTRACT(s))
2295                                 len--;
2296                         else {
2297                                 seq_putc(seq, '@');
2298                                 i++;
2299                         }
2300                         for ( ; i < len; i++)
2301                                 seq_putc(seq, u->addr->name->sun_path[i]);
2302                 }
2303                 unix_state_unlock(s);
2304                 seq_putc(seq, '\n');
2305         }
2306
2307         return 0;
2308 }
2309
2310 static const struct seq_operations unix_seq_ops = {
2311         .start  = unix_seq_start,
2312         .next   = unix_seq_next,
2313         .stop   = unix_seq_stop,
2314         .show   = unix_seq_show,
2315 };
2316
2317 static int unix_seq_open(struct inode *inode, struct file *file)
2318 {
2319         return seq_open_net(inode, file, &unix_seq_ops,
2320                             sizeof(struct unix_iter_state));
2321 }
2322
2323 static const struct file_operations unix_seq_fops = {
2324         .owner          = THIS_MODULE,
2325         .open           = unix_seq_open,
2326         .read           = seq_read,
2327         .llseek         = seq_lseek,
2328         .release        = seq_release_net,
2329 };
2330
2331 #endif
2332
2333 static const struct net_proto_family unix_family_ops = {
2334         .family = PF_UNIX,
2335         .create = unix_create,
2336         .owner  = THIS_MODULE,
2337 };
2338
2339
2340 static int __net_init unix_net_init(struct net *net)
2341 {
2342         int error = -ENOMEM;
2343
2344         net->unx.sysctl_max_dgram_qlen = 10;
2345         if (unix_sysctl_register(net))
2346                 goto out;
2347
2348 #ifdef CONFIG_PROC_FS
2349         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2350                 unix_sysctl_unregister(net);
2351                 goto out;
2352         }
2353 #endif
2354         error = 0;
2355 out:
2356         return error;
2357 }
2358
2359 static void __net_exit unix_net_exit(struct net *net)
2360 {
2361         unix_sysctl_unregister(net);
2362         proc_net_remove(net, "unix");
2363 }
2364
2365 static struct pernet_operations unix_net_ops = {
2366         .init = unix_net_init,
2367         .exit = unix_net_exit,
2368 };
2369
2370 static int __init af_unix_init(void)
2371 {
2372         int rc = -1;
2373         struct sk_buff *dummy_skb;
2374
2375         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2376
2377         rc = proto_register(&unix_proto, 1);
2378         if (rc != 0) {
2379                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2380                        __func__);
2381                 goto out;
2382         }
2383
2384         sock_register(&unix_family_ops);
2385         register_pernet_subsys(&unix_net_ops);
2386 out:
2387         return rc;
2388 }
2389
2390 static void __exit af_unix_exit(void)
2391 {
2392         sock_unregister(PF_UNIX);
2393         proto_unregister(&unix_proto);
2394         unregister_pernet_subsys(&unix_net_ops);
2395 }
2396
2397 /* Earlier than device_initcall() so that other drivers invoking
2398    request_module() don't end up in a loop when modprobe tries
2399    to use a UNIX socket. But later than subsys_initcall() because
2400    we depend on stuff initialised there */
2401 fs_initcall(af_unix_init);
2402 module_exit(af_unix_exit);
2403
2404 MODULE_LICENSE("GPL");
2405 MODULE_ALIAS_NETPROTO(PF_UNIX);