net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_long_t unix_nr_socks;
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate spin lock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned int hash = (__force unsigned int)csum_fold(n);
 153
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168 }
 169
 170 static inline int unix_recvq_full(struct sock const *sk)
 171 {
 172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173 }
 174
 175 static struct sock *unix_peer_get(struct sock *s)
 176 {
 177         struct sock *peer;
 178
 179         unix_state_lock(s);
 180         peer = unix_peer(s);
 181         if (peer)
 182                 sock_hold(peer);
 183         unix_state_unlock(s);
 184         return peer;
 185 }
 186
 187 static inline void unix_release_addr(struct unix_address *addr)
 188 {
 189         if (atomic_dec_and_test(&addr->refcnt))
 190                 kfree(addr);
 191 }
 192
 193 /*
 194  *      Check unix socket name:
 195  *              - should be not zero length.
 196  *              - if started by not zero, should be NULL terminated (FS object)
 197  *              - if started by zero, it is abstract name.
 198  */
 199
 200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201 {
 202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                 return -EINVAL;
 204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                 return -EINVAL;
 206         if (sunaddr->sun_path[0]) {
 207                 /*
 208                  * This may look like an off by one error but it is a bit more
 209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                  * sun_path[108] doesn't as such exist.  However in kernel space
 211                  * we are guaranteed that it is a valid memory location in our
 212                  * kernel address buffer.
 213                  */
 214                 ((char *)sunaddr)[len] = 0;
 215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                 return len;
 217         }
 218
 219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 220         return len;
 221 }
 222
 223 static void __unix_remove_socket(struct sock *sk)
 224 {
 225         sk_del_node_init(sk);
 226 }
 227
 228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229 {
 230         WARN_ON(!sk_unhashed(sk));
 231         sk_add_node(sk, list);
 232 }
 233
 234 static inline void unix_remove_socket(struct sock *sk)
 235 {
 236         spin_lock(&unix_table_lock);
 237         __unix_remove_socket(sk);
 238         spin_unlock(&unix_table_lock);
 239 }
 240
 241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242 {
 243         spin_lock(&unix_table_lock);
 244         __unix_insert_socket(list, sk);
 245         spin_unlock(&unix_table_lock);
 246 }
 247
 248 static struct sock *__unix_find_socket_byname(struct net *net,
 249                                               struct sockaddr_un *sunname,
 250                                               int len, int type, unsigned hash)
 251 {
 252         struct sock *s;
 253         struct hlist_node *node;
 254
 255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                 struct unix_sock *u = unix_sk(s);
 257
 258                 if (!net_eq(sock_net(s), net))
 259                         continue;
 260
 261                 if (u->addr->len == len &&
 262                     !memcmp(u->addr->name, sunname, len))
 263                         goto found;
 264         }
 265         s = NULL;
 266 found:
 267         return s;
 268 }
 269
 270 static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                    struct sockaddr_un *sunname,
 272                                                    int len, int type,
 273                                                    unsigned hash)
 274 {
 275         struct sock *s;
 276
 277         spin_lock(&unix_table_lock);
 278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279         if (s)
 280                 sock_hold(s);
 281         spin_unlock(&unix_table_lock);
 282         return s;
 283 }
 284
 285 static struct sock *unix_find_socket_byinode(struct inode *i)
 286 {
 287         struct sock *s;
 288         struct hlist_node *node;
 289
 290         spin_lock(&unix_table_lock);
 291         sk_for_each(s, node,
 292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                 struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                 if (dentry && dentry->d_inode == i) {
 296                         sock_hold(s);
 297                         goto found;
 298                 }
 299         }
 300         s = NULL;
 301 found:
 302         spin_unlock(&unix_table_lock);
 303         return s;
 304 }
 305
 306 static inline int unix_writable(struct sock *sk)
 307 {
 308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 309 }
 310
 311 static void unix_write_space(struct sock *sk)
 312 {
 313         struct socket_wq *wq;
 314
 315         rcu_read_lock();
 316         if (unix_writable(sk)) {
 317                 wq = rcu_dereference(sk->sk_wq);
 318                 if (wq_has_sleeper(wq))
 319                         wake_up_interruptible_sync_poll(&wq->wait,
 320                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 321                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 322         }
 323         rcu_read_unlock();
 324 }
 325
 326 /* When dgram socket disconnects (or changes its peer), we clear its receive
 327  * queue of packets arrived from previous peer. First, it allows to do
 328  * flow control based only on wmem_alloc; second, sk connected to peer
 329  * may receive messages only from that peer. */
 330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 331 {
 332         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 333                 skb_queue_purge(&sk->sk_receive_queue);
 334                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 335
 336                 /* If one link of bidirectional dgram pipe is disconnected,
 337                  * we signal error. Messages are lost. Do not make this,
 338                  * when peer was not connected to us.
 339                  */
 340                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 341                         other->sk_err = ECONNRESET;
 342                         other->sk_error_report(other);
 343                 }
 344         }
 345 }
 346
 347 static void unix_sock_destructor(struct sock *sk)
 348 {
 349         struct unix_sock *u = unix_sk(sk);
 350
 351         skb_queue_purge(&sk->sk_receive_queue);
 352
 353         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 354         WARN_ON(!sk_unhashed(sk));
 355         WARN_ON(sk->sk_socket);
 356         if (!sock_flag(sk, SOCK_DEAD)) {
 357                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 358                 return;
 359         }
 360
 361         if (u->addr)
 362                 unix_release_addr(u->addr);
 363
 364         atomic_long_dec(&unix_nr_socks);
 365         local_bh_disable();
 366         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 367         local_bh_enable();
 368 #ifdef UNIX_REFCNT_DEBUG
 369         printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
 370                 atomic_long_read(&unix_nr_socks));
 371 #endif
 372 }
 373
 374 static void unix_release_sock(struct sock *sk, int embrion)
 375 {
 376         struct unix_sock *u = unix_sk(sk);
 377         struct dentry *dentry;
 378         struct vfsmount *mnt;
 379         struct sock *skpair;
 380         struct sk_buff *skb;
 381         int state;
 382
 383         unix_remove_socket(sk);
 384
 385         /* Clear state */
 386         unix_state_lock(sk);
 387         sock_orphan(sk);
 388         sk->sk_shutdown = SHUTDOWN_MASK;
 389         dentry       = u->dentry;
 390         u->dentry    = NULL;
 391         mnt          = u->mnt;
 392         u->mnt       = NULL;
 393         state = sk->sk_state;
 394         sk->sk_state = TCP_CLOSE;
 395         unix_state_unlock(sk);
 396
 397         wake_up_interruptible_all(&u->peer_wait);
 398
 399         skpair = unix_peer(sk);
 400
 401         if (skpair != NULL) {
 402                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 403                         unix_state_lock(skpair);
 404                         /* No more writes */
 405                         skpair->sk_shutdown = SHUTDOWN_MASK;
 406                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 407                                 skpair->sk_err = ECONNRESET;
 408                         unix_state_unlock(skpair);
 409                         skpair->sk_state_change(skpair);
 410                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 411                 }
 412                 sock_put(skpair); /* It may now die */
 413                 unix_peer(sk) = NULL;
 414         }
 415
 416         /* Try to flush out this socket. Throw out buffers at least */
 417
 418         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 419                 if (state == TCP_LISTEN)
 420                         unix_release_sock(skb->sk, 1);
 421                 /* passed fds are erased in the kfree_skb hook        */
 422                 kfree_skb(skb);
 423         }
 424
 425         if (dentry) {
 426                 dput(dentry);
 427                 mntput(mnt);
 428         }
 429
 430         sock_put(sk);
 431
 432         /* ---- Socket is dead now and most probably destroyed ---- */
 433
 434         /*
 435          * Fixme: BSD difference: In BSD all sockets connected to use get
 436          *        ECONNRESET and we die on the spot. In Linux we behave
 437          *        like files and pipes do and wait for the last
 438          *        dereference.
 439          *
 440          * Can't we simply set sock->err?
 441          *
 442          *        What the above comment does talk about? --ANK(980817)
 443          */
 444
 445         if (unix_tot_inflight)
 446                 unix_gc();              /* Garbage collect fds */
 447 }
 448
 449 static void init_peercred(struct sock *sk)
 450 {
 451         put_pid(sk->sk_peer_pid);
 452         if (sk->sk_peer_cred)
 453                 put_cred(sk->sk_peer_cred);
 454         sk->sk_peer_pid  = get_pid(task_tgid(current));
 455         sk->sk_peer_cred = get_current_cred();
 456 }
 457
 458 static void copy_peercred(struct sock *sk, struct sock *peersk)
 459 {
 460         put_pid(sk->sk_peer_pid);
 461         if (sk->sk_peer_cred)
 462                 put_cred(sk->sk_peer_cred);
 463         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 464         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 465 }
 466
 467 static int unix_listen(struct socket *sock, int backlog)
 468 {
 469         int err;
 470         struct sock *sk = sock->sk;
 471         struct unix_sock *u = unix_sk(sk);
 472         struct pid *old_pid = NULL;
 473         const struct cred *old_cred = NULL;
 474
 475         err = -EOPNOTSUPP;
 476         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 477                 goto out;       /* Only stream/seqpacket sockets accept */
 478         err = -EINVAL;
 479         if (!u->addr)
 480                 goto out;       /* No listens on an unbound socket */
 481         unix_state_lock(sk);
 482         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 483                 goto out_unlock;
 484         if (backlog > sk->sk_max_ack_backlog)
 485                 wake_up_interruptible_all(&u->peer_wait);
 486         sk->sk_max_ack_backlog  = backlog;
 487         sk->sk_state            = TCP_LISTEN;
 488         /* set credentials so connect can copy them */
 489         init_peercred(sk);
 490         err = 0;
 491
 492 out_unlock:
 493         unix_state_unlock(sk);
 494         put_pid(old_pid);
 495         if (old_cred)
 496                 put_cred(old_cred);
 497 out:
 498         return err;
 499 }
 500
 501 static int unix_release(struct socket *);
 502 static int unix_bind(struct socket *, struct sockaddr *, int);
 503 static int unix_stream_connect(struct socket *, struct sockaddr *,
 504                                int addr_len, int flags);
 505 static int unix_socketpair(struct socket *, struct socket *);
 506 static int unix_accept(struct socket *, struct socket *, int);
 507 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 508 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 509 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 510                                     poll_table *);
 511 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 512 static int unix_shutdown(struct socket *, int);
 513 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 514                                struct msghdr *, size_t);
 515 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 516                                struct msghdr *, size_t, int);
 517 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 518                               struct msghdr *, size_t);
 519 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 520                               struct msghdr *, size_t, int);
 521 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 522                               int, int);
 523 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 524                                   struct msghdr *, size_t);
 525 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 526                                   struct msghdr *, size_t, int);
 527
 528 static const struct proto_ops unix_stream_ops = {
 529         .family =       PF_UNIX,
 530         .owner =        THIS_MODULE,
 531         .release =      unix_release,
 532         .bind =         unix_bind,
 533         .connect =      unix_stream_connect,
 534         .socketpair =   unix_socketpair,
 535         .accept =       unix_accept,
 536         .getname =      unix_getname,
 537         .poll =         unix_poll,
 538         .ioctl =        unix_ioctl,
 539         .listen =       unix_listen,
 540         .shutdown =     unix_shutdown,
 541         .setsockopt =   sock_no_setsockopt,
 542         .getsockopt =   sock_no_getsockopt,
 543         .sendmsg =      unix_stream_sendmsg,
 544         .recvmsg =      unix_stream_recvmsg,
 545         .mmap =         sock_no_mmap,
 546         .sendpage =     sock_no_sendpage,
 547 };
 548
 549 static const struct proto_ops unix_dgram_ops = {
 550         .family =       PF_UNIX,
 551         .owner =        THIS_MODULE,
 552         .release =      unix_release,
 553         .bind =         unix_bind,
 554         .connect =      unix_dgram_connect,
 555         .socketpair =   unix_socketpair,
 556         .accept =       sock_no_accept,
 557         .getname =      unix_getname,
 558         .poll =         unix_dgram_poll,
 559         .ioctl =        unix_ioctl,
 560         .listen =       sock_no_listen,
 561         .shutdown =     unix_shutdown,
 562         .setsockopt =   sock_no_setsockopt,
 563         .getsockopt =   sock_no_getsockopt,
 564         .sendmsg =      unix_dgram_sendmsg,
 565         .recvmsg =      unix_dgram_recvmsg,
 566         .mmap =         sock_no_mmap,
 567         .sendpage =     sock_no_sendpage,
 568 };
 569
 570 static const struct proto_ops unix_seqpacket_ops = {
 571         .family =       PF_UNIX,
 572         .owner =        THIS_MODULE,
 573         .release =      unix_release,
 574         .bind =         unix_bind,
 575         .connect =      unix_stream_connect,
 576         .socketpair =   unix_socketpair,
 577         .accept =       unix_accept,
 578         .getname =      unix_getname,
 579         .poll =         unix_dgram_poll,
 580         .ioctl =        unix_ioctl,
 581         .listen =       unix_listen,
 582         .shutdown =     unix_shutdown,
 583         .setsockopt =   sock_no_setsockopt,
 584         .getsockopt =   sock_no_getsockopt,
 585         .sendmsg =      unix_seqpacket_sendmsg,
 586         .recvmsg =      unix_seqpacket_recvmsg,
 587         .mmap =         sock_no_mmap,
 588         .sendpage =     sock_no_sendpage,
 589 };
 590
 591 static struct proto unix_proto = {
 592         .name                   = "UNIX",
 593         .owner                  = THIS_MODULE,
 594         .obj_size               = sizeof(struct unix_sock),
 595 };
 596
 597 /*
 598  * AF_UNIX sockets do not interact with hardware, hence they
 599  * dont trigger interrupts - so it's safe for them to have
 600  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 601  * this special lock-class by reinitializing the spinlock key:
 602  */
 603 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 604
 605 static struct sock *unix_create1(struct net *net, struct socket *sock)
 606 {
 607         struct sock *sk = NULL;
 608         struct unix_sock *u;
 609
 610         atomic_long_inc(&unix_nr_socks);
 611         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 612                 goto out;
 613
 614         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 615         if (!sk)
 616                 goto out;
 617
 618         sock_init_data(sock, sk);
 619         lockdep_set_class(&sk->sk_receive_queue.lock,
 620                                 &af_unix_sk_receive_queue_lock_key);
 621
 622         sk->sk_write_space      = unix_write_space;
 623         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 624         sk->sk_destruct         = unix_sock_destructor;
 625         u         = unix_sk(sk);
 626         u->dentry = NULL;
 627         u->mnt    = NULL;
 628         spin_lock_init(&u->lock);
 629         atomic_long_set(&u->inflight, 0);
 630         INIT_LIST_HEAD(&u->link);
 631         mutex_init(&u->readlock); /* single task reading lock */
 632         init_waitqueue_head(&u->peer_wait);
 633         unix_insert_socket(unix_sockets_unbound, sk);
 634 out:
 635         if (sk == NULL)
 636                 atomic_long_dec(&unix_nr_socks);
 637         else {
 638                 local_bh_disable();
 639                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 640                 local_bh_enable();
 641         }
 642         return sk;
 643 }
 644
 645 static int unix_create(struct net *net, struct socket *sock, int protocol,
 646                        int kern)
 647 {
 648         if (protocol && protocol != PF_UNIX)
 649                 return -EPROTONOSUPPORT;
 650
 651         sock->state = SS_UNCONNECTED;
 652
 653         switch (sock->type) {
 654         case SOCK_STREAM:
 655                 sock->ops = &unix_stream_ops;
 656                 break;
 657                 /*
 658                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 659                  *      nothing uses it.
 660                  */
 661         case SOCK_RAW:
 662                 sock->type = SOCK_DGRAM;
 663         case SOCK_DGRAM:
 664                 sock->ops = &unix_dgram_ops;
 665                 break;
 666         case SOCK_SEQPACKET:
 667                 sock->ops = &unix_seqpacket_ops;
 668                 break;
 669         default:
 670                 return -ESOCKTNOSUPPORT;
 671         }
 672
 673         return unix_create1(net, sock) ? 0 : -ENOMEM;
 674 }
 675
 676 static int unix_release(struct socket *sock)
 677 {
 678         struct sock *sk = sock->sk;
 679
 680         if (!sk)
 681                 return 0;
 682
 683         unix_release_sock(sk, 0);
 684         sock->sk = NULL;
 685
 686         return 0;
 687 }
 688
 689 static int unix_autobind(struct socket *sock)
 690 {
 691         struct sock *sk = sock->sk;
 692         struct net *net = sock_net(sk);
 693         struct unix_sock *u = unix_sk(sk);
 694         static u32 ordernum = 1;
 695         struct unix_address *addr;
 696         int err;
 697         unsigned int retries = 0;
 698
 699         err = mutex_lock_interruptible(&u->readlock);
 700         if (err)
 701                 return err;
 702
 703         err = 0;
 704         if (u->addr)
 705                 goto out;
 706
 707         err = -ENOMEM;
 708         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 709         if (!addr)
 710                 goto out;
 711
 712         addr->name->sun_family = AF_UNIX;
 713         atomic_set(&addr->refcnt, 1);
 714
 715 retry:
 716         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 717         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 718
 719         spin_lock(&unix_table_lock);
 720         ordernum = (ordernum+1)&0xFFFFF;
 721
 722         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 723                                       addr->hash)) {
 724                 spin_unlock(&unix_table_lock);
 725                 /*
 726                  * __unix_find_socket_byname() may take long time if many names
 727                  * are already in use.
 728                  */
 729                 cond_resched();
 730                 /* Give up if all names seems to be in use. */
 731                 if (retries++ == 0xFFFFF) {
 732                         err = -ENOSPC;
 733                         kfree(addr);
 734                         goto out;
 735                 }
 736                 goto retry;
 737         }
 738         addr->hash ^= sk->sk_type;
 739
 740         __unix_remove_socket(sk);
 741         u->addr = addr;
 742         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 743         spin_unlock(&unix_table_lock);
 744         err = 0;
 745
 746 out:    mutex_unlock(&u->readlock);
 747         return err;
 748 }
 749
 750 static struct sock *unix_find_other(struct net *net,
 751                                     struct sockaddr_un *sunname, int len,
 752                                     int type, unsigned hash, int *error)
 753 {
 754         struct sock *u;
 755         struct path path;
 756         int err = 0;
 757
 758         if (sunname->sun_path[0]) {
 759                 struct inode *inode;
 760                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 761                 if (err)
 762                         goto fail;
 763                 inode = path.dentry->d_inode;
 764                 err = inode_permission(inode, MAY_WRITE);
 765                 if (err)
 766                         goto put_fail;
 767
 768                 err = -ECONNREFUSED;
 769                 if (!S_ISSOCK(inode->i_mode))
 770                         goto put_fail;
 771                 u = unix_find_socket_byinode(inode);
 772                 if (!u)
 773                         goto put_fail;
 774
 775                 if (u->sk_type == type)
 776                         touch_atime(path.mnt, path.dentry);
 777
 778                 path_put(&path);
 779
 780                 err = -EPROTOTYPE;
 781                 if (u->sk_type != type) {
 782                         sock_put(u);
 783                         goto fail;
 784                 }
 785         } else {
 786                 err = -ECONNREFUSED;
 787                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 788                 if (u) {
 789                         struct dentry *dentry;
 790                         dentry = unix_sk(u)->dentry;
 791                         if (dentry)
 792                                 touch_atime(unix_sk(u)->mnt, dentry);
 793                 } else
 794                         goto fail;
 795         }
 796         return u;
 797
 798 put_fail:
 799         path_put(&path);
 800 fail:
 801         *error = err;
 802         return NULL;
 803 }
 804
 805
 806 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 807 {
 808         struct sock *sk = sock->sk;
 809         struct net *net = sock_net(sk);
 810         struct unix_sock *u = unix_sk(sk);
 811         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 812         char *sun_path = sunaddr->sun_path;
 813         struct dentry *dentry = NULL;
 814         struct path path;
 815         int err;
 816         unsigned hash;
 817         struct unix_address *addr;
 818         struct hlist_head *list;
 819
 820         err = -EINVAL;
 821         if (sunaddr->sun_family != AF_UNIX)
 822                 goto out;
 823
 824         if (addr_len == sizeof(short)) {
 825                 err = unix_autobind(sock);
 826                 goto out;
 827         }
 828
 829         err = unix_mkname(sunaddr, addr_len, &hash);
 830         if (err < 0)
 831                 goto out;
 832         addr_len = err;
 833
 834         err = mutex_lock_interruptible(&u->readlock);
 835         if (err)
 836                 goto out;
 837
 838         err = -EINVAL;
 839         if (u->addr)
 840                 goto out_up;
 841
 842         err = -ENOMEM;
 843         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 844         if (!addr)
 845                 goto out_up;
 846
 847         memcpy(addr->name, sunaddr, addr_len);
 848         addr->len = addr_len;
 849         addr->hash = hash ^ sk->sk_type;
 850         atomic_set(&addr->refcnt, 1);
 851
 852         if (sun_path[0]) {
 853                 unsigned int mode;
 854                 err = 0;
 855                 /*
 856                  * Get the parent directory, calculate the hash for last
 857                  * component.
 858                  */
 859                 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 860                 err = PTR_ERR(dentry);
 861                 if (IS_ERR(dentry))
 862                         goto out_mknod_parent;
 863
 864                 /*
 865                  * All right, let's create it.
 866                  */
 867                 mode = S_IFSOCK |
 868                        (SOCK_INODE(sock)->i_mode & ~current_umask());
 869                 err = mnt_want_write(path.mnt);
 870                 if (err)
 871                         goto out_mknod_dput;
 872                 err = security_path_mknod(&path, dentry, mode, 0);
 873                 if (err)
 874                         goto out_mknod_drop_write;
 875                 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
 876 out_mknod_drop_write:
 877                 mnt_drop_write(path.mnt);
 878                 if (err)
 879                         goto out_mknod_dput;
 880                 mutex_unlock(&path.dentry->d_inode->i_mutex);
 881                 dput(path.dentry);
 882                 path.dentry = dentry;
 883
 884                 addr->hash = UNIX_HASH_SIZE;
 885         }
 886
 887         spin_lock(&unix_table_lock);
 888
 889         if (!sun_path[0]) {
 890                 err = -EADDRINUSE;
 891                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 892                                               sk->sk_type, hash)) {
 893                         unix_release_addr(addr);
 894                         goto out_unlock;
 895                 }
 896
 897                 list = &unix_socket_table[addr->hash];
 898         } else {
 899                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 900                 u->dentry = path.dentry;
 901                 u->mnt    = path.mnt;
 902         }
 903
 904         err = 0;
 905         __unix_remove_socket(sk);
 906         u->addr = addr;
 907         __unix_insert_socket(list, sk);
 908
 909 out_unlock:
 910         spin_unlock(&unix_table_lock);
 911 out_up:
 912         mutex_unlock(&u->readlock);
 913 out:
 914         return err;
 915
 916 out_mknod_dput:
 917         dput(dentry);
 918         mutex_unlock(&path.dentry->d_inode->i_mutex);
 919         path_put(&path);
 920 out_mknod_parent:
 921         if (err == -EEXIST)
 922                 err = -EADDRINUSE;
 923         unix_release_addr(addr);
 924         goto out_up;
 925 }
 926
 927 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 928 {
 929         if (unlikely(sk1 == sk2) || !sk2) {
 930                 unix_state_lock(sk1);
 931                 return;
 932         }
 933         if (sk1 < sk2) {
 934                 unix_state_lock(sk1);
 935                 unix_state_lock_nested(sk2);
 936         } else {
 937                 unix_state_lock(sk2);
 938                 unix_state_lock_nested(sk1);
 939         }
 940 }
 941
 942 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 943 {
 944         if (unlikely(sk1 == sk2) || !sk2) {
 945                 unix_state_unlock(sk1);
 946                 return;
 947         }
 948         unix_state_unlock(sk1);
 949         unix_state_unlock(sk2);
 950 }
 951
 952 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 953                               int alen, int flags)
 954 {
 955         struct sock *sk = sock->sk;
 956         struct net *net = sock_net(sk);
 957         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 958         struct sock *other;
 959         unsigned hash;
 960         int err;
 961
 962         if (addr->sa_family != AF_UNSPEC) {
 963                 err = unix_mkname(sunaddr, alen, &hash);
 964                 if (err < 0)
 965                         goto out;
 966                 alen = err;
 967
 968                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 969                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 970                         goto out;
 971
 972 restart:
 973                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 974                 if (!other)
 975                         goto out;
 976
 977                 unix_state_double_lock(sk, other);
 978
 979                 /* Apparently VFS overslept socket death. Retry. */
 980                 if (sock_flag(other, SOCK_DEAD)) {
 981                         unix_state_double_unlock(sk, other);
 982                         sock_put(other);
 983                         goto restart;
 984                 }
 985
 986                 err = -EPERM;
 987                 if (!unix_may_send(sk, other))
 988                         goto out_unlock;
 989
 990                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 991                 if (err)
 992                         goto out_unlock;
 993
 994         } else {
 995                 /*
 996                  *      1003.1g breaking connected state with AF_UNSPEC
 997                  */
 998                 other = NULL;
 999                 unix_state_double_lock(sk, other);
1000         }
1001
1002         /*
1003          * If it was connected, reconnect.
1004          */
1005         if (unix_peer(sk)) {
1006                 struct sock *old_peer = unix_peer(sk);
1007                 unix_peer(sk) = other;
1008                 unix_state_double_unlock(sk, other);
1009
1010                 if (other != old_peer)
1011                         unix_dgram_disconnected(sk, old_peer);
1012                 sock_put(old_peer);
1013         } else {
1014                 unix_peer(sk) = other;
1015                 unix_state_double_unlock(sk, other);
1016         }
1017         return 0;
1018
1019 out_unlock:
1020         unix_state_double_unlock(sk, other);
1021         sock_put(other);
1022 out:
1023         return err;
1024 }
1025
1026 static long unix_wait_for_peer(struct sock *other, long timeo)
1027 {
1028         struct unix_sock *u = unix_sk(other);
1029         int sched;
1030         DEFINE_WAIT(wait);
1031
1032         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1033
1034         sched = !sock_flag(other, SOCK_DEAD) &&
1035                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1036                 unix_recvq_full(other);
1037
1038         unix_state_unlock(other);
1039
1040         if (sched)
1041                 timeo = schedule_timeout(timeo);
1042
1043         finish_wait(&u->peer_wait, &wait);
1044         return timeo;
1045 }
1046
1047 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1048                                int addr_len, int flags)
1049 {
1050         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1051         struct sock *sk = sock->sk;
1052         struct net *net = sock_net(sk);
1053         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1054         struct sock *newsk = NULL;
1055         struct sock *other = NULL;
1056         struct sk_buff *skb = NULL;
1057         unsigned hash;
1058         int st;
1059         int err;
1060         long timeo;
1061
1062         err = unix_mkname(sunaddr, addr_len, &hash);
1063         if (err < 0)
1064                 goto out;
1065         addr_len = err;
1066
1067         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1068             (err = unix_autobind(sock)) != 0)
1069                 goto out;
1070
1071         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1072
1073         /* First of all allocate resources.
1074            If we will make it after state is locked,
1075            we will have to recheck all again in any case.
1076          */
1077
1078         err = -ENOMEM;
1079
1080         /* create new sock for complete connection */
1081         newsk = unix_create1(sock_net(sk), NULL);
1082         if (newsk == NULL)
1083                 goto out;
1084
1085         /* Allocate skb for sending to listening sock */
1086         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1087         if (skb == NULL)
1088                 goto out;
1089
1090 restart:
1091         /*  Find listening sock. */
1092         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1093         if (!other)
1094                 goto out;
1095
1096         /* Latch state of peer */
1097         unix_state_lock(other);
1098
1099         /* Apparently VFS overslept socket death. Retry. */
1100         if (sock_flag(other, SOCK_DEAD)) {
1101                 unix_state_unlock(other);
1102                 sock_put(other);
1103                 goto restart;
1104         }
1105
1106         err = -ECONNREFUSED;
1107         if (other->sk_state != TCP_LISTEN)
1108                 goto out_unlock;
1109         if (other->sk_shutdown & RCV_SHUTDOWN)
1110                 goto out_unlock;
1111
1112         if (unix_recvq_full(other)) {
1113                 err = -EAGAIN;
1114                 if (!timeo)
1115                         goto out_unlock;
1116
1117                 timeo = unix_wait_for_peer(other, timeo);
1118
1119                 err = sock_intr_errno(timeo);
1120                 if (signal_pending(current))
1121                         goto out;
1122                 sock_put(other);
1123                 goto restart;
1124         }
1125
1126         /* Latch our state.
1127
1128            It is tricky place. We need to grab our state lock and cannot
1129            drop lock on peer. It is dangerous because deadlock is
1130            possible. Connect to self case and simultaneous
1131            attempt to connect are eliminated by checking socket
1132            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1133            check this before attempt to grab lock.
1134
1135            Well, and we have to recheck the state after socket locked.
1136          */
1137         st = sk->sk_state;
1138
1139         switch (st) {
1140         case TCP_CLOSE:
1141                 /* This is ok... continue with connect */
1142                 break;
1143         case TCP_ESTABLISHED:
1144                 /* Socket is already connected */
1145                 err = -EISCONN;
1146                 goto out_unlock;
1147         default:
1148                 err = -EINVAL;
1149                 goto out_unlock;
1150         }
1151
1152         unix_state_lock_nested(sk);
1153
1154         if (sk->sk_state != st) {
1155                 unix_state_unlock(sk);
1156                 unix_state_unlock(other);
1157                 sock_put(other);
1158                 goto restart;
1159         }
1160
1161         err = security_unix_stream_connect(sk, other, newsk);
1162         if (err) {
1163                 unix_state_unlock(sk);
1164                 goto out_unlock;
1165         }
1166
1167         /* The way is open! Fastly set all the necessary fields... */
1168
1169         sock_hold(sk);
1170         unix_peer(newsk)        = sk;
1171         newsk->sk_state         = TCP_ESTABLISHED;
1172         newsk->sk_type          = sk->sk_type;
1173         init_peercred(newsk);
1174         newu = unix_sk(newsk);
1175         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1176         otheru = unix_sk(other);
1177
1178         /* copy address information from listening to new sock*/
1179         if (otheru->addr) {
1180                 atomic_inc(&otheru->addr->refcnt);
1181                 newu->addr = otheru->addr;
1182         }
1183         if (otheru->dentry) {
1184                 newu->dentry    = dget(otheru->dentry);
1185                 newu->mnt       = mntget(otheru->mnt);
1186         }
1187
1188         /* Set credentials */
1189         copy_peercred(sk, other);
1190
1191         sock->state     = SS_CONNECTED;
1192         sk->sk_state    = TCP_ESTABLISHED;
1193         sock_hold(newsk);
1194
1195         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1196         unix_peer(sk)   = newsk;
1197
1198         unix_state_unlock(sk);
1199
1200         /* take ten and and send info to listening sock */
1201         spin_lock(&other->sk_receive_queue.lock);
1202         __skb_queue_tail(&other->sk_receive_queue, skb);
1203         spin_unlock(&other->sk_receive_queue.lock);
1204         unix_state_unlock(other);
1205         other->sk_data_ready(other, 0);
1206         sock_put(other);
1207         return 0;
1208
1209 out_unlock:
1210         if (other)
1211                 unix_state_unlock(other);
1212
1213 out:
1214         kfree_skb(skb);
1215         if (newsk)
1216                 unix_release_sock(newsk, 0);
1217         if (other)
1218                 sock_put(other);
1219         return err;
1220 }
1221
1222 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1223 {
1224         struct sock *ska = socka->sk, *skb = sockb->sk;
1225
1226         /* Join our sockets back to back */
1227         sock_hold(ska);
1228         sock_hold(skb);
1229         unix_peer(ska) = skb;
1230         unix_peer(skb) = ska;
1231         init_peercred(ska);
1232         init_peercred(skb);
1233
1234         if (ska->sk_type != SOCK_DGRAM) {
1235                 ska->sk_state = TCP_ESTABLISHED;
1236                 skb->sk_state = TCP_ESTABLISHED;
1237                 socka->state  = SS_CONNECTED;
1238                 sockb->state  = SS_CONNECTED;
1239         }
1240         return 0;
1241 }
1242
1243 static void unix_sock_inherit_flags(const struct socket *old,
1244                                     struct socket *new)
1245 {
1246         if (test_bit(SOCK_PASSCRED, &old->flags))
1247                 set_bit(SOCK_PASSCRED, &new->flags);
1248         if (test_bit(SOCK_PASSSEC, &old->flags))
1249                 set_bit(SOCK_PASSSEC, &new->flags);
1250 }
1251
1252 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1253 {
1254         struct sock *sk = sock->sk;
1255         struct sock *tsk;
1256         struct sk_buff *skb;
1257         int err;
1258
1259         err = -EOPNOTSUPP;
1260         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1261                 goto out;
1262
1263         err = -EINVAL;
1264         if (sk->sk_state != TCP_LISTEN)
1265                 goto out;
1266
1267         /* If socket state is TCP_LISTEN it cannot change (for now...),
1268          * so that no locks are necessary.
1269          */
1270
1271         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1272         if (!skb) {
1273                 /* This means receive shutdown. */
1274                 if (err == 0)
1275                         err = -EINVAL;
1276                 goto out;
1277         }
1278
1279         tsk = skb->sk;
1280         skb_free_datagram(sk, skb);
1281         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1282
1283         /* attach accepted sock to socket */
1284         unix_state_lock(tsk);
1285         newsock->state = SS_CONNECTED;
1286         unix_sock_inherit_flags(sock, newsock);
1287         sock_graft(tsk, newsock);
1288         unix_state_unlock(tsk);
1289         return 0;
1290
1291 out:
1292         return err;
1293 }
1294
1295
1296 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1297 {
1298         struct sock *sk = sock->sk;
1299         struct unix_sock *u;
1300         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1301         int err = 0;
1302
1303         if (peer) {
1304                 sk = unix_peer_get(sk);
1305
1306                 err = -ENOTCONN;
1307                 if (!sk)
1308                         goto out;
1309                 err = 0;
1310         } else {
1311                 sock_hold(sk);
1312         }
1313
1314         u = unix_sk(sk);
1315         unix_state_lock(sk);
1316         if (!u->addr) {
1317                 sunaddr->sun_family = AF_UNIX;
1318                 sunaddr->sun_path[0] = 0;
1319                 *uaddr_len = sizeof(short);
1320         } else {
1321                 struct unix_address *addr = u->addr;
1322
1323                 *uaddr_len = addr->len;
1324                 memcpy(sunaddr, addr->name, *uaddr_len);
1325         }
1326         unix_state_unlock(sk);
1327         sock_put(sk);
1328 out:
1329         return err;
1330 }
1331
1332 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1333 {
1334         int i;
1335
1336         scm->fp = UNIXCB(skb).fp;
1337         UNIXCB(skb).fp = NULL;
1338
1339         for (i = scm->fp->count-1; i >= 0; i--)
1340                 unix_notinflight(scm->fp->fp[i]);
1341 }
1342
1343 static void unix_destruct_scm(struct sk_buff *skb)
1344 {
1345         struct scm_cookie scm;
1346         memset(&scm, 0, sizeof(scm));
1347         scm.pid  = UNIXCB(skb).pid;
1348         scm.cred = UNIXCB(skb).cred;
1349         if (UNIXCB(skb).fp)
1350                 unix_detach_fds(&scm, skb);
1351
1352         /* Alas, it calls VFS */
1353         /* So fscking what? fput() had been SMP-safe since the last Summer */
1354         scm_destroy(&scm);
1355         sock_wfree(skb);
1356 }
1357
1358 #define MAX_RECURSION_LEVEL 4
1359
1360 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1361 {
1362         int i;
1363         unsigned char max_level = 0;
1364         int unix_sock_count = 0;
1365
1366         for (i = scm->fp->count - 1; i >= 0; i--) {
1367                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1368
1369                 if (sk) {
1370                         unix_sock_count++;
1371                         max_level = max(max_level,
1372                                         unix_sk(sk)->recursion_level);
1373                 }
1374         }
1375         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1376                 return -ETOOMANYREFS;
1377
1378         /*
1379          * Need to duplicate file references for the sake of garbage
1380          * collection.  Otherwise a socket in the fps might become a
1381          * candidate for GC while the skb is not yet queued.
1382          */
1383         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1384         if (!UNIXCB(skb).fp)
1385                 return -ENOMEM;
1386
1387         if (unix_sock_count) {
1388                 for (i = scm->fp->count - 1; i >= 0; i--)
1389                         unix_inflight(scm->fp->fp[i]);
1390         }
1391         return max_level;
1392 }
1393
1394 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1395 {
1396         int err = 0;
1397
1398         UNIXCB(skb).pid  = get_pid(scm->pid);
1399         if (scm->cred)
1400                 UNIXCB(skb).cred = get_cred(scm->cred);
1401         UNIXCB(skb).fp = NULL;
1402         if (scm->fp && send_fds)
1403                 err = unix_attach_fds(scm, skb);
1404
1405         skb->destructor = unix_destruct_scm;
1406         return err;
1407 }
1408
1409 /*
1410  * Some apps rely on write() giving SCM_CREDENTIALS
1411  * We include credentials if source or destination socket
1412  * asserted SOCK_PASSCRED.
1413  */
1414 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1415                             const struct sock *other)
1416 {
1417         if (UNIXCB(skb).cred)
1418                 return;
1419         if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1420             !other->sk_socket ||
1421             test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1422                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1423                 UNIXCB(skb).cred = get_current_cred();
1424         }
1425 }
1426
1427 /*
1428  *      Send AF_UNIX data.
1429  */
1430
1431 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1432                               struct msghdr *msg, size_t len)
1433 {
1434         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1435         struct sock *sk = sock->sk;
1436         struct net *net = sock_net(sk);
1437         struct unix_sock *u = unix_sk(sk);
1438         struct sockaddr_un *sunaddr = msg->msg_name;
1439         struct sock *other = NULL;
1440         int namelen = 0; /* fake GCC */
1441         int err;
1442         unsigned hash;
1443         struct sk_buff *skb;
1444         long timeo;
1445         struct scm_cookie tmp_scm;
1446         int max_level;
1447
1448         if (NULL == siocb->scm)
1449                 siocb->scm = &tmp_scm;
1450         wait_for_unix_gc();
1451         err = scm_send(sock, msg, siocb->scm, false);
1452         if (err < 0)
1453                 return err;
1454
1455         err = -EOPNOTSUPP;
1456         if (msg->msg_flags&MSG_OOB)
1457                 goto out;
1458
1459         if (msg->msg_namelen) {
1460                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1461                 if (err < 0)
1462                         goto out;
1463                 namelen = err;
1464         } else {
1465                 sunaddr = NULL;
1466                 err = -ENOTCONN;
1467                 other = unix_peer_get(sk);
1468                 if (!other)
1469                         goto out;
1470         }
1471
1472         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1473             && (err = unix_autobind(sock)) != 0)
1474                 goto out;
1475
1476         err = -EMSGSIZE;
1477         if (len > sk->sk_sndbuf - 32)
1478                 goto out;
1479
1480         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1481         if (skb == NULL)
1482                 goto out;
1483
1484         err = unix_scm_to_skb(siocb->scm, skb, true);
1485         if (err < 0)
1486                 goto out_free;
1487         max_level = err + 1;
1488         unix_get_secdata(siocb->scm, skb);
1489
1490         skb_reset_transport_header(skb);
1491         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1492         if (err)
1493                 goto out_free;
1494
1495         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1496
1497 restart:
1498         if (!other) {
1499                 err = -ECONNRESET;
1500                 if (sunaddr == NULL)
1501                         goto out_free;
1502
1503                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1504                                         hash, &err);
1505                 if (other == NULL)
1506                         goto out_free;
1507         }
1508
1509         if (sk_filter(other, skb) < 0) {
1510                 /* Toss the packet but do not return any error to the sender */
1511                 err = len;
1512                 goto out_free;
1513         }
1514
1515         unix_state_lock(other);
1516         err = -EPERM;
1517         if (!unix_may_send(sk, other))
1518                 goto out_unlock;
1519
1520         if (sock_flag(other, SOCK_DEAD)) {
1521                 /*
1522                  *      Check with 1003.1g - what should
1523                  *      datagram error
1524                  */
1525                 unix_state_unlock(other);
1526                 sock_put(other);
1527
1528                 err = 0;
1529                 unix_state_lock(sk);
1530                 if (unix_peer(sk) == other) {
1531                         unix_peer(sk) = NULL;
1532                         unix_state_unlock(sk);
1533
1534                         unix_dgram_disconnected(sk, other);
1535                         sock_put(other);
1536                         err = -ECONNREFUSED;
1537                 } else {
1538                         unix_state_unlock(sk);
1539                 }
1540
1541                 other = NULL;
1542                 if (err)
1543                         goto out_free;
1544                 goto restart;
1545         }
1546
1547         err = -EPIPE;
1548         if (other->sk_shutdown & RCV_SHUTDOWN)
1549                 goto out_unlock;
1550
1551         if (sk->sk_type != SOCK_SEQPACKET) {
1552                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1553                 if (err)
1554                         goto out_unlock;
1555         }
1556
1557         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1558                 if (!timeo) {
1559                         err = -EAGAIN;
1560                         goto out_unlock;
1561                 }
1562
1563                 timeo = unix_wait_for_peer(other, timeo);
1564
1565                 err = sock_intr_errno(timeo);
1566                 if (signal_pending(current))
1567                         goto out_free;
1568
1569                 goto restart;
1570         }
1571
1572         if (sock_flag(other, SOCK_RCVTSTAMP))
1573                 __net_timestamp(skb);
1574         maybe_add_creds(skb, sock, other);
1575         skb_queue_tail(&other->sk_receive_queue, skb);
1576         if (max_level > unix_sk(other)->recursion_level)
1577                 unix_sk(other)->recursion_level = max_level;
1578         unix_state_unlock(other);
1579         other->sk_data_ready(other, len);
1580         sock_put(other);
1581         scm_destroy(siocb->scm);
1582         return len;
1583
1584 out_unlock:
1585         unix_state_unlock(other);
1586 out_free:
1587         kfree_skb(skb);
1588 out:
1589         if (other)
1590                 sock_put(other);
1591         scm_destroy(siocb->scm);
1592         return err;
1593 }
1594
1595
1596 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1597                                struct msghdr *msg, size_t len)
1598 {
1599         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1600         struct sock *sk = sock->sk;
1601         struct sock *other = NULL;
1602         int err, size;
1603         struct sk_buff *skb;
1604         int sent = 0;
1605         struct scm_cookie tmp_scm;
1606         bool fds_sent = false;
1607         int max_level;
1608
1609         if (NULL == siocb->scm)
1610                 siocb->scm = &tmp_scm;
1611         wait_for_unix_gc();
1612         err = scm_send(sock, msg, siocb->scm, false);
1613         if (err < 0)
1614                 return err;
1615
1616         err = -EOPNOTSUPP;
1617         if (msg->msg_flags&MSG_OOB)
1618                 goto out_err;
1619
1620         if (msg->msg_namelen) {
1621                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1622                 goto out_err;
1623         } else {
1624                 err = -ENOTCONN;
1625                 other = unix_peer(sk);
1626                 if (!other)
1627                         goto out_err;
1628         }
1629
1630         if (sk->sk_shutdown & SEND_SHUTDOWN)
1631                 goto pipe_err;
1632
1633         while (sent < len) {
1634                 /*
1635                  *      Optimisation for the fact that under 0.01% of X
1636                  *      messages typically need breaking up.
1637                  */
1638
1639                 size = len-sent;
1640
1641                 /* Keep two messages in the pipe so it schedules better */
1642                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1643                         size = (sk->sk_sndbuf >> 1) - 64;
1644
1645                 if (size > SKB_MAX_ALLOC)
1646                         size = SKB_MAX_ALLOC;
1647
1648                 /*
1649                  *      Grab a buffer
1650                  */
1651
1652                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1653                                           &err);
1654
1655                 if (skb == NULL)
1656                         goto out_err;
1657
1658                 /*
1659                  *      If you pass two values to the sock_alloc_send_skb
1660                  *      it tries to grab the large buffer with GFP_NOFS
1661                  *      (which can fail easily), and if it fails grab the
1662                  *      fallback size buffer which is under a page and will
1663                  *      succeed. [Alan]
1664                  */
1665                 size = min_t(int, size, skb_tailroom(skb));
1666
1667
1668                 /* Only send the fds in the first buffer */
1669                 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1670                 if (err < 0) {
1671                         kfree_skb(skb);
1672                         goto out_err;
1673                 }
1674                 max_level = err + 1;
1675                 fds_sent = true;
1676
1677                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1678                 if (err) {
1679                         kfree_skb(skb);
1680                         goto out_err;
1681                 }
1682
1683                 unix_state_lock(other);
1684
1685                 if (sock_flag(other, SOCK_DEAD) ||
1686                     (other->sk_shutdown & RCV_SHUTDOWN))
1687                         goto pipe_err_free;
1688
1689                 maybe_add_creds(skb, sock, other);
1690                 skb_queue_tail(&other->sk_receive_queue, skb);
1691                 if (max_level > unix_sk(other)->recursion_level)
1692                         unix_sk(other)->recursion_level = max_level;
1693                 unix_state_unlock(other);
1694                 other->sk_data_ready(other, size);
1695                 sent += size;
1696         }
1697
1698         scm_destroy(siocb->scm);
1699         siocb->scm = NULL;
1700
1701         return sent;
1702
1703 pipe_err_free:
1704         unix_state_unlock(other);
1705         kfree_skb(skb);
1706 pipe_err:
1707         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1708                 send_sig(SIGPIPE, current, 0);
1709         err = -EPIPE;
1710 out_err:
1711         scm_destroy(siocb->scm);
1712         siocb->scm = NULL;
1713         return sent ? : err;
1714 }
1715
1716 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1717                                   struct msghdr *msg, size_t len)
1718 {
1719         int err;
1720         struct sock *sk = sock->sk;
1721
1722         err = sock_error(sk);
1723         if (err)
1724                 return err;
1725
1726         if (sk->sk_state != TCP_ESTABLISHED)
1727                 return -ENOTCONN;
1728
1729         if (msg->msg_namelen)
1730                 msg->msg_namelen = 0;
1731
1732         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1733 }
1734
1735 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1736                               struct msghdr *msg, size_t size,
1737                               int flags)
1738 {
1739         struct sock *sk = sock->sk;
1740
1741         if (sk->sk_state != TCP_ESTABLISHED)
1742                 return -ENOTCONN;
1743
1744         return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1745 }
1746
1747 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1748 {
1749         struct unix_sock *u = unix_sk(sk);
1750
1751         if (u->addr) {
1752                 msg->msg_namelen = u->addr->len;
1753                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1754         }
1755 }
1756
1757 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1758                               struct msghdr *msg, size_t size,
1759                               int flags)
1760 {
1761         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1762         struct scm_cookie tmp_scm;
1763         struct sock *sk = sock->sk;
1764         struct unix_sock *u = unix_sk(sk);
1765         int noblock = flags & MSG_DONTWAIT;
1766         struct sk_buff *skb;
1767         int err;
1768
1769         err = -EOPNOTSUPP;
1770         if (flags&MSG_OOB)
1771                 goto out;
1772
1773         err = mutex_lock_interruptible(&u->readlock);
1774         if (unlikely(err)) {
1775                 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1776                  * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1777                  */
1778                 err = noblock ? -EAGAIN : -ERESTARTSYS;
1779                 goto out;
1780         }
1781
1782         skb = skb_recv_datagram(sk, flags, noblock, &err);
1783         if (!skb) {
1784                 unix_state_lock(sk);
1785                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1786                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1787                     (sk->sk_shutdown & RCV_SHUTDOWN))
1788                         err = 0;
1789                 unix_state_unlock(sk);
1790                 goto out_unlock;
1791         }
1792
1793         wake_up_interruptible_sync_poll(&u->peer_wait,
1794                                         POLLOUT | POLLWRNORM | POLLWRBAND);
1795
1796         if (msg->msg_name)
1797                 unix_copy_addr(msg, skb->sk);
1798
1799         if (size > skb->len)
1800                 size = skb->len;
1801         else if (size < skb->len)
1802                 msg->msg_flags |= MSG_TRUNC;
1803
1804         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1805         if (err)
1806                 goto out_free;
1807
1808         if (sock_flag(sk, SOCK_RCVTSTAMP))
1809                 __sock_recv_timestamp(msg, sk, skb);
1810
1811         if (!siocb->scm) {
1812                 siocb->scm = &tmp_scm;
1813                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1814         }
1815         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1816         unix_set_secdata(siocb->scm, skb);
1817
1818         if (!(flags & MSG_PEEK)) {
1819                 if (UNIXCB(skb).fp)
1820                         unix_detach_fds(siocb->scm, skb);
1821         } else {
1822                 /* It is questionable: on PEEK we could:
1823                    - do not return fds - good, but too simple 8)
1824                    - return fds, and do not return them on read (old strategy,
1825                      apparently wrong)
1826                    - clone fds (I chose it for now, it is the most universal
1827                      solution)
1828
1829                    POSIX 1003.1g does not actually define this clearly
1830                    at all. POSIX 1003.1g doesn't define a lot of things
1831                    clearly however!
1832
1833                 */
1834                 if (UNIXCB(skb).fp)
1835                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1836         }
1837         err = size;
1838
1839         scm_recv(sock, msg, siocb->scm, flags);
1840
1841 out_free:
1842         skb_free_datagram(sk, skb);
1843 out_unlock:
1844         mutex_unlock(&u->readlock);
1845 out:
1846         return err;
1847 }
1848
1849 /*
1850  *      Sleep until data has arrive. But check for races..
1851  */
1852
1853 static long unix_stream_data_wait(struct sock *sk, long timeo)
1854 {
1855         DEFINE_WAIT(wait);
1856
1857         unix_state_lock(sk);
1858
1859         for (;;) {
1860                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1861
1862                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1863                     sk->sk_err ||
1864                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1865                     signal_pending(current) ||
1866                     !timeo)
1867                         break;
1868
1869                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1870                 unix_state_unlock(sk);
1871                 timeo = schedule_timeout(timeo);
1872                 unix_state_lock(sk);
1873
1874                 if (sock_flag(sk, SOCK_DEAD))
1875                         break;
1876
1877                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1878         }
1879
1880         finish_wait(sk_sleep(sk), &wait);
1881         unix_state_unlock(sk);
1882         return timeo;
1883 }
1884
1885
1886
1887 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1888                                struct msghdr *msg, size_t size,
1889                                int flags)
1890 {
1891         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1892         struct scm_cookie tmp_scm;
1893         struct sock *sk = sock->sk;
1894         struct unix_sock *u = unix_sk(sk);
1895         struct sockaddr_un *sunaddr = msg->msg_name;
1896         int copied = 0;
1897         int noblock = flags & MSG_DONTWAIT;
1898         int check_creds = 0;
1899         int target;
1900         int err = 0;
1901         long timeo;
1902
1903         err = -EINVAL;
1904         if (sk->sk_state != TCP_ESTABLISHED)
1905                 goto out;
1906
1907         err = -EOPNOTSUPP;
1908         if (flags&MSG_OOB)
1909                 goto out;
1910
1911         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1912         timeo = sock_rcvtimeo(sk, noblock);
1913
1914         /* Lock the socket to prevent queue disordering
1915          * while sleeps in memcpy_tomsg
1916          */
1917
1918         if (!siocb->scm) {
1919                 siocb->scm = &tmp_scm;
1920                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1921         }
1922
1923         err = mutex_lock_interruptible(&u->readlock);
1924         if (unlikely(err)) {
1925                 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1926                  * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1927                  */
1928                 err = noblock ? -EAGAIN : -ERESTARTSYS;
1929                 goto out;
1930         }
1931
1932         do {
1933                 int chunk;
1934                 struct sk_buff *skb;
1935
1936                 unix_state_lock(sk);
1937                 if (sock_flag(sk, SOCK_DEAD)) {
1938                         err = -ECONNRESET;
1939                         goto unlock;
1940                 }
1941                 skb = skb_peek(&sk->sk_receive_queue);
1942                 if (skb == NULL) {
1943                         unix_sk(sk)->recursion_level = 0;
1944                         if (copied >= target)
1945                                 goto unlock;
1946
1947                         /*
1948                          *      POSIX 1003.1g mandates this order.
1949                          */
1950
1951                         err = sock_error(sk);
1952                         if (err)
1953                                 goto unlock;
1954                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1955                                 goto unlock;
1956
1957                         unix_state_unlock(sk);
1958                         err = -EAGAIN;
1959                         if (!timeo)
1960                                 break;
1961                         mutex_unlock(&u->readlock);
1962
1963                         timeo = unix_stream_data_wait(sk, timeo);
1964
1965                         if (signal_pending(current)
1966                             ||  mutex_lock_interruptible(&u->readlock)) {
1967                                 err = sock_intr_errno(timeo);
1968                                 goto out;
1969                         }
1970
1971                         continue;
1972  unlock:
1973                         unix_state_unlock(sk);
1974                         break;
1975                 }
1976                 unix_state_unlock(sk);
1977
1978                 if (check_creds) {
1979                         /* Never glue messages from different writers */
1980                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1981                             (UNIXCB(skb).cred != siocb->scm->cred))
1982                                 break;
1983                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1984                         /* Copy credentials */
1985                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1986                         check_creds = 1;
1987                 }
1988
1989                 /* Copy address just once */
1990                 if (sunaddr) {
1991                         unix_copy_addr(msg, skb->sk);
1992                         sunaddr = NULL;
1993                 }
1994
1995                 chunk = min_t(unsigned int, skb->len, size);
1996                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1997                         if (copied == 0)
1998                                 copied = -EFAULT;
1999                         break;
2000                 }
2001                 copied += chunk;
2002                 size -= chunk;
2003
2004                 /* Mark read part of skb as used */
2005                 if (!(flags & MSG_PEEK)) {
2006                         skb_pull(skb, chunk);
2007
2008                         if (UNIXCB(skb).fp)
2009                                 unix_detach_fds(siocb->scm, skb);
2010
2011                         if (skb->len)
2012                                 break;
2013
2014                         skb_unlink(skb, &sk->sk_receive_queue);
2015                         consume_skb(skb);
2016
2017                         if (siocb->scm->fp)
2018                                 break;
2019                 } else {
2020                         /* It is questionable, see note in unix_dgram_recvmsg.
2021                          */
2022                         if (UNIXCB(skb).fp)
2023                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2024
2025                         break;
2026                 }
2027         } while (size);
2028
2029         mutex_unlock(&u->readlock);
2030         scm_recv(sock, msg, siocb->scm, flags);
2031 out:
2032         return copied ? : err;
2033 }
2034
2035 static int unix_shutdown(struct socket *sock, int mode)
2036 {
2037         struct sock *sk = sock->sk;
2038         struct sock *other;
2039
2040         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2041
2042         if (!mode)
2043                 return 0;
2044
2045         unix_state_lock(sk);
2046         sk->sk_shutdown |= mode;
2047         other = unix_peer(sk);
2048         if (other)
2049                 sock_hold(other);
2050         unix_state_unlock(sk);
2051         sk->sk_state_change(sk);
2052
2053         if (other &&
2054                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2055
2056                 int peer_mode = 0;
2057
2058                 if (mode&RCV_SHUTDOWN)
2059                         peer_mode |= SEND_SHUTDOWN;
2060                 if (mode&SEND_SHUTDOWN)
2061                         peer_mode |= RCV_SHUTDOWN;
2062                 unix_state_lock(other);
2063                 other->sk_shutdown |= peer_mode;
2064                 unix_state_unlock(other);
2065                 other->sk_state_change(other);
2066                 if (peer_mode == SHUTDOWN_MASK)
2067                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2068                 else if (peer_mode & RCV_SHUTDOWN)
2069                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2070         }
2071         if (other)
2072                 sock_put(other);
2073
2074         return 0;
2075 }
2076
2077 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2078 {
2079         struct sock *sk = sock->sk;
2080         long amount = 0;
2081         int err;
2082
2083         switch (cmd) {
2084         case SIOCOUTQ:
2085                 amount = sk_wmem_alloc_get(sk);
2086                 err = put_user(amount, (int __user *)arg);
2087                 break;
2088         case SIOCINQ:
2089                 {
2090                         struct sk_buff *skb;
2091
2092                         if (sk->sk_state == TCP_LISTEN) {
2093                                 err = -EINVAL;
2094                                 break;
2095                         }
2096
2097                         spin_lock(&sk->sk_receive_queue.lock);
2098                         if (sk->sk_type == SOCK_STREAM ||
2099                             sk->sk_type == SOCK_SEQPACKET) {
2100                                 skb_queue_walk(&sk->sk_receive_queue, skb)
2101                                         amount += skb->len;
2102                         } else {
2103                                 skb = skb_peek(&sk->sk_receive_queue);
2104                                 if (skb)
2105                                         amount = skb->len;
2106                         }
2107                         spin_unlock(&sk->sk_receive_queue.lock);
2108                         err = put_user(amount, (int __user *)arg);
2109                         break;
2110                 }
2111
2112         default:
2113                 err = -ENOIOCTLCMD;
2114                 break;
2115         }
2116         return err;
2117 }
2118
2119 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2120 {
2121         struct sock *sk = sock->sk;
2122         unsigned int mask;
2123
2124         sock_poll_wait(file, sk_sleep(sk), wait);
2125         mask = 0;
2126
2127         /* exceptional events? */
2128         if (sk->sk_err)
2129                 mask |= POLLERR;
2130         if (sk->sk_shutdown == SHUTDOWN_MASK)
2131                 mask |= POLLHUP;
2132         if (sk->sk_shutdown & RCV_SHUTDOWN)
2133                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2134
2135         /* readable? */
2136         if (!skb_queue_empty(&sk->sk_receive_queue))
2137                 mask |= POLLIN | POLLRDNORM;
2138
2139         /* Connection-based need to check for termination and startup */
2140         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2141             sk->sk_state == TCP_CLOSE)
2142                 mask |= POLLHUP;
2143
2144         /*
2145          * we set writable also when the other side has shut down the
2146          * connection. This prevents stuck sockets.
2147          */
2148         if (unix_writable(sk))
2149                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2150
2151         return mask;
2152 }
2153
2154 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2155                                     poll_table *wait)
2156 {
2157         struct sock *sk = sock->sk, *other;
2158         unsigned int mask, writable;
2159
2160         sock_poll_wait(file, sk_sleep(sk), wait);
2161         mask = 0;
2162
2163         /* exceptional events? */
2164         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2165                 mask |= POLLERR;
2166         if (sk->sk_shutdown & RCV_SHUTDOWN)
2167                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2168         if (sk->sk_shutdown == SHUTDOWN_MASK)
2169                 mask |= POLLHUP;
2170
2171         /* readable? */
2172         if (!skb_queue_empty(&sk->sk_receive_queue))
2173                 mask |= POLLIN | POLLRDNORM;
2174
2175         /* Connection-based need to check for termination and startup */
2176         if (sk->sk_type == SOCK_SEQPACKET) {
2177                 if (sk->sk_state == TCP_CLOSE)
2178                         mask |= POLLHUP;
2179                 /* connection hasn't started yet? */
2180                 if (sk->sk_state == TCP_SYN_SENT)
2181                         return mask;
2182         }
2183
2184         /* No write status requested, avoid expensive OUT tests. */
2185         if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2186                 return mask;
2187
2188         writable = unix_writable(sk);
2189         other = unix_peer_get(sk);
2190         if (other) {
2191                 if (unix_peer(other) != sk) {
2192                         sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2193                         if (unix_recvq_full(other))
2194                                 writable = 0;
2195                 }
2196                 sock_put(other);
2197         }
2198
2199         if (writable)
2200                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2201         else
2202                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2203
2204         return mask;
2205 }
2206
2207 #ifdef CONFIG_PROC_FS
2208 static struct sock *first_unix_socket(int *i)
2209 {
2210         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2211                 if (!hlist_empty(&unix_socket_table[*i]))
2212                         return __sk_head(&unix_socket_table[*i]);
2213         }
2214         return NULL;
2215 }
2216
2217 static struct sock *next_unix_socket(int *i, struct sock *s)
2218 {
2219         struct sock *next = sk_next(s);
2220         /* More in this chain? */
2221         if (next)
2222                 return next;
2223         /* Look for next non-empty chain. */
2224         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2225                 if (!hlist_empty(&unix_socket_table[*i]))
2226                         return __sk_head(&unix_socket_table[*i]);
2227         }
2228         return NULL;
2229 }
2230
2231 struct unix_iter_state {
2232         struct seq_net_private p;
2233         int i;
2234 };
2235
2236 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2237 {
2238         struct unix_iter_state *iter = seq->private;
2239         loff_t off = 0;
2240         struct sock *s;
2241
2242         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2243                 if (sock_net(s) != seq_file_net(seq))
2244                         continue;
2245                 if (off == pos)
2246                         return s;
2247                 ++off;
2248         }
2249         return NULL;
2250 }
2251
2252 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2253         __acquires(unix_table_lock)
2254 {
2255         spin_lock(&unix_table_lock);
2256         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2257 }
2258
2259 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2260 {
2261         struct unix_iter_state *iter = seq->private;
2262         struct sock *sk = v;
2263         ++*pos;
2264
2265         if (v == SEQ_START_TOKEN)
2266                 sk = first_unix_socket(&iter->i);
2267         else
2268                 sk = next_unix_socket(&iter->i, sk);
2269         while (sk && (sock_net(sk) != seq_file_net(seq)))
2270                 sk = next_unix_socket(&iter->i, sk);
2271         return sk;
2272 }
2273
2274 static void unix_seq_stop(struct seq_file *seq, void *v)
2275         __releases(unix_table_lock)
2276 {
2277         spin_unlock(&unix_table_lock);
2278 }
2279
2280 static int unix_seq_show(struct seq_file *seq, void *v)
2281 {
2282
2283         if (v == SEQ_START_TOKEN)
2284                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2285                          "Inode Path\n");
2286         else {
2287                 struct sock *s = v;
2288                 struct unix_sock *u = unix_sk(s);
2289                 unix_state_lock(s);
2290
2291                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2292                         s,
2293                         atomic_read(&s->sk_refcnt),
2294                         0,
2295                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2296                         s->sk_type,
2297                         s->sk_socket ?
2298                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2299                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2300                         sock_i_ino(s));
2301
2302                 if (u->addr) {
2303                         int i, len;
2304                         seq_putc(seq, ' ');
2305
2306                         i = 0;
2307                         len = u->addr->len - sizeof(short);
2308                         if (!UNIX_ABSTRACT(s))
2309                                 len--;
2310                         else {
2311                                 seq_putc(seq, '@');
2312                                 i++;
2313                         }
2314                         for ( ; i < len; i++)
2315                                 seq_putc(seq, u->addr->name->sun_path[i]);
2316                 }
2317                 unix_state_unlock(s);
2318                 seq_putc(seq, '\n');
2319         }
2320
2321         return 0;
2322 }
2323
2324 static const struct seq_operations unix_seq_ops = {
2325         .start  = unix_seq_start,
2326         .next   = unix_seq_next,
2327         .stop   = unix_seq_stop,
2328         .show   = unix_seq_show,
2329 };
2330
2331 static int unix_seq_open(struct inode *inode, struct file *file)
2332 {
2333         return seq_open_net(inode, file, &unix_seq_ops,
2334                             sizeof(struct unix_iter_state));
2335 }
2336
2337 static const struct file_operations unix_seq_fops = {
2338         .owner          = THIS_MODULE,
2339         .open           = unix_seq_open,
2340         .read           = seq_read,
2341         .llseek         = seq_lseek,
2342         .release        = seq_release_net,
2343 };
2344
2345 #endif
2346
2347 static const struct net_proto_family unix_family_ops = {
2348         .family = PF_UNIX,
2349         .create = unix_create,
2350         .owner  = THIS_MODULE,
2351 };
2352
2353
2354 static int __net_init unix_net_init(struct net *net)
2355 {
2356         int error = -ENOMEM;
2357
2358         net->unx.sysctl_max_dgram_qlen = 10;
2359         if (unix_sysctl_register(net))
2360                 goto out;
2361
2362 #ifdef CONFIG_PROC_FS
2363         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2364                 unix_sysctl_unregister(net);
2365                 goto out;
2366         }
2367 #endif
2368         error = 0;
2369 out:
2370         return error;
2371 }
2372
2373 static void __net_exit unix_net_exit(struct net *net)
2374 {
2375         unix_sysctl_unregister(net);
2376         proc_net_remove(net, "unix");
2377 }
2378
2379 static struct pernet_operations unix_net_ops = {
2380         .init = unix_net_init,
2381         .exit = unix_net_exit,
2382 };
2383
2384 static int __init af_unix_init(void)
2385 {
2386         int rc = -1;
2387         struct sk_buff *dummy_skb;
2388
2389         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2390
2391         rc = proto_register(&unix_proto, 1);
2392         if (rc != 0) {
2393                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2394                        __func__);
2395                 goto out;
2396         }
2397
2398         sock_register(&unix_family_ops);
2399         register_pernet_subsys(&unix_net_ops);
2400 out:
2401         return rc;
2402 }
2403
2404 static void __exit af_unix_exit(void)
2405 {
2406         sock_unregister(PF_UNIX);
2407         proto_unregister(&unix_proto);
2408         unregister_pernet_subsys(&unix_net_ops);
2409 }
2410
2411 /* Earlier than device_initcall() so that other drivers invoking
2412    request_module() don't end up in a loop when modprobe tries
2413    to use a UNIX socket. But later than subsys_initcall() because
2414    we depend on stuff initialised there */
2415 fs_initcall(af_unix_init);
2416 module_exit(af_unix_exit);
2417
2418 MODULE_LICENSE("GPL");
2419 MODULE_ALIAS_NETPROTO(PF_UNIX);