net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_long_t unix_nr_socks;
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate spin lock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned hash = (__force unsigned)n;
 153         hash ^= hash>>16;
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168 }
 169
 170 static inline int unix_recvq_full(struct sock const *sk)
 171 {
 172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173 }
 174
 175 static struct sock *unix_peer_get(struct sock *s)
 176 {
 177         struct sock *peer;
 178
 179         unix_state_lock(s);
 180         peer = unix_peer(s);
 181         if (peer)
 182                 sock_hold(peer);
 183         unix_state_unlock(s);
 184         return peer;
 185 }
 186
 187 static inline void unix_release_addr(struct unix_address *addr)
 188 {
 189         if (atomic_dec_and_test(&addr->refcnt))
 190                 kfree(addr);
 191 }
 192
 193 /*
 194  *      Check unix socket name:
 195  *              - should be not zero length.
 196  *              - if started by not zero, should be NULL terminated (FS object)
 197  *              - if started by zero, it is abstract name.
 198  */
 199
 200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201 {
 202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                 return -EINVAL;
 204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                 return -EINVAL;
 206         if (sunaddr->sun_path[0]) {
 207                 /*
 208                  * This may look like an off by one error but it is a bit more
 209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                  * sun_path[108] doesn't as such exist.  However in kernel space
 211                  * we are guaranteed that it is a valid memory location in our
 212                  * kernel address buffer.
 213                  */
 214                 ((char *)sunaddr)[len] = 0;
 215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                 return len;
 217         }
 218
 219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 220         return len;
 221 }
 222
 223 static void __unix_remove_socket(struct sock *sk)
 224 {
 225         sk_del_node_init(sk);
 226 }
 227
 228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229 {
 230         WARN_ON(!sk_unhashed(sk));
 231         sk_add_node(sk, list);
 232 }
 233
 234 static inline void unix_remove_socket(struct sock *sk)
 235 {
 236         spin_lock(&unix_table_lock);
 237         __unix_remove_socket(sk);
 238         spin_unlock(&unix_table_lock);
 239 }
 240
 241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242 {
 243         spin_lock(&unix_table_lock);
 244         __unix_insert_socket(list, sk);
 245         spin_unlock(&unix_table_lock);
 246 }
 247
 248 static struct sock *__unix_find_socket_byname(struct net *net,
 249                                               struct sockaddr_un *sunname,
 250                                               int len, int type, unsigned hash)
 251 {
 252         struct sock *s;
 253         struct hlist_node *node;
 254
 255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                 struct unix_sock *u = unix_sk(s);
 257
 258                 if (!net_eq(sock_net(s), net))
 259                         continue;
 260
 261                 if (u->addr->len == len &&
 262                     !memcmp(u->addr->name, sunname, len))
 263                         goto found;
 264         }
 265         s = NULL;
 266 found:
 267         return s;
 268 }
 269
 270 static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                    struct sockaddr_un *sunname,
 272                                                    int len, int type,
 273                                                    unsigned hash)
 274 {
 275         struct sock *s;
 276
 277         spin_lock(&unix_table_lock);
 278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279         if (s)
 280                 sock_hold(s);
 281         spin_unlock(&unix_table_lock);
 282         return s;
 283 }
 284
 285 static struct sock *unix_find_socket_byinode(struct inode *i)
 286 {
 287         struct sock *s;
 288         struct hlist_node *node;
 289
 290         spin_lock(&unix_table_lock);
 291         sk_for_each(s, node,
 292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                 struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                 if (dentry && dentry->d_inode == i) {
 296                         sock_hold(s);
 297                         goto found;
 298                 }
 299         }
 300         s = NULL;
 301 found:
 302         spin_unlock(&unix_table_lock);
 303         return s;
 304 }
 305
 306 static inline int unix_writable(struct sock *sk)
 307 {
 308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 309 }
 310
 311 static void unix_write_space(struct sock *sk)
 312 {
 313         struct socket_wq *wq;
 314
 315         rcu_read_lock();
 316         if (unix_writable(sk)) {
 317                 wq = rcu_dereference(sk->sk_wq);
 318                 if (wq_has_sleeper(wq))
 319                         wake_up_interruptible_sync_poll(&wq->wait,
 320                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 321                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 322         }
 323         rcu_read_unlock();
 324 }
 325
 326 /* When dgram socket disconnects (or changes its peer), we clear its receive
 327  * queue of packets arrived from previous peer. First, it allows to do
 328  * flow control based only on wmem_alloc; second, sk connected to peer
 329  * may receive messages only from that peer. */
 330 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 331 {
 332         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 333                 skb_queue_purge(&sk->sk_receive_queue);
 334                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 335
 336                 /* If one link of bidirectional dgram pipe is disconnected,
 337                  * we signal error. Messages are lost. Do not make this,
 338                  * when peer was not connected to us.
 339                  */
 340                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 341                         other->sk_err = ECONNRESET;
 342                         other->sk_error_report(other);
 343                 }
 344         }
 345 }
 346
 347 static void unix_sock_destructor(struct sock *sk)
 348 {
 349         struct unix_sock *u = unix_sk(sk);
 350
 351         skb_queue_purge(&sk->sk_receive_queue);
 352
 353         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 354         WARN_ON(!sk_unhashed(sk));
 355         WARN_ON(sk->sk_socket);
 356         if (!sock_flag(sk, SOCK_DEAD)) {
 357                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 358                 return;
 359         }
 360
 361         if (u->addr)
 362                 unix_release_addr(u->addr);
 363
 364         atomic_long_dec(&unix_nr_socks);
 365         local_bh_disable();
 366         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 367         local_bh_enable();
 368 #ifdef UNIX_REFCNT_DEBUG
 369         printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
 370                 atomic_long_read(&unix_nr_socks));
 371 #endif
 372 }
 373
 374 static void unix_release_sock(struct sock *sk, int embrion)
 375 {
 376         struct unix_sock *u = unix_sk(sk);
 377         struct dentry *dentry;
 378         struct vfsmount *mnt;
 379         struct sock *skpair;
 380         struct sk_buff *skb;
 381         int state;
 382
 383         unix_remove_socket(sk);
 384
 385         /* Clear state */
 386         unix_state_lock(sk);
 387         sock_orphan(sk);
 388         sk->sk_shutdown = SHUTDOWN_MASK;
 389         dentry       = u->dentry;
 390         u->dentry    = NULL;
 391         mnt          = u->mnt;
 392         u->mnt       = NULL;
 393         state = sk->sk_state;
 394         sk->sk_state = TCP_CLOSE;
 395         unix_state_unlock(sk);
 396
 397         wake_up_interruptible_all(&u->peer_wait);
 398
 399         skpair = unix_peer(sk);
 400
 401         if (skpair != NULL) {
 402                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 403                         unix_state_lock(skpair);
 404                         /* No more writes */
 405                         skpair->sk_shutdown = SHUTDOWN_MASK;
 406                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 407                                 skpair->sk_err = ECONNRESET;
 408                         unix_state_unlock(skpair);
 409                         skpair->sk_state_change(skpair);
 410                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 411                 }
 412                 sock_put(skpair); /* It may now die */
 413                 unix_peer(sk) = NULL;
 414         }
 415
 416         /* Try to flush out this socket. Throw out buffers at least */
 417
 418         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 419                 if (state == TCP_LISTEN)
 420                         unix_release_sock(skb->sk, 1);
 421                 /* passed fds are erased in the kfree_skb hook        */
 422                 kfree_skb(skb);
 423         }
 424
 425         if (dentry) {
 426                 dput(dentry);
 427                 mntput(mnt);
 428         }
 429
 430         sock_put(sk);
 431
 432         /* ---- Socket is dead now and most probably destroyed ---- */
 433
 434         /*
 435          * Fixme: BSD difference: In BSD all sockets connected to use get
 436          *        ECONNRESET and we die on the spot. In Linux we behave
 437          *        like files and pipes do and wait for the last
 438          *        dereference.
 439          *
 440          * Can't we simply set sock->err?
 441          *
 442          *        What the above comment does talk about? --ANK(980817)
 443          */
 444
 445         if (unix_tot_inflight)
 446                 unix_gc();              /* Garbage collect fds */
 447 }
 448
 449 static void init_peercred(struct sock *sk)
 450 {
 451         put_pid(sk->sk_peer_pid);
 452         if (sk->sk_peer_cred)
 453                 put_cred(sk->sk_peer_cred);
 454         sk->sk_peer_pid  = get_pid(task_tgid(current));
 455         sk->sk_peer_cred = get_current_cred();
 456 }
 457
 458 static void copy_peercred(struct sock *sk, struct sock *peersk)
 459 {
 460         put_pid(sk->sk_peer_pid);
 461         if (sk->sk_peer_cred)
 462                 put_cred(sk->sk_peer_cred);
 463         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 464         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 465 }
 466
 467 static int unix_listen(struct socket *sock, int backlog)
 468 {
 469         int err;
 470         struct sock *sk = sock->sk;
 471         struct unix_sock *u = unix_sk(sk);
 472         struct pid *old_pid = NULL;
 473         const struct cred *old_cred = NULL;
 474
 475         err = -EOPNOTSUPP;
 476         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 477                 goto out;       /* Only stream/seqpacket sockets accept */
 478         err = -EINVAL;
 479         if (!u->addr)
 480                 goto out;       /* No listens on an unbound socket */
 481         unix_state_lock(sk);
 482         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 483                 goto out_unlock;
 484         if (backlog > sk->sk_max_ack_backlog)
 485                 wake_up_interruptible_all(&u->peer_wait);
 486         sk->sk_max_ack_backlog  = backlog;
 487         sk->sk_state            = TCP_LISTEN;
 488         /* set credentials so connect can copy them */
 489         init_peercred(sk);
 490         err = 0;
 491
 492 out_unlock:
 493         unix_state_unlock(sk);
 494         put_pid(old_pid);
 495         if (old_cred)
 496                 put_cred(old_cred);
 497 out:
 498         return err;
 499 }
 500
 501 static int unix_release(struct socket *);
 502 static int unix_bind(struct socket *, struct sockaddr *, int);
 503 static int unix_stream_connect(struct socket *, struct sockaddr *,
 504                                int addr_len, int flags);
 505 static int unix_socketpair(struct socket *, struct socket *);
 506 static int unix_accept(struct socket *, struct socket *, int);
 507 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 508 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 509 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 510                                     poll_table *);
 511 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 512 static int unix_shutdown(struct socket *, int);
 513 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 514                                struct msghdr *, size_t);
 515 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 516                                struct msghdr *, size_t, int);
 517 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 518                               struct msghdr *, size_t);
 519 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 520                               struct msghdr *, size_t, int);
 521 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 522                               int, int);
 523 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 524                                   struct msghdr *, size_t);
 525 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
 526                                   struct msghdr *, size_t, int);
 527
 528 static const struct proto_ops unix_stream_ops = {
 529         .family =       PF_UNIX,
 530         .owner =        THIS_MODULE,
 531         .release =      unix_release,
 532         .bind =         unix_bind,
 533         .connect =      unix_stream_connect,
 534         .socketpair =   unix_socketpair,
 535         .accept =       unix_accept,
 536         .getname =      unix_getname,
 537         .poll =         unix_poll,
 538         .ioctl =        unix_ioctl,
 539         .listen =       unix_listen,
 540         .shutdown =     unix_shutdown,
 541         .setsockopt =   sock_no_setsockopt,
 542         .getsockopt =   sock_no_getsockopt,
 543         .sendmsg =      unix_stream_sendmsg,
 544         .recvmsg =      unix_stream_recvmsg,
 545         .mmap =         sock_no_mmap,
 546         .sendpage =     sock_no_sendpage,
 547 };
 548
 549 static const struct proto_ops unix_dgram_ops = {
 550         .family =       PF_UNIX,
 551         .owner =        THIS_MODULE,
 552         .release =      unix_release,
 553         .bind =         unix_bind,
 554         .connect =      unix_dgram_connect,
 555         .socketpair =   unix_socketpair,
 556         .accept =       sock_no_accept,
 557         .getname =      unix_getname,
 558         .poll =         unix_dgram_poll,
 559         .ioctl =        unix_ioctl,
 560         .listen =       sock_no_listen,
 561         .shutdown =     unix_shutdown,
 562         .setsockopt =   sock_no_setsockopt,
 563         .getsockopt =   sock_no_getsockopt,
 564         .sendmsg =      unix_dgram_sendmsg,
 565         .recvmsg =      unix_dgram_recvmsg,
 566         .mmap =         sock_no_mmap,
 567         .sendpage =     sock_no_sendpage,
 568 };
 569
 570 static const struct proto_ops unix_seqpacket_ops = {
 571         .family =       PF_UNIX,
 572         .owner =        THIS_MODULE,
 573         .release =      unix_release,
 574         .bind =         unix_bind,
 575         .connect =      unix_stream_connect,
 576         .socketpair =   unix_socketpair,
 577         .accept =       unix_accept,
 578         .getname =      unix_getname,
 579         .poll =         unix_dgram_poll,
 580         .ioctl =        unix_ioctl,
 581         .listen =       unix_listen,
 582         .shutdown =     unix_shutdown,
 583         .setsockopt =   sock_no_setsockopt,
 584         .getsockopt =   sock_no_getsockopt,
 585         .sendmsg =      unix_seqpacket_sendmsg,
 586         .recvmsg =      unix_seqpacket_recvmsg,
 587         .mmap =         sock_no_mmap,
 588         .sendpage =     sock_no_sendpage,
 589 };
 590
 591 static struct proto unix_proto = {
 592         .name                   = "UNIX",
 593         .owner                  = THIS_MODULE,
 594         .obj_size               = sizeof(struct unix_sock),
 595 };
 596
 597 /*
 598  * AF_UNIX sockets do not interact with hardware, hence they
 599  * dont trigger interrupts - so it's safe for them to have
 600  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 601  * this special lock-class by reinitializing the spinlock key:
 602  */
 603 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 604
 605 static struct sock *unix_create1(struct net *net, struct socket *sock)
 606 {
 607         struct sock *sk = NULL;
 608         struct unix_sock *u;
 609
 610         atomic_long_inc(&unix_nr_socks);
 611         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 612                 goto out;
 613
 614         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 615         if (!sk)
 616                 goto out;
 617
 618         sock_init_data(sock, sk);
 619         lockdep_set_class(&sk->sk_receive_queue.lock,
 620                                 &af_unix_sk_receive_queue_lock_key);
 621
 622         sk->sk_write_space      = unix_write_space;
 623         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 624         sk->sk_destruct         = unix_sock_destructor;
 625         u         = unix_sk(sk);
 626         u->dentry = NULL;
 627         u->mnt    = NULL;
 628         spin_lock_init(&u->lock);
 629         atomic_long_set(&u->inflight, 0);
 630         INIT_LIST_HEAD(&u->link);
 631         mutex_init(&u->readlock); /* single task reading lock */
 632         init_waitqueue_head(&u->peer_wait);
 633         unix_insert_socket(unix_sockets_unbound, sk);
 634 out:
 635         if (sk == NULL)
 636                 atomic_long_dec(&unix_nr_socks);
 637         else {
 638                 local_bh_disable();
 639                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 640                 local_bh_enable();
 641         }
 642         return sk;
 643 }
 644
 645 static int unix_create(struct net *net, struct socket *sock, int protocol,
 646                        int kern)
 647 {
 648         if (protocol && protocol != PF_UNIX)
 649                 return -EPROTONOSUPPORT;
 650
 651         sock->state = SS_UNCONNECTED;
 652
 653         switch (sock->type) {
 654         case SOCK_STREAM:
 655                 sock->ops = &unix_stream_ops;
 656                 break;
 657                 /*
 658                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 659                  *      nothing uses it.
 660                  */
 661         case SOCK_RAW:
 662                 sock->type = SOCK_DGRAM;
 663         case SOCK_DGRAM:
 664                 sock->ops = &unix_dgram_ops;
 665                 break;
 666         case SOCK_SEQPACKET:
 667                 sock->ops = &unix_seqpacket_ops;
 668                 break;
 669         default:
 670                 return -ESOCKTNOSUPPORT;
 671         }
 672
 673         return unix_create1(net, sock) ? 0 : -ENOMEM;
 674 }
 675
 676 static int unix_release(struct socket *sock)
 677 {
 678         struct sock *sk = sock->sk;
 679
 680         if (!sk)
 681                 return 0;
 682
 683         unix_release_sock(sk, 0);
 684         sock->sk = NULL;
 685
 686         return 0;
 687 }
 688
 689 static int unix_autobind(struct socket *sock)
 690 {
 691         struct sock *sk = sock->sk;
 692         struct net *net = sock_net(sk);
 693         struct unix_sock *u = unix_sk(sk);
 694         static u32 ordernum = 1;
 695         struct unix_address *addr;
 696         int err;
 697         unsigned int retries = 0;
 698
 699         mutex_lock(&u->readlock);
 700
 701         err = 0;
 702         if (u->addr)
 703                 goto out;
 704
 705         err = -ENOMEM;
 706         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 707         if (!addr)
 708                 goto out;
 709
 710         addr->name->sun_family = AF_UNIX;
 711         atomic_set(&addr->refcnt, 1);
 712
 713 retry:
 714         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 715         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 716
 717         spin_lock(&unix_table_lock);
 718         ordernum = (ordernum+1)&0xFFFFF;
 719
 720         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 721                                       addr->hash)) {
 722                 spin_unlock(&unix_table_lock);
 723                 /*
 724                  * __unix_find_socket_byname() may take long time if many names
 725                  * are already in use.
 726                  */
 727                 cond_resched();
 728                 /* Give up if all names seems to be in use. */
 729                 if (retries++ == 0xFFFFF) {
 730                         err = -ENOSPC;
 731                         kfree(addr);
 732                         goto out;
 733                 }
 734                 goto retry;
 735         }
 736         addr->hash ^= sk->sk_type;
 737
 738         __unix_remove_socket(sk);
 739         u->addr = addr;
 740         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 741         spin_unlock(&unix_table_lock);
 742         err = 0;
 743
 744 out:    mutex_unlock(&u->readlock);
 745         return err;
 746 }
 747
 748 static struct sock *unix_find_other(struct net *net,
 749                                     struct sockaddr_un *sunname, int len,
 750                                     int type, unsigned hash, int *error)
 751 {
 752         struct sock *u;
 753         struct path path;
 754         int err = 0;
 755
 756         if (sunname->sun_path[0]) {
 757                 struct inode *inode;
 758                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 759                 if (err)
 760                         goto fail;
 761                 inode = path.dentry->d_inode;
 762                 err = inode_permission(inode, MAY_WRITE);
 763                 if (err)
 764                         goto put_fail;
 765
 766                 err = -ECONNREFUSED;
 767                 if (!S_ISSOCK(inode->i_mode))
 768                         goto put_fail;
 769                 u = unix_find_socket_byinode(inode);
 770                 if (!u)
 771                         goto put_fail;
 772
 773                 if (u->sk_type == type)
 774                         touch_atime(path.mnt, path.dentry);
 775
 776                 path_put(&path);
 777
 778                 err = -EPROTOTYPE;
 779                 if (u->sk_type != type) {
 780                         sock_put(u);
 781                         goto fail;
 782                 }
 783         } else {
 784                 err = -ECONNREFUSED;
 785                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 786                 if (u) {
 787                         struct dentry *dentry;
 788                         dentry = unix_sk(u)->dentry;
 789                         if (dentry)
 790                                 touch_atime(unix_sk(u)->mnt, dentry);
 791                 } else
 792                         goto fail;
 793         }
 794         return u;
 795
 796 put_fail:
 797         path_put(&path);
 798 fail:
 799         *error = err;
 800         return NULL;
 801 }
 802
 803
 804 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 805 {
 806         struct sock *sk = sock->sk;
 807         struct net *net = sock_net(sk);
 808         struct unix_sock *u = unix_sk(sk);
 809         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 810         char *sun_path = sunaddr->sun_path;
 811         struct dentry *dentry = NULL;
 812         struct path path;
 813         int err;
 814         unsigned hash;
 815         struct unix_address *addr;
 816         struct hlist_head *list;
 817
 818         err = -EINVAL;
 819         if (sunaddr->sun_family != AF_UNIX)
 820                 goto out;
 821
 822         if (addr_len == sizeof(short)) {
 823                 err = unix_autobind(sock);
 824                 goto out;
 825         }
 826
 827         err = unix_mkname(sunaddr, addr_len, &hash);
 828         if (err < 0)
 829                 goto out;
 830         addr_len = err;
 831
 832         mutex_lock(&u->readlock);
 833
 834         err = -EINVAL;
 835         if (u->addr)
 836                 goto out_up;
 837
 838         err = -ENOMEM;
 839         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 840         if (!addr)
 841                 goto out_up;
 842
 843         memcpy(addr->name, sunaddr, addr_len);
 844         addr->len = addr_len;
 845         addr->hash = hash ^ sk->sk_type;
 846         atomic_set(&addr->refcnt, 1);
 847
 848         if (sun_path[0]) {
 849                 unsigned int mode;
 850                 err = 0;
 851                 /*
 852                  * Get the parent directory, calculate the hash for last
 853                  * component.
 854                  */
 855                 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 856                 err = PTR_ERR(dentry);
 857                 if (IS_ERR(dentry))
 858                         goto out_mknod_parent;
 859
 860                 /*
 861                  * All right, let's create it.
 862                  */
 863                 mode = S_IFSOCK |
 864                        (SOCK_INODE(sock)->i_mode & ~current_umask());
 865                 err = mnt_want_write(path.mnt);
 866                 if (err)
 867                         goto out_mknod_dput;
 868                 err = security_path_mknod(&path, dentry, mode, 0);
 869                 if (err)
 870                         goto out_mknod_drop_write;
 871                 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
 872 out_mknod_drop_write:
 873                 mnt_drop_write(path.mnt);
 874                 if (err)
 875                         goto out_mknod_dput;
 876                 mutex_unlock(&path.dentry->d_inode->i_mutex);
 877                 dput(path.dentry);
 878                 path.dentry = dentry;
 879
 880                 addr->hash = UNIX_HASH_SIZE;
 881         }
 882
 883         spin_lock(&unix_table_lock);
 884
 885         if (!sun_path[0]) {
 886                 err = -EADDRINUSE;
 887                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 888                                               sk->sk_type, hash)) {
 889                         unix_release_addr(addr);
 890                         goto out_unlock;
 891                 }
 892
 893                 list = &unix_socket_table[addr->hash];
 894         } else {
 895                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 896                 u->dentry = path.dentry;
 897                 u->mnt    = path.mnt;
 898         }
 899
 900         err = 0;
 901         __unix_remove_socket(sk);
 902         u->addr = addr;
 903         __unix_insert_socket(list, sk);
 904
 905 out_unlock:
 906         spin_unlock(&unix_table_lock);
 907 out_up:
 908         mutex_unlock(&u->readlock);
 909 out:
 910         return err;
 911
 912 out_mknod_dput:
 913         dput(dentry);
 914         mutex_unlock(&path.dentry->d_inode->i_mutex);
 915         path_put(&path);
 916 out_mknod_parent:
 917         if (err == -EEXIST)
 918                 err = -EADDRINUSE;
 919         unix_release_addr(addr);
 920         goto out_up;
 921 }
 922
 923 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 924 {
 925         if (unlikely(sk1 == sk2) || !sk2) {
 926                 unix_state_lock(sk1);
 927                 return;
 928         }
 929         if (sk1 < sk2) {
 930                 unix_state_lock(sk1);
 931                 unix_state_lock_nested(sk2);
 932         } else {
 933                 unix_state_lock(sk2);
 934                 unix_state_lock_nested(sk1);
 935         }
 936 }
 937
 938 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 939 {
 940         if (unlikely(sk1 == sk2) || !sk2) {
 941                 unix_state_unlock(sk1);
 942                 return;
 943         }
 944         unix_state_unlock(sk1);
 945         unix_state_unlock(sk2);
 946 }
 947
 948 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 949                               int alen, int flags)
 950 {
 951         struct sock *sk = sock->sk;
 952         struct net *net = sock_net(sk);
 953         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 954         struct sock *other;
 955         unsigned hash;
 956         int err;
 957
 958         if (addr->sa_family != AF_UNSPEC) {
 959                 err = unix_mkname(sunaddr, alen, &hash);
 960                 if (err < 0)
 961                         goto out;
 962                 alen = err;
 963
 964                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 965                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 966                         goto out;
 967
 968 restart:
 969                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 970                 if (!other)
 971                         goto out;
 972
 973                 unix_state_double_lock(sk, other);
 974
 975                 /* Apparently VFS overslept socket death. Retry. */
 976                 if (sock_flag(other, SOCK_DEAD)) {
 977                         unix_state_double_unlock(sk, other);
 978                         sock_put(other);
 979                         goto restart;
 980                 }
 981
 982                 err = -EPERM;
 983                 if (!unix_may_send(sk, other))
 984                         goto out_unlock;
 985
 986                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 987                 if (err)
 988                         goto out_unlock;
 989
 990         } else {
 991                 /*
 992                  *      1003.1g breaking connected state with AF_UNSPEC
 993                  */
 994                 other = NULL;
 995                 unix_state_double_lock(sk, other);
 996         }
 997
 998         /*
 999          * If it was connected, reconnect.
1000          */
1001         if (unix_peer(sk)) {
1002                 struct sock *old_peer = unix_peer(sk);
1003                 unix_peer(sk) = other;
1004                 unix_state_double_unlock(sk, other);
1005
1006                 if (other != old_peer)
1007                         unix_dgram_disconnected(sk, old_peer);
1008                 sock_put(old_peer);
1009         } else {
1010                 unix_peer(sk) = other;
1011                 unix_state_double_unlock(sk, other);
1012         }
1013         return 0;
1014
1015 out_unlock:
1016         unix_state_double_unlock(sk, other);
1017         sock_put(other);
1018 out:
1019         return err;
1020 }
1021
1022 static long unix_wait_for_peer(struct sock *other, long timeo)
1023 {
1024         struct unix_sock *u = unix_sk(other);
1025         int sched;
1026         DEFINE_WAIT(wait);
1027
1028         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1029
1030         sched = !sock_flag(other, SOCK_DEAD) &&
1031                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1032                 unix_recvq_full(other);
1033
1034         unix_state_unlock(other);
1035
1036         if (sched)
1037                 timeo = schedule_timeout(timeo);
1038
1039         finish_wait(&u->peer_wait, &wait);
1040         return timeo;
1041 }
1042
1043 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1044                                int addr_len, int flags)
1045 {
1046         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1047         struct sock *sk = sock->sk;
1048         struct net *net = sock_net(sk);
1049         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1050         struct sock *newsk = NULL;
1051         struct sock *other = NULL;
1052         struct sk_buff *skb = NULL;
1053         unsigned hash;
1054         int st;
1055         int err;
1056         long timeo;
1057
1058         err = unix_mkname(sunaddr, addr_len, &hash);
1059         if (err < 0)
1060                 goto out;
1061         addr_len = err;
1062
1063         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1064             (err = unix_autobind(sock)) != 0)
1065                 goto out;
1066
1067         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1068
1069         /* First of all allocate resources.
1070            If we will make it after state is locked,
1071            we will have to recheck all again in any case.
1072          */
1073
1074         err = -ENOMEM;
1075
1076         /* create new sock for complete connection */
1077         newsk = unix_create1(sock_net(sk), NULL);
1078         if (newsk == NULL)
1079                 goto out;
1080
1081         /* Allocate skb for sending to listening sock */
1082         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1083         if (skb == NULL)
1084                 goto out;
1085
1086 restart:
1087         /*  Find listening sock. */
1088         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1089         if (!other)
1090                 goto out;
1091
1092         /* Latch state of peer */
1093         unix_state_lock(other);
1094
1095         /* Apparently VFS overslept socket death. Retry. */
1096         if (sock_flag(other, SOCK_DEAD)) {
1097                 unix_state_unlock(other);
1098                 sock_put(other);
1099                 goto restart;
1100         }
1101
1102         err = -ECONNREFUSED;
1103         if (other->sk_state != TCP_LISTEN)
1104                 goto out_unlock;
1105         if (other->sk_shutdown & RCV_SHUTDOWN)
1106                 goto out_unlock;
1107
1108         if (unix_recvq_full(other)) {
1109                 err = -EAGAIN;
1110                 if (!timeo)
1111                         goto out_unlock;
1112
1113                 timeo = unix_wait_for_peer(other, timeo);
1114
1115                 err = sock_intr_errno(timeo);
1116                 if (signal_pending(current))
1117                         goto out;
1118                 sock_put(other);
1119                 goto restart;
1120         }
1121
1122         /* Latch our state.
1123
1124            It is tricky place. We need to grab our state lock and cannot
1125            drop lock on peer. It is dangerous because deadlock is
1126            possible. Connect to self case and simultaneous
1127            attempt to connect are eliminated by checking socket
1128            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1129            check this before attempt to grab lock.
1130
1131            Well, and we have to recheck the state after socket locked.
1132          */
1133         st = sk->sk_state;
1134
1135         switch (st) {
1136         case TCP_CLOSE:
1137                 /* This is ok... continue with connect */
1138                 break;
1139         case TCP_ESTABLISHED:
1140                 /* Socket is already connected */
1141                 err = -EISCONN;
1142                 goto out_unlock;
1143         default:
1144                 err = -EINVAL;
1145                 goto out_unlock;
1146         }
1147
1148         unix_state_lock_nested(sk);
1149
1150         if (sk->sk_state != st) {
1151                 unix_state_unlock(sk);
1152                 unix_state_unlock(other);
1153                 sock_put(other);
1154                 goto restart;
1155         }
1156
1157         err = security_unix_stream_connect(sk, other, newsk);
1158         if (err) {
1159                 unix_state_unlock(sk);
1160                 goto out_unlock;
1161         }
1162
1163         /* The way is open! Fastly set all the necessary fields... */
1164
1165         sock_hold(sk);
1166         unix_peer(newsk)        = sk;
1167         newsk->sk_state         = TCP_ESTABLISHED;
1168         newsk->sk_type          = sk->sk_type;
1169         init_peercred(newsk);
1170         newu = unix_sk(newsk);
1171         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1172         otheru = unix_sk(other);
1173
1174         /* copy address information from listening to new sock*/
1175         if (otheru->addr) {
1176                 atomic_inc(&otheru->addr->refcnt);
1177                 newu->addr = otheru->addr;
1178         }
1179         if (otheru->dentry) {
1180                 newu->dentry    = dget(otheru->dentry);
1181                 newu->mnt       = mntget(otheru->mnt);
1182         }
1183
1184         /* Set credentials */
1185         copy_peercred(sk, other);
1186
1187         sock->state     = SS_CONNECTED;
1188         sk->sk_state    = TCP_ESTABLISHED;
1189         sock_hold(newsk);
1190
1191         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1192         unix_peer(sk)   = newsk;
1193
1194         unix_state_unlock(sk);
1195
1196         /* take ten and and send info to listening sock */
1197         spin_lock(&other->sk_receive_queue.lock);
1198         __skb_queue_tail(&other->sk_receive_queue, skb);
1199         spin_unlock(&other->sk_receive_queue.lock);
1200         unix_state_unlock(other);
1201         other->sk_data_ready(other, 0);
1202         sock_put(other);
1203         return 0;
1204
1205 out_unlock:
1206         if (other)
1207                 unix_state_unlock(other);
1208
1209 out:
1210         kfree_skb(skb);
1211         if (newsk)
1212                 unix_release_sock(newsk, 0);
1213         if (other)
1214                 sock_put(other);
1215         return err;
1216 }
1217
1218 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1219 {
1220         struct sock *ska = socka->sk, *skb = sockb->sk;
1221
1222         /* Join our sockets back to back */
1223         sock_hold(ska);
1224         sock_hold(skb);
1225         unix_peer(ska) = skb;
1226         unix_peer(skb) = ska;
1227         init_peercred(ska);
1228         init_peercred(skb);
1229
1230         if (ska->sk_type != SOCK_DGRAM) {
1231                 ska->sk_state = TCP_ESTABLISHED;
1232                 skb->sk_state = TCP_ESTABLISHED;
1233                 socka->state  = SS_CONNECTED;
1234                 sockb->state  = SS_CONNECTED;
1235         }
1236         return 0;
1237 }
1238
1239 static void unix_sock_inherit_flags(const struct socket *old,
1240                                     struct socket *new)
1241 {
1242         if (test_bit(SOCK_PASSCRED, &old->flags))
1243                 set_bit(SOCK_PASSCRED, &new->flags);
1244         if (test_bit(SOCK_PASSSEC, &old->flags))
1245                 set_bit(SOCK_PASSSEC, &new->flags);
1246 }
1247
1248 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1249 {
1250         struct sock *sk = sock->sk;
1251         struct sock *tsk;
1252         struct sk_buff *skb;
1253         int err;
1254
1255         err = -EOPNOTSUPP;
1256         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1257                 goto out;
1258
1259         err = -EINVAL;
1260         if (sk->sk_state != TCP_LISTEN)
1261                 goto out;
1262
1263         /* If socket state is TCP_LISTEN it cannot change (for now...),
1264          * so that no locks are necessary.
1265          */
1266
1267         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1268         if (!skb) {
1269                 /* This means receive shutdown. */
1270                 if (err == 0)
1271                         err = -EINVAL;
1272                 goto out;
1273         }
1274
1275         tsk = skb->sk;
1276         skb_free_datagram(sk, skb);
1277         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1278
1279         /* attach accepted sock to socket */
1280         unix_state_lock(tsk);
1281         newsock->state = SS_CONNECTED;
1282         unix_sock_inherit_flags(sock, newsock);
1283         sock_graft(tsk, newsock);
1284         unix_state_unlock(tsk);
1285         return 0;
1286
1287 out:
1288         return err;
1289 }
1290
1291
1292 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1293 {
1294         struct sock *sk = sock->sk;
1295         struct unix_sock *u;
1296         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1297         int err = 0;
1298
1299         if (peer) {
1300                 sk = unix_peer_get(sk);
1301
1302                 err = -ENOTCONN;
1303                 if (!sk)
1304                         goto out;
1305                 err = 0;
1306         } else {
1307                 sock_hold(sk);
1308         }
1309
1310         u = unix_sk(sk);
1311         unix_state_lock(sk);
1312         if (!u->addr) {
1313                 sunaddr->sun_family = AF_UNIX;
1314                 sunaddr->sun_path[0] = 0;
1315                 *uaddr_len = sizeof(short);
1316         } else {
1317                 struct unix_address *addr = u->addr;
1318
1319                 *uaddr_len = addr->len;
1320                 memcpy(sunaddr, addr->name, *uaddr_len);
1321         }
1322         unix_state_unlock(sk);
1323         sock_put(sk);
1324 out:
1325         return err;
1326 }
1327
1328 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1329 {
1330         int i;
1331
1332         scm->fp = UNIXCB(skb).fp;
1333         UNIXCB(skb).fp = NULL;
1334
1335         for (i = scm->fp->count-1; i >= 0; i--)
1336                 unix_notinflight(scm->fp->fp[i]);
1337 }
1338
1339 static void unix_destruct_scm(struct sk_buff *skb)
1340 {
1341         struct scm_cookie scm;
1342         memset(&scm, 0, sizeof(scm));
1343         scm.pid  = UNIXCB(skb).pid;
1344         scm.cred = UNIXCB(skb).cred;
1345         if (UNIXCB(skb).fp)
1346                 unix_detach_fds(&scm, skb);
1347
1348         /* Alas, it calls VFS */
1349         /* So fscking what? fput() had been SMP-safe since the last Summer */
1350         scm_destroy(&scm);
1351         sock_wfree(skb);
1352 }
1353
1354 #define MAX_RECURSION_LEVEL 4
1355
1356 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1357 {
1358         int i;
1359         unsigned char max_level = 0;
1360         int unix_sock_count = 0;
1361
1362         for (i = scm->fp->count - 1; i >= 0; i--) {
1363                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1364
1365                 if (sk) {
1366                         unix_sock_count++;
1367                         max_level = max(max_level,
1368                                         unix_sk(sk)->recursion_level);
1369                 }
1370         }
1371         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1372                 return -ETOOMANYREFS;
1373
1374         /*
1375          * Need to duplicate file references for the sake of garbage
1376          * collection.  Otherwise a socket in the fps might become a
1377          * candidate for GC while the skb is not yet queued.
1378          */
1379         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1380         if (!UNIXCB(skb).fp)
1381                 return -ENOMEM;
1382
1383         if (unix_sock_count) {
1384                 for (i = scm->fp->count - 1; i >= 0; i--)
1385                         unix_inflight(scm->fp->fp[i]);
1386         }
1387         return max_level;
1388 }
1389
1390 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1391 {
1392         int err = 0;
1393
1394         UNIXCB(skb).pid  = get_pid(scm->pid);
1395         if (scm->cred)
1396                 UNIXCB(skb).cred = get_cred(scm->cred);
1397         UNIXCB(skb).fp = NULL;
1398         if (scm->fp && send_fds)
1399                 err = unix_attach_fds(scm, skb);
1400
1401         skb->destructor = unix_destruct_scm;
1402         return err;
1403 }
1404
1405 /*
1406  * Some apps rely on write() giving SCM_CREDENTIALS
1407  * We include credentials if source or destination socket
1408  * asserted SOCK_PASSCRED.
1409  */
1410 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1411                             const struct sock *other)
1412 {
1413         if (UNIXCB(skb).cred)
1414                 return;
1415         if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1416             !other->sk_socket ||
1417             test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1418                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1419                 UNIXCB(skb).cred = get_current_cred();
1420         }
1421 }
1422
1423 /*
1424  *      Send AF_UNIX data.
1425  */
1426
1427 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1428                               struct msghdr *msg, size_t len)
1429 {
1430         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1431         struct sock *sk = sock->sk;
1432         struct net *net = sock_net(sk);
1433         struct unix_sock *u = unix_sk(sk);
1434         struct sockaddr_un *sunaddr = msg->msg_name;
1435         struct sock *other = NULL;
1436         int namelen = 0; /* fake GCC */
1437         int err;
1438         unsigned hash;
1439         struct sk_buff *skb;
1440         long timeo;
1441         struct scm_cookie tmp_scm;
1442         int max_level;
1443
1444         if (NULL == siocb->scm)
1445                 siocb->scm = &tmp_scm;
1446         wait_for_unix_gc();
1447         err = scm_send(sock, msg, siocb->scm, false);
1448         if (err < 0)
1449                 return err;
1450
1451         err = -EOPNOTSUPP;
1452         if (msg->msg_flags&MSG_OOB)
1453                 goto out;
1454
1455         if (msg->msg_namelen) {
1456                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1457                 if (err < 0)
1458                         goto out;
1459                 namelen = err;
1460         } else {
1461                 sunaddr = NULL;
1462                 err = -ENOTCONN;
1463                 other = unix_peer_get(sk);
1464                 if (!other)
1465                         goto out;
1466         }
1467
1468         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1469             && (err = unix_autobind(sock)) != 0)
1470                 goto out;
1471
1472         err = -EMSGSIZE;
1473         if (len > sk->sk_sndbuf - 32)
1474                 goto out;
1475
1476         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1477         if (skb == NULL)
1478                 goto out;
1479
1480         err = unix_scm_to_skb(siocb->scm, skb, true);
1481         if (err < 0)
1482                 goto out_free;
1483         max_level = err + 1;
1484         unix_get_secdata(siocb->scm, skb);
1485
1486         skb_reset_transport_header(skb);
1487         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1488         if (err)
1489                 goto out_free;
1490
1491         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1492
1493 restart:
1494         if (!other) {
1495                 err = -ECONNRESET;
1496                 if (sunaddr == NULL)
1497                         goto out_free;
1498
1499                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1500                                         hash, &err);
1501                 if (other == NULL)
1502                         goto out_free;
1503         }
1504
1505         if (sk_filter(other, skb) < 0) {
1506                 /* Toss the packet but do not return any error to the sender */
1507                 err = len;
1508                 goto out_free;
1509         }
1510
1511         unix_state_lock(other);
1512         err = -EPERM;
1513         if (!unix_may_send(sk, other))
1514                 goto out_unlock;
1515
1516         if (sock_flag(other, SOCK_DEAD)) {
1517                 /*
1518                  *      Check with 1003.1g - what should
1519                  *      datagram error
1520                  */
1521                 unix_state_unlock(other);
1522                 sock_put(other);
1523
1524                 err = 0;
1525                 unix_state_lock(sk);
1526                 if (unix_peer(sk) == other) {
1527                         unix_peer(sk) = NULL;
1528                         unix_state_unlock(sk);
1529
1530                         unix_dgram_disconnected(sk, other);
1531                         sock_put(other);
1532                         err = -ECONNREFUSED;
1533                 } else {
1534                         unix_state_unlock(sk);
1535                 }
1536
1537                 other = NULL;
1538                 if (err)
1539                         goto out_free;
1540                 goto restart;
1541         }
1542
1543         err = -EPIPE;
1544         if (other->sk_shutdown & RCV_SHUTDOWN)
1545                 goto out_unlock;
1546
1547         if (sk->sk_type != SOCK_SEQPACKET) {
1548                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1549                 if (err)
1550                         goto out_unlock;
1551         }
1552
1553         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1554                 if (!timeo) {
1555                         err = -EAGAIN;
1556                         goto out_unlock;
1557                 }
1558
1559                 timeo = unix_wait_for_peer(other, timeo);
1560
1561                 err = sock_intr_errno(timeo);
1562                 if (signal_pending(current))
1563                         goto out_free;
1564
1565                 goto restart;
1566         }
1567
1568         if (sock_flag(other, SOCK_RCVTSTAMP))
1569                 __net_timestamp(skb);
1570         maybe_add_creds(skb, sock, other);
1571         skb_queue_tail(&other->sk_receive_queue, skb);
1572         if (max_level > unix_sk(other)->recursion_level)
1573                 unix_sk(other)->recursion_level = max_level;
1574         unix_state_unlock(other);
1575         other->sk_data_ready(other, len);
1576         sock_put(other);
1577         scm_destroy(siocb->scm);
1578         return len;
1579
1580 out_unlock:
1581         unix_state_unlock(other);
1582 out_free:
1583         kfree_skb(skb);
1584 out:
1585         if (other)
1586                 sock_put(other);
1587         scm_destroy(siocb->scm);
1588         return err;
1589 }
1590
1591
1592 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1593                                struct msghdr *msg, size_t len)
1594 {
1595         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1596         struct sock *sk = sock->sk;
1597         struct sock *other = NULL;
1598         int err, size;
1599         struct sk_buff *skb;
1600         int sent = 0;
1601         struct scm_cookie tmp_scm;
1602         bool fds_sent = false;
1603         int max_level;
1604
1605         if (NULL == siocb->scm)
1606                 siocb->scm = &tmp_scm;
1607         wait_for_unix_gc();
1608         err = scm_send(sock, msg, siocb->scm, false);
1609         if (err < 0)
1610                 return err;
1611
1612         err = -EOPNOTSUPP;
1613         if (msg->msg_flags&MSG_OOB)
1614                 goto out_err;
1615
1616         if (msg->msg_namelen) {
1617                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1618                 goto out_err;
1619         } else {
1620                 err = -ENOTCONN;
1621                 other = unix_peer(sk);
1622                 if (!other)
1623                         goto out_err;
1624         }
1625
1626         if (sk->sk_shutdown & SEND_SHUTDOWN)
1627                 goto pipe_err;
1628
1629         while (sent < len) {
1630                 /*
1631                  *      Optimisation for the fact that under 0.01% of X
1632                  *      messages typically need breaking up.
1633                  */
1634
1635                 size = len-sent;
1636
1637                 /* Keep two messages in the pipe so it schedules better */
1638                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1639                         size = (sk->sk_sndbuf >> 1) - 64;
1640
1641                 if (size > SKB_MAX_ALLOC)
1642                         size = SKB_MAX_ALLOC;
1643
1644                 /*
1645                  *      Grab a buffer
1646                  */
1647
1648                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1649                                           &err);
1650
1651                 if (skb == NULL)
1652                         goto out_err;
1653
1654                 /*
1655                  *      If you pass two values to the sock_alloc_send_skb
1656                  *      it tries to grab the large buffer with GFP_NOFS
1657                  *      (which can fail easily), and if it fails grab the
1658                  *      fallback size buffer which is under a page and will
1659                  *      succeed. [Alan]
1660                  */
1661                 size = min_t(int, size, skb_tailroom(skb));
1662
1663
1664                 /* Only send the fds in the first buffer */
1665                 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1666                 if (err < 0) {
1667                         kfree_skb(skb);
1668                         goto out_err;
1669                 }
1670                 max_level = err + 1;
1671                 fds_sent = true;
1672
1673                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1674                 if (err) {
1675                         kfree_skb(skb);
1676                         goto out_err;
1677                 }
1678
1679                 unix_state_lock(other);
1680
1681                 if (sock_flag(other, SOCK_DEAD) ||
1682                     (other->sk_shutdown & RCV_SHUTDOWN))
1683                         goto pipe_err_free;
1684
1685                 maybe_add_creds(skb, sock, other);
1686                 skb_queue_tail(&other->sk_receive_queue, skb);
1687                 if (max_level > unix_sk(other)->recursion_level)
1688                         unix_sk(other)->recursion_level = max_level;
1689                 unix_state_unlock(other);
1690                 other->sk_data_ready(other, size);
1691                 sent += size;
1692         }
1693
1694         scm_destroy(siocb->scm);
1695         siocb->scm = NULL;
1696
1697         return sent;
1698
1699 pipe_err_free:
1700         unix_state_unlock(other);
1701         kfree_skb(skb);
1702 pipe_err:
1703         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1704                 send_sig(SIGPIPE, current, 0);
1705         err = -EPIPE;
1706 out_err:
1707         scm_destroy(siocb->scm);
1708         siocb->scm = NULL;
1709         return sent ? : err;
1710 }
1711
1712 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1713                                   struct msghdr *msg, size_t len)
1714 {
1715         int err;
1716         struct sock *sk = sock->sk;
1717
1718         err = sock_error(sk);
1719         if (err)
1720                 return err;
1721
1722         if (sk->sk_state != TCP_ESTABLISHED)
1723                 return -ENOTCONN;
1724
1725         if (msg->msg_namelen)
1726                 msg->msg_namelen = 0;
1727
1728         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1729 }
1730
1731 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1732                               struct msghdr *msg, size_t size,
1733                               int flags)
1734 {
1735         struct sock *sk = sock->sk;
1736
1737         if (sk->sk_state != TCP_ESTABLISHED)
1738                 return -ENOTCONN;
1739
1740         return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1741 }
1742
1743 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1744 {
1745         struct unix_sock *u = unix_sk(sk);
1746
1747         if (u->addr) {
1748                 msg->msg_namelen = u->addr->len;
1749                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1750         }
1751 }
1752
1753 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1754                               struct msghdr *msg, size_t size,
1755                               int flags)
1756 {
1757         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1758         struct scm_cookie tmp_scm;
1759         struct sock *sk = sock->sk;
1760         struct unix_sock *u = unix_sk(sk);
1761         int noblock = flags & MSG_DONTWAIT;
1762         struct sk_buff *skb;
1763         int err;
1764
1765         err = -EOPNOTSUPP;
1766         if (flags&MSG_OOB)
1767                 goto out;
1768
1769         err = mutex_lock_interruptible(&u->readlock);
1770         if (err) {
1771                 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1772                 goto out;
1773         }
1774
1775         skb = skb_recv_datagram(sk, flags, noblock, &err);
1776         if (!skb) {
1777                 unix_state_lock(sk);
1778                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1779                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1780                     (sk->sk_shutdown & RCV_SHUTDOWN))
1781                         err = 0;
1782                 unix_state_unlock(sk);
1783                 goto out_unlock;
1784         }
1785
1786         wake_up_interruptible_sync_poll(&u->peer_wait,
1787                                         POLLOUT | POLLWRNORM | POLLWRBAND);
1788
1789         if (msg->msg_name)
1790                 unix_copy_addr(msg, skb->sk);
1791
1792         if (size > skb->len)
1793                 size = skb->len;
1794         else if (size < skb->len)
1795                 msg->msg_flags |= MSG_TRUNC;
1796
1797         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1798         if (err)
1799                 goto out_free;
1800
1801         if (sock_flag(sk, SOCK_RCVTSTAMP))
1802                 __sock_recv_timestamp(msg, sk, skb);
1803
1804         if (!siocb->scm) {
1805                 siocb->scm = &tmp_scm;
1806                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1807         }
1808         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1809         unix_set_secdata(siocb->scm, skb);
1810
1811         if (!(flags & MSG_PEEK)) {
1812                 if (UNIXCB(skb).fp)
1813                         unix_detach_fds(siocb->scm, skb);
1814         } else {
1815                 /* It is questionable: on PEEK we could:
1816                    - do not return fds - good, but too simple 8)
1817                    - return fds, and do not return them on read (old strategy,
1818                      apparently wrong)
1819                    - clone fds (I chose it for now, it is the most universal
1820                      solution)
1821
1822                    POSIX 1003.1g does not actually define this clearly
1823                    at all. POSIX 1003.1g doesn't define a lot of things
1824                    clearly however!
1825
1826                 */
1827                 if (UNIXCB(skb).fp)
1828                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1829         }
1830         err = size;
1831
1832         scm_recv(sock, msg, siocb->scm, flags);
1833
1834 out_free:
1835         skb_free_datagram(sk, skb);
1836 out_unlock:
1837         mutex_unlock(&u->readlock);
1838 out:
1839         return err;
1840 }
1841
1842 /*
1843  *      Sleep until data has arrive. But check for races..
1844  */
1845
1846 static long unix_stream_data_wait(struct sock *sk, long timeo)
1847 {
1848         DEFINE_WAIT(wait);
1849
1850         unix_state_lock(sk);
1851
1852         for (;;) {
1853                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1854
1855                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1856                     sk->sk_err ||
1857                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1858                     signal_pending(current) ||
1859                     !timeo)
1860                         break;
1861
1862                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1863                 unix_state_unlock(sk);
1864                 timeo = schedule_timeout(timeo);
1865                 unix_state_lock(sk);
1866                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1867         }
1868
1869         finish_wait(sk_sleep(sk), &wait);
1870         unix_state_unlock(sk);
1871         return timeo;
1872 }
1873
1874
1875
1876 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1877                                struct msghdr *msg, size_t size,
1878                                int flags)
1879 {
1880         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1881         struct scm_cookie tmp_scm;
1882         struct sock *sk = sock->sk;
1883         struct unix_sock *u = unix_sk(sk);
1884         struct sockaddr_un *sunaddr = msg->msg_name;
1885         int copied = 0;
1886         int check_creds = 0;
1887         int target;
1888         int err = 0;
1889         long timeo;
1890
1891         err = -EINVAL;
1892         if (sk->sk_state != TCP_ESTABLISHED)
1893                 goto out;
1894
1895         err = -EOPNOTSUPP;
1896         if (flags&MSG_OOB)
1897                 goto out;
1898
1899         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1900         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1901
1902         /* Lock the socket to prevent queue disordering
1903          * while sleeps in memcpy_tomsg
1904          */
1905
1906         if (!siocb->scm) {
1907                 siocb->scm = &tmp_scm;
1908                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1909         }
1910
1911         err = mutex_lock_interruptible(&u->readlock);
1912         if (err) {
1913                 err = sock_intr_errno(timeo);
1914                 goto out;
1915         }
1916
1917         do {
1918                 int chunk;
1919                 struct sk_buff *skb;
1920
1921                 unix_state_lock(sk);
1922                 skb = skb_peek(&sk->sk_receive_queue);
1923                 if (skb == NULL) {
1924                         unix_sk(sk)->recursion_level = 0;
1925                         if (copied >= target)
1926                                 goto unlock;
1927
1928                         /*
1929                          *      POSIX 1003.1g mandates this order.
1930                          */
1931
1932                         err = sock_error(sk);
1933                         if (err)
1934                                 goto unlock;
1935                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1936                                 goto unlock;
1937
1938                         unix_state_unlock(sk);
1939                         err = -EAGAIN;
1940                         if (!timeo)
1941                                 break;
1942                         mutex_unlock(&u->readlock);
1943
1944                         timeo = unix_stream_data_wait(sk, timeo);
1945
1946                         if (signal_pending(current)
1947                             ||  mutex_lock_interruptible(&u->readlock)) {
1948                                 err = sock_intr_errno(timeo);
1949                                 goto out;
1950                         }
1951
1952                         continue;
1953  unlock:
1954                         unix_state_unlock(sk);
1955                         break;
1956                 }
1957                 unix_state_unlock(sk);
1958
1959                 if (check_creds) {
1960                         /* Never glue messages from different writers */
1961                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1962                             (UNIXCB(skb).cred != siocb->scm->cred))
1963                                 break;
1964                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1965                         /* Copy credentials */
1966                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1967                         check_creds = 1;
1968                 }
1969
1970                 /* Copy address just once */
1971                 if (sunaddr) {
1972                         unix_copy_addr(msg, skb->sk);
1973                         sunaddr = NULL;
1974                 }
1975
1976                 chunk = min_t(unsigned int, skb->len, size);
1977                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1978                         if (copied == 0)
1979                                 copied = -EFAULT;
1980                         break;
1981                 }
1982                 copied += chunk;
1983                 size -= chunk;
1984
1985                 /* Mark read part of skb as used */
1986                 if (!(flags & MSG_PEEK)) {
1987                         skb_pull(skb, chunk);
1988
1989                         if (UNIXCB(skb).fp)
1990                                 unix_detach_fds(siocb->scm, skb);
1991
1992                         if (skb->len)
1993                                 break;
1994
1995                         skb_unlink(skb, &sk->sk_receive_queue);
1996                         consume_skb(skb);
1997
1998                         if (siocb->scm->fp)
1999                                 break;
2000                 } else {
2001                         /* It is questionable, see note in unix_dgram_recvmsg.
2002                          */
2003                         if (UNIXCB(skb).fp)
2004                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2005
2006                         break;
2007                 }
2008         } while (size);
2009
2010         mutex_unlock(&u->readlock);
2011         scm_recv(sock, msg, siocb->scm, flags);
2012 out:
2013         return copied ? : err;
2014 }
2015
2016 static int unix_shutdown(struct socket *sock, int mode)
2017 {
2018         struct sock *sk = sock->sk;
2019         struct sock *other;
2020
2021         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2022
2023         if (!mode)
2024                 return 0;
2025
2026         unix_state_lock(sk);
2027         sk->sk_shutdown |= mode;
2028         other = unix_peer(sk);
2029         if (other)
2030                 sock_hold(other);
2031         unix_state_unlock(sk);
2032         sk->sk_state_change(sk);
2033
2034         if (other &&
2035                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2036
2037                 int peer_mode = 0;
2038
2039                 if (mode&RCV_SHUTDOWN)
2040                         peer_mode |= SEND_SHUTDOWN;
2041                 if (mode&SEND_SHUTDOWN)
2042                         peer_mode |= RCV_SHUTDOWN;
2043                 unix_state_lock(other);
2044                 other->sk_shutdown |= peer_mode;
2045                 unix_state_unlock(other);
2046                 other->sk_state_change(other);
2047                 if (peer_mode == SHUTDOWN_MASK)
2048                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2049                 else if (peer_mode & RCV_SHUTDOWN)
2050                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2051         }
2052         if (other)
2053                 sock_put(other);
2054
2055         return 0;
2056 }
2057
2058 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2059 {
2060         struct sock *sk = sock->sk;
2061         long amount = 0;
2062         int err;
2063
2064         switch (cmd) {
2065         case SIOCOUTQ:
2066                 amount = sk_wmem_alloc_get(sk);
2067                 err = put_user(amount, (int __user *)arg);
2068                 break;
2069         case SIOCINQ:
2070                 {
2071                         struct sk_buff *skb;
2072
2073                         if (sk->sk_state == TCP_LISTEN) {
2074                                 err = -EINVAL;
2075                                 break;
2076                         }
2077
2078                         spin_lock(&sk->sk_receive_queue.lock);
2079                         if (sk->sk_type == SOCK_STREAM ||
2080                             sk->sk_type == SOCK_SEQPACKET) {
2081                                 skb_queue_walk(&sk->sk_receive_queue, skb)
2082                                         amount += skb->len;
2083                         } else {
2084                                 skb = skb_peek(&sk->sk_receive_queue);
2085                                 if (skb)
2086                                         amount = skb->len;
2087                         }
2088                         spin_unlock(&sk->sk_receive_queue.lock);
2089                         err = put_user(amount, (int __user *)arg);
2090                         break;
2091                 }
2092
2093         default:
2094                 err = -ENOIOCTLCMD;
2095                 break;
2096         }
2097         return err;
2098 }
2099
2100 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2101 {
2102         struct sock *sk = sock->sk;
2103         unsigned int mask;
2104
2105         sock_poll_wait(file, sk_sleep(sk), wait);
2106         mask = 0;
2107
2108         /* exceptional events? */
2109         if (sk->sk_err)
2110                 mask |= POLLERR;
2111         if (sk->sk_shutdown == SHUTDOWN_MASK)
2112                 mask |= POLLHUP;
2113         if (sk->sk_shutdown & RCV_SHUTDOWN)
2114                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2115
2116         /* readable? */
2117         if (!skb_queue_empty(&sk->sk_receive_queue))
2118                 mask |= POLLIN | POLLRDNORM;
2119
2120         /* Connection-based need to check for termination and startup */
2121         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2122             sk->sk_state == TCP_CLOSE)
2123                 mask |= POLLHUP;
2124
2125         /*
2126          * we set writable also when the other side has shut down the
2127          * connection. This prevents stuck sockets.
2128          */
2129         if (unix_writable(sk))
2130                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2131
2132         return mask;
2133 }
2134
2135 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2136                                     poll_table *wait)
2137 {
2138         struct sock *sk = sock->sk, *other;
2139         unsigned int mask, writable;
2140
2141         sock_poll_wait(file, sk_sleep(sk), wait);
2142         mask = 0;
2143
2144         /* exceptional events? */
2145         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2146                 mask |= POLLERR;
2147         if (sk->sk_shutdown & RCV_SHUTDOWN)
2148                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2149         if (sk->sk_shutdown == SHUTDOWN_MASK)
2150                 mask |= POLLHUP;
2151
2152         /* readable? */
2153         if (!skb_queue_empty(&sk->sk_receive_queue))
2154                 mask |= POLLIN | POLLRDNORM;
2155
2156         /* Connection-based need to check for termination and startup */
2157         if (sk->sk_type == SOCK_SEQPACKET) {
2158                 if (sk->sk_state == TCP_CLOSE)
2159                         mask |= POLLHUP;
2160                 /* connection hasn't started yet? */
2161                 if (sk->sk_state == TCP_SYN_SENT)
2162                         return mask;
2163         }
2164
2165         /* No write status requested, avoid expensive OUT tests. */
2166         if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2167                 return mask;
2168
2169         writable = unix_writable(sk);
2170         other = unix_peer_get(sk);
2171         if (other) {
2172                 if (unix_peer(other) != sk) {
2173                         sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2174                         if (unix_recvq_full(other))
2175                                 writable = 0;
2176                 }
2177                 sock_put(other);
2178         }
2179
2180         if (writable)
2181                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2182         else
2183                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2184
2185         return mask;
2186 }
2187
2188 #ifdef CONFIG_PROC_FS
2189 static struct sock *first_unix_socket(int *i)
2190 {
2191         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2192                 if (!hlist_empty(&unix_socket_table[*i]))
2193                         return __sk_head(&unix_socket_table[*i]);
2194         }
2195         return NULL;
2196 }
2197
2198 static struct sock *next_unix_socket(int *i, struct sock *s)
2199 {
2200         struct sock *next = sk_next(s);
2201         /* More in this chain? */
2202         if (next)
2203                 return next;
2204         /* Look for next non-empty chain. */
2205         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2206                 if (!hlist_empty(&unix_socket_table[*i]))
2207                         return __sk_head(&unix_socket_table[*i]);
2208         }
2209         return NULL;
2210 }
2211
2212 struct unix_iter_state {
2213         struct seq_net_private p;
2214         int i;
2215 };
2216
2217 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2218 {
2219         struct unix_iter_state *iter = seq->private;
2220         loff_t off = 0;
2221         struct sock *s;
2222
2223         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2224                 if (sock_net(s) != seq_file_net(seq))
2225                         continue;
2226                 if (off == pos)
2227                         return s;
2228                 ++off;
2229         }
2230         return NULL;
2231 }
2232
2233 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2234         __acquires(unix_table_lock)
2235 {
2236         spin_lock(&unix_table_lock);
2237         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2238 }
2239
2240 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2241 {
2242         struct unix_iter_state *iter = seq->private;
2243         struct sock *sk = v;
2244         ++*pos;
2245
2246         if (v == SEQ_START_TOKEN)
2247                 sk = first_unix_socket(&iter->i);
2248         else
2249                 sk = next_unix_socket(&iter->i, sk);
2250         while (sk && (sock_net(sk) != seq_file_net(seq)))
2251                 sk = next_unix_socket(&iter->i, sk);
2252         return sk;
2253 }
2254
2255 static void unix_seq_stop(struct seq_file *seq, void *v)
2256         __releases(unix_table_lock)
2257 {
2258         spin_unlock(&unix_table_lock);
2259 }
2260
2261 static int unix_seq_show(struct seq_file *seq, void *v)
2262 {
2263
2264         if (v == SEQ_START_TOKEN)
2265                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2266                          "Inode Path\n");
2267         else {
2268                 struct sock *s = v;
2269                 struct unix_sock *u = unix_sk(s);
2270                 unix_state_lock(s);
2271
2272                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2273                         s,
2274                         atomic_read(&s->sk_refcnt),
2275                         0,
2276                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2277                         s->sk_type,
2278                         s->sk_socket ?
2279                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2280                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2281                         sock_i_ino(s));
2282
2283                 if (u->addr) {
2284                         int i, len;
2285                         seq_putc(seq, ' ');
2286
2287                         i = 0;
2288                         len = u->addr->len - sizeof(short);
2289                         if (!UNIX_ABSTRACT(s))
2290                                 len--;
2291                         else {
2292                                 seq_putc(seq, '@');
2293                                 i++;
2294                         }
2295                         for ( ; i < len; i++)
2296                                 seq_putc(seq, u->addr->name->sun_path[i]);
2297                 }
2298                 unix_state_unlock(s);
2299                 seq_putc(seq, '\n');
2300         }
2301
2302         return 0;
2303 }
2304
2305 static const struct seq_operations unix_seq_ops = {
2306         .start  = unix_seq_start,
2307         .next   = unix_seq_next,
2308         .stop   = unix_seq_stop,
2309         .show   = unix_seq_show,
2310 };
2311
2312 static int unix_seq_open(struct inode *inode, struct file *file)
2313 {
2314         return seq_open_net(inode, file, &unix_seq_ops,
2315                             sizeof(struct unix_iter_state));
2316 }
2317
2318 static const struct file_operations unix_seq_fops = {
2319         .owner          = THIS_MODULE,
2320         .open           = unix_seq_open,
2321         .read           = seq_read,
2322         .llseek         = seq_lseek,
2323         .release        = seq_release_net,
2324 };
2325
2326 #endif
2327
2328 static const struct net_proto_family unix_family_ops = {
2329         .family = PF_UNIX,
2330         .create = unix_create,
2331         .owner  = THIS_MODULE,
2332 };
2333
2334
2335 static int __net_init unix_net_init(struct net *net)
2336 {
2337         int error = -ENOMEM;
2338
2339         net->unx.sysctl_max_dgram_qlen = 10;
2340         if (unix_sysctl_register(net))
2341                 goto out;
2342
2343 #ifdef CONFIG_PROC_FS
2344         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2345                 unix_sysctl_unregister(net);
2346                 goto out;
2347         }
2348 #endif
2349         error = 0;
2350 out:
2351         return error;
2352 }
2353
2354 static void __net_exit unix_net_exit(struct net *net)
2355 {
2356         unix_sysctl_unregister(net);
2357         proc_net_remove(net, "unix");
2358 }
2359
2360 static struct pernet_operations unix_net_ops = {
2361         .init = unix_net_init,
2362         .exit = unix_net_exit,
2363 };
2364
2365 static int __init af_unix_init(void)
2366 {
2367         int rc = -1;
2368         struct sk_buff *dummy_skb;
2369
2370         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2371
2372         rc = proto_register(&unix_proto, 1);
2373         if (rc != 0) {
2374                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2375                        __func__);
2376                 goto out;
2377         }
2378
2379         sock_register(&unix_family_ops);
2380         register_pernet_subsys(&unix_net_ops);
2381 out:
2382         return rc;
2383 }
2384
2385 static void __exit af_unix_exit(void)
2386 {
2387         sock_unregister(PF_UNIX);
2388         proto_unregister(&unix_proto);
2389         unregister_pernet_subsys(&unix_net_ops);
2390 }
2391
2392 /* Earlier than device_initcall() so that other drivers invoking
2393    request_module() don't end up in a loop when modprobe tries
2394    to use a UNIX socket. But later than subsys_initcall() because
2395    we depend on stuff initialised there */
2396 fs_initcall(af_unix_init);
2397 module_exit(af_unix_exit);
2398
2399 MODULE_LICENSE("GPL");
2400 MODULE_ALIAS_NETPROTO(PF_UNIX);