X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Funix%2Faf_unix.c;h=8d0b803bb7ca543436b69c446c102b9d7c4a8aee;hb=f63b2b3204ea962e9cc34a223771ec973694f8bf;hp=b595a3d8679f016a7b0936683170a69cc86c7905;hpb=3172f8fe1ca1d432b196efad453c0ceb89302075;p=pandora-kernel.git diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b595a3d8679f..8d0b803bb7ca 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -149,8 +149,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned unix_hash_fold(__wsum n) { - unsigned hash = (__force unsigned)n; - hash ^= hash>>16; + unsigned int hash = (__force unsigned int)csum_fold(n); + hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } @@ -303,6 +303,118 @@ found: return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static inline int unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; @@ -371,7 +483,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct dentry *dentry; @@ -409,6 +521,8 @@ static int unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -444,8 +558,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -632,6 +744,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound, sk); out: if (sk == NULL) @@ -682,9 +795,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) @@ -697,7 +811,9 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + return err; err = 0; if (u->addr) @@ -830,7 +946,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + goto out; err = -EINVAL; if (u->addr) @@ -1002,6 +1120,8 @@ restart: if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1237,6 +1357,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } +static void unix_sock_inherit_flags(const struct socket *old, + struct socket *new) +{ + if (test_bit(SOCK_PASSCRED, &old->flags)) + set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSSEC, &old->flags)) + set_bit(SOCK_PASSSEC, &new->flags); +} + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk = sock->sk; @@ -1271,6 +1400,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) /* attach accepted sock to socket */ unix_state_lock(tsk); newsock->state = SS_CONNECTED; + unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; @@ -1324,7 +1454,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1342,6 +1472,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1350,6 +1495,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1371,10 +1519,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } @@ -1431,11 +1577,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, long timeo; struct scm_cookie tmp_scm; int max_level; + int sk_locked; if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1499,12 +1646,14 @@ restart: goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1512,10 +1661,14 @@ restart: unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1541,21 +1694,43 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1569,6 +1744,8 @@ restart: return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -1596,7 +1773,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1735,7 +1912,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1758,11 +1934,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1857,6 +2034,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) unix_state_unlock(sk); timeo = schedule_timeout(timeo); unix_state_lock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -1877,23 +2058,24 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = msg->msg_name; int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; long timeo; - err = -EINVAL; - if (sk->sk_state != TCP_ESTABLISHED) + if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + err = -EINVAL; goto out; + } - err = -EOPNOTSUPP; - if (flags&MSG_OOB) + if (unlikely(flags & MSG_OOB)) { + err = -EOPNOTSUPP; goto out; + } target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - - msg->msg_namelen = 0; + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1904,18 +2086,18 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); - goto out; - } + mutex_lock(&u->readlock); do { int chunk; struct sk_buff *skb; unix_state_lock(sk); - skb = skb_dequeue(&sk->sk_receive_queue); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } + skb = skb_peek(&sk->sk_receive_queue); if (skb == NULL) { unix_sk(sk)->recursion_level = 0; if (copied >= target) @@ -1932,19 +2114,22 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto unlock; unix_state_unlock(sk); - err = -EAGAIN; - if (!timeo) + if (!timeo) { + err = -EAGAIN; break; + } + mutex_unlock(&u->readlock); timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current) - || mutex_lock_interruptible(&u->readlock)) { + if (signal_pending(current)) { err = sock_intr_errno(timeo); + scm_destroy(siocb->scm); goto out; } + mutex_lock(&u->readlock); continue; unlock: unix_state_unlock(sk); @@ -1955,12 +2140,9 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + (UNIXCB(skb).cred != siocb->scm->cred)) break; - } - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; @@ -1974,8 +2156,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, chunk = min_t(unsigned int, skb->len, size); if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); if (copied == 0) copied = -EFAULT; break; @@ -1990,13 +2170,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); - /* put the skb back if we didn't use it up.. */ - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + if (skb->len) break; - } + skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); if (siocb->scm->fp) @@ -2007,9 +2184,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); - /* put message back and return */ - skb_queue_head(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); break; } } while (size); @@ -2174,14 +2348,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable)