RDS: only put sockets that have seen congestion on the poll_waitq
authorAndy Grover <andy.grover@oracle.com>
Thu, 11 Mar 2010 13:50:04 +0000 (13:50 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 17 Mar 2010 04:16:59 +0000 (21:16 -0700)
rds_poll_waitq's listeners will be awoken if we receive a congestion
notification. Bad performance may result because *all* polled sockets
contend for this single lock. However, it should not be necessary to
wake pollers when a congestion update arrives if they have never
experienced congestion, and not putting these on the waitq will
hopefully greatly reduce contention.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/af_rds.c
net/rds/rds.h
net/rds/send.c

index 853c52b..937ecda 100644 (file)
@@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 
        poll_wait(file, sk->sk_sleep, wait);
 
-       poll_wait(file, &rds_poll_waitq, wait);
+       if (rs->rs_seen_congestion)
+               poll_wait(file, &rds_poll_waitq, wait);
 
        read_lock_irqsave(&rs->rs_recv_lock, flags);
        if (!rs->rs_cong_monitor) {
@@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
                mask |= (POLLOUT | POLLWRNORM);
        read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
+       /* clear state any time we wake a seen-congested socket */
+       if (mask)
+               rs->rs_seen_congestion = 0;
+
        return mask;
 }
 
index 85d6f89..4bec6e2 100644 (file)
@@ -388,6 +388,8 @@ struct rds_sock {
 
        /* flag indicating we were congested or not */
        int                     rs_congested;
+       /* seen congestion (ENOBUFS) when sending? */
+       int                     rs_seen_congestion;
 
        /* rs_lock protects all these adjacent members before the newline */
        spinlock_t              rs_lock;
index 192a480..51e2def 100644 (file)
@@ -894,8 +894,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
                queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
 
        ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
-       if (ret)
+       if (ret) {
+               rs->rs_seen_congestion = 1;
                goto out;
+       }
 
        while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
                                  dport, &queued)) {