SUNRPC: Fix tcp reconnection
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 21 Aug 2009 17:37:17 +0000 (13:37 -0400)
committerGreg Kroah-Hartman <gregkh@suse.de>
Wed, 9 Sep 2009 03:17:51 +0000 (20:17 -0700)
This fixes a problem that was reported as Red Hat Bugzilla entry number
485339, in which rpciod starts looping on the TCP connection code,
rendering the NFS client unusable for 1/2 minute or so.

It is basically a backport of commit
f75e6745aa3084124ae1434fd7629853bdaf6798 (SUNRPC: Fix the problem of
EADDRNOTAVAIL syslog floods on reconnect)

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
include/linux/sunrpc/xprt.h
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c

index 4d80a11..75a87fe 100644 (file)
@@ -260,6 +260,7 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_BOUND             (4)
 #define XPRT_BINDING           (5)
 #define XPRT_CLOSING           (6)
+#define XPRT_CONNECTION_CLOSE  (8)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
index 99a52aa..b66be67 100644 (file)
@@ -645,10 +645,8 @@ xprt_init_autodisconnect(unsigned long data)
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                goto out_abort;
        spin_unlock(&xprt->transport_lock);
-       if (xprt_connecting(xprt))
-               xprt_release_write(xprt, NULL);
-       else
-               queue_work(rpciod_workqueue, &xprt->task_cleanup);
+       set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+       queue_work(rpciod_workqueue, &xprt->task_cleanup);
        return;
 out_abort:
        spin_unlock(&xprt->transport_lock);
index 8f9295d..a283304 100644 (file)
@@ -748,6 +748,9 @@ out_release:
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -781,6 +784,14 @@ clear_close_wait:
        xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+               xs_close(xprt);
+       else
+               xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -1676,11 +1687,21 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
                                goto out_clear;
                        case -ECONNREFUSED:
                        case -ECONNRESET:
+                       case -ENETUNREACH:
                                /* retry with existing socket, after a delay */
-                               break;
+                               goto out_clear;
                        default:
                                /* get rid of existing socket, and retry */
                                xs_tcp_shutdown(xprt);
+                               printk("%s: connect returned unhandled error %d\n",
+                                               __func__, status);
+                       case -EADDRNOTAVAIL:
+                               /* We're probably in TIME_WAIT. Get rid of existing socket,
+                                * and retry
+                                */
+                               set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+                               xprt_force_disconnect(xprt);
+                               status = -EAGAIN;
                }
        }
 out:
@@ -1735,11 +1756,21 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
                                goto out_clear;
                        case -ECONNREFUSED:
                        case -ECONNRESET:
+                       case -ENETUNREACH:
                                /* retry with existing socket, after a delay */
-                               break;
+                               goto out_clear;
                        default:
                                /* get rid of existing socket, and retry */
                                xs_tcp_shutdown(xprt);
+                               printk("%s: connect returned unhandled error %d\n",
+                                               __func__, status);
+                       case -EADDRNOTAVAIL:
+                               /* We're probably in TIME_WAIT. Get rid of existing socket,
+                                * and retry
+                                */
+                               set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+                               xprt_force_disconnect(xprt);
+                               status = -EAGAIN;
                }
        }
 out:
@@ -1871,7 +1902,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .buf_free               = rpc_free,
        .send_request           = xs_tcp_send_request,
        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
-       .close                  = xs_tcp_shutdown,
+       .close                  = xs_tcp_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_tcp_print_stats,
 };