s390/dasd: fix pathgroup race
authorStefan Haberland <stefan.haberland@de.ibm.com>
Tue, 11 Sep 2012 13:10:58 +0000 (15:10 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 17 Sep 2012 07:58:18 +0000 (09:58 +0200)
If a new path is available we need to verify the path data. If it is the
first path for a device the stop bits are removed after path verification.
If a pathgroup is established we need to set system characteristics for
the lcu. Therefore I/O has to be started.
If the device is stopped the set system characteristics worker may block
the path verification worker and the device is blocked.

Turn on failfast for set system characteristics CQR to prevent a deadlock
with the path verification worker.

If a pathgroup is established on a device that is not in use trigger path
verification. Maybe we were not informed about a working path.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Reviewed-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
drivers/s390/block/dasd.c
drivers/s390/block/dasd_eckd.c

index 6498d15..0595c76 100644 (file)
@@ -2157,6 +2157,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
                    test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
                    (!dasd_eer_enabled(device))) {
                        cqr->status = DASD_CQR_FAILED;
+                       cqr->intrc = -EAGAIN;
                        continue;
                }
                /* Don't try to start requests if device is stopped */
@@ -3270,6 +3271,16 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
                        dasd_schedule_device_bh(device);
                }
                if (path_event[chp] & PE_PATHGROUP_ESTABLISHED) {
+                       if (!(device->path_data.opm & eventlpm) &&
+                           !(device->path_data.tbvpm & eventlpm)) {
+                               /*
+                                * we can not establish a pathgroup on an
+                                * unavailable path, so trigger a path
+                                * verification first
+                                */
+                               device->path_data.tbvpm |= eventlpm;
+                               dasd_schedule_device_bh(device);
+                       }
                        DBF_DEV_EVENT(DBF_WARNING, device, "%s",
                                      "Pathgroup re-established\n");
                        if (device->discipline->kick_validate)
index 2fb2b9e..c48c72a 100644 (file)
@@ -1507,7 +1507,8 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device,
  * call might change behaviour of DASD devices.
  */
 static int
-dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
+dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav,
+                 unsigned long flags)
 {
        struct dasd_ccw_req *cqr;
        int rc;
@@ -1516,10 +1517,19 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
        if (IS_ERR(cqr))
                return PTR_ERR(cqr);
 
+       /*
+        * set flags e.g. turn on failfast, to prevent blocking
+        * the calling function should handle failed requests
+        */
+       cqr->flags |= flags;
+
        rc = dasd_sleep_on(cqr);
        if (!rc)
                /* trigger CIO to reprobe devices */
                css_schedule_reprobe();
+       else if (cqr->intrc == -EAGAIN)
+               rc = -EAGAIN;
+
        dasd_sfree_request(cqr, cqr->memdev);
        return rc;
 }
@@ -1527,7 +1537,8 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav)
 /*
  * Valide storage server of current device.
  */
-static void dasd_eckd_validate_server(struct dasd_device *device)
+static int dasd_eckd_validate_server(struct dasd_device *device,
+                                    unsigned long flags)
 {
        int rc;
        struct dasd_eckd_private *private;
@@ -1536,17 +1547,18 @@ static void dasd_eckd_validate_server(struct dasd_device *device)
        private = (struct dasd_eckd_private *) device->private;
        if (private->uid.type == UA_BASE_PAV_ALIAS ||
            private->uid.type == UA_HYPER_PAV_ALIAS)
-               return;
+               return 0;
        if (dasd_nopav || MACHINE_IS_VM)
                enable_pav = 0;
        else
                enable_pav = 1;
-       rc = dasd_eckd_psf_ssc(device, enable_pav);
+       rc = dasd_eckd_psf_ssc(device, enable_pav, flags);
 
        /* may be requested feature is not available on server,
         * therefore just report error and go ahead */
        DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "PSF-SSC for SSID %04x "
                        "returned rc=%d", private->uid.ssid, rc);
+       return rc;
 }
 
 /*
@@ -1556,7 +1568,13 @@ static void dasd_eckd_do_validate_server(struct work_struct *work)
 {
        struct dasd_device *device = container_of(work, struct dasd_device,
                                                  kick_validate);
-       dasd_eckd_validate_server(device);
+       if (dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST)
+           == -EAGAIN) {
+               /* schedule worker again if failed */
+               schedule_work(&device->kick_validate);
+               return;
+       }
+
        dasd_put_device(device);
 }
 
@@ -1685,7 +1703,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
        if (rc)
                goto out_err2;
 
-       dasd_eckd_validate_server(device);
+       dasd_eckd_validate_server(device, 0);
 
        /* device may report different configuration data after LCU setup */
        rc = dasd_eckd_read_conf(device);
@@ -4153,7 +4171,7 @@ static int dasd_eckd_restore_device(struct dasd_device *device)
        rc = dasd_alias_make_device_known_to_lcu(device);
        if (rc)
                return rc;
-       dasd_eckd_validate_server(device);
+       dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST);
 
        /* RE-Read Configuration Data */
        rc = dasd_eckd_read_conf(device);