[S390] cio: Avoid machine check vs. not operational races.
authorCornelia Huck <cornelia.huck@de.ibm.com>
Fri, 12 Oct 2007 14:11:26 +0000 (16:11 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Fri, 12 Oct 2007 14:13:02 +0000 (16:13 +0200)
There was the possibilty that an action like ccw_device_set_offline()
triggered by a device gone machine check might trigger a not oper
event. Unfortunately, this could lead to the situation that we tried
to unregister a subchannel twice: Once from the slow path evaluation,
and once via the not oper event.

Fix this by always using the same mechanism (css_schedule_eval()) for
triggering the unregister. This makes sure that unregistration will
only be done once. As an added bonus, it also simplyfies the code.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
drivers/s390/cio/device.c
drivers/s390/cio/device.h
drivers/s390/cio/device_fsm.c

index c96380d..39f02b4 100644 (file)
@@ -969,8 +969,7 @@ out:
                wake_up(&ccw_device_init_wq);
 }
 
-void
-ccw_device_call_sch_unregister(struct work_struct *work)
+static void ccw_device_call_sch_unregister(struct work_struct *work)
 {
        struct ccw_device_private *priv;
        struct ccw_device *cdev;
index b66338b..0d40896 100644 (file)
@@ -80,7 +80,6 @@ void io_subchannel_recog_done(struct ccw_device *cdev);
 int ccw_device_cancel_halt_clear(struct ccw_device *);
 
 void ccw_device_do_unreg_rereg(struct work_struct *);
-void ccw_device_call_sch_unregister(struct work_struct *);
 void ccw_device_move_to_orphanage(struct work_struct *);
 int ccw_device_is_orphan(struct ccw_device *);
 
index f772ef0..8867443 100644 (file)
@@ -544,51 +544,6 @@ ccw_device_recog_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 }
 
 
-static void
-ccw_device_nopath_notify(struct work_struct *work)
-{
-       struct ccw_device_private *priv;
-       struct ccw_device *cdev;
-       struct subchannel *sch;
-       int ret;
-       unsigned long flags;
-
-       priv = container_of(work, struct ccw_device_private, kick_work);
-       cdev = priv->cdev;
-       spin_lock_irqsave(cdev->ccwlock, flags);
-       sch = to_subchannel(cdev->dev.parent);
-       /* Extra sanity. */
-       if (sch->lpm)
-               goto out_unlock;
-       if (sch->driver && sch->driver->notify) {
-               spin_unlock_irqrestore(cdev->ccwlock, flags);
-               ret = sch->driver->notify(&sch->dev, CIO_NO_PATH);
-               spin_lock_irqsave(cdev->ccwlock, flags);
-       } else
-               ret = 0;
-       if (!ret) {
-               if (get_device(&sch->dev)) {
-                       /* Driver doesn't want to keep device. */
-                       cio_disable_subchannel(sch);
-                       if (get_device(&cdev->dev)) {
-                               PREPARE_WORK(&cdev->private->kick_work,
-                                            ccw_device_call_sch_unregister);
-                               queue_work(ccw_device_work,
-                                          &cdev->private->kick_work);
-                       } else
-                               put_device(&sch->dev);
-               }
-       } else {
-               cio_disable_subchannel(sch);
-               ccw_device_set_timeout(cdev, 0);
-               cdev->private->flags.fake_irb = 0;
-               cdev->private->state = DEV_STATE_DISCONNECTED;
-               wake_up(&cdev->private->wait_q);
-       }
-out_unlock:
-       spin_unlock_irqrestore(cdev->ccwlock, flags);
-}
-
 void
 ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
@@ -632,12 +587,9 @@ ccw_device_verify_done(struct ccw_device *cdev, int err)
        default:
                /* Reset oper notify indication after verify error. */
                cdev->private->flags.donotify = 0;
-               if (cdev->online) {
-                       PREPARE_WORK(&cdev->private->kick_work,
-                                    ccw_device_nopath_notify);
-                       queue_work(ccw_device_notify_work,
-                                  &cdev->private->kick_work);
-               } else
+               if (cdev->online)
+                       dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+               else
                        ccw_device_done(cdev, DEV_STATE_NOT_OPER);
                break;
        }
@@ -691,11 +643,7 @@ ccw_device_disband_done(struct ccw_device *cdev, int err)
                break;
        default:
                cdev->private->flags.donotify = 0;
-               if (get_device(&cdev->dev)) {
-                       PREPARE_WORK(&cdev->private->kick_work,
-                                    ccw_device_call_sch_unregister);
-                       queue_work(ccw_device_work, &cdev->private->kick_work);
-               }
+               dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
                ccw_device_done(cdev, DEV_STATE_NOT_OPER);
                break;
        }
@@ -766,59 +714,16 @@ ccw_device_recog_notoper(struct ccw_device *cdev, enum dev_event dev_event)
 }
 
 /*
- * Handle not operational event while offline.
+ * Handle not operational event in non-special state.
  */
-static void
-ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event)
+static void ccw_device_generic_notoper(struct ccw_device *cdev,
+                                      enum dev_event dev_event)
 {
        struct subchannel *sch;
 
        cdev->private->state = DEV_STATE_NOT_OPER;
        sch = to_subchannel(cdev->dev.parent);
-       if (get_device(&cdev->dev)) {
-               PREPARE_WORK(&cdev->private->kick_work,
-                            ccw_device_call_sch_unregister);
-               queue_work(ccw_device_work, &cdev->private->kick_work);
-       }
-       wake_up(&cdev->private->wait_q);
-}
-
-/*
- * Handle not operational event while online.
- */
-static void
-ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event)
-{
-       struct subchannel *sch;
-       int ret;
-
-       sch = to_subchannel(cdev->dev.parent);
-       if (sch->driver->notify) {
-               spin_unlock_irq(cdev->ccwlock);
-               ret = sch->driver->notify(&sch->dev,
-                                         sch->lpm ? CIO_GONE : CIO_NO_PATH);
-               spin_lock_irq(cdev->ccwlock);
-       } else
-               ret = 0;
-       if (ret) {
-               ccw_device_set_timeout(cdev, 0);
-               cdev->private->flags.fake_irb = 0;
-               cdev->private->state = DEV_STATE_DISCONNECTED;
-               wake_up(&cdev->private->wait_q);
-               return;
-       }
-       cdev->private->state = DEV_STATE_NOT_OPER;
-       cio_disable_subchannel(sch);
-       if (sch->schib.scsw.actl != 0) {
-               // FIXME: not-oper indication to device driver ?
-               ccw_device_call_handler(cdev);
-       }
-       if (get_device(&cdev->dev)) {
-               PREPARE_WORK(&cdev->private->kick_work,
-                            ccw_device_call_sch_unregister);
-               queue_work(ccw_device_work, &cdev->private->kick_work);
-       }
-       wake_up(&cdev->private->wait_q);
+       css_schedule_eval(sch->schid);
 }
 
 /*
@@ -916,18 +821,9 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event)
                cdev->private->state = DEV_STATE_TIMEOUT_KILL;
                return;
        }
-       if (ret == -ENODEV) {
-               struct subchannel *sch;
-
-               sch = to_subchannel(cdev->dev.parent);
-               if (!sch->lpm) {
-                       PREPARE_WORK(&cdev->private->kick_work,
-                                    ccw_device_nopath_notify);
-                       queue_work(ccw_device_notify_work,
-                                  &cdev->private->kick_work);
-               } else
-                       dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
-       } else if (cdev->handler)
+       if (ret == -ENODEV)
+               dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+       else if (cdev->handler)
                cdev->handler(cdev, cdev->private->intparm,
                              ERR_PTR(-ETIMEDOUT));
 }
@@ -1234,7 +1130,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        [DEV_STATE_SENSE_PGID] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_sense_pgid_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_onoff_timeout,
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
@@ -1246,50 +1142,50 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        [DEV_STATE_OFFLINE] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_offline_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_offline_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_nop,
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        [DEV_STATE_VERIFY] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_verify_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_onoff_timeout,
                [DEV_EVENT_VERIFY]      = ccw_device_delay_verify,
        },
        [DEV_STATE_ONLINE] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_online_timeout,
                [DEV_EVENT_VERIFY]      = ccw_device_online_verify,
        },
        [DEV_STATE_W4SENSE] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_w4sense,
                [DEV_EVENT_TIMEOUT]     = ccw_device_nop,
                [DEV_EVENT_VERIFY]      = ccw_device_online_verify,
        },
        [DEV_STATE_DISBAND_PGID] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_disband_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_onoff_timeout,
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        [DEV_STATE_BOXED] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_offline_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_stlck_done,
                [DEV_EVENT_TIMEOUT]     = ccw_device_stlck_done,
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        /* states to wait for i/o completion before doing something */
        [DEV_STATE_CLEAR_VERIFY] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_clear_verify,
                [DEV_EVENT_TIMEOUT]     = ccw_device_nop,
                [DEV_EVENT_VERIFY]      = ccw_device_nop,
        },
        [DEV_STATE_TIMEOUT_KILL] = {
-               [DEV_EVENT_NOTOPER]     = ccw_device_online_notoper,
+               [DEV_EVENT_NOTOPER]     = ccw_device_generic_notoper,
                [DEV_EVENT_INTERRUPT]   = ccw_device_killing_irq,
                [DEV_EVENT_TIMEOUT]     = ccw_device_killing_timeout,
                [DEV_EVENT_VERIFY]      = ccw_device_nop, //FIXME