[SCSI] ibmvfc: Fix deadlock in EH
authorBrian King <brking@linux.vnet.ibm.com>
Thu, 28 May 2009 21:17:28 +0000 (16:17 -0500)
committerJames Bottomley <James.Bottomley@HansenPartnership.com>
Mon, 8 Jun 2009 18:07:42 +0000 (13:07 -0500)
Fixes the following deadlock scenario shown below. We currently allow
queuecommand to send commands when the ibmvfc workqueue is scanning for
new rports, so we should also allow EH to function at this time as well.

scsi_eh_3     D 0000000000000000 12304  1279      2
Call Trace:
[c0000002f7257730] [c0000002f72577e0] 0xc0000002f72577e0 (unreliable)
[c0000002f7257900] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f72579a0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f7257b60] [c0000000004f8f08] .schedule_timeout+0xa8/0xe8
[c0000002f7257c50] [d0000000001d23e0] .ibmvfc_wait_while_resetting+0xe4/0x140 [ibmvfc]
[c0000002f7257d20] [d0000000001d3984] .ibmvfc_eh_abort_handler+0x60/0xe4 [ibmvfc]
[c0000002f7257dc0] [d000000000366714] .scsi_error_handler+0x38c/0x674 [scsi_mod]
[c0000002f7257f00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f7257f90] [c000000000029b8c] .kernel_thread+0x4c/0x68
ibmvfc_3      D 0000000000000000 12432  1280      2
Call Trace:
[c0000002f7253540] [c0000002f72535f0] 0xc0000002f72535f0 (unreliable)
[c0000002f7253710] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f72537b0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f7253970] [c0000000004f8e98] .schedule_timeout+0x38/0xe8
[c0000002f7253a60] [c0000000004f80cc] .wait_for_common+0x138/0x220
[c0000002f7253b40] [c0000000000a2784] .flush_cpu_workqueue+0xac/0xcc
[c0000002f7253c10] [c0000000000a2960] .flush_workqueue+0x58/0xa0
[c0000002f7253ca0] [d0000000000827fc] .fc_flush_work+0x4c/0x64 [scsi_transport_fc]
[c0000002f7253d20] [d000000000082db4] .fc_remote_port_add+0x48/0x6c4 [scsi_transport_fc]
[c0000002f7253dd0] [d0000000001d7d04] .ibmvfc_work+0x820/0xa7c [ibmvfc]
[c0000002f7253f00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f7253f90] [c000000000029b8c] .kernel_thread+0x4c/0x68
fc_wq_3       D 0000000000000000 10720  1283      2
Call Trace:
[c0000002f559ac30] [c0000002f559ace0] 0xc0000002f559ace0 (unreliable)
[c0000002f559ae00] [c0000000000118f4] .__switch_to+0x158/0x1a0
[c0000002f559aea0] [c0000000004f8b40] .schedule+0x8d4/0x9dc
[c0000002f559b060] [c0000000004f8e98] .schedule_timeout+0x38/0xe8
[c0000002f559b150] [c0000000004f80cc] .wait_for_common+0x138/0x220
[c0000002f559b230] [c0000000002721c4] .blk_execute_rq+0xb4/0x100
[c0000002f559b360] [d00000000036a1f8] .scsi_execute+0x118/0x194 [scsi_mod]
[c0000002f559b420] [d00000000036a32c] .scsi_execute_req+0xb8/0x124 [scsi_mod]
[c0000002f559b500] [d0000000000c1330] .sd_sync_cache+0x8c/0x108 [sd_mod]
[c0000002f559b5e0] [d0000000000c15b4] .sd_shutdown+0x9c/0x158 [sd_mod]
[c0000002f559b660] [d0000000000c16d0] .sd_remove+0x60/0xb4 [sd_mod]
[c0000002f559b700] [c000000000392ecc] .__device_release_driver+0xd0/0x118
[c0000002f559b7a0] [c000000000393080] .device_release_driver+0x30/0x54
[c0000002f559b830] [c000000000392108] .bus_remove_device+0x128/0x16c
[c0000002f559b8d0] [c00000000038f94c] .device_del+0x158/0x234
[c0000002f559b960] [d00000000036f078] .__scsi_remove_device+0x5c/0xd4 [scsi_mod]
[c0000002f559b9f0] [d00000000036f124] .scsi_remove_device+0x34/0x58 [scsi_mod]
[c0000002f559ba80] [d00000000036f204] .__scsi_remove_target+0xb4/0x120 [scsi_mod]
[c0000002f559bb10] [d00000000036f338] .__remove_child+0x2c/0x44 [scsi_mod]
[c0000002f559bb90] [c00000000038f11c] .device_for_each_child+0x54/0xb4
[c0000002f559bc50] [d00000000036f2e0] .scsi_remove_target+0x70/0x9c [scsi_mod]
[c0000002f559bce0] [d000000000083454] .fc_starget_delete+0x24/0x3c [scsi_transport_fc]
[c0000002f559bd70] [c0000000000a2368] .run_workqueue+0x118/0x208
[c0000002f559be30] [c0000000000a2580] .worker_thread+0x128/0x154
[c0000002f559bf00] [c0000000000a7470] .kthread+0x78/0xc4
[c0000002f559bf90] [c000000000029b8c] .kernel_thread+0x4c/0x68

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvfc.h

index 182c8e7..76ae266 100644 (file)
@@ -431,6 +431,8 @@ static void ibmvfc_set_tgt_action(struct ibmvfc_target *tgt,
        case IBMVFC_TGT_ACTION_DEL_RPORT:
                break;
        default:
+               if (action == IBMVFC_TGT_ACTION_DEL_RPORT)
+                       tgt->add_rport = 0;
                tgt->action = action;
                break;
        }
@@ -483,7 +485,7 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
                switch (vhost->action) {
                case IBMVFC_HOST_ACTION_INIT_WAIT:
                case IBMVFC_HOST_ACTION_NONE:
-               case IBMVFC_HOST_ACTION_TGT_ADD:
+               case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
                        vhost->action = action;
                        break;
                default:
@@ -498,7 +500,6 @@ static void ibmvfc_set_host_action(struct ibmvfc_host *vhost,
        case IBMVFC_HOST_ACTION_TGT_DEL:
        case IBMVFC_HOST_ACTION_QUERY_TGTS:
        case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
-       case IBMVFC_HOST_ACTION_TGT_ADD:
        case IBMVFC_HOST_ACTION_NONE:
        default:
                vhost->action = action;
@@ -2306,7 +2307,7 @@ static int ibmvfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
                done = 1;
        }
 
-       if (vhost->state != IBMVFC_NO_CRQ && vhost->action == IBMVFC_HOST_ACTION_NONE)
+       if (vhost->scan_complete)
                done = 1;
        spin_unlock_irqrestore(shost->host_lock, flags);
        return done;
@@ -2820,7 +2821,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt)
                                                tgt->ids.roles |= FC_PORT_ROLE_FCP_TARGET;
                                        if (parms->service_parms & IBMVFC_PRLI_INITIATOR_FUNC)
                                                tgt->ids.roles |= FC_PORT_ROLE_FCP_INITIATOR;
-                                       ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_ADD_RPORT);
+                                       tgt->add_rport = 1;
                                } else
                                        ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
                        } else if (prli_rsp[index].retry)
@@ -3660,7 +3661,6 @@ static int __ibmvfc_work_to_do(struct ibmvfc_host *vhost)
                return 1;
        case IBMVFC_HOST_ACTION_INIT:
        case IBMVFC_HOST_ACTION_ALLOC_TGTS:
-       case IBMVFC_HOST_ACTION_TGT_ADD:
        case IBMVFC_HOST_ACTION_TGT_DEL:
        case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
        case IBMVFC_HOST_ACTION_QUERY:
@@ -3715,25 +3715,26 @@ static void ibmvfc_log_ae(struct ibmvfc_host *vhost, int events)
 static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt)
 {
        struct ibmvfc_host *vhost = tgt->vhost;
-       struct fc_rport *rport = tgt->rport;
+       struct fc_rport *rport;
        unsigned long flags;
 
-       if (rport) {
-               tgt_dbg(tgt, "Setting rport roles\n");
-               fc_remote_port_rolechg(rport, tgt->ids.roles);
-               spin_lock_irqsave(vhost->host->host_lock, flags);
-               ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
+       tgt_dbg(tgt, "Adding rport\n");
+       rport = fc_remote_port_add(vhost->host, 0, &tgt->ids);
+       spin_lock_irqsave(vhost->host->host_lock, flags);
+
+       if (rport && tgt->action == IBMVFC_TGT_ACTION_DEL_RPORT) {
+               tgt_dbg(tgt, "Deleting rport\n");
+               list_del(&tgt->queue);
                spin_unlock_irqrestore(vhost->host->host_lock, flags);
+               fc_remote_port_delete(rport);
+               del_timer_sync(&tgt->timer);
+               kref_put(&tgt->kref, ibmvfc_release_tgt);
                return;
        }
 
-       tgt_dbg(tgt, "Adding rport\n");
-       rport = fc_remote_port_add(vhost->host, 0, &tgt->ids);
-       spin_lock_irqsave(vhost->host->host_lock, flags);
-       tgt->rport = rport;
-       ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE);
        if (rport) {
                tgt_dbg(tgt, "rport add succeeded\n");
+               tgt->rport = rport;
                rport->maxframe_size = tgt->service_parms.common.bb_rcv_sz & 0x0fff;
                rport->supported_classes = 0;
                tgt->target_id = rport->scsi_target_id;
@@ -3811,11 +3812,21 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
 
                if (vhost->state == IBMVFC_INITIALIZING) {
                        if (vhost->action == IBMVFC_HOST_ACTION_TGT_DEL_FAILED) {
-                               ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
-                               ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_ADD);
-                               vhost->init_retries = 0;
-                               spin_unlock_irqrestore(vhost->host->host_lock, flags);
-                               scsi_unblock_requests(vhost->host);
+                               if (vhost->reinit) {
+                                       vhost->reinit = 0;
+                                       scsi_block_requests(vhost->host);
+                                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
+                                       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+                               } else {
+                                       ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
+                                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
+                                       wake_up(&vhost->init_wait_q);
+                                       schedule_work(&vhost->rport_add_work_q);
+                                       vhost->init_retries = 0;
+                                       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+                                       scsi_unblock_requests(vhost->host);
+                               }
+
                                return;
                        } else {
                                ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
@@ -3846,24 +3857,6 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
                if (!ibmvfc_dev_init_to_do(vhost))
                        ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_DEL_FAILED);
                break;
-       case IBMVFC_HOST_ACTION_TGT_ADD:
-               list_for_each_entry(tgt, &vhost->targets, queue) {
-                       if (tgt->action == IBMVFC_TGT_ACTION_ADD_RPORT) {
-                               spin_unlock_irqrestore(vhost->host->host_lock, flags);
-                               ibmvfc_tgt_add_rport(tgt);
-                               return;
-                       }
-               }
-
-               if (vhost->reinit && !ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
-                       vhost->reinit = 0;
-                       scsi_block_requests(vhost->host);
-                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
-               } else {
-                       ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
-                       wake_up(&vhost->init_wait_q);
-               }
-               break;
        default:
                break;
        };
@@ -4092,6 +4085,56 @@ nomem:
        return -ENOMEM;
 }
 
+/**
+ * ibmvfc_rport_add_thread - Worker thread for rport adds
+ * @work:      work struct
+ *
+ **/
+static void ibmvfc_rport_add_thread(struct work_struct *work)
+{
+       struct ibmvfc_host *vhost = container_of(work, struct ibmvfc_host,
+                                                rport_add_work_q);
+       struct ibmvfc_target *tgt;
+       struct fc_rport *rport;
+       unsigned long flags;
+       int did_work;
+
+       ENTER;
+       spin_lock_irqsave(vhost->host->host_lock, flags);
+       do {
+               did_work = 0;
+               if (vhost->state != IBMVFC_ACTIVE)
+                       break;
+
+               list_for_each_entry(tgt, &vhost->targets, queue) {
+                       if (tgt->add_rport) {
+                               did_work = 1;
+                               tgt->add_rport = 0;
+                               kref_get(&tgt->kref);
+                               rport = tgt->rport;
+                               if (!rport) {
+                                       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+                                       ibmvfc_tgt_add_rport(tgt);
+                               } else if (get_device(&rport->dev)) {
+                                       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+                                       tgt_dbg(tgt, "Setting rport roles\n");
+                                       fc_remote_port_rolechg(rport, tgt->ids.roles);
+                                       put_device(&rport->dev);
+                               }
+
+                               kref_put(&tgt->kref, ibmvfc_release_tgt);
+                               spin_lock_irqsave(vhost->host->host_lock, flags);
+                               break;
+                       }
+               }
+       } while(did_work);
+
+       if (vhost->state == IBMVFC_ACTIVE)
+               vhost->scan_complete = 1;
+       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+       LEAVE;
+}
+
 /**
  * ibmvfc_probe - Adapter hot plug add entry point
  * @vdev:      vio device struct
@@ -4135,6 +4178,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        strcpy(vhost->partition_name, "UNKNOWN");
        init_waitqueue_head(&vhost->work_wait_q);
        init_waitqueue_head(&vhost->init_wait_q);
+       INIT_WORK(&vhost->rport_add_work_q, ibmvfc_rport_add_thread);
 
        if ((rc = ibmvfc_alloc_mem(vhost)))
                goto free_scsi_host;
index 4dac356..3a6a725 100644 (file)
@@ -575,7 +575,6 @@ enum ibmvfc_target_action {
        IBMVFC_TGT_ACTION_NONE = 0,
        IBMVFC_TGT_ACTION_INIT,
        IBMVFC_TGT_ACTION_INIT_WAIT,
-       IBMVFC_TGT_ACTION_ADD_RPORT,
        IBMVFC_TGT_ACTION_DEL_RPORT,
 };
 
@@ -588,6 +587,7 @@ struct ibmvfc_target {
        int target_id;
        enum ibmvfc_target_action action;
        int need_login;
+       int add_rport;
        int init_retries;
        u32 cancel_key;
        struct ibmvfc_service_parms service_parms;
@@ -635,7 +635,6 @@ enum ibmvfc_host_action {
        IBMVFC_HOST_ACTION_ALLOC_TGTS,
        IBMVFC_HOST_ACTION_TGT_INIT,
        IBMVFC_HOST_ACTION_TGT_DEL_FAILED,
-       IBMVFC_HOST_ACTION_TGT_ADD,
 };
 
 enum ibmvfc_host_state {
@@ -682,6 +681,7 @@ struct ibmvfc_host {
        int client_migrated;
        int reinit;
        int delay_init;
+       int scan_complete;
        int events_to_log;
 #define IBMVFC_AE_LINKUP       0x0001
 #define IBMVFC_AE_LINKDOWN     0x0002
@@ -692,6 +692,7 @@ struct ibmvfc_host {
        void (*job_step) (struct ibmvfc_host *);
        struct task_struct *work_thread;
        struct tasklet_struct tasklet;
+       struct work_struct rport_add_work_q;
        wait_queue_head_t init_wait_q;
        wait_queue_head_t work_wait_q;
 };