[SCSI] aacraid: add user initiated reset
authorSalyzyn, Mark <mark_salyzyn@adaptec.com>
Tue, 12 Jun 2007 13:33:54 +0000 (09:33 -0400)
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>
Sun, 17 Jun 2007 20:00:47 +0000 (15:00 -0500)
Add the ability for an application to issue a hardware reset to the
adapter via sysfs. Typical uses include restarting the adapter after it
has been flashed. Bumped revision number for the driver and added a
feature to periodically check the adapter's health (check_interval),
update the adapter's concept of time (update_interval) and block
checking/resetting of the adapter (check_reset).

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
drivers/scsi/aacraid/aachba.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/commsup.c
drivers/scsi/aacraid/linit.c
drivers/scsi/aacraid/rx.c

index ef11c18..b3081b1 100644 (file)
@@ -169,6 +169,18 @@ int acbsize = -1;
 module_param(acbsize, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
 
+int update_interval = 30 * 60;
+module_param(update_interval, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter.");
+
+int check_interval = 24 * 60 * 60;
+module_param(check_interval, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks.");
+
+int check_reset = 1;
+module_param(check_reset, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter.");
+
 int expose_physicals = -1;
 module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on");
@@ -1197,6 +1209,12 @@ int aac_get_adapter_info(struct aac_dev* dev)
                          (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
                          dev->supplement_adapter_info.VpdInfo.Tsid);
                }
+               if (!check_reset ||
+                 (dev->supplement_adapter_info.SupportedOptions2 &
+                 le32_to_cpu(AAC_OPTION_IGNORE_RESET))) {
+                       printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
+                         dev->name, dev->id);
+               }
        }
 
        dev->nondasd_support = 0;
index fdbedb1..8abe4f9 100644 (file)
@@ -12,8 +12,8 @@
  *----------------------------------------------------------------------------*/
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 2437
-# define AAC_DRIVER_BRANCH "-mh4"
+# define AAC_DRIVER_BUILD 2447
+# define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS 32
 
@@ -860,10 +860,12 @@ struct aac_supplement_adapter_info
        __le32  FlashFirmwareBootBuild;
        u8      MfgPcbaSerialNo[12];
        u8      MfgWWNName[8];
-       __le32  MoreFeatureBits;
+       __le32  SupportedOptions2;
        __le32  ReservedGrowth[1];
 };
 #define AAC_FEATURE_FALCON     0x00000010
+#define AAC_OPTION_MU_RESET    0x00000001
+#define AAC_OPTION_IGNORE_RESET        0x00000002
 #define AAC_SIS_VERSION_V3     3
 #define AAC_SIS_SLOT_UNKNOWN   0xFF
 
@@ -1260,6 +1262,19 @@ struct aac_synchronize_reply {
        u8              data[16];
 };
 
+#define CT_PAUSE_IO    65
+#define CT_RELEASE_IO  66
+struct aac_pause {
+       __le32          command;        /* VM_ContainerConfig */
+       __le32          type;           /* CT_PAUSE_IO */
+       __le32          timeout;        /* 10ms ticks */
+       __le32          min;
+       __le32          noRescan;
+       __le32          parm3;
+       __le32          parm4;
+       __le32          count;  /* sizeof(((struct aac_pause_reply *)NULL)->data) */
+};
+
 struct aac_srb
 {
        __le32          function;
@@ -1816,6 +1831,7 @@ int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw
 unsigned int aac_response_normal(struct aac_queue * q);
 unsigned int aac_command_normal(struct aac_queue * q);
 unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+int aac_reset_adapter(struct aac_dev * dev, int forced);
 int aac_check_health(struct aac_dev * dev);
 int aac_command_thread(void *data);
 int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
@@ -1835,3 +1851,6 @@ extern int aif_timeout;
 extern int expose_physicals;
 extern int aac_reset_devices;
 extern int aac_commit;
+extern int update_interval;
+extern int check_interval;
+extern int check_reset;
index 9aca57e..d510839 100644 (file)
@@ -1021,7 +1021,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
 
 }
 
-static int _aac_reset_adapter(struct aac_dev *aac)
+static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 {
        int index, quirks;
        int retval;
@@ -1029,25 +1029,32 @@ static int _aac_reset_adapter(struct aac_dev *aac)
        struct scsi_device *dev;
        struct scsi_cmnd *command;
        struct scsi_cmnd *command_list;
+       int jafo = 0;
 
        /*
         * Assumptions:
-        *      - host is locked.
+        *      - host is locked, unless called by the aacraid thread.
+        *        (a matter of convenience, due to legacy issues surrounding
+        *        eh_host_adapter_reset).
         *      - in_reset is asserted, so no new i/o is getting to the
         *        card.
-        *      - The card is dead.
+        *      - The card is dead, or will be very shortly ;-/ so no new
+        *        commands are completing in the interrupt service.
         */
        host = aac->scsi_host_ptr;
        scsi_block_requests(host);
        aac_adapter_disable_int(aac);
-       spin_unlock_irq(host->host_lock);
-       kthread_stop(aac->thread);
+       if (aac->thread->pid != current->pid) {
+               spin_unlock_irq(host->host_lock);
+               kthread_stop(aac->thread);
+               jafo = 1;
+       }
 
        /*
         *      If a positive health, means in a known DEAD PANIC
         * state and the adapter could be reset to `try again'.
         */
-       retval = aac_adapter_restart(aac, aac_adapter_check_health(aac));
+       retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac));
 
        if (retval)
                goto out;
@@ -1104,10 +1111,12 @@ static int _aac_reset_adapter(struct aac_dev *aac)
        if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
                if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
                        goto out;
-       aac->thread = kthread_run(aac_command_thread, aac, aac->name);
-       if (IS_ERR(aac->thread)) {
-               retval = PTR_ERR(aac->thread);
-               goto out;
+       if (jafo) {
+               aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+               if (IS_ERR(aac->thread)) {
+                       retval = PTR_ERR(aac->thread);
+                       goto out;
+               }
        }
        (void)aac_get_adapter_info(aac);
        quirks = aac_get_driver_ident(index)->quirks;
@@ -1150,7 +1159,98 @@ static int _aac_reset_adapter(struct aac_dev *aac)
 out:
        aac->in_reset = 0;
        scsi_unblock_requests(host);
-       spin_lock_irq(host->host_lock);
+       if (jafo) {
+               spin_lock_irq(host->host_lock);
+       }
+       return retval;
+}
+
+int aac_reset_adapter(struct aac_dev * aac, int forced)
+{
+       unsigned long flagv = 0;
+       int retval;
+       struct Scsi_Host * host;
+
+       if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
+               return -EBUSY;
+
+       if (aac->in_reset) {
+               spin_unlock_irqrestore(&aac->fib_lock, flagv);
+               return -EBUSY;
+       }
+       aac->in_reset = 1;
+       spin_unlock_irqrestore(&aac->fib_lock, flagv);
+
+       /*
+        * Wait for all commands to complete to this specific
+        * target (block maximum 60 seconds). Although not necessary,
+        * it does make us a good storage citizen.
+        */
+       host = aac->scsi_host_ptr;
+       scsi_block_requests(host);
+       if (forced < 2) for (retval = 60; retval; --retval) {
+               struct scsi_device * dev;
+               struct scsi_cmnd * command;
+               int active = 0;
+
+               __shost_for_each_device(dev, host) {
+                       spin_lock_irqsave(&dev->list_lock, flagv);
+                       list_for_each_entry(command, &dev->cmd_list, list) {
+                               if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
+                                       active++;
+                                       break;
+                               }
+                       }
+                       spin_unlock_irqrestore(&dev->list_lock, flagv);
+                       if (active)
+                               break;
+
+               }
+               /*
+                * We can exit If all the commands are complete
+                */
+               if (active == 0)
+                       break;
+               ssleep(1);
+       }
+
+       /* Quiesce build, flush cache, write through mode */
+       aac_send_shutdown(aac);
+       spin_lock_irqsave(host->host_lock, flagv);
+       retval = _aac_reset_adapter(aac, forced);
+       spin_unlock_irqrestore(host->host_lock, flagv);
+
+       if (retval == -ENODEV) {
+               /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */
+               struct fib * fibctx = aac_fib_alloc(aac);
+               if (fibctx) {
+                       struct aac_pause *cmd;
+                       int status;
+
+                       aac_fib_init(fibctx);
+
+                       cmd = (struct aac_pause *) fib_data(fibctx);
+
+                       cmd->command = cpu_to_le32(VM_ContainerConfig);
+                       cmd->type = cpu_to_le32(CT_PAUSE_IO);
+                       cmd->timeout = cpu_to_le32(1);
+                       cmd->min = cpu_to_le32(1);
+                       cmd->noRescan = cpu_to_le32(1);
+                       cmd->count = cpu_to_le32(0);
+
+                       status = aac_fib_send(ContainerCommand,
+                         fibctx,
+                         sizeof(struct aac_pause),
+                         FsaNormal,
+                         -2 /* Timeout silently */, 1,
+                         NULL, NULL);
+
+                       if (status >= 0)
+                               aac_fib_complete(fibctx);
+                       aac_fib_free(fibctx);
+               }
+       }
+
        return retval;
 }
 
@@ -1270,10 +1370,15 @@ int aac_check_health(struct aac_dev * aac)
 
        printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
 
+       if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 &
+         le32_to_cpu(AAC_OPTION_IGNORE_RESET)))
+               goto out;
        host = aac->scsi_host_ptr;
-       spin_lock_irqsave(host->host_lock, flagv);
-       BlinkLED = _aac_reset_adapter(aac);
-       spin_unlock_irqrestore(host->host_lock, flagv);
+       if (aac->thread->pid != current->pid)
+               spin_lock_irqsave(host->host_lock, flagv);
+       BlinkLED = _aac_reset_adapter(aac, 0);
+       if (aac->thread->pid != current->pid)
+               spin_unlock_irqrestore(host->host_lock, flagv);
        return BlinkLED;
 
 out:
@@ -1300,6 +1405,9 @@ int aac_command_thread(void *data)
        struct aac_fib_context *fibctx;
        unsigned long flags;
        DECLARE_WAITQUEUE(wait, current);
+       unsigned long next_jiffies = jiffies + HZ;
+       unsigned long next_check_jiffies = next_jiffies;
+       long difference = HZ;
 
        /*
         *      We can only have one thread per adapter for AIF's.
@@ -1368,7 +1476,7 @@ int aac_command_thread(void *data)
                                     cpu_to_le32(AifCmdJobProgress))) {
                                        aac_handle_aif(dev, fib);
                                }
-                               
+
                                time_now = jiffies/HZ;
 
                                /*
@@ -1507,11 +1615,79 @@ int aac_command_thread(void *data)
                 *      There are no more AIF's
                 */
                spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags);
-               schedule();
+
+               /*
+                *      Background activity
+                */
+               if ((time_before(next_check_jiffies,next_jiffies))
+                && ((difference = next_check_jiffies - jiffies) <= 0)) {
+                       next_check_jiffies = next_jiffies;
+                       if (aac_check_health(dev) == 0) {
+                               difference = ((long)(unsigned)check_interval)
+                                          * HZ;
+                               next_check_jiffies = jiffies + difference;
+                       } else if (!dev->queues)
+                               break;
+               }
+               if (!time_before(next_check_jiffies,next_jiffies)
+                && ((difference = next_jiffies - jiffies) <= 0)) {
+                       struct timeval now;
+                       int ret;
+
+                       /* Don't even try to talk to adapter if its sick */
+                       ret = aac_check_health(dev);
+                       if (!ret && !dev->queues)
+                               break;
+                       next_check_jiffies = jiffies
+                                          + ((long)(unsigned)check_interval)
+                                          * HZ;
+                       do_gettimeofday(&now);
+
+                       /* Synchronize our watches */
+                       if (((1000000 - (1000000 / HZ)) > now.tv_usec)
+                        && (now.tv_usec > (1000000 / HZ)))
+                               difference = (((1000000 - now.tv_usec) * HZ)
+                                 + 500000) / 1000000;
+                       else if (ret == 0) {
+                               struct fib *fibptr;
+
+                               if ((fibptr = aac_fib_alloc(dev))) {
+                                       u32 * info;
+
+                                       aac_fib_init(fibptr);
+
+                                       info = (u32 *) fib_data(fibptr);
+                                       if (now.tv_usec > 500000)
+                                               ++now.tv_sec;
+
+                                       *info = cpu_to_le32(now.tv_sec);
+
+                                       (void)aac_fib_send(SendHostTime,
+                                               fibptr,
+                                               sizeof(*info),
+                                               FsaNormal,
+                                               1, 1,
+                                               NULL,
+                                               NULL);
+                                       aac_fib_complete(fibptr);
+                                       aac_fib_free(fibptr);
+                               }
+                               difference = (long)(unsigned)update_interval*HZ;
+                       } else {
+                               /* retry shortly */
+                               difference = 10 * HZ;
+                       }
+                       next_jiffies = jiffies + difference;
+                       if (time_before(next_check_jiffies,next_jiffies))
+                               difference = next_check_jiffies - jiffies;
+               }
+               if (difference <= 0)
+                       difference = 1;
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(difference);
 
                if (kthread_should_stop())
                        break;
-               set_current_state(TASK_INTERRUPTIBLE);
        }
        if (dev->queues)
                remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait);
index 6f92d07..f8c2aaf 100644 (file)
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/dma-mapping.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 #include <asm/semaphore.h>
 
@@ -581,6 +579,14 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
                ssleep(1);
        }
        printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
+       /*
+        * This adapter needs a blind reset, only do so for Adapters that
+        * support a register, instead of a commanded, reset.
+        */
+       if ((aac->supplement_adapter_info.SupportedOptions2 &
+         le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) ==
+         le32_to_cpu(AAC_OPTION_MU_RESET))
+               aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */
        return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
 }
 
@@ -788,6 +794,31 @@ static ssize_t aac_show_max_id(struct class_device *class_dev, char *buf)
          class_to_shost(class_dev)->max_id);
 }
 
+static ssize_t aac_store_reset_adapter(struct class_device *class_dev,
+               const char *buf, size_t count)
+{
+       int retval = -EACCES;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return retval;
+       retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!');
+       if (retval >= 0)
+               retval = count;
+       return retval;
+}
+
+static ssize_t aac_show_reset_adapter(struct class_device *class_dev,
+               char *buf)
+{
+       struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
+       int len, tmp;
+
+       tmp = aac_adapter_check_health(dev);
+       if ((tmp == 0) && dev->in_reset)
+               tmp = -EBUSY;
+       len = snprintf(buf, PAGE_SIZE, "0x%x", tmp);
+       return len;
+}
 
 static struct class_device_attribute aac_model = {
        .attr = {
@@ -845,6 +876,14 @@ static struct class_device_attribute aac_max_id = {
        },
        .show = aac_show_max_id,
 };
+static struct class_device_attribute aac_reset = {
+       .attr = {
+               .name = "reset_host",
+               .mode = S_IWUSR|S_IRUGO,
+       },
+       .store = aac_store_reset_adapter,
+       .show = aac_show_reset_adapter,
+};
 
 static struct class_device_attribute *aac_attrs[] = {
        &aac_model,
@@ -855,6 +894,7 @@ static struct class_device_attribute *aac_attrs[] = {
        &aac_serial_number,
        &aac_max_channel,
        &aac_max_id,
+       &aac_reset,
        NULL
 };
 
@@ -1118,7 +1158,7 @@ static int __init aac_init(void)
 {
        int error;
        
-       printk(KERN_INFO "Adaptec %s driver (%s)\n",
+       printk(KERN_INFO "Adaptec %s driver %s\n",
          AAC_DRIVERNAME, aac_driver_version);
 
        error = pci_register_driver(&aac_pci_driver);
index ae978a3..ebc65b9 100644 (file)
@@ -464,21 +464,24 @@ static int aac_rx_restart_adapter(struct aac_dev *dev, int bled)
 {
        u32 var;
 
-       if (bled)
-               printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
-                       dev->name, dev->id, bled);
-       else {
-               bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
-                 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
-               if (!bled && (var != 0x00000001))
-                       bled = -EINVAL;
-       }
-       if (bled && (bled != -ETIMEDOUT))
-               bled = aac_adapter_sync_cmd(dev, IOP_RESET,
-                 0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
+       if (!(dev->supplement_adapter_info.SupportedOptions2 &
+         le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) {
+               if (bled)
+                       printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
+                               dev->name, dev->id, bled);
+               else {
+                       bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
+                         0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
+                       if (!bled && (var != 0x00000001))
+                               bled = -EINVAL;
+               }
+               if (bled && (bled != -ETIMEDOUT))
+                       bled = aac_adapter_sync_cmd(dev, IOP_RESET,
+                         0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
 
-       if (bled && (bled != -ETIMEDOUT))
-               return -EINVAL;
+               if (bled && (bled != -ETIMEDOUT))
+                       return -EINVAL;
+       }
        if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */
                rx_writel(dev, MUnit.reserved2, 3);
                msleep(5000); /* Delay 5 seconds */
@@ -596,7 +599,7 @@ int _aac_rx_init(struct aac_dev *dev)
                }
                msleep(1);
        }
-       if (restart)
+       if (restart && aac_commit)
                aac_commit = 1;
        /*
         *      Fill in the common function dispatch table.