libata: improve EH internal command timeout handling
[pandora-kernel.git] / drivers / ata / libata-eh.c
index 61dcd00..d5f03a6 100644 (file)
@@ -66,15 +66,19 @@ enum {
        ATA_ECAT_DUBIOUS_TOUT_HSM       = 6,
        ATA_ECAT_DUBIOUS_UNK_DEV        = 7,
        ATA_ECAT_NR                     = 8,
-};
 
-/* Waiting in ->prereset can never be reliable.  It's sometimes nice
- * to wait there but it can't be depended upon; otherwise, we wouldn't
- * be resetting.  Just give it enough time for most drives to spin up.
- */
-enum {
-       ATA_EH_PRERESET_TIMEOUT         = 10 * HZ,
-       ATA_EH_FASTDRAIN_INTERVAL       = 3 * HZ,
+       ATA_EH_CMD_DFL_TIMEOUT          =  5000,
+
+       /* always put at least this amount of time between resets */
+       ATA_EH_RESET_COOL_DOWN          =  5000,
+
+       /* Waiting in ->prereset can never be reliable.  It's
+        * sometimes nice to wait there but it can't be depended upon;
+        * otherwise, we wouldn't be resetting.  Just give it enough
+        * time for most drives to spin up.
+        */
+       ATA_EH_PRERESET_TIMEOUT         = 10000,
+       ATA_EH_FASTDRAIN_INTERVAL       =  3000,
 };
 
 /* The following table determines how we sequence resets.  Each entry
@@ -84,13 +88,60 @@ enum {
  * are mostly for error handling, hotplug and retarded devices.
  */
 static const unsigned long ata_eh_reset_timeouts[] = {
-       10 * HZ,        /* most drives spin up by 10sec */
-       10 * HZ,        /* > 99% working drives spin up before 20sec */
-       35 * HZ,        /* give > 30 secs of idleness for retarded devices */
-       5 * HZ,         /* and sweet one last chance */
-       /* > 1 min has elapsed, give up */
+       10000,  /* most drives spin up by 10sec */
+       10000,  /* > 99% working drives spin up before 20sec */
+       35000,  /* give > 30 secs of idleness for retarded devices */
+        5000,  /* and sweet one last chance */
+       ULONG_MAX, /* > 1 min has elapsed, give up */
+};
+
+static const unsigned long ata_eh_identify_timeouts[] = {
+        5000,  /* covers > 99% of successes and not too boring on failures */
+       10000,  /* combined time till here is enough even for media access */
+       30000,  /* for true idiots */
+       ULONG_MAX,
+};
+
+static const unsigned long ata_eh_other_timeouts[] = {
+        5000,  /* same rationale as identify timeout */
+       10000,  /* ditto */
+       /* but no merciful 30sec for other commands, it just isn't worth it */
+       ULONG_MAX,
 };
 
+struct ata_eh_cmd_timeout_ent {
+       const u8                *commands;
+       const unsigned long     *timeouts;
+};
+
+/* The following table determines timeouts to use for EH internal
+ * commands.  Each table entry is a command class and matches the
+ * commands the entry applies to and the timeout table to use.
+ *
+ * On the retry after a command timed out, the next timeout value from
+ * the table is used.  If the table doesn't contain further entries,
+ * the last value is used.
+ *
+ * ehc->cmd_timeout_idx keeps track of which timeout to use per
+ * command class, so if SET_FEATURES times out on the first try, the
+ * next try will use the second timeout value only for that class.
+ */
+#define CMDS(cmds...)  (const u8 []){ cmds, 0 }
+static const struct ata_eh_cmd_timeout_ent
+ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
+       { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
+         .timeouts = ata_eh_identify_timeouts, },
+       { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_SET_FEATURES),
+         .timeouts = ata_eh_other_timeouts, },
+       { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
+         .timeouts = ata_eh_other_timeouts, },
+};
+#undef CMDS
+
 static void __ata_port_freeze(struct ata_port *ap);
 #ifdef CONFIG_PM
 static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -236,6 +287,73 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
 
 #endif /* CONFIG_PCI */
 
+static int ata_lookup_timeout_table(u8 cmd)
+{
+       int i;
+
+       for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
+               const u8 *cur;
+
+               for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
+                       if (*cur == cmd)
+                               return i;
+       }
+
+       return -1;
+}
+
+/**
+ *     ata_internal_cmd_timeout - determine timeout for an internal command
+ *     @dev: target device
+ *     @cmd: internal command to be issued
+ *
+ *     Determine timeout for internal command @cmd for @dev.
+ *
+ *     LOCKING:
+ *     EH context.
+ *
+ *     RETURNS:
+ *     Determined timeout.
+ */
+unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
+{
+       struct ata_eh_context *ehc = &dev->link->eh_context;
+       int ent = ata_lookup_timeout_table(cmd);
+       int idx;
+
+       if (ent < 0)
+               return ATA_EH_CMD_DFL_TIMEOUT;
+
+       idx = ehc->cmd_timeout_idx[dev->devno][ent];
+       return ata_eh_cmd_timeout_table[ent].timeouts[idx];
+}
+
+/**
+ *     ata_internal_cmd_timed_out - notification for internal command timeout
+ *     @dev: target device
+ *     @cmd: internal command which timed out
+ *
+ *     Notify EH that internal command @cmd for @dev timed out.  This
+ *     function should be called only for commands whose timeouts are
+ *     determined using ata_internal_cmd_timeout().
+ *
+ *     LOCKING:
+ *     EH context.
+ */
+void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
+{
+       struct ata_eh_context *ehc = &dev->link->eh_context;
+       int ent = ata_lookup_timeout_table(cmd);
+       int idx;
+
+       if (ent < 0)
+               return;
+
+       idx = ehc->cmd_timeout_idx[dev->devno][ent];
+       if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
+               ehc->cmd_timeout_idx[dev->devno][ent]++;
+}
+
 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
                             unsigned int err_mask)
 {
@@ -486,6 +604,9 @@ void ata_scsi_error(struct Scsi_Host *host)
                                if (ata_ncq_enabled(dev))
                                        ehc->saved_ncq_enabled |= 1 << devno;
                        }
+
+                       /* set last reset timestamp to some time in the past */
+                       ehc->last_reset = jiffies - 60 * HZ;
                }
 
                ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
@@ -641,7 +762,7 @@ void ata_eh_fastdrain_timerfn(unsigned long arg)
                /* some qcs have finished, give it another chance */
                ap->fastdrain_cnt = cnt;
                ap->fastdrain_timer.expires =
-                       jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+                       ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
                add_timer(&ap->fastdrain_timer);
        }
 
@@ -681,7 +802,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
 
        /* activate fast drain */
        ap->fastdrain_cnt = cnt;
-       ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL;
+       ap->fastdrain_timer.expires =
+               ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
        add_timer(&ap->fastdrain_timer);
 }
 
@@ -1308,12 +1430,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
        unsigned int err_mask = 0, action = 0;
        u32 hotplug_mask;
 
-       if (serror & SERR_PERSISTENT) {
-               err_mask |= AC_ERR_ATA_BUS;
-               action |= ATA_EH_RESET;
-       }
-       if (serror &
-           (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
+       if (serror & (SERR_PERSISTENT | SERR_DATA)) {
                err_mask |= AC_ERR_ATA_BUS;
                action |= ATA_EH_RESET;
        }
@@ -1357,7 +1474,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
  *     LOCKING:
  *     Kernel thread context (may sleep).
  */
-static void ata_eh_analyze_ncq_error(struct ata_link *link)
+void ata_eh_analyze_ncq_error(struct ata_link *link)
 {
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
@@ -2047,19 +2164,11 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
                        unsigned int *classes, unsigned long deadline)
 {
        struct ata_device *dev;
-       int rc;
 
        ata_link_for_each_dev(dev, link)
                classes[dev->devno] = ATA_DEV_UNKNOWN;
 
-       rc = reset(link, classes, deadline);
-
-       /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
-       ata_link_for_each_dev(dev, link)
-               if (classes[dev->devno] == ATA_DEV_UNKNOWN)
-                       classes[dev->devno] = ATA_DEV_NONE;
-
-       return rc;
+       return reset(link, classes, deadline);
 }
 
 static int ata_eh_followup_srst_needed(struct ata_link *link,
@@ -2084,26 +2193,36 @@ int ata_eh_reset(struct ata_link *link, int classify,
                 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
                 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
 {
-       const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts);
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
        unsigned int *classes = ehc->classes;
        unsigned int lflags = link->flags;
        int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
-       int try = 0;
+       int max_tries = 0, try = 0;
        struct ata_device *dev;
        unsigned long deadline, now;
        ata_reset_fn_t reset;
        unsigned long flags;
        u32 sstatus;
-       int rc;
+       int nr_known, rc;
+
+       /*
+        * Prepare to reset
+        */
+       while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
+               max_tries++;
+
+       now = jiffies;
+       deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN);
+       if (time_before(now, deadline))
+               schedule_timeout_uninterruptible(deadline - now);
 
-       /* about to reset */
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags |= ATA_PFLAG_RESETTING;
        spin_unlock_irqrestore(ap->lock, flags);
 
        ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
 
        ata_link_for_each_dev(dev, link) {
                /* If we issue an SRST then an ATA drive (not ATAPI)
@@ -2124,16 +2243,8 @@ int ata_eh_reset(struct ata_link *link, int classify,
                        ap->ops->set_piomode(ap, dev);
        }
 
-       if (!softreset && !hardreset) {
-               if (verbose)
-                       ata_link_printk(link, KERN_INFO, "no reset method "
-                                       "available, skipping reset\n");
-               if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
-                       lflags |= ATA_LFLAG_ASSUME_ATA;
-               goto done;
-       }
-
        /* prefer hardreset */
+       reset = NULL;
        ehc->i.action &= ~ATA_EH_RESET;
        if (hardreset) {
                reset = hardreset;
@@ -2141,15 +2252,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
        } else if (softreset) {
                reset = softreset;
                ehc->i.action = ATA_EH_SOFTRESET;
-       } else {
-               ata_link_printk(link, KERN_ERR, "BUG: no reset method, "
-                               "please report to linux-ide@vger.kernel.org\n");
-               dump_stack();
-               return -EINVAL;
        }
 
        if (prereset) {
-               rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT);
+               rc = prereset(link,
+                             ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
                if (rc) {
                        if (rc == -ENOENT) {
                                ata_link_printk(link, KERN_DEBUG,
@@ -2165,55 +2272,72 @@ int ata_eh_reset(struct ata_link *link, int classify,
                                        "prereset failed (errno=%d)\n", rc);
                        goto out;
                }
-       }
 
-       /* prereset() might have cleared ATA_EH_RESET */
-       if (!(ehc->i.action & ATA_EH_RESET)) {
-               /* prereset told us not to reset, bang classes and return */
-               ata_link_for_each_dev(dev, link)
-                       classes[dev->devno] = ATA_DEV_NONE;
-               rc = 0;
-               goto out;
+               /* prereset() might have cleared ATA_EH_RESET.  If so,
+                * bang classes and return.
+                */
+               if (reset && !(ehc->i.action & ATA_EH_RESET)) {
+                       ata_link_for_each_dev(dev, link)
+                               classes[dev->devno] = ATA_DEV_NONE;
+                       rc = 0;
+                       goto out;
+               }
        }
 
  retry:
-       deadline = jiffies + ata_eh_reset_timeouts[try++];
+       /*
+        * Perform reset
+        */
+       ehc->last_reset = jiffies;
+       if (ata_is_host_link(link))
+               ata_eh_freeze_port(ap);
 
-       /* shut up during boot probing */
-       if (verbose)
-               ata_link_printk(link, KERN_INFO, "%s resetting link\n",
-                               reset == softreset ? "soft" : "hard");
+       deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
 
-       /* mark that this EH session started with reset */
-       if (reset == hardreset)
-               ehc->i.flags |= ATA_EHI_DID_HARDRESET;
-       else
-               ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
+       if (reset) {
+               if (verbose)
+                       ata_link_printk(link, KERN_INFO, "%s resetting link\n",
+                                       reset == softreset ? "soft" : "hard");
 
-       rc = ata_do_reset(link, reset, classes, deadline);
+               /* mark that this EH session started with reset */
+               if (reset == hardreset)
+                       ehc->i.flags |= ATA_EHI_DID_HARDRESET;
+               else
+                       ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
 
-       if (reset == hardreset &&
-           ata_eh_followup_srst_needed(link, rc, classify, classes)) {
-               /* okay, let's do follow-up softreset */
-               reset = softreset;
+               rc = ata_do_reset(link, reset, classes, deadline);
 
-               if (!reset) {
-                       ata_link_printk(link, KERN_ERR,
-                                       "follow-up softreset required "
-                                       "but no softreset avaliable\n");
-                       rc = -EINVAL;
-                       goto fail;
+               if (reset == hardreset &&
+                   ata_eh_followup_srst_needed(link, rc, classify, classes)) {
+                       /* okay, let's do follow-up softreset */
+                       reset = softreset;
+
+                       if (!reset) {
+                               ata_link_printk(link, KERN_ERR,
+                                               "follow-up softreset required "
+                                               "but no softreset avaliable\n");
+                               rc = -EINVAL;
+                               goto fail;
+                       }
+
+                       ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
+                       rc = ata_do_reset(link, reset, classes, deadline);
                }
 
-               ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
-               rc = ata_do_reset(link, reset, classes, deadline);
+               /* -EAGAIN can happen if we skipped followup SRST */
+               if (rc && rc != -EAGAIN)
+                       goto fail;
+       } else {
+               if (verbose)
+                       ata_link_printk(link, KERN_INFO, "no reset method "
+                                       "available, skipping reset\n");
+               if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
+                       lflags |= ATA_LFLAG_ASSUME_ATA;
        }
 
-       /* -EAGAIN can happen if we skipped followup SRST */
-       if (rc && rc != -EAGAIN)
-               goto fail;
-
- done:
+       /*
+        * Post-reset processing
+        */
        ata_link_for_each_dev(dev, link) {
                /* After the reset, the device state is PIO 0 and the
                 * controller state is undefined.  Reset also wakes up
@@ -2236,11 +2360,56 @@ int ata_eh_reset(struct ata_link *link, int classify,
        if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
                link->sata_spd = (sstatus >> 4) & 0xf;
 
+       /* thaw the port */
+       if (ata_is_host_link(link))
+               ata_eh_thaw_port(ap);
+
+       /* postreset() should clear hardware SError.  Although SError
+        * is cleared during link resume, clearing SError here is
+        * necessary as some PHYs raise hotplug events after SRST.
+        * This introduces race condition where hotplug occurs between
+        * reset and here.  This race is mediated by cross checking
+        * link onlineness and classification result later.
+        */
        if (postreset)
                postreset(link, classes);
 
+       /* clear cached SError */
+       spin_lock_irqsave(link->ap->lock, flags);
+       link->eh_info.serror = 0;
+       spin_unlock_irqrestore(link->ap->lock, flags);
+
+       /* Make sure onlineness and classification result correspond.
+        * Hotplug could have happened during reset and some
+        * controllers fail to wait while a drive is spinning up after
+        * being hotplugged causing misdetection.  By cross checking
+        * link onlineness and classification result, those conditions
+        * can be reliably detected and retried.
+        */
+       nr_known = 0;
+       ata_link_for_each_dev(dev, link) {
+               /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
+               if (classes[dev->devno] == ATA_DEV_UNKNOWN)
+                       classes[dev->devno] = ATA_DEV_NONE;
+               else
+                       nr_known++;
+       }
+
+       if (classify && !nr_known && ata_link_online(link)) {
+               if (try < max_tries) {
+                       ata_link_printk(link, KERN_WARNING, "link online but "
+                                      "device misclassified, retrying\n");
+                       rc = -EAGAIN;
+                       goto fail;
+               }
+               ata_link_printk(link, KERN_WARNING,
+                              "link online but device misclassified, "
+                              "device detection might fail\n");
+       }
+
        /* reset successful, schedule revalidation */
        ata_eh_done(link, NULL, ATA_EH_RESET);
+       ehc->last_reset = jiffies;
        ehc->i.action |= ATA_EH_REVALIDATE;
 
        rc = 0;
@@ -2267,9 +2436,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
        if (time_before(now, deadline)) {
                unsigned long delta = deadline - now;
 
-               ata_link_printk(link, KERN_WARNING, "reset failed "
-                               "(errno=%d), retrying in %u secs\n",
-                               rc, (jiffies_to_msecs(delta) + 999) / 1000);
+               ata_link_printk(link, KERN_WARNING,
+                       "reset failed (errno=%d), retrying in %u secs\n",
+                       rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
                while (delta)
                        delta = schedule_timeout_uninterruptible(delta);
@@ -2547,8 +2716,11 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
                        ata_eh_detach_dev(dev);
 
                /* schedule probe if necessary */
-               if (ata_eh_schedule_probe(dev))
+               if (ata_eh_schedule_probe(dev)) {
                        ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
+                       memset(ehc->cmd_timeout_idx[dev->devno], 0,
+                              sizeof(ehc->cmd_timeout_idx[dev->devno]));
+               }
 
                return 1;
        } else {
@@ -2586,8 +2758,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 {
        struct ata_link *link;
        struct ata_device *dev;
-       int nr_failed_devs, nr_disabled_devs;
-       int reset, rc;
+       int nr_failed_devs;
+       int rc;
        unsigned long flags;
 
        DPRINTK("ENTER\n");
@@ -2629,8 +2801,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
  retry:
        rc = 0;
        nr_failed_devs = 0;
-       nr_disabled_devs = 0;
-       reset = 0;
 
        /* if UNLOADING, finish immediately */
        if (ap->pflags & ATA_PFLAG_UNLOADING)
@@ -2644,40 +2814,24 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
                if (ata_eh_skip_recovery(link))
                        ehc->i.action = 0;
 
-               /* do we need to reset? */
-               if (ehc->i.action & ATA_EH_RESET)
-                       reset = 1;
-
                ata_link_for_each_dev(dev, link)
                        ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
        }
 
        /* reset */
-       if (reset) {
-               /* if PMP is attached, this function only deals with
-                * downstream links, port should stay thawed.
-                */
-               if (!sata_pmp_attached(ap))
-                       ata_eh_freeze_port(ap);
-
-               ata_port_for_each_link(link, ap) {
-                       struct ata_eh_context *ehc = &link->eh_context;
+       ata_port_for_each_link(link, ap) {
+               struct ata_eh_context *ehc = &link->eh_context;
 
-                       if (!(ehc->i.action & ATA_EH_RESET))
-                               continue;
+               if (!(ehc->i.action & ATA_EH_RESET))
+                       continue;
 
-                       rc = ata_eh_reset(link, ata_link_nr_vacant(link),
-                                         prereset, softreset, hardreset,
-                                         postreset);
-                       if (rc) {
-                               ata_link_printk(link, KERN_ERR,
-                                               "reset failed, giving up\n");
-                               goto out;
-                       }
+               rc = ata_eh_reset(link, ata_link_nr_vacant(link),
+                                 prereset, softreset, hardreset, postreset);
+               if (rc) {
+                       ata_link_printk(link, KERN_ERR,
+                                       "reset failed, giving up\n");
+                       goto out;
                }
-
-               if (!sata_pmp_attached(ap))
-                       ata_eh_thaw_port(ap);
        }
 
        /* the rest */
@@ -2713,8 +2867,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 dev_fail:
                nr_failed_devs++;
-               if (ata_eh_handle_dev_fail(dev, rc))
-                       nr_disabled_devs++;
+               ata_eh_handle_dev_fail(dev, rc);
 
                if (ap->pflags & ATA_PFLAG_FROZEN) {
                        /* PMP reset requires working host port.
@@ -2726,18 +2879,8 @@ dev_fail:
                }
        }
 
-       if (nr_failed_devs) {
-               if (nr_failed_devs != nr_disabled_devs) {
-                       ata_port_printk(ap, KERN_WARNING, "failed to recover "
-                                       "some devices, retrying in 5 secs\n");
-                       ssleep(5);
-               } else {
-                       /* no device left to recover, repeat fast */
-                       msleep(500);
-               }
-
+       if (nr_failed_devs)
                goto retry;
-       }
 
  out:
        if (rc && r_failed_link)