genirq: enable polling for disabled screaming irqs
authorEric W. Biederman <ebiederm@xmission.com>
Thu, 10 Jul 2008 21:48:54 +0000 (14:48 -0700)
committerIngo Molnar <mingo@elte.hu>
Fri, 18 Jul 2008 17:21:13 +0000 (19:21 +0200)
When we disable a screaming irq we never see it again.  If the irq
line is shared or if the driver half works this is a real pain.  So
periodically poll the handlers for screaming interrupts.

I use a timer instead of the classic irq poll technique of working off
the timer interrupt because when we use the local apic timers
note_interrupt is never called (bug?).  Further on a system with
dynamic ticks the timer interrupt might not even fire unless there is
a timer telling it it needs to.

I forced this case on my test system with an e1000 nic and my ssh
session remained responsive despite the interrupt handler only being
called every 10th of a second.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/irq/spurious.c

index c66d3f1..19fe9d6 100644 (file)
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/timer.h>
 
 static int irqfixup __read_mostly;
 
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
 /*
  * Recovery handler for misrouted interrupts.
  */
-static int misrouted_irq(int irq)
+static int try_one_irq(int irq, struct irq_desc *desc)
 {
-       int i;
+       struct irqaction *action;
        int ok = 0;
        int work = 0;   /* Did we do work for a real IRQ */
 
-       for (i = 1; i < NR_IRQS; i++) {
-               struct irq_desc *desc = irq_desc + i;
-               struct irqaction *action;
-
-               if (i == irq)   /* Already tried */
-                       continue;
-
-               spin_lock(&desc->lock);
-               /* Already running on another processor */
-               if (desc->status & IRQ_INPROGRESS) {
-                       /*
-                        * Already running: If it is shared get the other
-                        * CPU to go looking for our mystery interrupt too
-                        */
-                       if (desc->action && (desc->action->flags & IRQF_SHARED))
-                               desc->status |= IRQ_PENDING;
-                       spin_unlock(&desc->lock);
-                       continue;
-               }
-               /* Honour the normal IRQ locking */
-               desc->status |= IRQ_INPROGRESS;
-               action = desc->action;
+       spin_lock(&desc->lock);
+       /* Already running on another processor */
+       if (desc->status & IRQ_INPROGRESS) {
+               /*
+                * Already running: If it is shared get the other
+                * CPU to go looking for our mystery interrupt too
+                */
+               if (desc->action && (desc->action->flags & IRQF_SHARED))
+                       desc->status |= IRQ_PENDING;
                spin_unlock(&desc->lock);
+               return ok;
+       }
+       /* Honour the normal IRQ locking */
+       desc->status |= IRQ_INPROGRESS;
+       action = desc->action;
+       spin_unlock(&desc->lock);
 
-               while (action) {
-                       /* Only shared IRQ handlers are safe to call */
-                       if (action->flags & IRQF_SHARED) {
-                               if (action->handler(i, action->dev_id) ==
-                                               IRQ_HANDLED)
-                                       ok = 1;
-                       }
-                       action = action->next;
+       while (action) {
+               /* Only shared IRQ handlers are safe to call */
+               if (action->flags & IRQF_SHARED) {
+                       if (action->handler(irq, action->dev_id) ==
+                               IRQ_HANDLED)
+                               ok = 1;
                }
-               local_irq_disable();
-               /* Now clean up the flags */
-               spin_lock(&desc->lock);
-               action = desc->action;
+               action = action->next;
+       }
+       local_irq_disable();
+       /* Now clean up the flags */
+       spin_lock(&desc->lock);
+       action = desc->action;
 
+       /*
+        * While we were looking for a fixup someone queued a real
+        * IRQ clashing with our walk:
+        */
+       while ((desc->status & IRQ_PENDING) && action) {
                /*
-                * While we were looking for a fixup someone queued a real
-                * IRQ clashing with our walk:
-                */
-               while ((desc->status & IRQ_PENDING) && action) {
-                       /*
-                        * Perform real IRQ processing for the IRQ we deferred
-                        */
-                       work = 1;
-                       spin_unlock(&desc->lock);
-                       handle_IRQ_event(i, action);
-                       spin_lock(&desc->lock);
-                       desc->status &= ~IRQ_PENDING;
-               }
-               desc->status &= ~IRQ_INPROGRESS;
-               /*
-                * If we did actual work for the real IRQ line we must let the
-                * IRQ controller clean up too
+                * Perform real IRQ processing for the IRQ we deferred
                 */
-               if (work && desc->chip && desc->chip->end)
-                       desc->chip->end(i);
+               work = 1;
                spin_unlock(&desc->lock);
+               handle_IRQ_event(irq, action);
+               spin_lock(&desc->lock);
+               desc->status &= ~IRQ_PENDING;
+       }
+       desc->status &= ~IRQ_INPROGRESS;
+       /*
+        * If we did actual work for the real IRQ line we must let the
+        * IRQ controller clean up too
+        */
+       if (work && desc->chip && desc->chip->end)
+               desc->chip->end(irq);
+       spin_unlock(&desc->lock);
+
+       return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+       int i;
+       int ok = 0;
+
+       for (i = 1; i < NR_IRQS; i++) {
+               struct irq_desc *desc = irq_desc + i;
+
+               if (i == irq)   /* Already tried */
+                       continue;
+
+               if (try_one_irq(i, desc))
+                       ok = 1;
        }
        /* So the caller can adjust the irq error counts */
        return ok;
 }
 
+static void poll_spurious_irqs(unsigned long dummy)
+{
+       int i;
+       for (i = 1; i < NR_IRQS; i++) {
+               struct irq_desc *desc = irq_desc + i;
+               unsigned int status;
+
+               /* Racy but it doesn't matter */
+               status = desc->status;
+               barrier();
+               if (!(status & IRQ_SPURIOUS_DISABLED))
+                       continue;
+
+               try_one_irq(i, desc);
+       }
+
+       mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled
  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -212,6 +246,8 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
                desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
                desc->depth++;
                desc->chip->disable(irq);
+
+               mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
        }
        desc->irqs_unhandled = 0;
 }