ahci: Add PCI-id for the Highpoint Rocketraid 644L card
[pandora-kernel.git] / kernel / rcutree.c
index 7e2f297..1aa52af 100644 (file)
@@ -38,7 +38,7 @@
 #include <linux/nmi.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
@@ -131,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 
-#define RCU_KTHREAD_PRIO 1     /* RT priority for per-CPU kthreads. */
-
 /*
  * Track the rcutorture test sequence number and the update version
  * number within a given test.  The rcutorture_testseq is incremented
@@ -204,13 +202,13 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 };
 #endif /* #ifdef CONFIG_NO_HZ */
 
-static int blimit = 10;                /* Maximum callbacks per rcu_do_batch. */
-static int qhimark = 10000;    /* If this many pending, ignore blimit. */
-static int qlowmark = 100;     /* Once only this many pending, use blimit. */
+static long blimit = 10;       /* Maximum callbacks per rcu_do_batch. */
+static long qhimark = 10000;   /* If this many pending, ignore blimit. */
+static long qlowmark = 100;    /* Once only this many pending, use blimit. */
 
-module_param(blimit, int, 0);
-module_param(qhimark, int, 0);
-module_param(qlowmark, int, 0);
+module_param(blimit, long, 0);
+module_param(qhimark, long, 0);
+module_param(qlowmark, long, 0);
 
 int rcu_cpu_stall_suppress __read_mostly;
 module_param(rcu_cpu_stall_suppress, int, 0644);
@@ -294,7 +292,9 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 static int
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-       return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
+       return *rdp->nxttail[RCU_DONE_TAIL +
+                            ACCESS_ONCE(rsp->completed) != rdp->completed] &&
+              !rcu_gp_in_progress(rsp);
 }
 
 /*
@@ -373,13 +373,6 @@ void rcu_enter_nohz(void)
        smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
        local_irq_restore(flags);
-
-       /* If the interrupt queued a callback, get out of dyntick mode. */
-       if (in_irq() &&
-           (__get_cpu_var(rcu_sched_data).nxtlist ||
-            __get_cpu_var(rcu_bh_data).nxtlist ||
-            rcu_preempt_needs_cpu(smp_processor_id())))
-               set_need_resched();
 }
 
 /*
@@ -495,11 +488,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
  */
 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
-       unsigned long curr;
-       unsigned long snap;
+       unsigned int curr;
+       unsigned int snap;
 
-       curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
-       snap = (unsigned long)rdp->dynticks_snap;
+       curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
+       snap = (unsigned int)rdp->dynticks_snap;
 
        /*
         * If the CPU passed through or entered a dynticks idle phase with
@@ -509,7 +502,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
         * read-side critical section that started before the beginning
         * of the current RCU grace period.
         */
-       if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
+       if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
                trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
                rdp->dynticks_fqs++;
                return 1;
@@ -552,6 +545,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
        int cpu;
        long delta;
        unsigned long flags;
+       int ndetected;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
        /* Only let one CPU complain about others per time interval. */
@@ -568,7 +562,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
         * Now rat on any tasks that got kicked up to the root rcu_node
         * due to CPU offlining.
         */
-       rcu_print_task_stall(rnp);
+       ndetected = rcu_print_task_stall(rnp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        /*
@@ -580,17 +574,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
               rsp->name);
        rcu_for_each_leaf_node(rsp, rnp) {
                raw_spin_lock_irqsave(&rnp->lock, flags);
-               rcu_print_task_stall(rnp);
+               ndetected += rcu_print_task_stall(rnp);
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                if (rnp->qsmask == 0)
                        continue;
                for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
-                       if (rnp->qsmask & (1UL << cpu))
+                       if (rnp->qsmask & (1UL << cpu)) {
                                printk(" %d", rnp->grplo + cpu);
+                               ndetected++;
+                       }
        }
        printk("} (detected by %d, t=%ld jiffies)\n",
               smp_processor_id(), (long)(jiffies - rsp->gp_start));
-       trigger_all_cpu_backtrace();
+       if (ndetected == 0)
+               printk(KERN_ERR "INFO: Stall ended before state dump start\n");
+       else if (!trigger_all_cpu_backtrace())
+               dump_stack();
 
        /* If so configured, complain about tasks blocking the grace period. */
 
@@ -611,7 +610,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
         */
        printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
               rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
-       trigger_all_cpu_backtrace();
+       if (!trigger_all_cpu_backtrace())
+               dump_stack();
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
        if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
@@ -843,26 +843,25 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
        struct rcu_node *rnp = rcu_get_root(rsp);
 
-       if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
-               if (cpu_needs_another_gp(rsp, rdp))
-                       rsp->fqs_need_gp = 1;
-               if (rnp->completed == rsp->completed) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       return;
-               }
-               raw_spin_unlock(&rnp->lock);     /* irqs remain disabled. */
+       if (!rcu_scheduler_fully_active ||
+           !cpu_needs_another_gp(rsp, rdp)) {
+               /*
+                * Either the scheduler hasn't yet spawned the first
+                * non-idle task or this CPU does not need another
+                * grace period.  Either way, don't start a new grace
+                * period.
+                */
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               return;
+       }
 
+       if (rsp->fqs_active) {
                /*
-                * Propagate new ->completed value to rcu_node structures
-                * so that other CPUs don't have to wait until the start
-                * of the next grace period to process their callbacks.
+                * This CPU needs a grace period, but force_quiescent_state()
+                * is running.  Tell it to start one on this CPU's behalf.
                 */
-               rcu_for_each_node_breadth_first(rsp, rnp) {
-                       raw_spin_lock(&rnp->lock); /* irqs already disabled. */
-                       rnp->completed = rsp->completed;
-                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
-               }
-               local_irq_restore(flags);
+               rsp->fqs_need_gp = 1;
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
 
@@ -946,6 +945,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
        __releases(rcu_get_root(rsp)->lock)
 {
        unsigned long gp_duration;
+       struct rcu_node *rnp = rcu_get_root(rsp);
+       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 
@@ -957,7 +958,40 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
        gp_duration = jiffies - rsp->gp_start;
        if (gp_duration > rsp->gp_max)
                rsp->gp_max = gp_duration;
-       rsp->completed = rsp->gpnum;
+
+       /*
+        * We know the grace period is complete, but to everyone else
+        * it appears to still be ongoing.  But it is also the case
+        * that to everyone else it looks like there is nothing that
+        * they can do to advance the grace period.  It is therefore
+        * safe for us to drop the lock in order to mark the grace
+        * period as completed in all of the rcu_node structures.
+        *
+        * But if this CPU needs another grace period, it will take
+        * care of this while initializing the next grace period.
+        * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
+        * because the callbacks have not yet been advanced: Those
+        * callbacks are waiting on the grace period that just now
+        * completed.
+        */
+       if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
+               raw_spin_unlock(&rnp->lock);     /* irqs remain disabled. */
+
+               /*
+                * Propagate new ->completed value to rcu_node structures
+                * so that other CPUs don't have to wait until the start
+                * of the next grace period to process their callbacks.
+                */
+               rcu_for_each_node_breadth_first(rsp, rnp) {
+                       raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+                       rnp->completed = rsp->gpnum;
+                       raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+               }
+               rnp = rcu_get_root(rsp);
+               raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+       }
+
+       rsp->completed = rsp->gpnum;  /* Declare the grace period complete. */
        trace_rcu_grace_period(rsp->name, rsp->completed, "end");
        rsp->signaled = RCU_GP_IDLE;
        rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */
@@ -1226,7 +1260,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 {
        unsigned long flags;
        struct rcu_head *next, *list, **tail;
-       int bl, count;
+       long bl, count;
 
        /* If no callbacks are ready, just return.*/
        if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1527,9 +1561,6 @@ static void rcu_process_callbacks(struct softirq_action *unused)
                                &__get_cpu_var(rcu_sched_data));
        __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
        rcu_preempt_process_callbacks();
-
-       /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
-       rcu_needs_cpu_flush();
        trace_rcu_utilization("End RCU core");
 }
 
@@ -1712,7 +1743,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
        check_cpu_stall(rsp, rdp);
 
        /* Is the RCU core waiting for a quiescent state from this CPU? */
-       if (rdp->qs_pending && !rdp->passed_quiesce) {
+       if (rcu_scheduler_fully_active &&
+           rdp->qs_pending && !rdp->passed_quiesce) {
 
                /*
                 * If force_quiescent_state() coming soon and this CPU
@@ -1907,8 +1939,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
 
        /* Set up local state, ensuring consistent view of global state. */
        raw_spin_lock_irqsave(&rnp->lock, flags);
-       rdp->passed_quiesce = 0;  /* We could be racing with new GP, */
-       rdp->qs_pending = 1;     /*  so set up to respond to current GP. */
        rdp->beenonline = 1;     /* We have now been online. */
        rdp->preemptible = preemptible;
        rdp->qlen_last_fqs_check = 0;
@@ -1933,8 +1963,15 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
                rnp->qsmaskinit |= mask;
                mask = rnp->grpmask;
                if (rnp == rdp->mynode) {
-                       rdp->gpnum = rnp->completed; /* if GP in progress... */
+                       /*
+                        * If there is a grace period in progress, we will
+                        * set up to wait for it next time we run the
+                        * RCU core code.
+                        */
+                       rdp->gpnum = rnp->completed;
                        rdp->completed = rnp->completed;
+                       rdp->passed_quiesce = 0;
+                       rdp->qs_pending = 0;
                        rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
                        trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
                }