rcu: Kick rcuo kthreads after their CPU goes offline
[pandora-kernel.git] / kernel / rcu / tree.c
index dc52dc3..07bf4aa 100644 (file)
@@ -152,19 +152,6 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  */
 static int rcu_scheduler_fully_active __read_mostly;
 
-#ifdef CONFIG_RCU_BOOST
-
-/*
- * Control variables for per-CPU and per-rcu_node kthreads.  These
- * handle all flavors of RCU.
- */
-static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-DEFINE_PER_CPU(char, rcu_cpu_has_work);
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
@@ -197,22 +184,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  * one since the start of the grace period, this just sets a flag.
  * The caller must have disabled preemption.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
-       struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
+       if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
+               trace_rcu_grace_period(TPS("rcu_sched"),
+                                      __this_cpu_read(rcu_sched_data.gpnum),
+                                      TPS("cpuqs"));
+               __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
+       }
 }
 
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
-       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
+       if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
+               trace_rcu_grace_period(TPS("rcu_bh"),
+                                      __this_cpu_read(rcu_bh_data.gpnum),
+                                      TPS("cpuqs"));
+               __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
+       }
 }
 
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -287,7 +276,7 @@ static void rcu_momentary_dyntick_idle(void)
 void rcu_note_context_switch(int cpu)
 {
        trace_rcu_utilization(TPS("Start context switch"));
-       rcu_sched_qs(cpu);
+       rcu_sched_qs();
        rcu_preempt_note_context_switch(cpu);
        if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
                rcu_momentary_dyntick_idle();
@@ -535,6 +524,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
        atomic_inc(&rdtp->dynticks);
        smp_mb__after_atomic();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+       rcu_dynticks_task_enter();
 
        /*
         * It is illegal to enter an extended quiescent state while
@@ -651,6 +641,7 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
                               int user)
 {
+       rcu_dynticks_task_exit();
        smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -1656,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                            rnp->level, rnp->grplo,
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq(&rnp->lock);
-               cond_resched();
+               cond_resched_rcu_qs();
        }
 
        mutex_unlock(&rsp->onoff_mutex);
@@ -1746,7 +1737,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                /* smp_mb() provided by prior unlock-lock pair. */
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
-               cond_resched();
+               cond_resched_rcu_qs();
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
@@ -1795,8 +1786,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
-                       cond_resched();
-                       flush_signals(current);
+                       cond_resched_rcu_qs();
+                       WARN_ON(signal_pending(current));
                        trace_rcu_grace_period(rsp->name,
                                               ACCESS_ONCE(rsp->gpnum),
                                               TPS("reqwaitsig"));
@@ -1838,11 +1829,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqsend"));
-                               cond_resched();
+                               cond_resched_rcu_qs();
                        } else {
                                /* Deal with stray signal. */
-                               cond_resched();
-                               flush_signals(current);
+                               cond_resched_rcu_qs();
+                               WARN_ON(signal_pending(current));
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqswaitsig"));
@@ -1938,7 +1929,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 {
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+       rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -2220,8 +2211,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
 
-       /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
-
        /* Exclude any attempts to start a new grace period. */
        mutex_lock(&rsp->onoff_mutex);
        raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
@@ -2403,8 +2392,8 @@ void rcu_check_callbacks(int cpu, int user)
                 * at least not while the corresponding CPU is online.
                 */
 
-               rcu_sched_qs(cpu);
-               rcu_bh_qs(cpu);
+               rcu_sched_qs();
+               rcu_bh_qs();
 
        } else if (!in_softirq()) {
 
@@ -2415,11 +2404,13 @@ void rcu_check_callbacks(int cpu, int user)
                 * critical section, so note it.
                 */
 
-               rcu_bh_qs(cpu);
+               rcu_bh_qs();
        }
        rcu_preempt_check_callbacks(cpu);
        if (rcu_pending(cpu))
                invoke_rcu_core();
+       if (user)
+               rcu_note_voluntary_context_switch(current);
        trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
@@ -2442,7 +2433,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
        struct rcu_node *rnp;
 
        rcu_for_each_leaf_node(rsp, rnp) {
-               cond_resched();
+               cond_resched_rcu_qs();
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
@@ -2518,7 +2509,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
        ACCESS_ONCE(rsp->gp_flags) =
                ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
        raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+       rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -2936,11 +2927,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * restructure your code to batch your updates, and then use a single
  * synchronize_sched() instead.
  *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
  * This implementation can be thought of as an application of ticket
  * locking to RCU, with sync_sched_expedited_started and
  * sync_sched_expedited_done taking on the roles of the halves
@@ -2964,6 +2950,9 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  */
 void synchronize_sched_expedited(void)
 {
+       cpumask_var_t cm;
+       bool cma = false;
+       int cpu;
        long firstsnap, s, snap;
        int trycount = 0;
        struct rcu_state *rsp = &rcu_sched_state;
@@ -2990,14 +2979,34 @@ void synchronize_sched_expedited(void)
         */
        snap = atomic_long_inc_return(&rsp->expedited_start);
        firstsnap = snap;
-       get_online_cpus();
+       if (!try_get_online_cpus()) {
+               /* CPU hotplug operation in flight, fall back to normal GP. */
+               wait_rcu_gp(call_rcu_sched);
+               atomic_long_inc(&rsp->expedited_normal);
+               return;
+       }
        WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
 
+       /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
+       cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
+       if (cma) {
+               cpumask_copy(cm, cpu_online_mask);
+               cpumask_clear_cpu(raw_smp_processor_id(), cm);
+               for_each_cpu(cpu, cm) {
+                       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+                       if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+                               cpumask_clear_cpu(cpu, cm);
+               }
+               if (cpumask_weight(cm) == 0)
+                       goto all_cpus_idle;
+       }
+
        /*
         * Each pass through the following loop attempts to force a
         * context switch on each CPU.
         */
-       while (try_stop_cpus(cpu_online_mask,
+       while (try_stop_cpus(cma ? cm : cpu_online_mask,
                             synchronize_sched_expedited_cpu_stop,
                             NULL) == -EAGAIN) {
                put_online_cpus();
@@ -3009,6 +3018,7 @@ void synchronize_sched_expedited(void)
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone1);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3018,6 +3028,7 @@ void synchronize_sched_expedited(void)
                } else {
                        wait_rcu_gp(call_rcu_sched);
                        atomic_long_inc(&rsp->expedited_normal);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3027,6 +3038,7 @@ void synchronize_sched_expedited(void)
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone2);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3037,12 +3049,21 @@ void synchronize_sched_expedited(void)
                 * and they started after our first try, so their grace
                 * period works for us.
                 */
-               get_online_cpus();
+               if (!try_get_online_cpus()) {
+                       /* CPU hotplug operation in flight, use normal GP. */
+                       wait_rcu_gp(call_rcu_sched);
+                       atomic_long_inc(&rsp->expedited_normal);
+                       free_cpumask_var(cm);
+                       return;
+               }
                snap = atomic_long_read(&rsp->expedited_start);
                smp_mb(); /* ensure read is before try_stop_cpus(). */
        }
        atomic_long_inc(&rsp->expedited_stoppedcpus);
 
+all_cpus_idle:
+       free_cpumask_var(cm);
+
        /*
         * Everyone up to our most recent fetch is covered by our grace
         * period.  Update the counter, but only if our work is still
@@ -3290,11 +3311,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
                        continue;
                rdp = per_cpu_ptr(rsp->rda, cpu);
                if (rcu_is_nocb_cpu(cpu)) {
-                       _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
-                                          rsp->n_barrier_done);
-                       atomic_inc(&rsp->barrier_cpu_count);
-                       __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
-                                  rsp, cpu, 0);
+                       if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
+                               _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
+                                                  rsp->n_barrier_done);
+                       } else {
+                               _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
+                                                  rsp->n_barrier_done);
+                               atomic_inc(&rsp->barrier_cpu_count);
+                               __call_rcu(&rdp->barrier_head,
+                                          rcu_barrier_callback, rsp, cpu, 0);
+                       }
                } else if (ACCESS_ONCE(rdp->qlen)) {
                        _rcu_barrier_trace(rsp, "OnlineQ", cpu,
                                           rsp->n_barrier_done);
@@ -3453,6 +3479,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
        case CPU_UP_PREPARE_FROZEN:
                rcu_prepare_cpu(cpu);
                rcu_prepare_kthreads(cpu);
+               rcu_spawn_all_nocb_kthreads(cpu);
                break;
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
@@ -3470,8 +3497,10 @@ static int rcu_cpu_notify(struct notifier_block *self,
        case CPU_DEAD_FROZEN:
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
-               for_each_rcu_flavor(rsp)
+               for_each_rcu_flavor(rsp) {
                        rcu_cleanup_dead_cpu(cpu, rsp);
+                       do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
+               }
                break;
        default:
                break;
@@ -3500,7 +3529,7 @@ static int rcu_pm_notify(struct notifier_block *self,
 }
 
 /*
- * Spawn the kthread that handles this RCU flavor's grace periods.
+ * Spawn the kthreads that handle each RCU flavor's grace periods.
  */
 static int __init rcu_spawn_gp_kthread(void)
 {
@@ -3509,6 +3538,7 @@ static int __init rcu_spawn_gp_kthread(void)
        struct rcu_state *rsp;
        struct task_struct *t;
 
+       rcu_scheduler_fully_active = 1;
        for_each_rcu_flavor(rsp) {
                t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
                BUG_ON(IS_ERR(t));
@@ -3516,8 +3546,9 @@ static int __init rcu_spawn_gp_kthread(void)
                raw_spin_lock_irqsave(&rnp->lock, flags);
                rsp->gp_kthread = t;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               rcu_spawn_nocb_kthreads(rsp);
        }
+       rcu_spawn_nocb_kthreads();
+       rcu_spawn_boost_kthreads();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);