rcu: Kick rcuo kthreads after their CPU goes offline
[pandora-kernel.git] / kernel / rcu / tree.c
index 1b70cb6..07bf4aa 100644 (file)
@@ -79,9 +79,18 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
  * the tracing userspace tools to be able to decipher the string
  * address to the matching string.
  */
-#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+#ifdef CONFIG_TRACING
+# define DEFINE_RCU_TPS(sname) \
 static char sname##_varname[] = #sname; \
-static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname;
+# define RCU_STATE_NAME(sname) sname##_varname
+#else
+# define DEFINE_RCU_TPS(sname)
+# define RCU_STATE_NAME(sname) __stringify(sname)
+#endif
+
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+DEFINE_RCU_TPS(sname) \
 struct rcu_state sname##_state = { \
        .level = { &sname##_state.node[0] }, \
        .call = cr, \
@@ -93,7 +102,7 @@ struct rcu_state sname##_state = { \
        .orphan_donetail = &sname##_state.orphan_donelist, \
        .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
        .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
-       .name = sname##_varname, \
+       .name = RCU_STATE_NAME(sname), \
        .abbr = sabbr, \
 }; \
 DEFINE_PER_CPU(struct rcu_data, sname##_data)
@@ -143,19 +152,6 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  */
 static int rcu_scheduler_fully_active __read_mostly;
 
-#ifdef CONFIG_RCU_BOOST
-
-/*
- * Control variables for per-CPU and per-rcu_node kthreads.  These
- * handle all flavors of RCU.
- */
-static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
-DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
-DEFINE_PER_CPU(char, rcu_cpu_has_work);
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
@@ -188,22 +184,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  * one since the start of the grace period, this just sets a flag.
  * The caller must have disabled preemption.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
-       struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
+       if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
+               trace_rcu_grace_period(TPS("rcu_sched"),
+                                      __this_cpu_read(rcu_sched_data.gpnum),
+                                      TPS("cpuqs"));
+               __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
+       }
 }
 
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
-       struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-
-       if (rdp->passed_quiesce == 0)
-               trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
-       rdp->passed_quiesce = 1;
+       if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
+               trace_rcu_grace_period(TPS("rcu_bh"),
+                                      __this_cpu_read(rcu_bh_data.gpnum),
+                                      TPS("cpuqs"));
+               __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
+       }
 }
 
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +276,7 @@ static void rcu_momentary_dyntick_idle(void)
 void rcu_note_context_switch(int cpu)
 {
        trace_rcu_utilization(TPS("Start context switch"));
-       rcu_sched_qs(cpu);
+       rcu_sched_qs();
        rcu_preempt_note_context_switch(cpu);
        if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
                rcu_momentary_dyntick_idle();
@@ -526,6 +524,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
        atomic_inc(&rdtp->dynticks);
        smp_mb__after_atomic();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+       rcu_dynticks_task_enter();
 
        /*
         * It is illegal to enter an extended quiescent state while
@@ -642,6 +641,7 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
                               int user)
 {
+       rcu_dynticks_task_exit();
        smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -819,7 +819,7 @@ bool notrace __rcu_is_watching(void)
  */
 bool notrace rcu_is_watching(void)
 {
-       int ret;
+       bool ret;
 
        preempt_disable();
        ret = __rcu_is_watching();
@@ -1647,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                            rnp->level, rnp->grplo,
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq(&rnp->lock);
-               cond_resched();
+               cond_resched_rcu_qs();
        }
 
        mutex_unlock(&rsp->onoff_mutex);
@@ -1668,7 +1668,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
        if (fqs_state == RCU_SAVE_DYNTICK) {
                /* Collect dyntick-idle snapshots. */
                if (is_sysidle_rcu_state(rsp)) {
-                       isidle = 1;
+                       isidle = true;
                        maxj = jiffies - ULONG_MAX / 4;
                }
                force_qs_rnp(rsp, dyntick_save_progress_counter,
@@ -1677,14 +1677,15 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
                fqs_state = RCU_FORCE_QS;
        } else {
                /* Handle dyntick-idle and offline CPUs. */
-               isidle = 0;
+               isidle = false;
                force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
        }
        /* Clear flag to prevent immediate re-entry. */
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
-               ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
+               ACCESS_ONCE(rsp->gp_flags) =
+                       ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS;
                raw_spin_unlock_irq(&rnp->lock);
        }
        return fqs_state;
@@ -1736,7 +1737,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                /* smp_mb() provided by prior unlock-lock pair. */
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
-               cond_resched();
+               cond_resched_rcu_qs();
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
@@ -1785,8 +1786,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
-                       cond_resched();
-                       flush_signals(current);
+                       cond_resched_rcu_qs();
+                       WARN_ON(signal_pending(current));
                        trace_rcu_grace_period(rsp->name,
                                               ACCESS_ONCE(rsp->gpnum),
                                               TPS("reqwaitsig"));
@@ -1828,11 +1829,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqsend"));
-                               cond_resched();
+                               cond_resched_rcu_qs();
                        } else {
                                /* Deal with stray signal. */
-                               cond_resched();
-                               flush_signals(current);
+                               cond_resched_rcu_qs();
+                               WARN_ON(signal_pending(current));
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqswaitsig"));
@@ -1928,7 +1929,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 {
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+       rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -2210,8 +2211,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
 
-       /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
-
        /* Exclude any attempts to start a new grace period. */
        mutex_lock(&rsp->onoff_mutex);
        raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
@@ -2393,8 +2392,8 @@ void rcu_check_callbacks(int cpu, int user)
                 * at least not while the corresponding CPU is online.
                 */
 
-               rcu_sched_qs(cpu);
-               rcu_bh_qs(cpu);
+               rcu_sched_qs();
+               rcu_bh_qs();
 
        } else if (!in_softirq()) {
 
@@ -2405,11 +2404,13 @@ void rcu_check_callbacks(int cpu, int user)
                 * critical section, so note it.
                 */
 
-               rcu_bh_qs(cpu);
+               rcu_bh_qs();
        }
        rcu_preempt_check_callbacks(cpu);
        if (rcu_pending(cpu))
                invoke_rcu_core();
+       if (user)
+               rcu_note_voluntary_context_switch(current);
        trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
@@ -2432,7 +2433,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
        struct rcu_node *rnp;
 
        rcu_for_each_leaf_node(rsp, rnp) {
-               cond_resched();
+               cond_resched_rcu_qs();
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
@@ -2449,7 +2450,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
                for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
                        if ((rnp->qsmask & bit) != 0) {
                                if ((rnp->qsmaskinit & bit) != 0)
-                                       *isidle = 0;
+                                       *isidle = false;
                                if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
                                        mask |= bit;
                        }
@@ -2505,9 +2506,10 @@ static void force_quiescent_state(struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                return;  /* Someone beat us to it. */
        }
-       ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
+       ACCESS_ONCE(rsp->gp_flags) =
+               ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
        raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-       wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+       rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -2925,11 +2927,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * restructure your code to batch your updates, and then use a single
  * synchronize_sched() instead.
  *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
  * This implementation can be thought of as an application of ticket
  * locking to RCU, with sync_sched_expedited_started and
  * sync_sched_expedited_done taking on the roles of the halves
@@ -2953,6 +2950,9 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  */
 void synchronize_sched_expedited(void)
 {
+       cpumask_var_t cm;
+       bool cma = false;
+       int cpu;
        long firstsnap, s, snap;
        int trycount = 0;
        struct rcu_state *rsp = &rcu_sched_state;
@@ -2979,14 +2979,34 @@ void synchronize_sched_expedited(void)
         */
        snap = atomic_long_inc_return(&rsp->expedited_start);
        firstsnap = snap;
-       get_online_cpus();
+       if (!try_get_online_cpus()) {
+               /* CPU hotplug operation in flight, fall back to normal GP. */
+               wait_rcu_gp(call_rcu_sched);
+               atomic_long_inc(&rsp->expedited_normal);
+               return;
+       }
        WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
 
+       /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
+       cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
+       if (cma) {
+               cpumask_copy(cm, cpu_online_mask);
+               cpumask_clear_cpu(raw_smp_processor_id(), cm);
+               for_each_cpu(cpu, cm) {
+                       struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+                       if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+                               cpumask_clear_cpu(cpu, cm);
+               }
+               if (cpumask_weight(cm) == 0)
+                       goto all_cpus_idle;
+       }
+
        /*
         * Each pass through the following loop attempts to force a
         * context switch on each CPU.
         */
-       while (try_stop_cpus(cpu_online_mask,
+       while (try_stop_cpus(cma ? cm : cpu_online_mask,
                             synchronize_sched_expedited_cpu_stop,
                             NULL) == -EAGAIN) {
                put_online_cpus();
@@ -2998,6 +3018,7 @@ void synchronize_sched_expedited(void)
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone1);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3007,6 +3028,7 @@ void synchronize_sched_expedited(void)
                } else {
                        wait_rcu_gp(call_rcu_sched);
                        atomic_long_inc(&rsp->expedited_normal);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3016,6 +3038,7 @@ void synchronize_sched_expedited(void)
                        /* ensure test happens before caller kfree */
                        smp_mb__before_atomic(); /* ^^^ */
                        atomic_long_inc(&rsp->expedited_workdone2);
+                       free_cpumask_var(cm);
                        return;
                }
 
@@ -3026,12 +3049,21 @@ void synchronize_sched_expedited(void)
                 * and they started after our first try, so their grace
                 * period works for us.
                 */
-               get_online_cpus();
+               if (!try_get_online_cpus()) {
+                       /* CPU hotplug operation in flight, use normal GP. */
+                       wait_rcu_gp(call_rcu_sched);
+                       atomic_long_inc(&rsp->expedited_normal);
+                       free_cpumask_var(cm);
+                       return;
+               }
                snap = atomic_long_read(&rsp->expedited_start);
                smp_mb(); /* ensure read is before try_stop_cpus(). */
        }
        atomic_long_inc(&rsp->expedited_stoppedcpus);
 
+all_cpus_idle:
+       free_cpumask_var(cm);
+
        /*
         * Everyone up to our most recent fetch is covered by our grace
         * period.  Update the counter, but only if our work is still
@@ -3279,11 +3311,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
                        continue;
                rdp = per_cpu_ptr(rsp->rda, cpu);
                if (rcu_is_nocb_cpu(cpu)) {
-                       _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
-                                          rsp->n_barrier_done);
-                       atomic_inc(&rsp->barrier_cpu_count);
-                       __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
-                                  rsp, cpu, 0);
+                       if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
+                               _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
+                                                  rsp->n_barrier_done);
+                       } else {
+                               _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
+                                                  rsp->n_barrier_done);
+                               atomic_inc(&rsp->barrier_cpu_count);
+                               __call_rcu(&rdp->barrier_head,
+                                          rcu_barrier_callback, rsp, cpu, 0);
+                       }
                } else if (ACCESS_ONCE(rdp->qlen)) {
                        _rcu_barrier_trace(rsp, "OnlineQ", cpu,
                                           rsp->n_barrier_done);
@@ -3442,6 +3479,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
        case CPU_UP_PREPARE_FROZEN:
                rcu_prepare_cpu(cpu);
                rcu_prepare_kthreads(cpu);
+               rcu_spawn_all_nocb_kthreads(cpu);
                break;
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
@@ -3459,8 +3497,10 @@ static int rcu_cpu_notify(struct notifier_block *self,
        case CPU_DEAD_FROZEN:
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
-               for_each_rcu_flavor(rsp)
+               for_each_rcu_flavor(rsp) {
                        rcu_cleanup_dead_cpu(cpu, rsp);
+                       do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
+               }
                break;
        default:
                break;
@@ -3489,7 +3529,7 @@ static int rcu_pm_notify(struct notifier_block *self,
 }
 
 /*
- * Spawn the kthread that handles this RCU flavor's grace periods.
+ * Spawn the kthreads that handle each RCU flavor's grace periods.
  */
 static int __init rcu_spawn_gp_kthread(void)
 {
@@ -3498,6 +3538,7 @@ static int __init rcu_spawn_gp_kthread(void)
        struct rcu_state *rsp;
        struct task_struct *t;
 
+       rcu_scheduler_fully_active = 1;
        for_each_rcu_flavor(rsp) {
                t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
                BUG_ON(IS_ERR(t));
@@ -3505,8 +3546,9 @@ static int __init rcu_spawn_gp_kthread(void)
                raw_spin_lock_irqsave(&rnp->lock, flags);
                rsp->gp_kthread = t;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               rcu_spawn_nocb_kthreads(rsp);
        }
+       rcu_spawn_nocb_kthreads();
+       rcu_spawn_boost_kthreads();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);