kernel/rcutree_plugin.h

   1 /*
   2  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
   3  * Internal non-public definitions that provide either classic
   4  * or preemptable semantics.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19  *
  20  * Copyright Red Hat, 2009
  21  * Copyright IBM Corporation, 2009
  22  *
  23  * Author: Ingo Molnar <mingo@elte.hu>
  24  *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25  */
  26
  27
  28 #ifdef CONFIG_TREE_PREEMPT_RCU
  29
  30 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
  31 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  32
  33 /*
  34  * Tell them what RCU they are running.
  35  */
  36 static inline void rcu_bootup_announce(void)
  37 {
  38         printk(KERN_INFO
  39                "Experimental preemptable hierarchical RCU implementation.\n");
  40 }
  41
  42 /*
  43  * Return the number of RCU-preempt batches processed thus far
  44  * for debug and statistics.
  45  */
  46 long rcu_batches_completed_preempt(void)
  47 {
  48         return rcu_preempt_state.completed;
  49 }
  50 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  51
  52 /*
  53  * Return the number of RCU batches processed thus far for debug & stats.
  54  */
  55 long rcu_batches_completed(void)
  56 {
  57         return rcu_batches_completed_preempt();
  58 }
  59 EXPORT_SYMBOL_GPL(rcu_batches_completed);
  60
  61 /*
  62  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
  63  * that this just means that the task currently running on the CPU is
  64  * not in a quiescent state.  There might be any number of tasks blocked
  65  * while in an RCU read-side critical section.
  66  */
  67 static void rcu_preempt_qs_record(int cpu)
  68 {
  69         struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
  70         rdp->passed_quiesc = 1;
  71         rdp->passed_quiesc_completed = rdp->completed;
  72 }
  73
  74 /*
  75  * We have entered the scheduler or are between softirqs in ksoftirqd.
  76  * If we are in an RCU read-side critical section, we need to reflect
  77  * that in the state of the rcu_node structure corresponding to this CPU.
  78  * Caller must disable hardirqs.
  79  */
  80 static void rcu_preempt_qs(int cpu)
  81 {
  82         struct task_struct *t = current;
  83         int phase;
  84         struct rcu_data *rdp;
  85         struct rcu_node *rnp;
  86
  87         if (t->rcu_read_lock_nesting &&
  88             (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
  89
  90                 /* Possibly blocking in an RCU read-side critical section. */
  91                 rdp = rcu_preempt_state.rda[cpu];
  92                 rnp = rdp->mynode;
  93                 spin_lock(&rnp->lock);
  94                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
  95                 t->rcu_blocked_node = rnp;
  96
  97                 /*
  98                  * If this CPU has already checked in, then this task
  99                  * will hold up the next grace period rather than the
 100                  * current grace period.  Queue the task accordingly.
 101                  * If the task is queued for the current grace period
 102                  * (i.e., this CPU has not yet passed through a quiescent
 103                  * state for the current grace period), then as long
 104                  * as that task remains queued, the current grace period
 105                  * cannot end.
 106                  */
 107                 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
 108                 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 109                 smp_mb();  /* Ensure later ctxt swtch seen after above. */
 110                 spin_unlock(&rnp->lock);
 111         }
 112
 113         /*
 114          * Either we were not in an RCU read-side critical section to
 115          * begin with, or we have now recorded that critical section
 116          * globally.  Either way, we can now note a quiescent state
 117          * for this CPU.  Again, if we were in an RCU read-side critical
 118          * section, and if that critical section was blocking the current
 119          * grace period, then the fact that the task has been enqueued
 120          * means that we continue to block the current grace period.
 121          */
 122         rcu_preempt_qs_record(cpu);
 123         t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
 124                                         RCU_READ_UNLOCK_GOT_QS);
 125 }
 126
 127 /*
 128  * Tree-preemptable RCU implementation for rcu_read_lock().
 129  * Just increment ->rcu_read_lock_nesting, shared state will be updated
 130  * if we block.
 131  */
 132 void __rcu_read_lock(void)
 133 {
 134         ACCESS_ONCE(current->rcu_read_lock_nesting)++;
 135         barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
 136 }
 137 EXPORT_SYMBOL_GPL(__rcu_read_lock);
 138
 139 static void rcu_read_unlock_special(struct task_struct *t)
 140 {
 141         int empty;
 142         unsigned long flags;
 143         unsigned long mask;
 144         struct rcu_node *rnp;
 145         int special;
 146
 147         /* NMI handlers cannot block and cannot safely manipulate state. */
 148         if (in_nmi())
 149                 return;
 150
 151         local_irq_save(flags);
 152
 153         /*
 154          * If RCU core is waiting for this CPU to exit critical section,
 155          * let it know that we have done so.
 156          */
 157         special = t->rcu_read_unlock_special;
 158         if (special & RCU_READ_UNLOCK_NEED_QS) {
 159                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 160                 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
 161         }
 162
 163         /* Hardware IRQ handlers cannot block. */
 164         if (in_irq()) {
 165                 local_irq_restore(flags);
 166                 return;
 167         }
 168
 169         /* Clean up if blocked during RCU read-side critical section. */
 170         if (special & RCU_READ_UNLOCK_BLOCKED) {
 171                 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 172
 173                 /*
 174                  * Remove this task from the list it blocked on.  The
 175                  * task can migrate while we acquire the lock, but at
 176                  * most one time.  So at most two passes through loop.
 177                  */
 178                 for (;;) {
 179                         rnp = t->rcu_blocked_node;
 180                         spin_lock(&rnp->lock);
 181                         if (rnp == t->rcu_blocked_node)
 182                                 break;
 183                         spin_unlock(&rnp->lock);
 184                 }
 185                 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
 186                 list_del_init(&t->rcu_node_entry);
 187                 t->rcu_blocked_node = NULL;
 188
 189                 /*
 190                  * If this was the last task on the current list, and if
 191                  * we aren't waiting on any CPUs, report the quiescent state.
 192                  * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
 193                  * drop rnp->lock and restore irq.
 194                  */
 195                 if (!empty && rnp->qsmask == 0 &&
 196                     list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
 197                         t->rcu_read_unlock_special &=
 198                                 ~(RCU_READ_UNLOCK_NEED_QS |
 199                                   RCU_READ_UNLOCK_GOT_QS);
 200                         if (rnp->parent == NULL) {
 201                                 /* Only one rcu_node in the tree. */
 202                                 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
 203                                 return;
 204                         }
 205                         /* Report up the rest of the hierarchy. */
 206                         mask = rnp->grpmask;
 207                         spin_unlock_irqrestore(&rnp->lock, flags);
 208                         rnp = rnp->parent;
 209                         spin_lock_irqsave(&rnp->lock, flags);
 210                         cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
 211                         return;
 212                 }
 213                 spin_unlock(&rnp->lock);
 214         }
 215         local_irq_restore(flags);
 216 }
 217
 218 /*
 219  * Tree-preemptable RCU implementation for rcu_read_unlock().
 220  * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 221  * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 222  * invoke rcu_read_unlock_special() to clean up after a context switch
 223  * in an RCU read-side critical section and other special cases.
 224  */
 225 void __rcu_read_unlock(void)
 226 {
 227         struct task_struct *t = current;
 228
 229         barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
 230         if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
 231             unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 232                 rcu_read_unlock_special(t);
 233 }
 234 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 235
 236 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 237
 238 /*
 239  * Scan the current list of tasks blocked within RCU read-side critical
 240  * sections, printing out the tid of each.
 241  */
 242 static void rcu_print_task_stall(struct rcu_node *rnp)
 243 {
 244         unsigned long flags;
 245         struct list_head *lp;
 246         int phase = rnp->gpnum & 0x1;
 247         struct task_struct *t;
 248
 249         if (!list_empty(&rnp->blocked_tasks[phase])) {
 250                 spin_lock_irqsave(&rnp->lock, flags);
 251                 phase = rnp->gpnum & 0x1; /* re-read under lock. */
 252                 lp = &rnp->blocked_tasks[phase];
 253                 list_for_each_entry(t, lp, rcu_node_entry)
 254                         printk(" P%d", t->pid);
 255                 spin_unlock_irqrestore(&rnp->lock, flags);
 256         }
 257 }
 258
 259 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 260
 261 /*
 262  * Check for preempted RCU readers for the specified rcu_node structure.
 263  * If the caller needs a reliable answer, it must hold the rcu_node's
 264  * >lock.
 265  */
 266 static int rcu_preempted_readers(struct rcu_node *rnp)
 267 {
 268         return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
 269 }
 270
 271 #ifdef CONFIG_HOTPLUG_CPU
 272
 273 /*
 274  * Handle tasklist migration for case in which all CPUs covered by the
 275  * specified rcu_node have gone offline.  Move them up to the root
 276  * rcu_node.  The reason for not just moving them to the immediate
 277  * parent is to remove the need for rcu_read_unlock_special() to
 278  * make more than two attempts to acquire the target rcu_node's lock.
 279  *
 280  * The caller must hold rnp->lock with irqs disabled.
 281  */
 282 static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
 283                                       struct rcu_node *rnp)
 284 {
 285         int i;
 286         struct list_head *lp;
 287         struct list_head *lp_root;
 288         struct rcu_node *rnp_root = rcu_get_root(rsp);
 289         struct task_struct *tp;
 290
 291         if (rnp == rnp_root) {
 292                 WARN_ONCE(1, "Last CPU thought to be offlined?");
 293                 return;  /* Shouldn't happen: at least one CPU online. */
 294         }
 295
 296         /*
 297          * Move tasks up to root rcu_node.  Rely on the fact that the
 298          * root rcu_node can be at most one ahead of the rest of the
 299          * rcu_nodes in terms of gp_num value.  This fact allows us to
 300          * move the blocked_tasks[] array directly, element by element.
 301          */
 302         for (i = 0; i < 2; i++) {
 303                 lp = &rnp->blocked_tasks[i];
 304                 lp_root = &rnp_root->blocked_tasks[i];
 305                 while (!list_empty(lp)) {
 306                         tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
 307                         spin_lock(&rnp_root->lock); /* irqs already disabled */
 308                         list_del(&tp->rcu_node_entry);
 309                         tp->rcu_blocked_node = rnp_root;
 310                         list_add(&tp->rcu_node_entry, lp_root);
 311                         spin_unlock(&rnp_root->lock); /* irqs remain disabled */
 312                 }
 313         }
 314 }
 315
 316 /*
 317  * Do CPU-offline processing for preemptable RCU.
 318  */
 319 static void rcu_preempt_offline_cpu(int cpu)
 320 {
 321         __rcu_offline_cpu(cpu, &rcu_preempt_state);
 322 }
 323
 324 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 325
 326 /*
 327  * Check for a quiescent state from the current CPU.  When a task blocks,
 328  * the task is recorded in the corresponding CPU's rcu_node structure,
 329  * which is checked elsewhere.
 330  *
 331  * Caller must disable hard irqs.
 332  */
 333 static void rcu_preempt_check_callbacks(int cpu)
 334 {
 335         struct task_struct *t = current;
 336
 337         if (t->rcu_read_lock_nesting == 0) {
 338                 t->rcu_read_unlock_special &=
 339                         ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
 340                 rcu_preempt_qs_record(cpu);
 341                 return;
 342         }
 343         if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
 344                 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
 345                         rcu_preempt_qs_record(cpu);
 346                         t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
 347                 } else if (!(t->rcu_read_unlock_special &
 348                              RCU_READ_UNLOCK_NEED_QS)) {
 349                         t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 350                 }
 351         }
 352 }
 353
 354 /*
 355  * Process callbacks for preemptable RCU.
 356  */
 357 static void rcu_preempt_process_callbacks(void)
 358 {
 359         __rcu_process_callbacks(&rcu_preempt_state,
 360                                 &__get_cpu_var(rcu_preempt_data));
 361 }
 362
 363 /*
 364  * Queue a preemptable-RCU callback for invocation after a grace period.
 365  */
 366 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 367 {
 368         __call_rcu(head, func, &rcu_preempt_state);
 369 }
 370 EXPORT_SYMBOL_GPL(call_rcu);
 371
 372 /*
 373  * Check to see if there is any immediate preemptable-RCU-related work
 374  * to be done.
 375  */
 376 static int rcu_preempt_pending(int cpu)
 377 {
 378         return __rcu_pending(&rcu_preempt_state,
 379                              &per_cpu(rcu_preempt_data, cpu));
 380 }
 381
 382 /*
 383  * Does preemptable RCU need the CPU to stay out of dynticks mode?
 384  */
 385 static int rcu_preempt_needs_cpu(int cpu)
 386 {
 387         return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 388 }
 389
 390 /*
 391  * Initialize preemptable RCU's per-CPU data.
 392  */
 393 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 394 {
 395         rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
 396 }
 397
 398 /*
 399  * Check for a task exiting while in a preemptable-RCU read-side
 400  * critical section, clean up if so.  No need to issue warnings,
 401  * as debug_check_no_locks_held() already does this if lockdep
 402  * is enabled.
 403  */
 404 void exit_rcu(void)
 405 {
 406         struct task_struct *t = current;
 407
 408         if (t->rcu_read_lock_nesting == 0)
 409                 return;
 410         t->rcu_read_lock_nesting = 1;
 411         rcu_read_unlock();
 412 }
 413
 414 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 415
 416 /*
 417  * Tell them what RCU they are running.
 418  */
 419 static inline void rcu_bootup_announce(void)
 420 {
 421         printk(KERN_INFO "Hierarchical RCU implementation.\n");
 422 }
 423
 424 /*
 425  * Return the number of RCU batches processed thus far for debug & stats.
 426  */
 427 long rcu_batches_completed(void)
 428 {
 429         return rcu_batches_completed_sched();
 430 }
 431 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 432
 433 /*
 434  * Because preemptable RCU does not exist, we never have to check for
 435  * CPUs being in quiescent states.
 436  */
 437 static void rcu_preempt_qs(int cpu)
 438 {
 439 }
 440
 441 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 442
 443 /*
 444  * Because preemptable RCU does not exist, we never have to check for
 445  * tasks blocked within RCU read-side critical sections.
 446  */
 447 static void rcu_print_task_stall(struct rcu_node *rnp)
 448 {
 449 }
 450
 451 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 452
 453 /*
 454  * Because preemptable RCU does not exist, there are never any preempted
 455  * RCU readers.
 456  */
 457 static int rcu_preempted_readers(struct rcu_node *rnp)
 458 {
 459         return 0;
 460 }
 461
 462 #ifdef CONFIG_HOTPLUG_CPU
 463
 464 /*
 465  * Because preemptable RCU does not exist, it never needs to migrate
 466  * tasks that were blocked within RCU read-side critical sections.
 467  */
 468 static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
 469                                       struct rcu_node *rnp)
 470 {
 471 }
 472
 473 /*
 474  * Because preemptable RCU does not exist, it never needs CPU-offline
 475  * processing.
 476  */
 477 static void rcu_preempt_offline_cpu(int cpu)
 478 {
 479 }
 480
 481 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 482
 483 /*
 484  * Because preemptable RCU does not exist, it never has any callbacks
 485  * to check.
 486  */
 487 void rcu_preempt_check_callbacks(int cpu)
 488 {
 489 }
 490
 491 /*
 492  * Because preemptable RCU does not exist, it never has any callbacks
 493  * to process.
 494  */
 495 void rcu_preempt_process_callbacks(void)
 496 {
 497 }
 498
 499 /*
 500  * In classic RCU, call_rcu() is just call_rcu_sched().
 501  */
 502 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 503 {
 504         call_rcu_sched(head, func);
 505 }
 506 EXPORT_SYMBOL_GPL(call_rcu);
 507
 508 /*
 509  * Because preemptable RCU does not exist, it never has any work to do.
 510  */
 511 static int rcu_preempt_pending(int cpu)
 512 {
 513         return 0;
 514 }
 515
 516 /*
 517  * Because preemptable RCU does not exist, it never needs any CPU.
 518  */
 519 static int rcu_preempt_needs_cpu(int cpu)
 520 {
 521         return 0;
 522 }
 523
 524 /*
 525  * Because preemptable RCU does not exist, there is no per-CPU
 526  * data to initialize.
 527  */
 528 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 529 {
 530 }
 531
 532 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */