3e952ee125c9d3388220c8aca444fb33498b7549
[pandora-kernel.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *      Remote softirq infrastructure is by Jens Axboe.
11  */
12
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
27
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
30
31 #include <asm/irq.h>
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
53 #endif
54
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56
57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
59 char *softirq_to_name[NR_SOFTIRQS] = {
60         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61         "TASKLET", "SCHED", "HRTIMER", "RCU"
62 };
63
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 static void wakeup_softirqd(void)
71 {
72         /* Interrupts are disabled: no need to stop preemption */
73         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74
75         if (tsk && tsk->state != TASK_RUNNING)
76                 wake_up_process(tsk);
77 }
78
79 /*
80  * preempt_count and SOFTIRQ_OFFSET usage:
81  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82  *   softirq processing.
83  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84  *   on local_bh_disable or local_bh_enable.
85  * This lets us distinguish between whether we are currently processing
86  * softirq and whether we just have bh disabled.
87  */
88
89 /*
90  * This one is for softirq.c-internal use,
91  * where hardirqs are disabled legitimately:
92  */
93 #ifdef CONFIG_TRACE_IRQFLAGS
94 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 {
96         unsigned long flags;
97
98         WARN_ON_ONCE(in_irq());
99
100         raw_local_irq_save(flags);
101         /*
102          * The preempt tracer hooks into add_preempt_count and will break
103          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104          * is set and before current->softirq_enabled is cleared.
105          * We must manually increment preempt_count here and manually
106          * call the trace_preempt_off later.
107          */
108         preempt_count() += cnt;
109         /*
110          * Were softirqs turned off above:
111          */
112         if (softirq_count() == cnt)
113                 trace_softirqs_off(ip);
114         raw_local_irq_restore(flags);
115
116         if (preempt_count() == cnt)
117                 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118 }
119 #else /* !CONFIG_TRACE_IRQFLAGS */
120 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121 {
122         add_preempt_count(cnt);
123         barrier();
124 }
125 #endif /* CONFIG_TRACE_IRQFLAGS */
126
127 void local_bh_disable(void)
128 {
129         __local_bh_disable((unsigned long)__builtin_return_address(0),
130                                 SOFTIRQ_DISABLE_OFFSET);
131 }
132
133 EXPORT_SYMBOL(local_bh_disable);
134
135 static void __local_bh_enable(unsigned int cnt)
136 {
137         WARN_ON_ONCE(in_irq());
138         WARN_ON_ONCE(!irqs_disabled());
139
140         if (softirq_count() == cnt)
141                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142         sub_preempt_count(cnt);
143 }
144
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154
155 EXPORT_SYMBOL(_local_bh_enable);
156
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159         WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161         local_irq_disable();
162 #endif
163         /*
164          * Are softirqs going to be turned on now:
165          */
166         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167                 trace_softirqs_on(ip);
168         /*
169          * Keep preemption disabled until we are done with
170          * softirq processing:
171          */
172         sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173
174         if (unlikely(!in_interrupt() && local_softirq_pending()))
175                 do_softirq();
176
177         dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179         local_irq_enable();
180 #endif
181         preempt_check_resched();
182 }
183
184 void local_bh_enable(void)
185 {
186         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189
190 void local_bh_enable_ip(unsigned long ip)
191 {
192         _local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195
196 /*
197  * We restart softirq processing for at most 2 ms,
198  * and if need_resched() is not set.
199  *
200  * These limits have been established via experimentation.
201  * The two things to balance is latency against fairness -
202  * we want to handle softirqs as soon as possible, but they
203  * should not be able to lock up the box.
204  */
205 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
206
207 asmlinkage void __do_softirq(void)
208 {
209         struct softirq_action *h;
210         __u32 pending;
211         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
212         int cpu;
213
214         pending = local_softirq_pending();
215         account_system_vtime(current);
216
217         __local_bh_disable((unsigned long)__builtin_return_address(0),
218                                 SOFTIRQ_OFFSET);
219         lockdep_softirq_enter();
220
221         cpu = smp_processor_id();
222 restart:
223         /* Reset the pending bitmask before enabling irqs */
224         set_softirq_pending(0);
225
226         local_irq_enable();
227
228         h = softirq_vec;
229
230         do {
231                 if (pending & 1) {
232                         unsigned int vec_nr = h - softirq_vec;
233                         int prev_count = preempt_count();
234
235                         kstat_incr_softirqs_this_cpu(vec_nr);
236
237                         trace_softirq_entry(vec_nr);
238                         h->action(h);
239                         trace_softirq_exit(vec_nr);
240                         if (unlikely(prev_count != preempt_count())) {
241                                 printk(KERN_ERR "huh, entered softirq %u %s %p"
242                                        "with preempt_count %08x,"
243                                        " exited with %08x?\n", vec_nr,
244                                        softirq_to_name[vec_nr], h->action,
245                                        prev_count, preempt_count());
246                                 preempt_count() = prev_count;
247                         }
248
249                         rcu_bh_qs(cpu);
250                 }
251                 h++;
252                 pending >>= 1;
253         } while (pending);
254
255         local_irq_disable();
256
257         pending = local_softirq_pending();
258         if (pending) {
259                 if (time_before(jiffies, end) && !need_resched())
260                         goto restart;
261
262                 wakeup_softirqd();
263         }
264
265         lockdep_softirq_exit();
266
267         account_system_vtime(current);
268         __local_bh_enable(SOFTIRQ_OFFSET);
269 }
270
271 #ifndef __ARCH_HAS_DO_SOFTIRQ
272
273 asmlinkage void do_softirq(void)
274 {
275         __u32 pending;
276         unsigned long flags;
277
278         if (in_interrupt())
279                 return;
280
281         local_irq_save(flags);
282
283         pending = local_softirq_pending();
284
285         if (pending)
286                 __do_softirq();
287
288         local_irq_restore(flags);
289 }
290
291 #endif
292
293 /*
294  * Enter an interrupt context.
295  */
296 void irq_enter(void)
297 {
298         int cpu = smp_processor_id();
299
300         rcu_irq_enter();
301         if (idle_cpu(cpu) && !in_interrupt()) {
302                 /*
303                  * Prevent raise_softirq from needlessly waking up ksoftirqd
304                  * here, as softirq will be serviced on return from interrupt.
305                  */
306                 local_bh_disable();
307                 tick_check_idle(cpu);
308                 _local_bh_enable();
309         }
310
311         __irq_enter();
312 }
313
314 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
315 static inline void invoke_softirq(void)
316 {
317         if (!force_irqthreads)
318                 __do_softirq();
319         else {
320                 __local_bh_disable((unsigned long)__builtin_return_address(0),
321                                 SOFTIRQ_OFFSET);
322                 wakeup_softirqd();
323                 __local_bh_enable(SOFTIRQ_OFFSET);
324         }
325 }
326 #else
327 static inline void invoke_softirq(void)
328 {
329         if (!force_irqthreads)
330                 do_softirq();
331         else {
332                 __local_bh_disable((unsigned long)__builtin_return_address(0),
333                                 SOFTIRQ_OFFSET);
334                 wakeup_softirqd();
335                 __local_bh_enable(SOFTIRQ_OFFSET);
336         }
337 }
338 #endif
339
340 /*
341  * Exit an interrupt context. Process softirqs if needed and possible:
342  */
343 void irq_exit(void)
344 {
345         account_system_vtime(current);
346         trace_hardirq_exit();
347         sub_preempt_count(IRQ_EXIT_OFFSET);
348         if (!in_interrupt() && local_softirq_pending())
349                 invoke_softirq();
350
351         rcu_irq_exit();
352 #ifdef CONFIG_NO_HZ
353         /* Make sure that timer wheel updates are propagated */
354         if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
355                 tick_nohz_stop_sched_tick(0);
356 #endif
357         preempt_enable_no_resched();
358 }
359
360 /*
361  * This function must run with irqs disabled!
362  */
363 inline void raise_softirq_irqoff(unsigned int nr)
364 {
365         __raise_softirq_irqoff(nr);
366
367         /*
368          * If we're in an interrupt or softirq, we're done
369          * (this also catches softirq-disabled code). We will
370          * actually run the softirq once we return from
371          * the irq or softirq.
372          *
373          * Otherwise we wake up ksoftirqd to make sure we
374          * schedule the softirq soon.
375          */
376         if (!in_interrupt())
377                 wakeup_softirqd();
378 }
379
380 void raise_softirq(unsigned int nr)
381 {
382         unsigned long flags;
383
384         local_irq_save(flags);
385         raise_softirq_irqoff(nr);
386         local_irq_restore(flags);
387 }
388
389 void open_softirq(int nr, void (*action)(struct softirq_action *))
390 {
391         softirq_vec[nr].action = action;
392 }
393
394 /*
395  * Tasklets
396  */
397 struct tasklet_head
398 {
399         struct tasklet_struct *head;
400         struct tasklet_struct **tail;
401 };
402
403 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
404 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
405
406 void __tasklet_schedule(struct tasklet_struct *t)
407 {
408         unsigned long flags;
409
410         local_irq_save(flags);
411         t->next = NULL;
412         *__this_cpu_read(tasklet_vec.tail) = t;
413         __this_cpu_write(tasklet_vec.tail, &(t->next));
414         raise_softirq_irqoff(TASKLET_SOFTIRQ);
415         local_irq_restore(flags);
416 }
417
418 EXPORT_SYMBOL(__tasklet_schedule);
419
420 void __tasklet_hi_schedule(struct tasklet_struct *t)
421 {
422         unsigned long flags;
423
424         local_irq_save(flags);
425         t->next = NULL;
426         *__this_cpu_read(tasklet_hi_vec.tail) = t;
427         __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
428         raise_softirq_irqoff(HI_SOFTIRQ);
429         local_irq_restore(flags);
430 }
431
432 EXPORT_SYMBOL(__tasklet_hi_schedule);
433
434 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
435 {
436         BUG_ON(!irqs_disabled());
437
438         t->next = __this_cpu_read(tasklet_hi_vec.head);
439         __this_cpu_write(tasklet_hi_vec.head, t);
440         __raise_softirq_irqoff(HI_SOFTIRQ);
441 }
442
443 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
444
445 static void tasklet_action(struct softirq_action *a)
446 {
447         struct tasklet_struct *list;
448
449         local_irq_disable();
450         list = __this_cpu_read(tasklet_vec.head);
451         __this_cpu_write(tasklet_vec.head, NULL);
452         __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
453         local_irq_enable();
454
455         while (list) {
456                 struct tasklet_struct *t = list;
457
458                 list = list->next;
459
460                 if (tasklet_trylock(t)) {
461                         if (!atomic_read(&t->count)) {
462                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
463                                         BUG();
464                                 t->func(t->data);
465                                 tasklet_unlock(t);
466                                 continue;
467                         }
468                         tasklet_unlock(t);
469                 }
470
471                 local_irq_disable();
472                 t->next = NULL;
473                 *__this_cpu_read(tasklet_vec.tail) = t;
474                 __this_cpu_write(tasklet_vec.tail, &(t->next));
475                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
476                 local_irq_enable();
477         }
478 }
479
480 static void tasklet_hi_action(struct softirq_action *a)
481 {
482         struct tasklet_struct *list;
483
484         local_irq_disable();
485         list = __this_cpu_read(tasklet_hi_vec.head);
486         __this_cpu_write(tasklet_hi_vec.head, NULL);
487         __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
488         local_irq_enable();
489
490         while (list) {
491                 struct tasklet_struct *t = list;
492
493                 list = list->next;
494
495                 if (tasklet_trylock(t)) {
496                         if (!atomic_read(&t->count)) {
497                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
498                                         BUG();
499                                 t->func(t->data);
500                                 tasklet_unlock(t);
501                                 continue;
502                         }
503                         tasklet_unlock(t);
504                 }
505
506                 local_irq_disable();
507                 t->next = NULL;
508                 *__this_cpu_read(tasklet_hi_vec.tail) = t;
509                 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
510                 __raise_softirq_irqoff(HI_SOFTIRQ);
511                 local_irq_enable();
512         }
513 }
514
515
516 void tasklet_init(struct tasklet_struct *t,
517                   void (*func)(unsigned long), unsigned long data)
518 {
519         t->next = NULL;
520         t->state = 0;
521         atomic_set(&t->count, 0);
522         t->func = func;
523         t->data = data;
524 }
525
526 EXPORT_SYMBOL(tasklet_init);
527
528 void tasklet_kill(struct tasklet_struct *t)
529 {
530         if (in_interrupt())
531                 printk("Attempt to kill tasklet from interrupt\n");
532
533         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
534                 do {
535                         yield();
536                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
537         }
538         tasklet_unlock_wait(t);
539         clear_bit(TASKLET_STATE_SCHED, &t->state);
540 }
541
542 EXPORT_SYMBOL(tasklet_kill);
543
544 /*
545  * tasklet_hrtimer
546  */
547
548 /*
549  * The trampoline is called when the hrtimer expires. It schedules a tasklet
550  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
551  * hrtimer callback, but from softirq context.
552  */
553 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
554 {
555         struct tasklet_hrtimer *ttimer =
556                 container_of(timer, struct tasklet_hrtimer, timer);
557
558         tasklet_hi_schedule(&ttimer->tasklet);
559         return HRTIMER_NORESTART;
560 }
561
562 /*
563  * Helper function which calls the hrtimer callback from
564  * tasklet/softirq context
565  */
566 static void __tasklet_hrtimer_trampoline(unsigned long data)
567 {
568         struct tasklet_hrtimer *ttimer = (void *)data;
569         enum hrtimer_restart restart;
570
571         restart = ttimer->function(&ttimer->timer);
572         if (restart != HRTIMER_NORESTART)
573                 hrtimer_restart(&ttimer->timer);
574 }
575
576 /**
577  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
578  * @ttimer:      tasklet_hrtimer which is initialized
579  * @function:    hrtimer callback function which gets called from softirq context
580  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
581  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
582  */
583 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
584                           enum hrtimer_restart (*function)(struct hrtimer *),
585                           clockid_t which_clock, enum hrtimer_mode mode)
586 {
587         hrtimer_init(&ttimer->timer, which_clock, mode);
588         ttimer->timer.function = __hrtimer_tasklet_trampoline;
589         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
590                      (unsigned long)ttimer);
591         ttimer->function = function;
592 }
593 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
594
595 /*
596  * Remote softirq bits
597  */
598
599 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
600 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
601
602 static void __local_trigger(struct call_single_data *cp, int softirq)
603 {
604         struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
605
606         list_add_tail(&cp->list, head);
607
608         /* Trigger the softirq only if the list was previously empty.  */
609         if (head->next == &cp->list)
610                 raise_softirq_irqoff(softirq);
611 }
612
613 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
614 static void remote_softirq_receive(void *data)
615 {
616         struct call_single_data *cp = data;
617         unsigned long flags;
618         int softirq;
619
620         softirq = cp->priv;
621
622         local_irq_save(flags);
623         __local_trigger(cp, softirq);
624         local_irq_restore(flags);
625 }
626
627 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
628 {
629         if (cpu_online(cpu)) {
630                 cp->func = remote_softirq_receive;
631                 cp->info = cp;
632                 cp->flags = 0;
633                 cp->priv = softirq;
634
635                 __smp_call_function_single(cpu, cp, 0);
636                 return 0;
637         }
638         return 1;
639 }
640 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
641 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
642 {
643         return 1;
644 }
645 #endif
646
647 /**
648  * __send_remote_softirq - try to schedule softirq work on a remote cpu
649  * @cp: private SMP call function data area
650  * @cpu: the remote cpu
651  * @this_cpu: the currently executing cpu
652  * @softirq: the softirq for the work
653  *
654  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
655  * done, the work is instead queued up on the local cpu.
656  *
657  * Interrupts must be disabled.
658  */
659 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
660 {
661         if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
662                 __local_trigger(cp, softirq);
663 }
664 EXPORT_SYMBOL(__send_remote_softirq);
665
666 /**
667  * send_remote_softirq - try to schedule softirq work on a remote cpu
668  * @cp: private SMP call function data area
669  * @cpu: the remote cpu
670  * @softirq: the softirq for the work
671  *
672  * Like __send_remote_softirq except that disabling interrupts and
673  * computing the current cpu is done for the caller.
674  */
675 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
676 {
677         unsigned long flags;
678         int this_cpu;
679
680         local_irq_save(flags);
681         this_cpu = smp_processor_id();
682         __send_remote_softirq(cp, cpu, this_cpu, softirq);
683         local_irq_restore(flags);
684 }
685 EXPORT_SYMBOL(send_remote_softirq);
686
687 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
688                                                unsigned long action, void *hcpu)
689 {
690         /*
691          * If a CPU goes away, splice its entries to the current CPU
692          * and trigger a run of the softirq
693          */
694         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
695                 int cpu = (unsigned long) hcpu;
696                 int i;
697
698                 local_irq_disable();
699                 for (i = 0; i < NR_SOFTIRQS; i++) {
700                         struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
701                         struct list_head *local_head;
702
703                         if (list_empty(head))
704                                 continue;
705
706                         local_head = &__get_cpu_var(softirq_work_list[i]);
707                         list_splice_init(head, local_head);
708                         raise_softirq_irqoff(i);
709                 }
710                 local_irq_enable();
711         }
712
713         return NOTIFY_OK;
714 }
715
716 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
717         .notifier_call  = remote_softirq_cpu_notify,
718 };
719
720 void __init softirq_init(void)
721 {
722         int cpu;
723
724         for_each_possible_cpu(cpu) {
725                 int i;
726
727                 per_cpu(tasklet_vec, cpu).tail =
728                         &per_cpu(tasklet_vec, cpu).head;
729                 per_cpu(tasklet_hi_vec, cpu).tail =
730                         &per_cpu(tasklet_hi_vec, cpu).head;
731                 for (i = 0; i < NR_SOFTIRQS; i++)
732                         INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
733         }
734
735         register_hotcpu_notifier(&remote_softirq_cpu_notifier);
736
737         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
738         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
739 }
740
741 static int run_ksoftirqd(void * __bind_cpu)
742 {
743         set_current_state(TASK_INTERRUPTIBLE);
744
745         while (!kthread_should_stop()) {
746                 preempt_disable();
747                 if (!local_softirq_pending()) {
748                         preempt_enable_no_resched();
749                         schedule();
750                         preempt_disable();
751                 }
752
753                 __set_current_state(TASK_RUNNING);
754
755                 while (local_softirq_pending()) {
756                         /* Preempt disable stops cpu going offline.
757                            If already offline, we'll be on wrong CPU:
758                            don't process */
759                         if (cpu_is_offline((long)__bind_cpu))
760                                 goto wait_to_die;
761                         local_irq_disable();
762                         if (local_softirq_pending())
763                                 __do_softirq();
764                         local_irq_enable();
765                         preempt_enable_no_resched();
766                         cond_resched();
767                         preempt_disable();
768                         rcu_note_context_switch((long)__bind_cpu);
769                 }
770                 preempt_enable();
771                 set_current_state(TASK_INTERRUPTIBLE);
772         }
773         __set_current_state(TASK_RUNNING);
774         return 0;
775
776 wait_to_die:
777         preempt_enable();
778         /* Wait for kthread_stop */
779         set_current_state(TASK_INTERRUPTIBLE);
780         while (!kthread_should_stop()) {
781                 schedule();
782                 set_current_state(TASK_INTERRUPTIBLE);
783         }
784         __set_current_state(TASK_RUNNING);
785         return 0;
786 }
787
788 #ifdef CONFIG_HOTPLUG_CPU
789 /*
790  * tasklet_kill_immediate is called to remove a tasklet which can already be
791  * scheduled for execution on @cpu.
792  *
793  * Unlike tasklet_kill, this function removes the tasklet
794  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
795  *
796  * When this function is called, @cpu must be in the CPU_DEAD state.
797  */
798 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
799 {
800         struct tasklet_struct **i;
801
802         BUG_ON(cpu_online(cpu));
803         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
804
805         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
806                 return;
807
808         /* CPU is dead, so no lock needed. */
809         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
810                 if (*i == t) {
811                         *i = t->next;
812                         /* If this was the tail element, move the tail ptr */
813                         if (*i == NULL)
814                                 per_cpu(tasklet_vec, cpu).tail = i;
815                         return;
816                 }
817         }
818         BUG();
819 }
820
821 static void takeover_tasklets(unsigned int cpu)
822 {
823         /* CPU is dead, so no lock needed. */
824         local_irq_disable();
825
826         /* Find end, append list for that CPU. */
827         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
828                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
829                 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
830                 per_cpu(tasklet_vec, cpu).head = NULL;
831                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
832         }
833         raise_softirq_irqoff(TASKLET_SOFTIRQ);
834
835         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
836                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
837                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
838                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
839                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
840         }
841         raise_softirq_irqoff(HI_SOFTIRQ);
842
843         local_irq_enable();
844 }
845 #endif /* CONFIG_HOTPLUG_CPU */
846
847 static int __cpuinit cpu_callback(struct notifier_block *nfb,
848                                   unsigned long action,
849                                   void *hcpu)
850 {
851         int hotcpu = (unsigned long)hcpu;
852         struct task_struct *p;
853
854         switch (action) {
855         case CPU_UP_PREPARE:
856         case CPU_UP_PREPARE_FROZEN:
857                 p = kthread_create_on_node(run_ksoftirqd,
858                                            hcpu,
859                                            cpu_to_node(hotcpu),
860                                            "ksoftirqd/%d", hotcpu);
861                 if (IS_ERR(p)) {
862                         printk("ksoftirqd for %i failed\n", hotcpu);
863                         return notifier_from_errno(PTR_ERR(p));
864                 }
865                 kthread_bind(p, hotcpu);
866                 per_cpu(ksoftirqd, hotcpu) = p;
867                 break;
868         case CPU_ONLINE:
869         case CPU_ONLINE_FROZEN:
870                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
871                 break;
872 #ifdef CONFIG_HOTPLUG_CPU
873         case CPU_UP_CANCELED:
874         case CPU_UP_CANCELED_FROZEN:
875                 if (!per_cpu(ksoftirqd, hotcpu))
876                         break;
877                 /* Unbind so it can run.  Fall thru. */
878                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
879                              cpumask_any(cpu_online_mask));
880         case CPU_DEAD:
881         case CPU_DEAD_FROZEN: {
882                 static const struct sched_param param = {
883                         .sched_priority = MAX_RT_PRIO-1
884                 };
885
886                 p = per_cpu(ksoftirqd, hotcpu);
887                 per_cpu(ksoftirqd, hotcpu) = NULL;
888                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
889                 kthread_stop(p);
890                 takeover_tasklets(hotcpu);
891                 break;
892         }
893 #endif /* CONFIG_HOTPLUG_CPU */
894         }
895         return NOTIFY_OK;
896 }
897
898 static struct notifier_block __cpuinitdata cpu_nfb = {
899         .notifier_call = cpu_callback
900 };
901
902 static __init int spawn_ksoftirqd(void)
903 {
904         void *cpu = (void *)(long)smp_processor_id();
905         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
906
907         BUG_ON(err != NOTIFY_OK);
908         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
909         register_cpu_notifier(&cpu_nfb);
910         return 0;
911 }
912 early_initcall(spawn_ksoftirqd);
913
914 /*
915  * [ These __weak aliases are kept in a separate compilation unit, so that
916  *   GCC does not inline them incorrectly. ]
917  */
918
919 int __init __weak early_irq_init(void)
920 {
921         return 0;
922 }
923
924 #ifdef CONFIG_GENERIC_HARDIRQS
925 int __init __weak arch_probe_nr_irqs(void)
926 {
927         return NR_IRQS_LEGACY;
928 }
929
930 int __init __weak arch_early_irq_init(void)
931 {
932         return 0;
933 }
934 #endif