2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/stackprotector.h>
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
22 #include <linux/kernel.h>
24 #include <linux/elfcore.h>
25 #include <linux/smp.h>
26 #include <linux/slab.h>
27 #include <linux/user.h>
28 #include <linux/interrupt.h>
29 #include <linux/utsname.h>
30 #include <linux/delay.h>
31 #include <linux/module.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/tick.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
40 #include <linux/ftrace.h>
41 #include <linux/dmi.h>
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
47 #include <asm/mmu_context.h>
48 #include <asm/prctl.h>
50 #include <asm/proto.h>
53 #include <asm/syscalls.h>
55 #include <asm/debugreg.h>
57 asmlinkage extern void ret_from_fork(void);
59 DEFINE_PER_CPU(unsigned long, old_rsp);
60 static DEFINE_PER_CPU(unsigned char, is_idle);
62 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
64 void idle_notifier_register(struct notifier_block *n)
66 atomic_notifier_chain_register(&idle_notifier, n);
68 EXPORT_SYMBOL_GPL(idle_notifier_register);
70 void idle_notifier_unregister(struct notifier_block *n)
72 atomic_notifier_chain_unregister(&idle_notifier, n);
74 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
78 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
82 static void __exit_idle(void)
84 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
89 /* Called from interrupts to signify idle end */
92 /* idle loop has pid 0 */
99 static inline void play_dead(void)
106 * The idle thread. There's no useful work to be
107 * done, so just try to conserve power and have a
108 * low exit latency (ie sit in a loop waiting for
109 * somebody to say that they'd like to reschedule)
113 current_thread_info()->status |= TS_POLLING;
116 * If we're the non-boot CPU, nothing set the stack canary up
117 * for us. CPU0 already has it initialized but no harm in
118 * doing it again. This is a good place for updating it, as
119 * we wont ever return from this function (so the invalid
120 * canaries already on the stack wont ever trigger).
122 boot_init_stack_canary();
124 /* endless idle loop with no priority at all */
126 tick_nohz_stop_sched_tick(1);
127 while (!need_resched()) {
131 if (cpu_is_offline(smp_processor_id()))
134 * Idle routines should keep interrupts disabled
135 * from here on, until they go to idle.
136 * Otherwise, idle callbacks can misfire.
140 /* Don't trace irqs off for idle */
141 stop_critical_timings();
143 start_critical_timings();
144 /* In many cases the interrupt that ended idle
145 has already called exit_idle. But some idle
146 loops can be woken up without interrupt. */
150 tick_nohz_restart_sched_tick();
151 preempt_enable_no_resched();
157 /* Prints also some state that isn't saved in the pt_regs */
158 void __show_regs(struct pt_regs *regs, int all)
160 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
161 unsigned long d0, d1, d2, d3, d6, d7;
162 unsigned int fsindex, gsindex;
163 unsigned int ds, cs, es;
168 board = dmi_get_system_info(DMI_PRODUCT_NAME);
171 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
172 current->pid, current->comm, print_tainted(),
173 init_utsname()->release,
174 (int)strcspn(init_utsname()->version, " "),
175 init_utsname()->version, board);
176 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
177 printk_address(regs->ip, 1);
178 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
179 regs->sp, regs->flags);
180 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
181 regs->ax, regs->bx, regs->cx);
182 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
183 regs->dx, regs->si, regs->di);
184 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
185 regs->bp, regs->r8, regs->r9);
186 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
187 regs->r10, regs->r11, regs->r12);
188 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
189 regs->r13, regs->r14, regs->r15);
191 asm("movl %%ds,%0" : "=r" (ds));
192 asm("movl %%cs,%0" : "=r" (cs));
193 asm("movl %%es,%0" : "=r" (es));
194 asm("movl %%fs,%0" : "=r" (fsindex));
195 asm("movl %%gs,%0" : "=r" (gsindex));
197 rdmsrl(MSR_FS_BASE, fs);
198 rdmsrl(MSR_GS_BASE, gs);
199 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
209 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
210 fs, fsindex, gs, gsindex, shadowgs);
211 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
213 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
219 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
223 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
226 void show_regs(struct pt_regs *regs)
228 show_registers(regs);
229 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
232 void release_thread(struct task_struct *dead_task)
235 if (dead_task->mm->context.size) {
236 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
238 dead_task->mm->context.ldt,
239 dead_task->mm->context.size);
245 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
247 struct user_desc ud = {
254 struct desc_struct *desc = t->thread.tls_array;
259 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
261 return get_desc_base(&t->thread.tls_array[tls]);
265 * This gets called before we allocate a new thread and copy
266 * the current task into it.
268 void prepare_to_copy(struct task_struct *tsk)
273 int copy_thread(unsigned long clone_flags, unsigned long sp,
274 unsigned long unused,
275 struct task_struct *p, struct pt_regs *regs)
278 struct pt_regs *childregs;
279 struct task_struct *me = current;
281 childregs = ((struct pt_regs *)
282 (THREAD_SIZE + task_stack_page(p))) - 1;
289 childregs->sp = (unsigned long)childregs;
291 p->thread.sp = (unsigned long) childregs;
292 p->thread.sp0 = (unsigned long) (childregs+1);
293 p->thread.usersp = me->thread.usersp;
295 set_tsk_thread_flag(p, TIF_FORK);
297 p->thread.fs = me->thread.fs;
298 p->thread.gs = me->thread.gs;
299 p->thread.io_bitmap_ptr = NULL;
301 savesegment(gs, p->thread.gsindex);
302 savesegment(fs, p->thread.fsindex);
303 savesegment(es, p->thread.es);
304 savesegment(ds, p->thread.ds);
307 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
311 if (!p->thread.io_bitmap_ptr) {
312 p->thread.io_bitmap_max = 0;
315 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
317 set_tsk_thread_flag(p, TIF_IO_BITMAP);
321 * Set a new TLS for the child thread?
323 if (clone_flags & CLONE_SETTLS) {
324 #ifdef CONFIG_IA32_EMULATION
325 if (test_thread_flag(TIF_IA32))
326 err = do_set_thread_area(p, -1,
327 (struct user_desc __user *)childregs->si, 0);
330 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
335 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
336 p->thread.ds_ctx = NULL;
338 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
339 p->thread.debugctlmsr = 0;
343 if (err && p->thread.io_bitmap_ptr) {
344 kfree(p->thread.io_bitmap_ptr);
345 p->thread.io_bitmap_max = 0;
352 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
353 unsigned long new_sp,
354 unsigned int _cs, unsigned int _ss, unsigned int _ds)
357 loadsegment(es, _ds);
358 loadsegment(ds, _ds);
362 percpu_write(old_rsp, new_sp);
365 regs->flags = X86_EFLAGS_IF;
368 * Free the old FP and other extended state
370 free_thread_xstate(current);
374 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
376 start_thread_common(regs, new_ip, new_sp,
377 __USER_CS, __USER_DS, 0);
380 #ifdef CONFIG_IA32_EMULATION
381 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
383 start_thread_common(regs, new_ip, new_sp,
384 __USER32_CS, __USER32_DS, __USER32_DS);
389 * switch_to(x,y) should switch tasks from x to y.
391 * This could still be optimized:
392 * - fold all the options into a flag word and test it with a single test.
393 * - could test fs/gs bitsliced
395 * Kprobes not supported here. Set the probe on schedule instead.
396 * Function graph tracer not supported too.
398 __notrace_funcgraph struct task_struct *
399 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
401 struct thread_struct *prev = &prev_p->thread;
402 struct thread_struct *next = &next_p->thread;
403 int cpu = smp_processor_id();
404 struct tss_struct *tss = &per_cpu(init_tss, cpu);
405 unsigned fsindex, gsindex;
409 * If the task has used fpu the last 5 timeslices, just do a full
410 * restore of the math state immediately to avoid the trap; the
411 * chances of needing FPU soon are obviously high now
413 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
415 /* we're going to use this soon, after a few expensive things */
417 prefetch(next->xstate);
420 * Reload esp0, LDT and the page table pointer:
426 * This won't pick up thread selector changes, but I guess that is ok.
428 savesegment(es, prev->es);
429 if (unlikely(next->es | prev->es))
430 loadsegment(es, next->es);
432 savesegment(ds, prev->ds);
433 if (unlikely(next->ds | prev->ds))
434 loadsegment(ds, next->ds);
437 /* We must save %fs and %gs before load_TLS() because
438 * %fs and %gs may be cleared by load_TLS().
440 * (e.g. xen_load_tls())
442 savesegment(fs, fsindex);
443 savesegment(gs, gsindex);
447 /* Must be after DS reload */
450 /* Make sure cpu is ready for new context */
455 * Leave lazy mode, flushing any hypercalls made here.
456 * This must be done before restoring TLS segments so
457 * the GDT and LDT are properly updated, and must be
458 * done before math_state_restore, so the TS bit is up
461 arch_end_context_switch(next_p);
466 * Segment register != 0 always requires a reload. Also
467 * reload when it has changed. When prev process used 64bit
468 * base always reload to avoid an information leak.
470 if (unlikely(fsindex | next->fsindex | prev->fs)) {
471 loadsegment(fs, next->fsindex);
473 * Check if the user used a selector != 0; if yes
474 * clear 64bit base, since overloaded base is always
475 * mapped to the Null selector
480 /* when next process has a 64bit base use it */
482 wrmsrl(MSR_FS_BASE, next->fs);
483 prev->fsindex = fsindex;
485 if (unlikely(gsindex | next->gsindex | prev->gs)) {
486 load_gs_index(next->gsindex);
491 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
492 prev->gsindex = gsindex;
495 * Switch the PDA and FPU contexts.
497 prev->usersp = percpu_read(old_rsp);
498 percpu_write(old_rsp, next->usersp);
499 percpu_write(current_task, next_p);
501 percpu_write(kernel_stack,
502 (unsigned long)task_stack_page(next_p) +
503 THREAD_SIZE - KERNEL_STACK_OFFSET);
506 * Now maybe reload the debug registers and handle I/O bitmaps
508 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
509 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
510 __switch_to_xtra(prev_p, next_p, tss);
513 * Preload the FPU context, now that we've determined that the
514 * task is likely to be using it.
517 __math_state_restore();
522 void set_personality_64bit(void)
524 /* inherit personality from parent */
526 /* Make sure to be in 64bit mode */
527 clear_thread_flag(TIF_IA32);
529 /* TBD: overwrites user setup. Should have two bits.
530 But 64bit processes have always behaved this way,
531 so it's not too bad. The main problem is just that
532 32bit childs are affected again. */
533 current->personality &= ~READ_IMPLIES_EXEC;
536 unsigned long get_wchan(struct task_struct *p)
542 if (!p || p == current || p->state == TASK_RUNNING)
544 stack = (unsigned long)task_stack_page(p);
545 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
547 fp = *(u64 *)(p->thread.sp);
549 if (fp < (unsigned long)stack ||
550 fp >= (unsigned long)stack+THREAD_SIZE)
553 if (!in_sched_functions(ip))
556 } while (count++ < 16);
560 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
563 int doit = task == current;
568 if (addr >= TASK_SIZE_OF(task))
571 /* handle small bases via the GDT because that's faster to
573 if (addr <= 0xffffffff) {
574 set_32bit_tls(task, GS_TLS, addr);
576 load_TLS(&task->thread, cpu);
577 load_gs_index(GS_TLS_SEL);
579 task->thread.gsindex = GS_TLS_SEL;
582 task->thread.gsindex = 0;
583 task->thread.gs = addr;
586 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
592 /* Not strictly needed for fs, but do it for symmetry
594 if (addr >= TASK_SIZE_OF(task))
597 /* handle small bases via the GDT because that's faster to
599 if (addr <= 0xffffffff) {
600 set_32bit_tls(task, FS_TLS, addr);
602 load_TLS(&task->thread, cpu);
603 loadsegment(fs, FS_TLS_SEL);
605 task->thread.fsindex = FS_TLS_SEL;
608 task->thread.fsindex = 0;
609 task->thread.fs = addr;
611 /* set the selector to 0 to not confuse
614 ret = checking_wrmsrl(MSR_FS_BASE, addr);
621 if (task->thread.fsindex == FS_TLS_SEL)
622 base = read_32bit_tls(task, FS_TLS);
624 rdmsrl(MSR_FS_BASE, base);
626 base = task->thread.fs;
627 ret = put_user(base, (unsigned long __user *)addr);
633 if (task->thread.gsindex == GS_TLS_SEL)
634 base = read_32bit_tls(task, GS_TLS);
636 savesegment(gs, gsindex);
638 rdmsrl(MSR_KERNEL_GS_BASE, base);
640 base = task->thread.gs;
642 base = task->thread.gs;
643 ret = put_user(base, (unsigned long __user *)addr);
655 long sys_arch_prctl(int code, unsigned long addr)
657 return do_arch_prctl(current, code, addr);
660 unsigned long KSTK_ESP(struct task_struct *task)
662 return (test_tsk_thread_flag(task, TIF_IA32)) ?
663 (task_pt_regs(task)->sp) : ((task)->thread.usersp);