2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * entry.S contains the system-call and fault low-level handling routines.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
52 #include <asm/irqflags.h>
56 #ifndef CONFIG_PREEMPT
57 #define retint_kernel retint_restore_args
61 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62 #ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
100 CFI_ADJUST_CFA_OFFSET 8
101 /*CFI_REL_OFFSET ss,0*/
103 CFI_ADJUST_CFA_OFFSET 8
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
107 /*CFI_REL_OFFSET rflags,0*/
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
110 /*CFI_REL_OFFSET cs,0*/
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
118 .macro UNFAKE_STACK_FRAME
120 CFI_ADJUST_CFA_OFFSET -(6*8)
123 .macro CFI_DEFAULT_STACK start=1
129 CFI_DEF_CFA_OFFSET SS+8
131 CFI_REL_OFFSET r15,R15
132 CFI_REL_OFFSET r14,R14
133 CFI_REL_OFFSET r13,R13
134 CFI_REL_OFFSET r12,R12
135 CFI_REL_OFFSET rbp,RBP
136 CFI_REL_OFFSET rbx,RBX
137 CFI_REL_OFFSET r11,R11
138 CFI_REL_OFFSET r10,R10
141 CFI_REL_OFFSET rax,RAX
142 CFI_REL_OFFSET rcx,RCX
143 CFI_REL_OFFSET rdx,RDX
144 CFI_REL_OFFSET rsi,RSI
145 CFI_REL_OFFSET rdi,RDI
146 CFI_REL_OFFSET rip,RIP
147 /*CFI_REL_OFFSET cs,CS*/
148 /*CFI_REL_OFFSET rflags,EFLAGS*/
149 CFI_REL_OFFSET rsp,RSP
150 /*CFI_REL_OFFSET ss,SS*/
153 * A newly forked process directly context switches into this.
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
163 GET_THREAD_INFO(%rcx)
164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
169 je int_ret_from_sys_call
170 testl $_TIF_IA32,threadinfo_flags(%rcx)
171 jnz int_ret_from_sys_call
172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173 jmp ret_from_sys_call
176 call syscall_trace_leave
177 GET_THREAD_INFO(%rcx)
183 * System call entry. Upto 6 arguments in registers are supported.
185 * SYSCALL does not save anything on the stack and does not change the
191 * rax system call number
193 * rcx return address for syscall/sysret, C arg3
196 * r10 arg3 (--> moved to rcx for C)
199 * r11 eflags for syscall/sysret, temporary for C
200 * r12-r15,rbp,rbx saved by C code, not touched.
202 * Interrupts are off on entry.
203 * Only called from user space.
205 * XXX if we had a free scratch register we could save the RSP into the stack frame
206 * and report it properly in ps. Unfortunately we haven't.
208 * When user can change the frames always force IRET. That is because
209 * it deals with uncanonical addresses better. SYSRET has trouble
210 * with them due to bugs in both AMD and Intel CPUs.
216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
218 /*CFI_REGISTER rflags,r11*/
220 movq %rsp,%gs:pda_oldrsp
221 movq %gs:pda_kernelstack,%rsp
223 * No need to follow this irqs off/on section - it's straight
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
229 movq %rcx,RIP-ARGOFFSET(%rsp)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
235 cmpq $__NR_syscall_max,%rax
238 call *sys_call_table(,%rax,8) # XXX: rip relative
239 movq %rax,RAX-ARGOFFSET(%rsp)
241 * Syscall return path ending with SYSRET (fast path)
242 * Has incomplete stack frame and undefined top of stack.
244 .globl ret_from_sys_call
246 movl $_TIF_ALLWORK_MASK,%edi
249 GET_THREAD_INFO(%rcx)
252 movl threadinfo_flags(%rcx),%edx
257 * sysretq will re-enable interrupts:
260 movq RIP-ARGOFFSET(%rsp),%rcx
262 RESTORE_ARGS 0,-ARG_SKIP,1
263 /*CFI_REGISTER rflags,r11*/
264 movq %gs:pda_oldrsp,%rsp
268 /* Handle reschedules */
269 /* edx: work, edi: workmask */
272 bt $TIF_NEED_RESCHED,%edx
277 CFI_ADJUST_CFA_OFFSET 8
280 CFI_ADJUST_CFA_OFFSET -8
283 /* Handle a signal */
287 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
290 /* Really a signal */
291 /* edx: work flags (arg3) */
292 leaq do_notify_resume(%rip),%rax
293 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
294 xorl %esi,%esi # oldset -> arg2
295 call ptregscall_common
296 1: movl $_TIF_NEED_RESCHED,%edi
297 /* Use IRET because user could have changed frame. This
298 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
304 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
305 jmp ret_from_sys_call
307 /* Do syscall tracing */
311 movq $-ENOSYS,RAX(%rsp)
312 FIXUP_TOP_OF_STACK %rdi
314 call syscall_trace_enter
315 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
317 cmpq $__NR_syscall_max,%rax
319 movq %r10,%rcx /* fixup for C */
320 call *sys_call_table(,%rax,8)
321 1: movq %rax,RAX-ARGOFFSET(%rsp)
322 /* Use IRET because user could have changed frame */
323 jmp int_ret_from_sys_call
328 * Syscall return path ending with IRET.
329 * Has correct top of stack, but partial stack frame.
331 ENTRY(int_ret_from_sys_call)
334 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
335 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
336 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
337 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
338 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
339 CFI_REL_OFFSET rip,RIP-ARGOFFSET
340 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
341 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
342 CFI_REL_OFFSET rax,RAX-ARGOFFSET
343 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
344 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
345 CFI_REL_OFFSET r8,R8-ARGOFFSET
346 CFI_REL_OFFSET r9,R9-ARGOFFSET
347 CFI_REL_OFFSET r10,R10-ARGOFFSET
348 CFI_REL_OFFSET r11,R11-ARGOFFSET
351 testl $3,CS-ARGOFFSET(%rsp)
352 je retint_restore_args
353 movl $_TIF_ALLWORK_MASK,%edi
354 /* edi: mask to check */
356 GET_THREAD_INFO(%rcx)
357 movl threadinfo_flags(%rcx),%edx
360 andl $~TS_COMPAT,threadinfo_status(%rcx)
363 /* Either reschedule or signal or syscall exit tracking needed. */
364 /* First do a reschedule test. */
365 /* edx: work, edi: workmask */
367 bt $TIF_NEED_RESCHED,%edx
372 CFI_ADJUST_CFA_OFFSET 8
375 CFI_ADJUST_CFA_OFFSET -8
380 /* handle signals and tracing -- both require a full stack frame */
385 /* Check for syscall exit trace */
386 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
389 CFI_ADJUST_CFA_OFFSET 8
390 leaq 8(%rsp),%rdi # &ptregs -> arg1
391 call syscall_trace_leave
393 CFI_ADJUST_CFA_OFFSET -8
394 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
400 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
402 movq %rsp,%rdi # &ptregs -> arg1
403 xorl %esi,%esi # oldset -> arg2
404 call do_notify_resume
405 1: movl $_TIF_NEED_RESCHED,%edi
412 END(int_ret_from_sys_call)
415 * Certain special system calls that need to save a complete full stack frame.
418 .macro PTREGSCALL label,func,arg
421 leaq \func(%rip),%rax
422 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
423 jmp ptregscall_common
429 PTREGSCALL stub_clone, sys_clone, %r8
430 PTREGSCALL stub_fork, sys_fork, %rdi
431 PTREGSCALL stub_vfork, sys_vfork, %rdi
432 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
433 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
434 PTREGSCALL stub_iopl, sys_iopl, %rsi
436 ENTRY(ptregscall_common)
438 CFI_ADJUST_CFA_OFFSET -8
439 CFI_REGISTER rip, r11
442 CFI_REGISTER rip, r15
443 FIXUP_TOP_OF_STACK %r11
445 RESTORE_TOP_OF_STACK %r11
447 CFI_REGISTER rip, r11
450 CFI_ADJUST_CFA_OFFSET 8
451 CFI_REL_OFFSET rip, 0
454 END(ptregscall_common)
459 CFI_ADJUST_CFA_OFFSET -8
460 CFI_REGISTER rip, r11
462 FIXUP_TOP_OF_STACK %r11
464 RESTORE_TOP_OF_STACK %r11
467 jmp int_ret_from_sys_call
472 * sigreturn is special because it needs to restore all registers on return.
473 * This cannot be done with SYSRET, so use the IRET return path instead.
475 ENTRY(stub_rt_sigreturn)
478 CFI_ADJUST_CFA_OFFSET -8
481 FIXUP_TOP_OF_STACK %r11
482 call sys_rt_sigreturn
483 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
485 jmp int_ret_from_sys_call
487 END(stub_rt_sigreturn)
490 * initial frame state for interrupts and exceptions
495 CFI_DEF_CFA rsp,SS+8-\ref
496 /*CFI_REL_OFFSET ss,SS-\ref*/
497 CFI_REL_OFFSET rsp,RSP-\ref
498 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
499 /*CFI_REL_OFFSET cs,CS-\ref*/
500 CFI_REL_OFFSET rip,RIP-\ref
503 /* initial frame state for interrupts (and exceptions without error code) */
504 #define INTR_FRAME _frame RIP
505 /* initial frame state for exceptions with error code (and interrupts with
506 vector already pushed) */
507 #define XCPT_FRAME _frame ORIG_RAX
510 * Interrupt entry/exit.
512 * Interrupt entry points save only callee clobbered registers in fast path.
514 * Entry runs with interrupts off.
517 /* 0(%rsp): interrupt number */
518 .macro interrupt func
521 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
523 CFI_ADJUST_CFA_OFFSET 8
524 CFI_REL_OFFSET rbp, 0
526 CFI_DEF_CFA_REGISTER rbp
530 /* irqcount is used to check if a CPU is already on an interrupt
531 stack or not. While this is essentially redundant with preempt_count
532 it is a little cheaper to use a separate counter in the PDA
533 (short of moving irq_enter into assembly, which would be too
535 1: incl %gs:pda_irqcount
536 cmoveq %gs:pda_irqstackptr,%rsp
537 push %rbp # backlink for old unwinder
538 CFI_ADJUST_CFA_OFFSET 8
541 * We entered an interrupt context - irqs are off:
547 ENTRY(common_interrupt)
550 /* 0(%rsp): oldrsp-ARGOFFSET */
554 decl %gs:pda_irqcount
556 CFI_DEF_CFA_REGISTER rsp
557 CFI_ADJUST_CFA_OFFSET -8
559 GET_THREAD_INFO(%rcx)
560 testl $3,CS-ARGOFFSET(%rsp)
563 /* Interrupt came from user space */
565 * Has a correct top of stack, but a partial stack frame
566 * %rcx: thread info. Interrupts off.
568 retint_with_reschedule:
569 movl $_TIF_WORK_MASK,%edi
571 movl threadinfo_flags(%rcx),%edx
577 * The iretq could re-enable interrupts:
587 * The iretq could re-enable interrupts:
595 .section __ex_table,"a"
596 .quad iret_label,bad_iret
599 /* force a signal here? this matches i386 behaviour */
600 /* running with kernel gs */
602 movq $11,%rdi /* SIGSEGV */
608 /* edi: workmask, edx: work */
611 bt $TIF_NEED_RESCHED,%edx
616 CFI_ADJUST_CFA_OFFSET 8
619 CFI_ADJUST_CFA_OFFSET -8
620 GET_THREAD_INFO(%rcx)
626 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
631 movq $-1,ORIG_RAX(%rsp)
632 xorl %esi,%esi # oldset
633 movq %rsp,%rdi # &pt_regs
634 call do_notify_resume
638 movl $_TIF_NEED_RESCHED,%edi
639 GET_THREAD_INFO(%rcx)
642 #ifdef CONFIG_PREEMPT
643 /* Returning to kernel space. Check if we need preemption */
644 /* rcx: threadinfo. interrupts off. */
646 cmpl $0,threadinfo_preempt_count(%rcx)
647 jnz retint_restore_args
648 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
649 jnc retint_restore_args
650 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
651 jnc retint_restore_args
652 call preempt_schedule_irq
657 END(common_interrupt)
662 .macro apicinterrupt num,func
665 CFI_ADJUST_CFA_OFFSET 8
671 ENTRY(thermal_interrupt)
672 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
673 END(thermal_interrupt)
675 ENTRY(threshold_interrupt)
676 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
677 END(threshold_interrupt)
680 ENTRY(reschedule_interrupt)
681 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
682 END(reschedule_interrupt)
684 .macro INVALIDATE_ENTRY num
685 ENTRY(invalidate_interrupt\num)
686 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
687 END(invalidate_interrupt\num)
699 ENTRY(call_function_interrupt)
700 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
701 END(call_function_interrupt)
704 ENTRY(apic_timer_interrupt)
705 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
706 END(apic_timer_interrupt)
708 ENTRY(error_interrupt)
709 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
712 ENTRY(spurious_interrupt)
713 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
714 END(spurious_interrupt)
717 * Exception entry points.
721 pushq $0 /* push error code/oldrax */
722 CFI_ADJUST_CFA_OFFSET 8
723 pushq %rax /* push real oldrax to the rdi slot */
724 CFI_ADJUST_CFA_OFFSET 8
730 .macro errorentry sym
733 CFI_ADJUST_CFA_OFFSET 8
739 /* error code is on the stack already */
740 /* handle NMI like exceptions that can happen everywhere */
741 .macro paranoidentry sym, ist=0, irqtrace=1
745 movl $MSR_GS_BASE,%ecx
753 movq %gs:pda_data_offset, %rbp
756 movq ORIG_RAX(%rsp),%rsi
757 movq $-1,ORIG_RAX(%rsp)
759 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
763 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
772 * "Paranoid" exit path from exception stack.
773 * Paranoid because this is used by NMIs and cannot take
774 * any kernel state for granted.
775 * We don't do kernel preemption checks here, because only
776 * NMI should be common and it does not enable IRQs and
777 * cannot get reschedule ticks.
779 * "trace" is 0 for the NMI handler only, because irq-tracing
780 * is fundamentally NMI-unsafe. (we cannot change the soft and
781 * hard flags at once, atomically)
783 .macro paranoidexit trace=1
784 /* ebx: no swapgs flag */
786 testl %ebx,%ebx /* swapgs needed? */
787 jnz paranoid_restore\trace
789 jnz paranoid_userspace\trace
790 paranoid_swapgs\trace:
795 paranoid_restore\trace:
798 paranoid_userspace\trace:
799 GET_THREAD_INFO(%rcx)
800 movl threadinfo_flags(%rcx),%ebx
801 andl $_TIF_WORK_MASK,%ebx
802 jz paranoid_swapgs\trace
803 movq %rsp,%rdi /* &pt_regs */
805 movq %rax,%rsp /* switch stack for scheduling */
806 testl $_TIF_NEED_RESCHED,%ebx
807 jnz paranoid_schedule\trace
808 movl %ebx,%edx /* arg3: thread flags */
813 xorl %esi,%esi /* arg2: oldset */
814 movq %rsp,%rdi /* arg1: &pt_regs */
815 call do_notify_resume
820 jmp paranoid_userspace\trace
821 paranoid_schedule\trace:
831 jmp paranoid_userspace\trace
836 * Exception entry point. This expects an error code/orig_rax on the stack
837 * and the exception handler in %rax.
839 KPROBE_ENTRY(error_entry)
841 /* rdi slot contains rax, oldrax contains error code */
844 CFI_ADJUST_CFA_OFFSET (14*8)
846 CFI_REL_OFFSET rsi,RSI
847 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
849 CFI_REL_OFFSET rdx,RDX
851 CFI_REL_OFFSET rcx,RCX
852 movq %rsi,10*8(%rsp) /* store rax */
853 CFI_REL_OFFSET rax,RAX
859 CFI_REL_OFFSET r10,R10
861 CFI_REL_OFFSET r11,R11
863 CFI_REL_OFFSET rbx,RBX
865 CFI_REL_OFFSET rbp,RBP
867 CFI_REL_OFFSET r12,R12
869 CFI_REL_OFFSET r13,R13
871 CFI_REL_OFFSET r14,R14
873 CFI_REL_OFFSET r15,R15
882 movq ORIG_RAX(%rsp),%rsi /* get error code */
883 movq $-1,ORIG_RAX(%rsp)
885 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
891 GET_THREAD_INFO(%rcx)
894 movl threadinfo_flags(%rcx),%edx
895 movl $_TIF_WORK_MASK,%edi
899 * The iret might restore flags:
909 /* There are two places in the kernel that can potentially fault with
910 usergs. Handle them here. The exception handlers after
911 iret run with kernel gs again, so don't set the user space flag.
912 B stepping K8s sometimes report an truncated RIP for IRET
913 exceptions returning to compat mode. Check for these here too. */
914 leaq iret_label(%rip),%rbp
917 movl %ebp,%ebp /* zero extend */
920 cmpq $gs_change,RIP(%rsp)
923 KPROBE_END(error_entry)
925 /* Reload gs selector with exception handling */
926 /* edi: new selector */
930 CFI_ADJUST_CFA_OFFSET 8
935 2: mfence /* workaround */
938 CFI_ADJUST_CFA_OFFSET -8
941 ENDPROC(load_gs_index)
943 .section __ex_table,"a"
945 .quad gs_change,bad_gs
948 /* running with kernelgs */
950 swapgs /* switch back to user gs */
957 * Create a kernel thread.
959 * C extern interface:
960 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
962 * asm input arguments:
963 * rdi: fn, rsi: arg, rdx: flags
967 FAKE_STACK_FRAME $child_rip
970 # rdi: flags, rsi: usp, rdx: will be &pt_regs
972 orq kernel_thread_flags(%rip),%rdi
985 /* terminate stack in child */
990 * It isn't worth to check for reschedule here,
991 * so internally to the x86_64 port you can rely on kernel_thread()
992 * not to reschedule the child before returning, this avoids the need
993 * of hacks for example to fork off the per-CPU idle tasks.
994 * [Hopefully no generic code relies on the reschedule -AK]
1000 ENDPROC(kernel_thread)
1003 pushq $0 # fake return address
1006 * Here we are in the child and the registers are set as they were
1007 * at kernel_thread() invocation in the parent.
1019 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1021 * C extern interface:
1022 * extern long execve(char *name, char **argv, char **envp)
1024 * asm input arguments:
1025 * rdi: name, rsi: argv, rdx: envp
1027 * We want to fallback into:
1028 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1030 * do_sys_execve asm fallback arguments:
1031 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1033 ENTRY(kernel_execve)
1038 movq %rax, RAX(%rsp)
1041 je int_ret_from_sys_call
1046 ENDPROC(kernel_execve)
1048 KPROBE_ENTRY(page_fault)
1049 errorentry do_page_fault
1050 KPROBE_END(page_fault)
1052 ENTRY(coprocessor_error)
1053 zeroentry do_coprocessor_error
1054 END(coprocessor_error)
1056 ENTRY(simd_coprocessor_error)
1057 zeroentry do_simd_coprocessor_error
1058 END(simd_coprocessor_error)
1060 ENTRY(device_not_available)
1061 zeroentry math_state_restore
1062 END(device_not_available)
1064 /* runs on exception stack */
1068 CFI_ADJUST_CFA_OFFSET 8
1069 paranoidentry do_debug, DEBUG_STACK
1073 /* runs on exception stack */
1077 CFI_ADJUST_CFA_OFFSET 8
1078 paranoidentry do_nmi, 0, 0
1079 #ifdef CONFIG_TRACE_IRQFLAGS
1090 CFI_ADJUST_CFA_OFFSET 8
1091 paranoidentry do_int3, DEBUG_STACK
1097 zeroentry do_overflow
1105 zeroentry do_invalid_op
1108 ENTRY(coprocessor_segment_overrun)
1109 zeroentry do_coprocessor_segment_overrun
1110 END(coprocessor_segment_overrun)
1113 zeroentry do_reserved
1116 /* runs on exception stack */
1119 paranoidentry do_double_fault
1125 errorentry do_invalid_TSS
1128 ENTRY(segment_not_present)
1129 errorentry do_segment_not_present
1130 END(segment_not_present)
1132 /* runs on exception stack */
1133 ENTRY(stack_segment)
1135 paranoidentry do_stack_segment
1140 KPROBE_ENTRY(general_protection)
1141 errorentry do_general_protection
1142 KPROBE_END(general_protection)
1144 ENTRY(alignment_check)
1145 errorentry do_alignment_check
1146 END(alignment_check)
1149 zeroentry do_divide_error
1152 ENTRY(spurious_interrupt_bug)
1153 zeroentry do_spurious_interrupt_bug
1154 END(spurious_interrupt_bug)
1156 #ifdef CONFIG_X86_MCE
1157 /* runs on exception stack */
1158 ENTRY(machine_check)
1161 CFI_ADJUST_CFA_OFFSET 8
1162 paranoidentry do_machine_check
1168 /* Call softirq on interrupt stack. Interrupts are off. */
1172 CFI_ADJUST_CFA_OFFSET 8
1173 CFI_REL_OFFSET rbp,0
1175 CFI_DEF_CFA_REGISTER rbp
1176 incl %gs:pda_irqcount
1177 cmove %gs:pda_irqstackptr,%rsp
1178 push %rbp # backlink for old unwinder
1179 CFI_ADJUST_CFA_OFFSET 8
1182 CFI_DEF_CFA_REGISTER rsp
1183 CFI_ADJUST_CFA_OFFSET -8
1184 decl %gs:pda_irqcount
1187 ENDPROC(call_softirq)
1189 #ifdef CONFIG_STACK_UNWIND
1190 ENTRY(arch_unwind_init_running)
1192 movq %r15, R15(%rdi)
1193 movq %r14, R14(%rdi)
1195 movq %r13, R13(%rdi)
1196 movq %r12, R12(%rdi)
1198 movq %rbp, RBP(%rdi)
1199 movq %rbx, RBX(%rdi)
1201 movq %rax, R11(%rdi)
1202 movq %rax, R10(%rdi)
1205 movq %rax, RAX(%rdi)
1206 movq %rax, RCX(%rdi)
1207 movq %rax, RDX(%rdi)
1208 movq %rax, RSI(%rdi)
1209 movq %rax, RDI(%rdi)
1210 movq %rax, ORIG_RAX(%rdi)
1211 movq %rcx, RIP(%rdi)
1213 movq $__KERNEL_CS, CS(%rdi)
1214 movq %rax, EFLAGS(%rdi)
1215 movq %rcx, RSP(%rdi)
1216 movq $__KERNEL_DS, SS(%rdi)
1219 ENDPROC(arch_unwind_init_running)