2 * linux/arch/i386/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
40 * "current" is in register %ebx during any slow entries.
43 #include <linux/linkage.h>
44 #include <asm/thread_info.h>
45 #include <asm/irqflags.h>
46 #include <asm/errno.h>
47 #include <asm/segment.h>
51 #include <asm/percpu.h>
52 #include <asm/dwarf2.h>
53 #include "irq_vectors.h"
55 #define nr_syscalls ((syscall_table_size)/4)
80 /* These are replaces for paravirtualization */
81 #define DISABLE_INTERRUPTS cli
82 #define ENABLE_INTERRUPTS sti
83 #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
84 #define INTERRUPT_RETURN iret
85 #define GET_CR0_INTO_EAX movl %cr0, %eax
88 #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
91 #define resume_kernel restore_nocheck
94 .macro TRACE_IRQS_IRET
95 #ifdef CONFIG_TRACE_IRQFLAGS
96 testl $IF_MASK,EFLAGS(%esp) # interrupts off?
104 #define resume_userspace_sig check_userspace
106 #define resume_userspace_sig resume_userspace
112 CFI_ADJUST_CFA_OFFSET 4;\
113 /*CFI_REL_OFFSET es, 0;*/\
115 CFI_ADJUST_CFA_OFFSET 4;\
116 /*CFI_REL_OFFSET ds, 0;*/\
118 CFI_ADJUST_CFA_OFFSET 4;\
119 CFI_REL_OFFSET eax, 0;\
121 CFI_ADJUST_CFA_OFFSET 4;\
122 CFI_REL_OFFSET ebp, 0;\
124 CFI_ADJUST_CFA_OFFSET 4;\
125 CFI_REL_OFFSET edi, 0;\
127 CFI_ADJUST_CFA_OFFSET 4;\
128 CFI_REL_OFFSET esi, 0;\
130 CFI_ADJUST_CFA_OFFSET 4;\
131 CFI_REL_OFFSET edx, 0;\
133 CFI_ADJUST_CFA_OFFSET 4;\
134 CFI_REL_OFFSET ecx, 0;\
136 CFI_ADJUST_CFA_OFFSET 4;\
137 CFI_REL_OFFSET ebx, 0;\
138 movl $(__USER_DS), %edx; \
142 #define RESTORE_INT_REGS \
144 CFI_ADJUST_CFA_OFFSET -4;\
147 CFI_ADJUST_CFA_OFFSET -4;\
150 CFI_ADJUST_CFA_OFFSET -4;\
153 CFI_ADJUST_CFA_OFFSET -4;\
156 CFI_ADJUST_CFA_OFFSET -4;\
159 CFI_ADJUST_CFA_OFFSET -4;\
162 CFI_ADJUST_CFA_OFFSET -4;\
165 #define RESTORE_REGS \
168 CFI_ADJUST_CFA_OFFSET -4;\
171 CFI_ADJUST_CFA_OFFSET -4;\
173 .section .fixup,"ax"; \
179 .section __ex_table,"a";\
185 #define RING0_INT_FRAME \
186 CFI_STARTPROC simple;\
188 CFI_DEF_CFA esp, 3*4;\
189 /*CFI_OFFSET cs, -2*4;*/\
192 #define RING0_EC_FRAME \
193 CFI_STARTPROC simple;\
195 CFI_DEF_CFA esp, 4*4;\
196 /*CFI_OFFSET cs, -2*4;*/\
199 #define RING0_PTREGS_FRAME \
200 CFI_STARTPROC simple;\
202 CFI_DEF_CFA esp, OLDESP-EBX;\
203 /*CFI_OFFSET cs, CS-OLDESP;*/\
204 CFI_OFFSET eip, EIP-OLDESP;\
205 /*CFI_OFFSET es, ES-OLDESP;*/\
206 /*CFI_OFFSET ds, DS-OLDESP;*/\
207 CFI_OFFSET eax, EAX-OLDESP;\
208 CFI_OFFSET ebp, EBP-OLDESP;\
209 CFI_OFFSET edi, EDI-OLDESP;\
210 CFI_OFFSET esi, ESI-OLDESP;\
211 CFI_OFFSET edx, EDX-OLDESP;\
212 CFI_OFFSET ecx, ECX-OLDESP;\
213 CFI_OFFSET ebx, EBX-OLDESP
218 CFI_ADJUST_CFA_OFFSET 4
220 GET_THREAD_INFO(%ebp)
222 CFI_ADJUST_CFA_OFFSET -4
223 pushl $0x0202 # Reset kernel eflags
224 CFI_ADJUST_CFA_OFFSET 4
226 CFI_ADJUST_CFA_OFFSET -4
231 * Return to user mode is not as complex as all this looks,
232 * but we want the default path for a system call return to
233 * go as quickly as possible which is why some of this is
234 * less clear than it otherwise should be.
237 # userspace resumption stub bypassing syscall exit tracing
243 GET_THREAD_INFO(%ebp)
245 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
247 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
249 jb resume_kernel # not returning to v8086 or userspace
250 ENTRY(resume_userspace)
251 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
252 # setting need_resched or sigpending
253 # between sampling and the iret
254 movl TI_flags(%ebp), %ecx
255 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
256 # int/exception return?
260 #ifdef CONFIG_PREEMPT
263 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
266 movl TI_flags(%ebp), %ecx # need_resched set ?
267 testb $_TIF_NEED_RESCHED, %cl
269 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
271 call preempt_schedule_irq
276 /* SYSENTER_RETURN points to after the "sysenter" instruction in
277 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
279 # sysenter call handler stub
280 ENTRY(sysenter_entry)
284 CFI_REGISTER esp, ebp
285 movl TSS_sysenter_esp0(%esp),%esp
288 * No need to follow this irqs on/off section: the syscall
289 * disabled irqs and here we enable it straight after entry:
293 CFI_ADJUST_CFA_OFFSET 4
294 /*CFI_REL_OFFSET ss, 0*/
296 CFI_ADJUST_CFA_OFFSET 4
297 CFI_REL_OFFSET esp, 0
299 CFI_ADJUST_CFA_OFFSET 4
301 CFI_ADJUST_CFA_OFFSET 4
302 /*CFI_REL_OFFSET cs, 0*/
304 * Push current_thread_info()->sysenter_return to the stack.
305 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
306 * pushed above; +8 corresponds to copy_thread's esp0 setting.
308 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
309 CFI_ADJUST_CFA_OFFSET 4
310 CFI_REL_OFFSET eip, 0
313 * Load the potential sixth argument from user stack.
314 * Careful about security.
316 cmpl $__PAGE_OFFSET-3,%ebp
319 .section __ex_table,"a"
321 .long 1b,syscall_fault
325 CFI_ADJUST_CFA_OFFSET 4
327 GET_THREAD_INFO(%ebp)
329 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
330 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
331 jnz syscall_trace_entry
332 cmpl $(nr_syscalls), %eax
334 call *sys_call_table(,%eax,4)
338 movl TI_flags(%ebp), %ecx
339 testw $_TIF_ALLWORK_MASK, %cx
340 jne syscall_exit_work
341 /* if something modifies registers it must also disable sysexit */
343 movl OLDESP(%esp), %ecx
346 ENABLE_INTERRUPTS_SYSEXIT
350 # system call handler stub
352 RING0_INT_FRAME # can't unwind into user space anyway
353 pushl %eax # save orig_eax
354 CFI_ADJUST_CFA_OFFSET 4
356 GET_THREAD_INFO(%ebp)
357 testl $TF_MASK,EFLAGS(%esp)
359 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
361 # system call tracing in operation / emulation
362 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
363 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
364 jnz syscall_trace_entry
365 cmpl $(nr_syscalls), %eax
368 call *sys_call_table(,%eax,4)
369 movl %eax,EAX(%esp) # store the return value
371 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
372 # setting need_resched or sigpending
373 # between sampling and the iret
375 movl TI_flags(%ebp), %ecx
376 testw $_TIF_ALLWORK_MASK, %cx # current->work
377 jne syscall_exit_work
380 movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
381 # Warning: OLDSS(%esp) contains the wrong/random values if we
382 # are returning to the kernel.
383 # See comments in process.c:copy_thread() for details.
384 movb OLDSS(%esp), %ah
386 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
387 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
389 je ldt_ss # returning to user-space with LDT SS
392 restore_nocheck_notrace:
395 CFI_ADJUST_CFA_OFFSET -4
401 pushl $0 # no error code
405 .section __ex_table,"a"
412 larl OLDSS(%esp), %eax
414 testl $0x00400000, %eax # returning to 32bit stack?
415 jnz restore_nocheck # allright, normal return
416 /* If returning to userspace with 16bit stack,
417 * try to fix the higher word of ESP, as the CPU
419 * This is an "official" bug of all the x86-compatible
420 * CPUs, which we can try to work around to make
421 * dosemu and wine happy. */
422 movl OLDESP(%esp), %eax
424 call patch_espfix_desc
426 CFI_ADJUST_CFA_OFFSET 4
428 CFI_ADJUST_CFA_OFFSET 4
432 CFI_ADJUST_CFA_OFFSET -8
436 # perform work that needs to be done immediately before resumption
438 RING0_PTREGS_FRAME # can't unwind into user space anyway
440 testb $_TIF_NEED_RESCHED, %cl
444 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
445 # setting need_resched or sigpending
446 # between sampling and the iret
448 movl TI_flags(%ebp), %ecx
449 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
450 # than syscall tracing?
452 testb $_TIF_NEED_RESCHED, %cl
455 work_notifysig: # deal with pending signals and
456 # notify-resume requests
457 testl $VM_MASK, EFLAGS(%esp)
459 jne work_notifysig_v86 # returning to kernel-space or
462 call do_notify_resume
463 jmp resume_userspace_sig
468 pushl %ecx # save ti_flags for do_notify_resume
469 CFI_ADJUST_CFA_OFFSET 4
470 call save_v86_state # %eax contains pt_regs pointer
472 CFI_ADJUST_CFA_OFFSET -4
475 call do_notify_resume
476 jmp resume_userspace_sig
479 # perform syscall exit tracing
482 movl $-ENOSYS,EAX(%esp)
485 call do_syscall_trace
487 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
488 # so must skip actual syscall
489 movl ORIG_EAX(%esp), %eax
490 cmpl $(nr_syscalls), %eax
494 # perform syscall exit tracing
497 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
500 ENABLE_INTERRUPTS # could let do_syscall_trace() call
504 call do_syscall_trace
508 RING0_INT_FRAME # can't unwind into user space anyway
510 pushl %eax # save orig_eax
511 CFI_ADJUST_CFA_OFFSET 4
513 GET_THREAD_INFO(%ebp)
514 movl $-EFAULT,EAX(%esp)
518 movl $-ENOSYS,EAX(%esp)
522 #define FIXUP_ESPFIX_STACK \
523 /* since we are on a wrong stack, we cant make it a C code :( */ \
524 GET_THREAD_INFO(%ebp); \
525 movl TI_cpu(%ebp), %ebx; \
526 PER_CPU(cpu_gdt_descr, %ebx); \
527 movl GDS_address(%ebx), %ebx; \
528 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
530 pushl $__KERNEL_DS; \
531 CFI_ADJUST_CFA_OFFSET 4; \
533 CFI_ADJUST_CFA_OFFSET 4; \
535 CFI_ADJUST_CFA_OFFSET -8;
536 #define UNWIND_ESPFIX_STACK \
538 /* see if on espfix stack */ \
539 cmpw $__ESPFIX_SS, %ax; \
541 movl $__KERNEL_DS, %eax; \
544 /* switch to normal stack */ \
545 FIXUP_ESPFIX_STACK; \
549 * Build the entry stubs and pointer table with
550 * some assembler magic.
557 ENTRY(irq_entries_start)
562 CFI_ADJUST_CFA_OFFSET -4
565 CFI_ADJUST_CFA_OFFSET 4
574 * the CPU automatically disables interrupts when executing an IRQ vector,
575 * so IRQ-flags tracing has to follow that:
586 #define BUILD_INTERRUPT(name, nr) \
590 CFI_ADJUST_CFA_OFFSET 4; \
598 /* The include is where all of the SMP etc. interrupts come from */
599 #include "entry_arch.h"
601 KPROBE_ENTRY(page_fault)
604 CFI_ADJUST_CFA_OFFSET 4
608 CFI_ADJUST_CFA_OFFSET 4
609 /*CFI_REL_OFFSET ds, 0*/
611 CFI_ADJUST_CFA_OFFSET 4
612 CFI_REL_OFFSET eax, 0
614 CFI_ADJUST_CFA_OFFSET 4
615 CFI_REL_OFFSET ebp, 0
617 CFI_ADJUST_CFA_OFFSET 4
618 CFI_REL_OFFSET edi, 0
620 CFI_ADJUST_CFA_OFFSET 4
621 CFI_REL_OFFSET esi, 0
623 CFI_ADJUST_CFA_OFFSET 4
624 CFI_REL_OFFSET edx, 0
626 CFI_ADJUST_CFA_OFFSET 4
627 CFI_REL_OFFSET ecx, 0
629 CFI_ADJUST_CFA_OFFSET 4
630 CFI_REL_OFFSET ebx, 0
633 CFI_ADJUST_CFA_OFFSET 4
634 /*CFI_REL_OFFSET es, 0*/
637 CFI_ADJUST_CFA_OFFSET -4
638 /*CFI_REGISTER es, ecx*/
639 movl ES(%esp), %edi # get the function address
640 movl ORIG_EAX(%esp), %edx # get the error code
641 movl $-1, ORIG_EAX(%esp)
643 /*CFI_REL_OFFSET es, ES*/
644 movl $(__USER_DS), %ecx
647 movl %esp,%eax # pt_regs pointer
649 jmp ret_from_exception
651 KPROBE_END(page_fault)
653 ENTRY(coprocessor_error)
656 CFI_ADJUST_CFA_OFFSET 4
657 pushl $do_coprocessor_error
658 CFI_ADJUST_CFA_OFFSET 4
662 ENTRY(simd_coprocessor_error)
665 CFI_ADJUST_CFA_OFFSET 4
666 pushl $do_simd_coprocessor_error
667 CFI_ADJUST_CFA_OFFSET 4
671 ENTRY(device_not_available)
673 pushl $-1 # mark this as an int
674 CFI_ADJUST_CFA_OFFSET 4
677 testl $0x4, %eax # EM (math emulation bit)
678 jne device_not_available_emulate
680 call math_state_restore
681 jmp ret_from_exception
682 device_not_available_emulate:
683 pushl $0 # temporary storage for ORIG_EIP
684 CFI_ADJUST_CFA_OFFSET 4
687 CFI_ADJUST_CFA_OFFSET -4
688 jmp ret_from_exception
692 * Debug traps and NMI can happen at the one SYSENTER instruction
693 * that sets up the real kernel stack. Check here, since we can't
694 * allow the wrong stack to be used.
696 * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
697 * already pushed 3 words if it hits on the sysenter instruction:
698 * eflags, cs and eip.
700 * We just load the right stack, and push the three (known) values
701 * by hand onto the new stack - while updating the return eip past
702 * the instruction that would have done it for sysenter.
704 #define FIX_STACK(offset, ok, label) \
705 cmpw $__KERNEL_CS,4(%esp); \
708 movl TSS_sysenter_esp0+offset(%esp),%esp; \
709 CFI_DEF_CFA esp, 0; \
712 CFI_ADJUST_CFA_OFFSET 4; \
713 pushl $__KERNEL_CS; \
714 CFI_ADJUST_CFA_OFFSET 4; \
715 pushl $sysenter_past_esp; \
716 CFI_ADJUST_CFA_OFFSET 4; \
717 CFI_REL_OFFSET eip, 0
721 cmpl $sysenter_entry,(%esp)
722 jne debug_stack_correct
723 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
725 pushl $-1 # mark this as an int
726 CFI_ADJUST_CFA_OFFSET 4
728 xorl %edx,%edx # error code 0
729 movl %esp,%eax # pt_regs pointer
731 jmp ret_from_exception
736 * NMI is doubly nasty. It can happen _while_ we're handling
737 * a debug fault, and the debug fault hasn't yet been able to
738 * clear up the stack. So we first check whether we got an
739 * NMI on the sysenter entry path, but after that we need to
740 * check whether we got an NMI on the debug path where the debug
741 * fault happened on the sysenter path.
746 CFI_ADJUST_CFA_OFFSET 4
748 cmpw $__ESPFIX_SS, %ax
750 CFI_ADJUST_CFA_OFFSET -4
752 cmpl $sysenter_entry,(%esp)
755 CFI_ADJUST_CFA_OFFSET 4
757 /* Do not access memory above the end of our stack page,
758 * it might not exist.
760 andl $(THREAD_SIZE-1),%eax
761 cmpl $(THREAD_SIZE-20),%eax
763 CFI_ADJUST_CFA_OFFSET -4
764 jae nmi_stack_correct
765 cmpl $sysenter_entry,12(%esp)
766 je nmi_debug_stack_check
768 /* We have a RING0_INT_FRAME here */
770 CFI_ADJUST_CFA_OFFSET 4
772 xorl %edx,%edx # zero error code
773 movl %esp,%eax # pt_regs pointer
775 jmp restore_nocheck_notrace
780 FIX_STACK(12,nmi_stack_correct, 1)
781 jmp nmi_stack_correct
783 nmi_debug_stack_check:
784 /* We have a RING0_INT_FRAME here */
785 cmpw $__KERNEL_CS,16(%esp)
786 jne nmi_stack_correct
789 cmpl $debug_esp_fix_insn,(%esp)
791 FIX_STACK(24,nmi_stack_correct, 1)
792 jmp nmi_stack_correct
795 /* We have a RING0_INT_FRAME here.
797 * create the pointer to lss back
800 CFI_ADJUST_CFA_OFFSET 4
802 CFI_ADJUST_CFA_OFFSET 4
804 /* copy the iret frame of 12 bytes */
807 CFI_ADJUST_CFA_OFFSET 4
810 CFI_ADJUST_CFA_OFFSET 4
812 FIXUP_ESPFIX_STACK # %eax == %esp
813 xorl %edx,%edx # zero error code
816 lss 12+4(%esp), %esp # back to espfix stack
817 CFI_ADJUST_CFA_OFFSET -24
820 .section __ex_table,"a"
828 pushl $-1 # mark this as an int
829 CFI_ADJUST_CFA_OFFSET 4
831 xorl %edx,%edx # zero error code
832 movl %esp,%eax # pt_regs pointer
834 jmp ret_from_exception
841 CFI_ADJUST_CFA_OFFSET 4
843 CFI_ADJUST_CFA_OFFSET 4
850 CFI_ADJUST_CFA_OFFSET 4
852 CFI_ADJUST_CFA_OFFSET 4
859 CFI_ADJUST_CFA_OFFSET 4
861 CFI_ADJUST_CFA_OFFSET 4
865 ENTRY(coprocessor_segment_overrun)
868 CFI_ADJUST_CFA_OFFSET 4
869 pushl $do_coprocessor_segment_overrun
870 CFI_ADJUST_CFA_OFFSET 4
876 pushl $do_invalid_TSS
877 CFI_ADJUST_CFA_OFFSET 4
881 ENTRY(segment_not_present)
883 pushl $do_segment_not_present
884 CFI_ADJUST_CFA_OFFSET 4
890 pushl $do_stack_segment
891 CFI_ADJUST_CFA_OFFSET 4
895 KPROBE_ENTRY(general_protection)
897 pushl $do_general_protection
898 CFI_ADJUST_CFA_OFFSET 4
901 KPROBE_END(general_protection)
903 ENTRY(alignment_check)
905 pushl $do_alignment_check
906 CFI_ADJUST_CFA_OFFSET 4
912 pushl $0 # no error code
913 CFI_ADJUST_CFA_OFFSET 4
914 pushl $do_divide_error
915 CFI_ADJUST_CFA_OFFSET 4
919 #ifdef CONFIG_X86_MCE
923 CFI_ADJUST_CFA_OFFSET 4
924 pushl machine_check_vector
925 CFI_ADJUST_CFA_OFFSET 4
930 ENTRY(spurious_interrupt_bug)
933 CFI_ADJUST_CFA_OFFSET 4
934 pushl $do_spurious_interrupt_bug
935 CFI_ADJUST_CFA_OFFSET 4
939 #ifdef CONFIG_STACK_UNWIND
940 ENTRY(arch_unwind_init_running)
953 movl $__USER_DS, DS(%edx)
954 movl $__USER_DS, ES(%edx)
955 movl %ebx, ORIG_EAX(%edx)
958 movl $__KERNEL_CS, CS(%edx)
959 movl %ebx, EFLAGS(%edx)
960 movl %eax, OLDESP(%edx)
964 movl $__KERNEL_DS, OLDSS(%edx)
967 ENDPROC(arch_unwind_init_running)
970 ENTRY(kernel_thread_helper)
971 pushl $0 # fake return address for unwinder
975 CFI_ADJUST_CFA_OFFSET 4
978 CFI_ADJUST_CFA_OFFSET 4
981 ENDPROC(kernel_thread_helper)
984 #include "syscall_table.S"
986 syscall_table_size=(.-sys_call_table)