#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/pgtable_types.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
+#include <asm/kaiser.h>
+#include <asm/nospec-branch.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
- call *ftrace_trace_function
-
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
MCOUNT_RESTORE_FRAME
jmp ftrace_stub
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
testl $3, CS(%rdi)
je 1f
SWAPGS
+ SWITCH_KERNEL_CR3
/*
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
+/*
+ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
+ */
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx,%ebx
-1: ret
+1:
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ movq %rax, %cr3
+2:
+#endif
+ ret
CFI_ENDPROC
END(save_paranoid)
.popsection
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* A hypervisor implementation might want to use a label
* after the swapgs, so that it can do the swapgs
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
jnz tracesys
system_call_fastpath:
- cmpq $__NR_syscall_max,%rax
- ja badsys
+ cmpq $NR_syscalls, %rax
+ jae badsys
+ sbb %rcx, %rcx /* array_index_mask_nospec() */
+ and %rcx, %rax
movq %r10,%rcx
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(,%rax,8) # XXX: rip relative
+#endif
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
CFI_REGISTER rip,rcx
RESTORE_ARGS 1,-ARG_SKIP,0
/*CFI_REGISTER rflags,r11*/
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
*/
LOAD_ARGS ARGOFFSET, 1
RESTORE_REST
- cmpq $__NR_syscall_max,%rax
- ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
+ cmpq $NR_syscalls, %rax
+ jae int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
+ sbb %rcx, %rcx /* array_index_mask_nospec() */
+ and %rcx, %rax
movq %r10,%rcx /* fixup for C */
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86_indirect_thunk_rax
+#else
call *sys_call_table(,%rax,8)
+#endif
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
*/
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
SWAPGS
jmp restore_args
pushq_cfi %rax
pushq_cfi %rdi
SWAPGS
+ SWITCH_KERNEL_CR3
movq PER_CPU_VAR(espfix_waddr),%rdi
movq %rax,(0*8)(%rdi) /* RAX */
movq (2*8)(%rsp),%rax /* RIP */
andl $0xffff0000,%eax
popq_cfi %rdi
orq PER_CPU_VAR(espfix_stack),%rax
+ SWITCH_USER_CR3
SWAPGS
movq %rax,%rsp
popq_cfi %rax
* Here we are in the child and the registers are set as they were
* at kernel_thread() invocation in the parent.
*/
- call *%rsi
+ CALL_NOSPEC %rsi
# exit
mov %eax, %edi
call do_exit
.pushsection .kprobes.text, "ax"
paranoidzeroentry_ist debug do_debug DEBUG_STACK
-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+zeroentry int3 do_int3
errorentry stack_segment do_stack_segment
#ifdef CONFIG_XEN
zeroentry xen_debug do_debug
errorentry async_page_fault do_async_page_fault
#endif
#ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check *machine_check_vector(%rip)
+paranoidzeroentry machine_check do_mce
#endif
/*
* is fundamentally NMI-unsafe. (we cannot change the soft and
* hard flags at once, atomically)
*/
-
- /* ebx: no swapgs flag */
+/*
+ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
+ */
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore
- testl $3,CS(%rsp)
- jnz paranoid_userspace
-paranoid_swapgs:
+ movq %rbx, %r12 /* paranoid_userspace uses %ebx */
+ testl $3, CS(%rsp)
+ jnz paranoid_userspace
+paranoid_kernel:
+ movq %r12, %rbx /* restore after paranoid_userspace */
TRACE_IRQS_IRETQ 0
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+paranoid_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
SWAPGS_UNSAFE_STACK
+paranoid_exit_no_swapgs:
RESTORE_ALL 8
- jmp irq_return
-paranoid_restore:
- TRACE_IRQS_IRETQ 0
- RESTORE_ALL 8
- jmp irq_return
+ jmp irq_return
+
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs
+ jz paranoid_kernel
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
movq_cfi r13, R13+8
movq_cfi r14, R14+8
movq_cfi r15, R15+8
+ /*
+ * error_entry() always returns with a kernel gsbase and
+ * CR3. We must also have a kernel CR3/gsbase before
+ * calling TRACE_IRQS_*. Just unconditionally switch to
+ * the kernel CR3 here.
+ */
+ SWITCH_KERNEL_CR3
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
- /* ebx: no swapgs flag */
+ /* ebx: no-swapgs and kaiser-switch-cr3 flag */
DISABLE_INTERRUPTS(CLBR_NONE)
- testl %ebx,%ebx /* swapgs needed? */
- jnz nmi_restore
- testl $3,CS(%rsp)
- jnz nmi_userspace
-nmi_swapgs:
+ movq %rbx, %r12 /* nmi_userspace uses %ebx */
+ testl $3, CS(%rsp)
+ jnz nmi_userspace
+nmi_kernel:
+ movq %r12, %rbx /* restore after nmi_userspace */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz nmi_exit_no_switch
+ SWITCH_USER_CR3
+nmi_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
+ jnz nmi_exit_no_swapgs
SWAPGS_UNSAFE_STACK
-nmi_restore:
+nmi_exit_no_swapgs:
RESTORE_ALL 8
- jmp irq_return
+ jmp irq_return
+
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
- jz nmi_swapgs
+ jz nmi_kernel
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */