#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/pgtable_types.h>
+#include <asm/kaiser.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
testl $3, CS(%rdi)
je 1f
SWAPGS
+ SWITCH_KERNEL_CR3
/*
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
+/*
+ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
+ */
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx,%ebx
-1: ret
+1:
+#ifdef CONFIG_KAISER
+ /*
+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+ movq %cr3, %rax
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ orq x86_cr3_pcid_noflush, %rax
+ movq %rax, %cr3
+2:
+#endif
+ ret
CFI_ENDPROC
END(save_paranoid)
.popsection
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* A hypervisor implementation might want to use a label
* after the swapgs, so that it can do the swapgs
CFI_REGISTER rip,rcx
RESTORE_ARGS 1,-ARG_SKIP,0
/*CFI_REGISTER rflags,r11*/
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
*/
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
SWAPGS
jmp restore_args
pushq_cfi %rax
pushq_cfi %rdi
SWAPGS
+ SWITCH_KERNEL_CR3
movq PER_CPU_VAR(espfix_waddr),%rdi
movq %rax,(0*8)(%rdi) /* RAX */
movq (2*8)(%rsp),%rax /* RIP */
andl $0xffff0000,%eax
popq_cfi %rdi
orq PER_CPU_VAR(espfix_stack),%rax
+ SWITCH_USER_CR3
SWAPGS
movq %rax,%rsp
popq_cfi %rax
* is fundamentally NMI-unsafe. (we cannot change the soft and
* hard flags at once, atomically)
*/
-
- /* ebx: no swapgs flag */
+/*
+ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
+ */
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore
- testl $3,CS(%rsp)
- jnz paranoid_userspace
-paranoid_swapgs:
+ movq %rbx, %r12 /* paranoid_userspace uses %ebx */
+ testl $3, CS(%rsp)
+ jnz paranoid_userspace
+paranoid_kernel:
+ movq %r12, %rbx /* restore after paranoid_userspace */
TRACE_IRQS_IRETQ 0
+#ifdef CONFIG_KAISER
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+paranoid_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
SWAPGS_UNSAFE_STACK
+paranoid_exit_no_swapgs:
RESTORE_ALL 8
- jmp irq_return
-paranoid_restore:
- TRACE_IRQS_IRETQ 0
- RESTORE_ALL 8
- jmp irq_return
+ jmp irq_return
+
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs
+ jz paranoid_kernel
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
movq_cfi r13, R13+8
movq_cfi r14, R14+8
movq_cfi r15, R15+8
+ /*
+ * error_entry() always returns with a kernel gsbase and
+ * CR3. We must also have a kernel CR3/gsbase before
+ * calling TRACE_IRQS_*. Just unconditionally switch to
+ * the kernel CR3 here.
+ */
+ SWITCH_KERNEL_CR3
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
/* paranoidexit; without TRACE_IRQS_OFF */
- /* ebx: no swapgs flag */
+ /* ebx: no-swapgs and kaiser-switch-cr3 flag */
DISABLE_INTERRUPTS(CLBR_NONE)
- testl %ebx,%ebx /* swapgs needed? */
- jnz nmi_restore
- testl $3,CS(%rsp)
- jnz nmi_userspace
-nmi_swapgs:
+ movq %rbx, %r12 /* nmi_userspace uses %ebx */
+ testl $3, CS(%rsp)
+ jnz nmi_userspace
+nmi_kernel:
+ movq %r12, %rbx /* restore after nmi_userspace */
+#ifdef CONFIG_KAISER
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz nmi_exit_no_switch
+ SWITCH_USER_CR3
+nmi_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
+ jnz nmi_exit_no_swapgs
SWAPGS_UNSAFE_STACK
-nmi_restore:
+nmi_exit_no_swapgs:
RESTORE_ALL 8
- jmp irq_return
+ jmp irq_return
+
nmi_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
- jz nmi_swapgs
+ jz nmi_kernel
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */