+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
/*
* Zero a page.
* rdi page
*/
- .globl clear_page
- .p2align 4
-clear_page:
+ ALIGN
+clear_page_c:
+ CFI_STARTPROC
movl $4096/8,%ecx
xorl %eax,%eax
- rep
- stosq
+ rep stosq
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page)
+
+ENTRY(clear_page)
+ CFI_STARTPROC
+ xorl %eax,%eax
+ movl $4096/64,%ecx
+ .p2align 4
+.Lloop:
+ decl %ecx
+#define PUT(x) movq %rax,x*8(%rdi)
+ movq %rax,(%rdi)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
ret
+ CFI_ENDPROC
+.Lclear_page_end:
+ENDPROC(clear_page)
+
+ /* Some CPUs run faster using the string instructions.
+ It is also a lot simpler. Use this when possible */
+
+#include <asm/cpufeature.h>
+
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
+2:
+ .previous
+ .section .altinstructions,"a"
+ .align 8
+ .quad clear_page
+ .quad 1b
+ .byte X86_FEATURE_REP_GOOD
+ .byte .Lclear_page_end - clear_page
+ .byte 2b - 1b
+ .previous