x86, mem: clear_page_64.S: Support clear_page() with enhanced REP MOVSB/STOSB
authorFenghua Yu <fenghua.yu@intel.com>
Tue, 17 May 2011 22:29:14 +0000 (15:29 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 17 May 2011 22:40:27 +0000 (15:40 -0700)
Intel processors are adding enhancements to REP MOVSB/STOSB and the use of
REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).

Support clear_page() with rep stosb for processor supporting enhanced REP MOVSB
/STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative
clear_page_c_e function using enhanced REP STOSB overrides the original function
and the fast string function.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-6-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/lib/clear_page_64.S

index aa4326b..f2145cf 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Zero a page.        
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
        CFI_ENDPROC
 ENDPROC(clear_page_c)
 
+ENTRY(clear_page_c_e)
+       CFI_STARTPROC
+       movl $4096,%ecx
+       xorl %eax,%eax
+       rep stosb
+       ret
+       CFI_ENDPROC
+ENDPROC(clear_page_c_e)
+
 ENTRY(clear_page)
        CFI_STARTPROC
        xorl   %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
 .Lclear_page_end:
 ENDPROC(clear_page)
 
-       /* Some CPUs run faster using the string instructions.
-          It is also a lot simpler. Use this when possible */
+       /*
+        * Some CPUs support enhanced REP MOVSB/STOSB instructions.
+        * It is recommended to use this when possible.
+        * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+        * Otherwise, use original function.
+        *
+        */
 
 #include <asm/cpufeature.h>
 
        .section .altinstr_replacement,"ax"
 1:     .byte 0xeb                                      /* jmp <disp8> */
        .byte (clear_page_c - clear_page) - (2f - 1b)   /* offset */
-2:
+2:     .byte 0xeb                                      /* jmp <disp8> */
+       .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
+3:
        .previous
        .section .altinstructions,"a"
-       .align 8
-       .quad clear_page
-       .quad 1b
-       .word X86_FEATURE_REP_GOOD
-       .byte .Lclear_page_end - clear_page
-       .byte 2b - 1b
+       altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
+                            .Lclear_page_end-clear_page, 2b-1b
+       altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
+                            .Lclear_page_end-clear_page,3b-2b
        .previous