Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2011 16:42:36 +0000 (06:42 -1000)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2011 16:42:36 +0000 (06:42 -1000)
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc: Size mondo queues more sanely.
  sparc: Access kernel TSB using physical addressing when possible.
  sparc: Fix __atomic_add_unless() return value.
  sparc: use kbuild-generic support for true asm-generic header files
  sparc: Use popc when possible for ffs/__ffs/ffz.
  sparc: Set reboot-cmd using reboot data hypervisor call if available.
  sparc: Add some missing hypervisor API groups.
  sparc: Use hweight64() in popc emulation.
  sparc: Use popc if possible for hweight routines.
  sparc: Minor tweaks to Niagara page copy/clear.
  sparc: Sanitize cpu feature detection and reporting.

30 files changed:
arch/sparc/include/asm/Kbuild
arch/sparc/include/asm/bitops_64.h
arch/sparc/include/asm/div64.h [deleted file]
arch/sparc/include/asm/elf_64.h
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/irq_regs.h [deleted file]
arch/sparc/include/asm/local.h [deleted file]
arch/sparc/include/asm/local64.h [deleted file]
arch/sparc/include/asm/tsb.h
arch/sparc/kernel/cpu.c
arch/sparc/kernel/ds.c
arch/sparc/kernel/entry.h
arch/sparc/kernel/head_64.S
arch/sparc/kernel/hvapi.c
arch/sparc/kernel/hvcalls.S
arch/sparc/kernel/kernel.h
arch/sparc/kernel/ktlb.S
arch/sparc/kernel/mdesc.c
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/sparc_ksyms_64.c
arch/sparc/kernel/sstate.c
arch/sparc/kernel/unaligned_64.c
arch/sparc/kernel/vmlinux.lds.S
arch/sparc/lib/Makefile
arch/sparc/lib/NG2page.S [deleted file]
arch/sparc/lib/NGpage.S
arch/sparc/lib/atomic32.c
arch/sparc/lib/ffs.S [new file with mode: 0644]
arch/sparc/lib/hweight.S [new file with mode: 0644]
arch/sparc/mm/init_64.c

index 3c93f08..2c2e388 100644 (file)
@@ -16,3 +16,8 @@ header-y += traps.h
 header-y += uctx.h
 header-y += utrap.h
 header-y += watchdog.h
+
+generic-y += div64.h
+generic-y += local64.h
+generic-y += irq_regs.h
+generic-y += local.h
index 325e295..29011cc 100644 (file)
@@ -26,61 +26,28 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
 #define smp_mb__before_clear_bit()     barrier()
 #define smp_mb__after_clear_bit()      barrier()
 
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
 
+extern int ffs(int x);
+extern unsigned long __ffs(unsigned long);
+
+#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  */
 
-#ifdef ULTRA_HAS_POPULATION_COUNT
-
-static inline unsigned int __arch_hweight64(unsigned long w)
-{
-       unsigned int res;
-
-       __asm__ ("popc %1,%0" : "=r" (res) : "r" (w));
-       return res;
-}
-
-static inline unsigned int __arch_hweight32(unsigned int w)
-{
-       unsigned int res;
-
-       __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffffffff));
-       return res;
-}
+extern unsigned long __arch_hweight64(__u64 w);
+extern unsigned int __arch_hweight32(unsigned int w);
+extern unsigned int __arch_hweight16(unsigned int w);
+extern unsigned int __arch_hweight8(unsigned int w);
 
-static inline unsigned int __arch_hweight16(unsigned int w)
-{
-       unsigned int res;
-
-       __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffff));
-       return res;
-}
-
-static inline unsigned int __arch_hweight8(unsigned int w)
-{
-       unsigned int res;
-
-       __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xff));
-       return res;
-}
-
-#else
-
-#include <asm-generic/bitops/arch_hweight.h>
-
-#endif
 #include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 #endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/div64.h b/arch/sparc/include/asm/div64.h
deleted file mode 100644 (file)
index 6cd978c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
index 64f7a00..7df8b7f 100644 (file)
 #define R_SPARC_6              45
 
 /* Bits present in AT_HWCAP, primarily for Sparc32.  */
-
-#define HWCAP_SPARC_FLUSH       1    /* CPU supports flush instruction. */
-#define HWCAP_SPARC_STBAR       2
-#define HWCAP_SPARC_SWAP        4
-#define HWCAP_SPARC_MULDIV      8
-#define HWCAP_SPARC_V9         16
-#define HWCAP_SPARC_ULTRA3     32
-#define HWCAP_SPARC_BLKINIT    64
-#define HWCAP_SPARC_N2         128
+#define HWCAP_SPARC_FLUSH       0x00000001
+#define HWCAP_SPARC_STBAR       0x00000002
+#define HWCAP_SPARC_SWAP        0x00000004
+#define HWCAP_SPARC_MULDIV      0x00000008
+#define HWCAP_SPARC_V9         0x00000010
+#define HWCAP_SPARC_ULTRA3     0x00000020
+#define HWCAP_SPARC_BLKINIT    0x00000040
+#define HWCAP_SPARC_N2         0x00000080
+
+/* Solaris compatible AT_HWCAP bits. */
+#define AV_SPARC_MUL32         0x00000100 /* 32x32 multiply is efficient */
+#define AV_SPARC_DIV32         0x00000200 /* 32x32 divide is efficient */
+#define AV_SPARC_FSMULD                0x00000400 /* 'fsmuld' is efficient */
+#define AV_SPARC_V8PLUS                0x00000800 /* v9 insn available to 32bit */
+#define AV_SPARC_POPC          0x00001000 /* 'popc' is efficient */
+#define AV_SPARC_VIS           0x00002000 /* VIS insns available */
+#define AV_SPARC_VIS2          0x00004000 /* VIS2 insns available */
+#define AV_SPARC_ASI_BLK_INIT  0x00008000 /* block init ASIs available */
+#define AV_SPARC_FMAF          0x00010000 /* fused multiply-add */
+#define AV_SPARC_VIS3          0x00020000 /* VIS3 insns available */
+#define AV_SPARC_HPC           0x00040000 /* HPC insns available */
+#define AV_SPARC_RANDOM                0x00080000 /* 'random' insn available */
+#define AV_SPARC_TRANS         0x00100000 /* transaction insns available */
+#define AV_SPARC_FJFMAU                0x00200000 /* unfused multiply-add */
+#define AV_SPARC_IMA           0x00400000 /* integer multiply-add */
+#define AV_SPARC_ASI_CACHE_SPARING \
+                               0x00800000 /* cache sparing ASIs available */
 
 #define CORE_DUMP_USE_REGSET
 
@@ -162,33 +180,8 @@ typedef struct {
 #define ELF_ET_DYN_BASE                0x0000010000000000UL
 #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL
 
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this cpu supports.  */
-
-/* On Ultra, we support all of the v8 capabilities. */
-static inline unsigned int sparc64_elf_hwcap(void)
-{
-       unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
-                           HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
-                           HWCAP_SPARC_V9);
-
-       if (tlb_type == cheetah || tlb_type == cheetah_plus)
-               cap |= HWCAP_SPARC_ULTRA3;
-       else if (tlb_type == hypervisor) {
-               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
-                   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
-                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
-                       cap |= HWCAP_SPARC_BLKINIT;
-               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
-                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
-                       cap |= HWCAP_SPARC_N2;
-       }
-
-       return cap;
-}
-
-#define ELF_HWCAP      sparc64_elf_hwcap()
+extern unsigned long sparc64_elf_hwcap;
+#define ELF_HWCAP      sparc64_elf_hwcap
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
index 7a5f80d..015a761 100644 (file)
@@ -2927,6 +2927,13 @@ extern unsigned long sun4v_ncs_request(unsigned long request,
 #define HV_FAST_FIRE_GET_PERFREG       0x120
 #define HV_FAST_FIRE_SET_PERFREG       0x121
 
+#define HV_FAST_REBOOT_DATA_SET                0x172
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_reboot_data_set(unsigned long ra,
+                                          unsigned long len);
+#endif
+
 /* Function numbers for HV_CORE_TRAP.  */
 #define HV_CORE_SET_VER                        0x00
 #define HV_CORE_PUTCHAR                        0x01
@@ -2940,11 +2947,17 @@ extern unsigned long sun4v_ncs_request(unsigned long request,
 #define HV_GRP_CORE                    0x0001
 #define HV_GRP_INTR                    0x0002
 #define HV_GRP_SOFT_STATE              0x0003
+#define HV_GRP_TM                      0x0080
 #define HV_GRP_PCI                     0x0100
 #define HV_GRP_LDOM                    0x0101
 #define HV_GRP_SVC_CHAN                        0x0102
 #define HV_GRP_NCS                     0x0103
 #define HV_GRP_RNG                     0x0104
+#define HV_GRP_PBOOT                   0x0105
+#define HV_GRP_TPM                     0x0107
+#define HV_GRP_SDIO                    0x0108
+#define HV_GRP_SDIO_ERR                        0x0109
+#define HV_GRP_REBOOT_DATA             0x0110
 #define HV_GRP_NIAG_PERF               0x0200
 #define HV_GRP_FIRE_PERF               0x0201
 #define HV_GRP_N2_CPU                  0x0202
diff --git a/arch/sparc/include/asm/irq_regs.h b/arch/sparc/include/asm/irq_regs.h
deleted file mode 100644 (file)
index 3dd9c0b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/sparc/include/asm/local.h b/arch/sparc/include/asm/local.h
deleted file mode 100644 (file)
index bc80815..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _SPARC_LOCAL_H
-#define _SPARC_LOCAL_H
-
-#include <asm-generic/local.h>
-
-#endif
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
index 83c571d..1a8afd1 100644 (file)
@@ -133,29 +133,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        sub     TSB, 0x8, TSB;   \
        TSB_STORE(TSB, TAG);
 
-#define KTSB_LOAD_QUAD(TSB, REG) \
-       ldda            [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
-
-#define KTSB_STORE(ADDR, VAL) \
-       stxa            VAL, [ADDR] ASI_N;
-
-#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
-99:    lduwa   [TSB] ASI_N, REG1;      \
-       sethi   %hi(TSB_TAG_LOCK_HIGH), REG2;\
-       andcc   REG1, REG2, %g0;        \
-       bne,pn  %icc, 99b;              \
-        nop;                           \
-       casa    [TSB] ASI_N, REG1, REG2;\
-       cmp     REG1, REG2;             \
-       bne,pn  %icc, 99b;              \
-        nop;                           \
-
-#define KTSB_WRITE(TSB, TTE, TAG) \
-       add     TSB, 0x8, TSB;   \
-       stxa    TTE, [TSB] ASI_N;     \
-       sub     TSB, 0x8, TSB;   \
-       stxa    TAG, [TSB] ASI_N;
-
        /* Do a kernel page table walk.  Leaves physical PTE pointer in
         * REG1.  Jumps to FAIL_LABEL on early page table walk termination.
         * VADDR will not be clobbered, but REG2 will.
@@ -239,6 +216,8 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        (KERNEL_TSB_SIZE_BYTES / 16)
 #define KERNEL_TSB4M_NENTRIES  4096
 
+#define KTSB_PHYS_SHIFT                15
+
        /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
         * on TSB hit.  REG1, REG2, REG3, and REG4 are used as temporaries
         * and the found TTE will be left in REG1.  REG3 and REG4 must
@@ -247,13 +226,22 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
         * VADDR and TAG will be preserved and not clobbered by this macro.
         */
 #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-       sethi           %hi(swapper_tsb), REG1; \
+661:   sethi           %hi(swapper_tsb), REG1;                 \
        or              REG1, %lo(swapper_tsb), REG1; \
+       .section        .swapper_tsb_phys_patch, "ax"; \
+       .word           661b; \
+       .previous; \
+661:   nop; \
+       .section        .tsb_ldquad_phys_patch, "ax"; \
+       .word           661b; \
+       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
+       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
+       .previous; \
        srlx            VADDR, PAGE_SHIFT, REG2; \
        and             REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
        sllx            REG2, 4, REG2; \
        add             REG1, REG2, REG2; \
-       KTSB_LOAD_QUAD(REG2, REG3); \
+       TSB_LOAD_QUAD(REG2, REG3); \
        cmp             REG3, TAG; \
        be,a,pt         %xcc, OK_LABEL; \
         mov            REG4, REG1;
@@ -263,12 +251,21 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
         * we can make use of that for the index computation.
         */
 #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-       sethi           %hi(swapper_4m_tsb), REG1; \
+661:   sethi           %hi(swapper_4m_tsb), REG1;           \
        or              REG1, %lo(swapper_4m_tsb), REG1; \
+       .section        .swapper_4m_tsb_phys_patch, "ax"; \
+       .word           661b; \
+       .previous; \
+661:   nop; \
+       .section        .tsb_ldquad_phys_patch, "ax"; \
+       .word           661b; \
+       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
+       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
+       .previous; \
        and             TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
        sllx            REG2, 4, REG2; \
        add             REG1, REG2, REG2; \
-       KTSB_LOAD_QUAD(REG2, REG3); \
+       TSB_LOAD_QUAD(REG2, REG3); \
        cmp             REG3, TAG; \
        be,a,pt         %xcc, OK_LABEL; \
         mov            REG4, REG1;
index 17cf290..9810fd8 100644 (file)
@@ -396,6 +396,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
                   , cpu_data(0).clock_tick
 #endif
                );
+       cpucap_info(m);
 #ifdef CONFIG_SMP
        smp_bogo(m);
 #endif
index dd1342c..490e541 100644 (file)
 #include <linux/reboot.h>
 #include <linux/cpu.h>
 
+#include <asm/hypervisor.h>
 #include <asm/ldc.h>
 #include <asm/vio.h>
 #include <asm/mdesc.h>
 #include <asm/head.h>
 #include <asm/irq.h>
 
+#include "kernel.h"
+
 #define DRV_MODULE_NAME                "ds"
 #define PFX DRV_MODULE_NAME    ": "
 #define DRV_MODULE_VERSION     "1.0"
@@ -828,18 +831,32 @@ void ldom_set_var(const char *var, const char *value)
        }
 }
 
+static char full_boot_str[256] __attribute__((aligned(32)));
+static int reboot_data_supported;
+
 void ldom_reboot(const char *boot_command)
 {
        /* Don't bother with any of this if the boot_command
         * is empty.
         */
        if (boot_command && strlen(boot_command)) {
-               char full_boot_str[256];
+               unsigned long len;
 
                strcpy(full_boot_str, "boot ");
                strcpy(full_boot_str + strlen("boot "), boot_command);
+               len = strlen(full_boot_str);
 
-               ldom_set_var("reboot-command", full_boot_str);
+               if (reboot_data_supported) {
+                       unsigned long ra = kimage_addr_to_ra(full_boot_str);
+                       unsigned long hv_ret;
+
+                       hv_ret = sun4v_reboot_data_set(ra, len);
+                       if (hv_ret != HV_EOK)
+                               pr_err("SUN4V: Unable to set reboot data "
+                                      "hv_ret=%lu\n", hv_ret);
+               } else {
+                       ldom_set_var("reboot-command", full_boot_str);
+               }
        }
        sun4v_mach_sir();
 }
@@ -1237,6 +1254,15 @@ static struct vio_driver ds_driver = {
 
 static int __init ds_init(void)
 {
+       unsigned long hv_ret, major, minor;
+
+       hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor);
+       if (hv_ret == HV_EOK) {
+               pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n",
+                       major, minor);
+               reboot_data_supported = 1;
+       }
+
        kthread_run(ds_thread, NULL, "kldomd");
 
        return vio_register_driver(&ds_driver);
index d1f1361..e27f8ea 100644 (file)
@@ -42,6 +42,20 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
 extern void fpload(unsigned long *fpregs, unsigned long *fsr);
 
 #else /* CONFIG_SPARC32 */
+struct popc_3insn_patch_entry {
+       unsigned int    addr;
+       unsigned int    insns[3];
+};
+extern struct popc_3insn_patch_entry __popc_3insn_patch,
+       __popc_3insn_patch_end;
+
+struct popc_6insn_patch_entry {
+       unsigned int    addr;
+       unsigned int    insns[6];
+};
+extern struct popc_6insn_patch_entry __popc_6insn_patch,
+       __popc_6insn_patch_end;
+
 extern void __init per_cpu_patch(void);
 extern void __init sun4v_patch(void);
 extern void __init boot_cpu_id_too_large(int cpu);
index c752603..0eac1b2 100644 (file)
@@ -559,7 +559,7 @@ niagara2_patch:
         nop
        call    niagara_patch_bzero
         nop
-       call    niagara2_patch_pageops
+       call    niagara_patch_pageops
         nop
 
        ba,a,pt %xcc, 80f
index d306e64..c2d055d 100644 (file)
@@ -28,11 +28,17 @@ static struct api_info api_table[] = {
        { .group = HV_GRP_CORE,         .flags = FLAG_PRE_API   },
        { .group = HV_GRP_INTR,                                 },
        { .group = HV_GRP_SOFT_STATE,                           },
+       { .group = HV_GRP_TM,                                   },
        { .group = HV_GRP_PCI,          .flags = FLAG_PRE_API   },
        { .group = HV_GRP_LDOM,                                 },
        { .group = HV_GRP_SVC_CHAN,     .flags = FLAG_PRE_API   },
        { .group = HV_GRP_NCS,          .flags = FLAG_PRE_API   },
        { .group = HV_GRP_RNG,                                  },
+       { .group = HV_GRP_PBOOT,                                },
+       { .group = HV_GRP_TPM,                                  },
+       { .group = HV_GRP_SDIO,                                 },
+       { .group = HV_GRP_SDIO_ERR,                             },
+       { .group = HV_GRP_REBOOT_DATA,                          },
        { .group = HV_GRP_NIAG_PERF,    .flags = FLAG_PRE_API   },
        { .group = HV_GRP_FIRE_PERF,                            },
        { .group = HV_GRP_N2_CPU,                               },
index 8a5f35f..58d60de 100644 (file)
@@ -798,3 +798,10 @@ ENTRY(sun4v_niagara2_setperf)
        retl
         nop
 ENDPROC(sun4v_niagara2_setperf)
+
+ENTRY(sun4v_reboot_data_set)
+       mov     HV_FAST_REBOOT_DATA_SET, %o5
+       ta      HV_FAST_TRAP
+       retl
+        nop
+ENDPROC(sun4v_reboot_data_set)
index 6f6544c..fd6c36b 100644 (file)
@@ -4,12 +4,27 @@
 #include <linux/interrupt.h>
 
 #include <asm/traps.h>
+#include <asm/head.h>
+#include <asm/io.h>
 
 /* cpu.c */
 extern const char *sparc_pmu_type;
 extern unsigned int fsr_storage;
 extern int ncpus_probed;
 
+#ifdef CONFIG_SPARC64
+/* setup_64.c */
+struct seq_file;
+extern void cpucap_info(struct seq_file *);
+
+static inline unsigned long kimage_addr_to_ra(const char *p)
+{
+       unsigned long val = (unsigned long) p;
+
+       return kern_base + (val - KERNBASE);
+}
+#endif
+
 #ifdef CONFIG_SPARC32
 /* cpu.c */
 extern void cpu_probe(void);
index 1d36147..79f3103 100644 (file)
@@ -47,16 +47,16 @@ kvmap_itlb_tsb_miss:
 kvmap_itlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
-       KTSB_LOCK_TAG(%g1, %g2, %g7)
+       TSB_LOCK_TAG(%g1, %g2, %g7)
 
        /* Load and check PTE.  */
        ldxa            [%g5] ASI_PHYS_USE_EC, %g5
        mov             1, %g7
        sllx            %g7, TSB_TAG_INVALID_BIT, %g7
        brgez,a,pn      %g5, kvmap_itlb_longpath
-        KTSB_STORE(%g1, %g7)
+        TSB_STORE(%g1, %g7)
 
-       KTSB_WRITE(%g1, %g5, %g6)
+       TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
 
@@ -102,9 +102,9 @@ kvmap_itlb_longpath:
 kvmap_itlb_obp:
        OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
 
-       KTSB_LOCK_TAG(%g1, %g2, %g7)
+       TSB_LOCK_TAG(%g1, %g2, %g7)
 
-       KTSB_WRITE(%g1, %g5, %g6)
+       TSB_WRITE(%g1, %g5, %g6)
 
        ba,pt           %xcc, kvmap_itlb_load
         nop
@@ -112,17 +112,17 @@ kvmap_itlb_obp:
 kvmap_dtlb_obp:
        OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
 
-       KTSB_LOCK_TAG(%g1, %g2, %g7)
+       TSB_LOCK_TAG(%g1, %g2, %g7)
 
-       KTSB_WRITE(%g1, %g5, %g6)
+       TSB_WRITE(%g1, %g5, %g6)
 
        ba,pt           %xcc, kvmap_dtlb_load
         nop
 
        .align          32
 kvmap_dtlb_tsb4m_load:
-       KTSB_LOCK_TAG(%g1, %g2, %g7)
-       KTSB_WRITE(%g1, %g5, %g6)
+       TSB_LOCK_TAG(%g1, %g2, %g7)
+       TSB_WRITE(%g1, %g5, %g6)
        ba,pt           %xcc, kvmap_dtlb_load
         nop
 
@@ -222,16 +222,16 @@ kvmap_linear_patch:
 kvmap_dtlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
-       KTSB_LOCK_TAG(%g1, %g2, %g7)
+       TSB_LOCK_TAG(%g1, %g2, %g7)
 
        /* Load and check PTE.  */
        ldxa            [%g5] ASI_PHYS_USE_EC, %g5
        mov             1, %g7
        sllx            %g7, TSB_TAG_INVALID_BIT, %g7
        brgez,a,pn      %g5, kvmap_dtlb_longpath
-        KTSB_STORE(%g1, %g7)
+        TSB_STORE(%g1, %g7)
 
-       KTSB_WRITE(%g1, %g5, %g6)
+       TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
 
index 42f28c7..acaebb6 100644 (file)
@@ -508,6 +508,8 @@ const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
 }
 EXPORT_SYMBOL(mdesc_node_name);
 
+static u64 max_cpus = 64;
+
 static void __init report_platform_properties(void)
 {
        struct mdesc_handle *hp = mdesc_grab();
@@ -543,8 +545,10 @@ static void __init report_platform_properties(void)
        if (v)
                printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v);
        v = mdesc_get_property(hp, pn, "max-cpus", NULL);
-       if (v)
-               printk("PLATFORM: max-cpus [%llu]\n", *v);
+       if (v) {
+               max_cpus = *v;
+               printk("PLATFORM: max-cpus [%llu]\n", max_cpus);
+       }
 
 #ifdef CONFIG_SMP
        {
@@ -715,7 +719,7 @@ static void __cpuinit set_proc_ids(struct mdesc_handle *hp)
 }
 
 static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
-                                        unsigned char def)
+                                        unsigned long def, unsigned long max)
 {
        u64 val;
 
@@ -726,6 +730,9 @@ static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
        if (!val || val >= 64)
                goto use_default;
 
+       if (val > max)
+               val = max;
+
        *mask = ((1U << val) * 64U) - 1U;
        return;
 
@@ -736,19 +743,28 @@ use_default:
 static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
                                     struct trap_per_cpu *tb)
 {
+       static int printed;
        const u64 *val;
 
        val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
-       get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
+       get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7, ilog2(max_cpus * 2));
 
        val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
-       get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
+       get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7, 8);
 
        val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
-       get_one_mondo_bits(val, &tb->resum_qmask, 6);
+       get_one_mondo_bits(val, &tb->resum_qmask, 6, 7);
 
        val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
-       get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
+       get_one_mondo_bits(val, &tb->nonresum_qmask, 2, 2);
+       if (!printed++) {
+               pr_info("SUN4V: Mondo queue sizes "
+                       "[cpu(%u) dev(%u) r(%u) nr(%u)]\n",
+                       tb->cpu_mondo_qmask + 1,
+                       tb->dev_mondo_qmask + 1,
+                       tb->resum_qmask + 1,
+                       tb->nonresum_qmask + 1);
+       }
 }
 
 static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask)
index c4dd099..3e9daea 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/initrd.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -46,6 +47,8 @@
 #include <asm/mmu.h>
 #include <asm/ns87303.h>
 #include <asm/btext.h>
+#include <asm/elf.h>
+#include <asm/mdesc.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
@@ -269,6 +272,40 @@ void __init sun4v_patch(void)
        sun4v_hvapi_init();
 }
 
+static void __init popc_patch(void)
+{
+       struct popc_3insn_patch_entry *p3;
+       struct popc_6insn_patch_entry *p6;
+
+       p3 = &__popc_3insn_patch;
+       while (p3 < &__popc_3insn_patch_end) {
+               unsigned long i, addr = p3->addr;
+
+               for (i = 0; i < 3; i++) {
+                       *(unsigned int *) (addr +  (i * 4)) = p3->insns[i];
+                       wmb();
+                       __asm__ __volatile__("flush     %0"
+                                            : : "r" (addr +  (i * 4)));
+               }
+
+               p3++;
+       }
+
+       p6 = &__popc_6insn_patch;
+       while (p6 < &__popc_6insn_patch_end) {
+               unsigned long i, addr = p6->addr;
+
+               for (i = 0; i < 6; i++) {
+                       *(unsigned int *) (addr +  (i * 4)) = p6->insns[i];
+                       wmb();
+                       __asm__ __volatile__("flush     %0"
+                                            : : "r" (addr +  (i * 4)));
+               }
+
+               p6++;
+       }
+}
+
 #ifdef CONFIG_SMP
 void __init boot_cpu_id_too_large(int cpu)
 {
@@ -278,6 +315,154 @@ void __init boot_cpu_id_too_large(int cpu)
 }
 #endif
 
+/* On Ultra, we support all of the v8 capabilities. */
+unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
+                                  HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
+                                  HWCAP_SPARC_V9);
+EXPORT_SYMBOL(sparc64_elf_hwcap);
+
+static const char *hwcaps[] = {
+       "flush", "stbar", "swap", "muldiv", "v9",
+       "ultra3", "blkinit", "n2",
+
+       /* These strings are as they appear in the machine description
+        * 'hwcap-list' property for cpu nodes.
+        */
+       "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
+       "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
+       "ima", "cspare",
+};
+
+void cpucap_info(struct seq_file *m)
+{
+       unsigned long caps = sparc64_elf_hwcap;
+       int i, printed = 0;
+
+       seq_puts(m, "cpucaps\t\t: ");
+       for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+               unsigned long bit = 1UL << i;
+               if (caps & bit) {
+                       seq_printf(m, "%s%s",
+                                  printed ? "," : "", hwcaps[i]);
+                       printed++;
+               }
+       }
+       seq_putc(m, '\n');
+}
+
+static void __init report_hwcaps(unsigned long caps)
+{
+       int i, printed = 0;
+
+       printk(KERN_INFO "CPU CAPS: [");
+       for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+               unsigned long bit = 1UL << i;
+               if (caps & bit) {
+                       printk(KERN_CONT "%s%s",
+                              printed ? "," : "", hwcaps[i]);
+                       if (++printed == 8) {
+                               printk(KERN_CONT "]\n");
+                               printk(KERN_INFO "CPU CAPS: [");
+                               printed = 0;
+                       }
+               }
+       }
+       printk(KERN_CONT "]\n");
+}
+
+static unsigned long __init mdesc_cpu_hwcap_list(void)
+{
+       struct mdesc_handle *hp;
+       unsigned long caps = 0;
+       const char *prop;
+       int len;
+       u64 pn;
+
+       hp = mdesc_grab();
+       if (!hp)
+               return 0;
+
+       pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu");
+       if (pn == MDESC_NODE_NULL)
+               goto out;
+
+       prop = mdesc_get_property(hp, pn, "hwcap-list", &len);
+       if (!prop)
+               goto out;
+
+       while (len) {
+               int i, plen;
+
+               for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+                       unsigned long bit = 1UL << i;
+
+                       if (!strcmp(prop, hwcaps[i])) {
+                               caps |= bit;
+                               break;
+                       }
+               }
+
+               plen = strlen(prop) + 1;
+               prop += plen;
+               len -= plen;
+       }
+
+out:
+       mdesc_release(hp);
+       return caps;
+}
+
+/* This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+static void __init init_sparc64_elf_hwcap(void)
+{
+       unsigned long cap = sparc64_elf_hwcap;
+       unsigned long mdesc_caps;
+
+       if (tlb_type == cheetah || tlb_type == cheetah_plus)
+               cap |= HWCAP_SPARC_ULTRA3;
+       else if (tlb_type == hypervisor) {
+               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
+                       cap |= HWCAP_SPARC_BLKINIT;
+               if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+                   sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
+                       cap |= HWCAP_SPARC_N2;
+       }
+
+       cap |= (AV_SPARC_MUL32 | AV_SPARC_DIV32 | AV_SPARC_V8PLUS);
+
+       mdesc_caps = mdesc_cpu_hwcap_list();
+       if (!mdesc_caps) {
+               if (tlb_type == spitfire)
+                       cap |= AV_SPARC_VIS;
+               if (tlb_type == cheetah || tlb_type == cheetah_plus)
+                       cap |= AV_SPARC_VIS | AV_SPARC_VIS2;
+               if (tlb_type == cheetah_plus)
+                       cap |= AV_SPARC_POPC;
+               if (tlb_type == hypervisor) {
+                       if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1)
+                               cap |= AV_SPARC_ASI_BLK_INIT;
+                       if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+                           sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
+                               cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
+                                       AV_SPARC_ASI_BLK_INIT |
+                                       AV_SPARC_POPC);
+                       if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
+                               cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
+                                       AV_SPARC_FMAF);
+               }
+       }
+       sparc64_elf_hwcap = cap | mdesc_caps;
+
+       report_hwcaps(sparc64_elf_hwcap);
+
+       if (sparc64_elf_hwcap & AV_SPARC_POPC)
+               popc_patch();
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        /* Initialize PROM console and command line. */
@@ -337,6 +522,7 @@ void __init setup_arch(char **cmdline_p)
        init_cur_cpu_trap(current_thread_info());
 
        paging_init();
+       init_sparc64_elf_hwcap();
 }
 
 extern int stop_a_enabled;
index 372ad59..83b47ab 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/cpudata.h>
@@ -38,5 +39,15 @@ EXPORT_SYMBOL(sun4v_niagara_setperf);
 EXPORT_SYMBOL(sun4v_niagara2_getperf);
 EXPORT_SYMBOL(sun4v_niagara2_setperf);
 
+/* from hweight.S */
+EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__arch_hweight64);
+
+/* from ffs_ffz.S */
+EXPORT_SYMBOL(ffs);
+EXPORT_SYMBOL(__ffs);
+
 /* Exporting a symbol from /init/main.c */
 EXPORT_SYMBOL(saved_command_line);
index 8cdbe59..c59af54 100644 (file)
 #include <asm/head.h>
 #include <asm/io.h>
 
-static int hv_supports_soft_state;
-
-static unsigned long kimage_addr_to_ra(const char *p)
-{
-       unsigned long val = (unsigned long) p;
+#include "kernel.h"
 
-       return kern_base + (val - KERNBASE);
-}
+static int hv_supports_soft_state;
 
 static void do_set_sstate(unsigned long state, const char *msg)
 {
index 35cff16..76e4ac1 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/bitops.h>
 #include <linux/perf_event.h>
 #include <linux/ratelimit.h>
+#include <linux/bitops.h>
 #include <asm/fpumacro.h>
 
 enum direction {
@@ -373,16 +374,11 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
        }
 }
 
-static char popc_helper[] = {
-0, 1, 1, 2, 1, 2, 2, 3,
-1, 2, 2, 3, 2, 3, 3, 4, 
-};
-
 int handle_popc(u32 insn, struct pt_regs *regs)
 {
-       u64 value;
-       int ret, i, rd = ((insn >> 25) & 0x1f);
        int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
+       int ret, rd = ((insn >> 25) & 0x1f);
+       u64 value;
                                
        perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
        if (insn & 0x2000) {
@@ -392,10 +388,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
                maybe_flush_windows(0, insn & 0x1f, rd, from_kernel);
                value = fetch_reg(insn & 0x1f, regs);
        }
-       for (ret = 0, i = 0; i < 16; i++) {
-               ret += popc_helper[value & 0xf];
-               value >>= 4;
-       }
+       ret = hweight64(value);
        if (rd < 16) {
                if (rd)
                        regs->u_regs[rd] = ret;
index c022075..0e16056 100644 (file)
@@ -107,7 +107,26 @@ SECTIONS
                *(.sun4v_2insn_patch)
                __sun4v_2insn_patch_end = .;
        }
-
+       .swapper_tsb_phys_patch : {
+               __swapper_tsb_phys_patch = .;
+               *(.swapper_tsb_phys_patch)
+               __swapper_tsb_phys_patch_end = .;
+       }
+       .swapper_4m_tsb_phys_patch : {
+               __swapper_4m_tsb_phys_patch = .;
+               *(.swapper_4m_tsb_phys_patch)
+               __swapper_4m_tsb_phys_patch_end = .;
+       }
+       .popc_3insn_patch : {
+               __popc_3insn_patch = .;
+               *(.popc_3insn_patch)
+               __popc_3insn_patch_end = .;
+       }
+       .popc_6insn_patch : {
+               __popc_6insn_patch = .;
+               *(.popc_6insn_patch)
+               __popc_6insn_patch_end = .;
+       }
        PERCPU_SECTION(SMP_CACHE_BYTES)
 
        . = ALIGN(PAGE_SIZE);
index 7f01b8f..a3fc437 100644 (file)
@@ -31,13 +31,13 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o
 lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
 
 lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
-lib-$(CONFIG_SPARC64) +=  NG2patch.o NG2page.o
+lib-$(CONFIG_SPARC64) +=  NG2patch.o
 
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
 lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
-lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o
+lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 obj-y                 += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o
diff --git a/arch/sparc/lib/NG2page.S b/arch/sparc/lib/NG2page.S
deleted file mode 100644 (file)
index 73b6b7c..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/* NG2page.S: Niagara-2 optimized clear and copy page.
- *
- * Copyright (C) 2007 (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/visasm.h>
-
-       .text
-       .align  32
-
-       /* This is heavily simplified from the sun4u variants
-        * because Niagara-2 does not have any D-cache aliasing issues.
-        */
-NG2copy_user_page:     /* %o0=dest, %o1=src, %o2=vaddr */
-       prefetch        [%o1 + 0x00], #one_read
-       prefetch        [%o1 + 0x40], #one_read
-       VISEntryHalf
-       set             PAGE_SIZE, %g7
-       sub             %o0, %o1, %g3
-1:     stxa            %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-       subcc           %g7, 64, %g7
-       ldda            [%o1] ASI_BLK_P, %f0
-       stda            %f0, [%o1 + %g3] ASI_BLK_P
-       add             %o1, 64, %o1
-       bne,pt          %xcc, 1b
-        prefetch       [%o1 + 0x40], #one_read
-       membar          #Sync
-       VISExitHalf
-       retl
-        nop
-
-#define BRANCH_ALWAYS  0x10680000
-#define NOP            0x01000000
-#define NG_DO_PATCH(OLD, NEW)  \
-       sethi   %hi(NEW), %g1; \
-       or      %g1, %lo(NEW), %g1; \
-       sethi   %hi(OLD), %g2; \
-       or      %g2, %lo(OLD), %g2; \
-       sub     %g1, %g2, %g1; \
-       sethi   %hi(BRANCH_ALWAYS), %g3; \
-       sll     %g1, 11, %g1; \
-       srl     %g1, 11 + 2, %g1; \
-       or      %g3, %lo(BRANCH_ALWAYS), %g3; \
-       or      %g3, %g1, %g3; \
-       stw     %g3, [%g2]; \
-       sethi   %hi(NOP), %g3; \
-       or      %g3, %lo(NOP), %g3; \
-       stw     %g3, [%g2 + 0x4]; \
-       flush   %g2;
-
-       .globl  niagara2_patch_pageops
-       .type   niagara2_patch_pageops,#function
-niagara2_patch_pageops:
-       NG_DO_PATCH(copy_user_page, NG2copy_user_page)
-       NG_DO_PATCH(_clear_page, NGclear_page)
-       NG_DO_PATCH(clear_user_page, NGclear_user_page)
-       retl
-        nop
-       .size   niagara2_patch_pageops,.-niagara2_patch_pageops
index 428920d..b9e790b 100644 (file)
         */
 
 NGcopy_user_page:      /* %o0=dest, %o1=src, %o2=vaddr */
-       prefetch        [%o1 + 0x00], #one_read
-       mov             8, %g1
-       mov             16, %g2
-       mov             24, %g3
+       save            %sp, -192, %sp
+       rd              %asi, %g3
+       wr              %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
        set             PAGE_SIZE, %g7
+       prefetch        [%i1 + 0x00], #one_read
+       prefetch        [%i1 + 0x40], #one_read
 
-1:     ldda            [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
-       ldda            [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
-       prefetch        [%o1 + 0x40], #one_read
-       add             %o1, 32, %o1
-       stxa            %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
-       ldda            [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
-       stxa            %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-       ldda            [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
-       add             %o1, 32, %o1
-       add             %o0, 32, %o0
-       stxa            %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-       subcc           %g7, 64, %g7
+1:     prefetch        [%i1 + 0x80], #one_read
+       prefetch        [%i1 + 0xc0], #one_read
+       ldda            [%i1 + 0x00] %asi, %o2
+       ldda            [%i1 + 0x10] %asi, %o4
+       ldda            [%i1 + 0x20] %asi, %l2
+       ldda            [%i1 + 0x30] %asi, %l4
+       stxa            %o2, [%i0 + 0x00] %asi
+       stxa            %o3, [%i0 + 0x08] %asi
+       stxa            %o4, [%i0 + 0x10] %asi
+       stxa            %o5, [%i0 + 0x18] %asi
+       stxa            %l2, [%i0 + 0x20] %asi
+       stxa            %l3, [%i0 + 0x28] %asi
+       stxa            %l4, [%i0 + 0x30] %asi
+       stxa            %l5, [%i0 + 0x38] %asi
+       ldda            [%i1 + 0x40] %asi, %o2
+       ldda            [%i1 + 0x50] %asi, %o4
+       ldda            [%i1 + 0x60] %asi, %l2
+       ldda            [%i1 + 0x70] %asi, %l4
+       stxa            %o2, [%i0 + 0x40] %asi
+       stxa            %o3, [%i0 + 0x48] %asi
+       stxa            %o4, [%i0 + 0x50] %asi
+       stxa            %o5, [%i0 + 0x58] %asi
+       stxa            %l2, [%i0 + 0x60] %asi
+       stxa            %l3, [%i0 + 0x68] %asi
+       stxa            %l4, [%i0 + 0x70] %asi
+       stxa            %l5, [%i0 + 0x78] %asi
+       add             %i1, 128, %i1
+       subcc           %g7, 128, %g7
        bne,pt          %xcc, 1b
-        add            %o0, 32, %o0
+        add            %i0, 128, %i0
+       wr              %g3, 0x0, %asi
        membar          #Sync
-       retl
-        nop
+       ret
+        restore
 
-       .globl          NGclear_page, NGclear_user_page
+       .align          32
 NGclear_page:          /* %o0=dest */
 NGclear_user_page:     /* %o0=dest, %o1=vaddr */
-       mov             8, %g1
-       mov             16, %g2
-       mov             24, %g3
+       rd              %asi, %g3
+       wr              %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
        set             PAGE_SIZE, %g7
 
-1:     stxa            %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-       add             %o0, 32, %o0
-       stxa            %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-       stxa            %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-       subcc           %g7, 64, %g7
+1:     stxa            %g0, [%o0 + 0x00] %asi
+       stxa            %g0, [%o0 + 0x08] %asi
+       stxa            %g0, [%o0 + 0x10] %asi
+       stxa            %g0, [%o0 + 0x18] %asi
+       stxa            %g0, [%o0 + 0x20] %asi
+       stxa            %g0, [%o0 + 0x28] %asi
+       stxa            %g0, [%o0 + 0x30] %asi
+       stxa            %g0, [%o0 + 0x38] %asi
+       stxa            %g0, [%o0 + 0x40] %asi
+       stxa            %g0, [%o0 + 0x48] %asi
+       stxa            %g0, [%o0 + 0x50] %asi
+       stxa            %g0, [%o0 + 0x58] %asi
+       stxa            %g0, [%o0 + 0x60] %asi
+       stxa            %g0, [%o0 + 0x68] %asi
+       stxa            %g0, [%o0 + 0x70] %asi
+       stxa            %g0, [%o0 + 0x78] %asi
+       stxa            %g0, [%o0 + 0x80] %asi
+       stxa            %g0, [%o0 + 0x88] %asi
+       stxa            %g0, [%o0 + 0x90] %asi
+       stxa            %g0, [%o0 + 0x98] %asi
+       stxa            %g0, [%o0 + 0xa0] %asi
+       stxa            %g0, [%o0 + 0xa8] %asi
+       stxa            %g0, [%o0 + 0xb0] %asi
+       stxa            %g0, [%o0 + 0xb8] %asi
+       stxa            %g0, [%o0 + 0xc0] %asi
+       stxa            %g0, [%o0 + 0xc8] %asi
+       stxa            %g0, [%o0 + 0xd0] %asi
+       stxa            %g0, [%o0 + 0xd8] %asi
+       stxa            %g0, [%o0 + 0xe0] %asi
+       stxa            %g0, [%o0 + 0xe8] %asi
+       stxa            %g0, [%o0 + 0xf0] %asi
+       stxa            %g0, [%o0 + 0xf8] %asi
+       subcc           %g7, 256, %g7
        bne,pt          %xcc, 1b
-        add            %o0, 32, %o0
+        add            %o0, 256, %o0
+       wr              %g3, 0x0, %asi
        membar          #Sync
        retl
         nop
index 8600eb2..1d32b54 100644 (file)
@@ -65,7 +65,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
        if (ret != u)
                v->counter += a;
        spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
-       return ret != u;
+       return ret;
 }
 EXPORT_SYMBOL(__atomic_add_unless);
 
diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S
new file mode 100644 (file)
index 0000000..b39389f
--- /dev/null
@@ -0,0 +1,84 @@
+#include <linux/linkage.h>
+
+       .register       %g2,#scratch
+
+       .text
+       .align  32
+
+ENTRY(ffs)
+       brnz,pt %o0, 1f
+        mov    1, %o1
+       retl
+        clr    %o0
+       nop
+       nop
+ENTRY(__ffs)
+       sllx    %o0, 32, %g1            /* 1  */
+       srlx    %o0, 32, %g2
+
+       clr     %o1                     /* 2  */
+       movrz   %g1, %g2, %o0
+
+       movrz   %g1, 32, %o1            /* 3  */
+1:     clr     %o2
+
+       sllx    %o0, (64 - 16), %g1     /* 4  */
+       srlx    %o0, 16, %g2
+
+       movrz   %g1, %g2, %o0           /* 5  */
+       clr     %o3
+
+       movrz   %g1, 16, %o2            /* 6  */
+       clr     %o4
+
+       and     %o0, 0xff, %g1          /* 7  */
+       srlx    %o0, 8, %g2
+
+       movrz   %g1, %g2, %o0           /* 8  */
+       clr     %o5
+
+       movrz   %g1, 8, %o3             /* 9  */
+       add     %o2, %o1, %o2
+
+       and     %o0, 0xf, %g1           /* 10 */
+       srlx    %o0, 4, %g2
+
+       movrz   %g1, %g2, %o0           /* 11 */
+       add     %o2, %o3, %o2
+
+       movrz   %g1, 4, %o4             /* 12 */
+
+       and     %o0, 0x3, %g1           /* 13 */
+       srlx    %o0, 2, %g2
+
+       movrz   %g1, %g2, %o0           /* 14 */
+       add     %o2, %o4, %o2
+
+       movrz   %g1, 2, %o5             /* 15 */
+
+       and     %o0, 0x1, %g1           /* 16 */
+
+       add     %o2, %o5, %o2           /* 17 */
+       xor     %g1, 0x1, %g1
+
+       retl                            /* 18 */
+        add    %o2, %g1, %o0
+ENDPROC(ffs)
+ENDPROC(__ffs)
+
+       .section        .popc_6insn_patch, "ax"
+       .word           ffs
+       brz,pn  %o0, 98f
+        neg    %o0, %g1
+       xnor    %o0, %g1, %o1
+       popc    %o1, %o0
+98:    retl
+        nop
+       .word           __ffs
+       neg     %o0, %g1
+       xnor    %o0, %g1, %o1
+       popc    %o1, %o0
+       retl
+        sub    %o0, 1, %o0
+       nop
+       .previous
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
new file mode 100644 (file)
index 0000000..95414e0
--- /dev/null
@@ -0,0 +1,51 @@
+#include <linux/linkage.h>
+
+       .text
+       .align  32
+ENTRY(__arch_hweight8)
+       ba,pt   %xcc, __sw_hweight8
+        nop
+       nop
+ENDPROC(__arch_hweight8)
+       .section        .popc_3insn_patch, "ax"
+       .word           __arch_hweight8
+       sllx            %o0, 64-8, %g1
+       retl
+        popc           %g1, %o0
+       .previous
+
+ENTRY(__arch_hweight16)
+       ba,pt   %xcc, __sw_hweight16
+        nop
+       nop
+ENDPROC(__arch_hweight16)
+       .section        .popc_3insn_patch, "ax"
+       .word           __arch_hweight16
+       sllx            %o0, 64-16, %g1
+       retl
+        popc           %g1, %o0
+       .previous
+
+ENTRY(__arch_hweight32)
+       ba,pt   %xcc, __sw_hweight32
+        nop
+       nop
+ENDPROC(__arch_hweight32)
+       .section        .popc_3insn_patch, "ax"
+       .word           __arch_hweight32
+       sllx            %o0, 64-32, %g1
+       retl
+        popc           %g1, %o0
+       .previous
+
+ENTRY(__arch_hweight64)
+       ba,pt   %xcc, __sw_hweight64
+        nop
+       nop
+ENDPROC(__arch_hweight64)
+       .section        .popc_3insn_patch, "ax"
+       .word           __arch_hweight64
+       retl
+        popc           %o0, %o0
+       nop
+       .previous
index 3fd8e18..adfac23 100644 (file)
@@ -1597,6 +1597,42 @@ static void __init tsb_phys_patch(void)
 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
 
+static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
+{
+       pa >>= KTSB_PHYS_SHIFT;
+
+       while (start < end) {
+               unsigned int *ia = (unsigned int *)(unsigned long)*start;
+
+               ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
+               __asm__ __volatile__("flush     %0" : : "r" (ia));
+
+               ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
+               __asm__ __volatile__("flush     %0" : : "r" (ia + 1));
+
+               start++;
+       }
+}
+
+static void ktsb_phys_patch(void)
+{
+       extern unsigned int __swapper_tsb_phys_patch;
+       extern unsigned int __swapper_tsb_phys_patch_end;
+       extern unsigned int __swapper_4m_tsb_phys_patch;
+       extern unsigned int __swapper_4m_tsb_phys_patch_end;
+       unsigned long ktsb_pa;
+
+       ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
+       patch_one_ktsb_phys(&__swapper_tsb_phys_patch,
+                           &__swapper_tsb_phys_patch_end, ktsb_pa);
+#ifndef CONFIG_DEBUG_PAGEALLOC
+       ktsb_pa = (kern_base +
+                  ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
+       patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch,
+                           &__swapper_4m_tsb_phys_patch_end, ktsb_pa);
+#endif
+}
+
 static void __init sun4v_ktsb_init(void)
 {
        unsigned long ktsb_pa;
@@ -1716,8 +1752,10 @@ void __init paging_init(void)
                sun4u_pgprot_init();
 
        if (tlb_type == cheetah_plus ||
-           tlb_type == hypervisor)
+           tlb_type == hypervisor) {
                tsb_phys_patch();
+               ktsb_phys_patch();
+       }
 
        if (tlb_type == hypervisor) {
                sun4v_patch_tlb_handlers();