Pull percpu-dtc into release branch
authorTony Luck <tony.luck@intel.com>
Mon, 30 Apr 2007 20:56:00 +0000 (13:56 -0700)
committerTony Luck <tony.luck@intel.com>
Mon, 30 Apr 2007 20:56:00 +0000 (13:56 -0700)
12 files changed:
arch/ia64/kernel/entry.S
arch/ia64/kernel/ivt.S
arch/ia64/kernel/mca_asm.S
arch/ia64/kernel/patch.c
arch/ia64/kernel/setup.c
arch/ia64/kernel/vmlinux.lds.S
arch/ia64/mm/init.c
include/asm-ia64/asmmacro.h
include/asm-ia64/kregs.h
include/asm-ia64/patch.h
include/asm-ia64/processor.h
include/asm-ia64/sections.h

index e7873ee..55fd2d5 100644 (file)
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
        ld8.fill r15=[r3]                       // M0|1 restore r15
        mov b6=r18                              // I0   restore b6
 
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+       LOAD_PHYS_STACK_REG_SIZE(r17)
        mov f9=f0                                       // F    clear f9
 (pKStk) br.cond.dpnt.many skip_rbs_switch              // B
 
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
        shr.u r18=r19,16                // I0|1 get byte size of existing "dirty" partition
        cover                           // B    add current frame into dirty partition & set cr.ifs
        ;;
-(pUStk) ld4 r17=[r17]                  // M0|1 r17 = cpu_data->phys_stacked_size_p8
        mov r19=ar.bsp                  // M2   get new backing store pointer
        mov f10=f0                      // F    clear f10
 
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        shr.u r18=r19,16        // get byte size of existing "dirty" partition
        ;;
        mov r16=ar.bsp          // get existing backing store pointer
-       addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-       ;;
-       ld4 r17=[r17]           // r17 = cpu_data->phys_stacked_size_p8
+       LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)        br.cond.dpnt skip_rbs_switch
 
        /*
index 6b7fcbd..34f44d8 100644 (file)
@@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss)
        movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
        mov r21=cr.ipsr
        mov r31=pr
+       mov r24=PERCPU_ADDR
        ;;
 #ifdef CONFIG_DISABLE_VHPT
        shr.u r22=r16,61                        // get the region number into r21
@@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss)
 (p8)   mov r29=b0                              // save b0
 (p8)   br.cond.dptk dtlb_fault
 #endif
+       cmp.ge p10,p11=r16,r24                  // access to per_cpu_data?
+       tbit.z p12,p0=r16,61                    // access to region 6?
+       mov r25=PERCPU_PAGE_SHIFT << 2
+       mov r26=PERCPU_PAGE_SIZE
+       nop.m 0
+       nop.b 0
+       ;;
+(p10)  mov r19=IA64_KR(PER_CPU_DATA)
+(p11)  and r19=r19,r16                         // clear non-ppn fields
        extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
        and r22=IA64_ISR_CODE_MASK,r20          // get the isr.code field
        tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
-       shr.u r18=r16,57                        // move address bit 61 to bit 4
-       and r19=r19,r16                         // clear ed, reserved bits, and PTE control bits
        tbit.nz p9,p0=r20,IA64_ISR_NA_BIT       // is non-access bit on?
        ;;
-       andcm r18=0x10,r18      // bit 4=~address-bit(61)
+(p10)  sub r19=r19,r26
+(p10)  mov cr.itir=r25
        cmp.ne p8,p0=r0,r23
 (p9)   cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22  // check isr.code field
+(p12)  dep r17=-1,r17,4,1                      // set ma=UC for region 6 addr
 (p8)   br.cond.spnt page_fault
 
        dep r21=-1,r21,IA64_PSR_ED_BIT,1
-       or r19=r19,r17          // insert PTE control bits into r19
        ;;
-       or r19=r19,r18          // set bit 4 (uncached) if the access was to region 6
+       or r19=r19,r17          // insert PTE control bits into r19
 (p6)   mov cr.ipsr=r21
        ;;
 (p7)   itc.d r19               // insert the TLB entry
index c6b607c..8c9c26a 100644 (file)
@@ -101,14 +101,6 @@ ia64_do_tlb_purge:
        ;;
        srlz.d
        ;;
-       // 2. Purge DTR for PERCPU data.
-       movl r16=PERCPU_ADDR
-       mov r18=PERCPU_PAGE_SHIFT<<2
-       ;;
-       ptr.d r16,r18
-       ;;
-       srlz.d
-       ;;
        // 3. Purge ITR for PAL code.
        GET_THIS_PADDR(r2, ia64_mca_pal_base)
        ;;
@@ -196,22 +188,6 @@ ia64_reload_tr:
        srlz.i
        srlz.d
        ;;
-       // 2. Reload DTR register for PERCPU data.
-       GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
-       ;;
-       movl r16=PERCPU_ADDR            // vaddr
-       movl r18=PERCPU_PAGE_SHIFT<<2
-       ;;
-       mov cr.itir=r18
-       mov cr.ifa=r16
-       ;;
-       ld8 r18=[r2]                    // load per-CPU PTE
-       mov r16=IA64_TR_PERCPU_DATA;
-       ;;
-       itr.d dtr[r16]=r18
-       ;;
-       srlz.d
-       ;;
        // 3. Reload ITR for PAL code.
        GET_THIS_PADDR(r2, ia64_mca_pal_pte)
        ;;
index bc11bb0..e796e29 100644 (file)
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
        ia64_patch_vtop(START(vtop), END(vtop));
        ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
 }
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+       s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+       s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+       u64 ip, mask, imm;
+
+       /* see instruction format A4: adds r1 = imm13, r3 */
+       mask = (0x3fUL << 27) | (0x7f << 13);
+       imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+       while (offp < end) {
+               ip = (u64) offp + *offp;
+               ia64_patch(ip, mask, imm);
+               ia64_fc(ip);
+               ++offp;
+       }
+       ia64_sync_i();
+       ia64_srlz_i();
+}
index dc7dd76..6e19da1 100644 (file)
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void);
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
@@ -869,6 +868,7 @@ void __cpuinit
 cpu_init (void)
 {
        extern void __cpuinit ia64_mmu_init (void *);
+       static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
        unsigned long num_phys_stacked;
        pal_vm_info_2_u_t vmi;
        unsigned int max_ctx;
@@ -982,7 +982,10 @@ cpu_init (void)
                num_phys_stacked = 96;
        }
        /* size of physical stacked register partition plus 8 bytes: */
-       __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+       if (num_phys_stacked > max_num_phys_stacked) {
+               ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+               max_num_phys_stacked = num_phys_stacked;
+       }
        platform_cpu_init();
        pm_idle = default_idle;
 }
index 25dd55e..6923826 100644 (file)
@@ -78,6 +78,13 @@ SECTIONS
          __stop___mca_table = .;
        }
 
+  .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
+       {
+         __start___phys_stack_reg_patchlist = .;
+         *(.data.patch.phys_stack_reg)
+         __end___phys_stack_reg_patchlist = .;
+       }
+
   /* Global data */
   _data = .;
 
index 4f36987..5b70241 100644 (file)
@@ -355,7 +355,7 @@ setup_gate (void)
 void __devinit
 ia64_mmu_init (void *my_cpu_data)
 {
-       unsigned long psr, pta, impl_va_bits;
+       unsigned long pta, impl_va_bits;
        extern void __devinit tlb_init (void);
 
 #ifdef CONFIG_DISABLE_VHPT
@@ -364,15 +364,6 @@ ia64_mmu_init (void *my_cpu_data)
 #      define VHPT_ENABLE_BIT  1
 #endif
 
-       /* Pin mapping for percpu area into TLB */
-       psr = ia64_clear_ic();
-       ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-                pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
-                PERCPU_PAGE_SHIFT);
-
-       ia64_set_psr(psr);
-       ia64_srlz_i();
-
        /*
         * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
         * address space.  The IA-64 architecture guarantees that at least 50 bits of
index c22b465..c1642fd 100644 (file)
@@ -103,6 +103,16 @@ name:
 # define FSYS_RETURN   br.ret.sptk.many b6
 #endif
 
+/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+       .section ".data.patch.phys_stack_reg", "a"
+       .previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg)                  \
+[1:]   adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0;        \
+       .xdata4 ".data.patch.phys_stack_reg", 1b-.
+
 /*
  * Up until early 2004, use of .align within a function caused bad unwind info.
  * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
index 221b5cb..7e55a58 100644 (file)
@@ -29,8 +29,7 @@
  */
 #define IA64_TR_KERNEL         0       /* itr0, dtr0: maps kernel image (code & data) */
 #define IA64_TR_PALCODE                1       /* itr1: maps PALcode as required by EFI */
-#define IA64_TR_PERCPU_DATA    1       /* dtr1: percpu data */
-#define IA64_TR_CURRENT_STACK  2       /* dtr2: maps kernel's memory- & register-stacks */
+#define IA64_TR_CURRENT_STACK  1       /* dtr1: maps kernel's memory- & register-stacks */
 
 /* Processor status register bits: */
 #define IA64_PSR_BE_BIT                1
index 4797f35..a715430 100644 (file)
@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_addr, u64 val);                /* patch "brl" w/ip-rel
 
 extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
 extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
 extern void ia64_patch_gate (void);
 
 #endif /* _ASM_IA64_PATCH_H */
index 4f4ee1c..db81ba4 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/ptrace.h>
 #include <asm/ustack.h>
 
+#define IA64_NUM_PHYS_STACK_REG        96
 #define IA64_NUM_DBG_REGS      8
 
 #define DEFAULT_MAP_BASE       __IA64_UL_CONST(0x2000000000000000)
index e9eb7f6..dc42a35 100644 (file)
@@ -11,6 +11,7 @@
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
 extern char __start_gate_section[];
 extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
 extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];