Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 12 Oct 2014 00:36:34 +0000 (20:36 -0400)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 12 Oct 2014 00:36:34 +0000 (20:36 -0400)
Pull sparc updates from David Miller:

 1) Move to 4-level page tables on sparc64 and support up to 53-bits of
    physical addressing.  Kernel static image BSS size reduced by
    several megabytes.

 2) M6/M7 cpu support, from Allan Pais.

 3) Move to sparse IRQs, handle hypervisor TLB call errors more
    gracefully, and add T5 perf_event support.  From Bob Picco.

 4) Recognize cdroms and compute geometry from capacity in virtual disk
    driver, also from Allan Pais.

 5) Fix memset() return value on sparc32, from Andreas Larsson.

 6) Respect gfp flags in dma_alloc_coherent on sparc32, from Daniel
    Hellstrom.

 7) Fix handling of compound pages in virtual disk driver, from Dwight
    Engen.

 8) Fix lockdep warnings in LDC layer by moving IRQ requesting to
    ldc_alloc() from ldc_bind().

 9) Increase boot string length to 1024 bytes, from Dave Kleikamp.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: (31 commits)
  sparc64: Fix lockdep warnings on reboot on Ultra-5
  sparc64: Increase size of boot string to 1024 bytes
  sparc64: Kill unnecessary tables and increase MAX_BANKS.
  sparc64: sparse irq
  sparc64: Adjust vmalloc region size based upon available virtual address bits.
  sparc64: Increase MAX_PHYS_ADDRESS_BITS to 53.
  sparc64: Use kernel page tables for vmemmap.
  sparc64: Fix physical memory management regressions with large max_phys_bits.
  sparc64: Adjust KTSB assembler to support larger physical addresses.
  sparc64: Define VA hole at run time, rather than at compile time.
  sparc64: Switch to 4-level page tables.
  sparc64: Fix reversed start/end in flush_tlb_kernel_range()
  sparc64: Add vio_set_intr() to enable/disable Rx interrupts
  vio: fix reuse of vio_dring slot
  sunvdc: limit each sg segment to a page
  sunvdc: compute vdisk geometry from capacity
  sunvdc: add cdrom and v1.1 protocol support
  sparc: VIO protocol version 1.6
  sparc64: Fix hibernation code refrence to PAGE_OFFSET.
  sparc64: Move request_irq() from ldc_bind() to ldc_alloc()
  ...

41 files changed:
arch/sparc/Kconfig
arch/sparc/include/asm/dma-mapping.h
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/irq_64.h
arch/sparc/include/asm/ldc.h
arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/pgalloc_64.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/spitfire.h
arch/sparc/include/asm/thread_info_64.h
arch/sparc/include/asm/tsb.h
arch/sparc/include/asm/vio.h
arch/sparc/kernel/cpu.c
arch/sparc/kernel/cpumap.c
arch/sparc/kernel/ds.c
arch/sparc/kernel/head_64.S
arch/sparc/kernel/hvapi.c
arch/sparc/kernel/hvcalls.S
arch/sparc/kernel/ioport.c
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/ktlb.S
arch/sparc/kernel/ldc.c
arch/sparc/kernel/leon_kernel.c
arch/sparc/kernel/pcr.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sun4v_tlb_miss.S
arch/sparc/kernel/traps_64.c
arch/sparc/kernel/vio.c
arch/sparc/kernel/viohs.c
arch/sparc/kernel/vmlinux.lds.S
arch/sparc/lib/memset.S
arch/sparc/mm/fault_64.c
arch/sparc/mm/init_64.c
arch/sparc/mm/init_64.h
arch/sparc/power/hibernate_asm.S
arch/sparc/prom/bootstr_64.c
arch/sparc/prom/p1275.c
drivers/block/sunvdc.c
drivers/net/ethernet/sun/sunvnet.c

index a537816..96ac69c 100644 (file)
@@ -67,6 +67,7 @@ config SPARC64
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_CONTEXT_TRACKING
        select HAVE_DEBUG_KMEMLEAK
+       select SPARSE_IRQ
        select RTC_DRV_CMOS
        select RTC_DRV_BQ4802
        select RTC_DRV_SUN4V
index 1ee0271..5b1b52a 100644 (file)
@@ -20,10 +20,12 @@ extern struct bus_type pci_bus_type;
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 {
-#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
+#ifdef CONFIG_SPARC_LEON
        if (sparc_cpu_model == sparc_leon)
                return leon_dma_ops;
-       else if (dev->bus == &pci_bus_type)
+#endif
+#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
+       if (dev->bus == &pci_bus_type)
                return &pci32_dma_ops;
 #endif
        return dma_ops;
index 94b39ca..4f6725f 100644 (file)
@@ -2947,6 +2947,16 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
                                   unsigned long reg_val);
 #endif
 
+#define        HV_FAST_T5_GET_PERFREG          0x1a8
+#define        HV_FAST_T5_SET_PERFREG          0x1a9
+
+#ifndef        __ASSEMBLY__
+unsigned long sun4v_t5_get_perfreg(unsigned long reg_num,
+                                  unsigned long *reg_val);
+unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
+                                  unsigned long reg_val);
+#endif
+
 /* Function numbers for HV_CORE_TRAP.  */
 #define HV_CORE_SET_VER                        0x00
 #define HV_CORE_PUTCHAR                        0x01
@@ -2978,6 +2988,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
 #define HV_GRP_VF_CPU                  0x0205
 #define HV_GRP_KT_CPU                  0x0209
 #define HV_GRP_VT_CPU                  0x020c
+#define HV_GRP_T5_CPU                  0x0211
 #define HV_GRP_DIAG                    0x0300
 
 #ifndef __ASSEMBLY__
index 91d2193..3f70f90 100644 (file)
@@ -37,7 +37,7 @@
  *
  * ino_bucket->irq allocation is made during {sun4v_,}build_irq().
  */
-#define NR_IRQS    255
+#define NR_IRQS                (2048)
 
 void irq_install_pre_handler(int irq,
                             void (*func)(unsigned int, void *, void *),
@@ -57,11 +57,8 @@ unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p,
                             unsigned long iclr_base);
 void sun4u_destroy_msi(unsigned int irq);
 
-unsigned char irq_alloc(unsigned int dev_handle,
-                       unsigned int dev_ino);
-#ifdef CONFIG_PCI_MSI
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
 void irq_free(unsigned int irq);
-#endif
 
 void __init init_IRQ(void);
 void fixup_irqs(void);
index c8c67f6..58ab64d 100644 (file)
@@ -53,13 +53,14 @@ struct ldc_channel;
 /* Allocate state for a channel.  */
 struct ldc_channel *ldc_alloc(unsigned long id,
                              const struct ldc_channel_config *cfgp,
-                             void *event_arg);
+                             void *event_arg,
+                             const char *name);
 
 /* Shut down and free state for a channel.  */
 void ldc_free(struct ldc_channel *lp);
 
 /* Register TX and RX queues of the link with the hypervisor.  */
-int ldc_bind(struct ldc_channel *lp, const char *name);
+int ldc_bind(struct ldc_channel *lp);
 
 /* For non-RAW protocols we need to complete a handshake before
  * communication can proceed.  ldc_connect() does that, if the
index bf10998..8c2a8c9 100644 (file)
@@ -57,18 +57,21 @@ void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topa
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long iopte; } iopte_t;
 typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 
 #define pte_val(x)     ((x).pte)
 #define iopte_val(x)   ((x).iopte)
 #define pmd_val(x)      ((x).pmd)
+#define pud_val(x)      ((x).pud)
 #define pgd_val(x)     ((x).pgd)
 #define pgprot_val(x)  ((x).pgprot)
 
 #define __pte(x)       ((pte_t) { (x) } )
 #define __iopte(x)     ((iopte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
+#define __pud(x)        ((pud_t) { (x) } )
 #define __pgd(x)       ((pgd_t) { (x) } )
 #define __pgprot(x)    ((pgprot_t) { (x) } )
 
@@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 typedef unsigned long pte_t;
 typedef unsigned long iopte_t;
 typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
 
 #define pte_val(x)     (x)
 #define iopte_val(x)   (x)
 #define pmd_val(x)      (x)
+#define pud_val(x)      (x)
 #define pgd_val(x)     (x)
 #define pgprot_val(x)  (x)
 
 #define __pte(x)       (x)
 #define __iopte(x)     (x)
 #define __pmd(x)        (x)
+#define __pud(x)        (x)
 #define __pgd(x)       (x)
 #define __pgprot(x)    (x)
 
@@ -96,21 +102,14 @@ typedef unsigned long pgprot_t;
 
 typedef pte_t *pgtable_t;
 
-/* These two values define the virtual address space range in which we
- * must forbid 64-bit user processes from making mappings.  It used to
- * represent precisely the virtual address space hole present in most
- * early sparc64 chips including UltraSPARC-I.  But now it also is
- * further constrained by the limits of our page tables, which is
- * 43-bits of virtual address.
- */
-#define SPARC64_VA_HOLE_TOP    _AC(0xfffffc0000000000,UL)
-#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL)
+extern unsigned long sparc64_va_hole_top;
+extern unsigned long sparc64_va_hole_bottom;
 
 /* The next two defines specify the actual exclusion region we
  * enforce, wherein we use a 4GB red zone on each side of the VA hole.
  */
-#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL))
-#define VA_EXCLUDE_END   (SPARC64_VA_HOLE_TOP + (1UL << 32UL))
+#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL))
+#define VA_EXCLUDE_END   (sparc64_va_hole_top + (1UL << 32UL))
 
 #define TASK_UNMAPPED_BASE     (test_thread_flag(TIF_32BIT) ? \
                                 _AC(0x0000000070000000,UL) : \
@@ -118,20 +117,16 @@ typedef pte_t *pgtable_t;
 
 #include <asm-generic/memory_model.h>
 
-#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X)))
 extern unsigned long PAGE_OFFSET;
 
 #endif /* !(__ASSEMBLY__) */
 
-/* The maximum number of physical memory address bits we support, this
- * is used to size various tables used to manage kernel TLB misses and
- * also the sparsemem code.
+/* The maximum number of physical memory address bits we support.  The
+ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS"
+ * evaluates to.
  */
-#define MAX_PHYS_ADDRESS_BITS  47
+#define MAX_PHYS_ADDRESS_BITS  53
 
-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
- * and kpte_linear_bitmap.
- */
 #define ILOG2_4MB              22
 #define ILOG2_256MB            28
 
index 39a7ac4..5e31871 100644 (file)
 
 extern struct kmem_cache *pgtable_cache;
 
+static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+{
+       pgd_set(pgd, pud);
+}
+
+#define pgd_populate(MM, PGD, PUD)     __pgd_populate(PGD, PUD)
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
@@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
        kmem_cache_free(pgtable_cache, pgd);
 }
 
-#define pud_populate(MM, PUD, PMD)     pud_set(PUD, PMD)
+static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
+{
+       pud_set(pud, pmd);
+}
+
+#define pud_populate(MM, PUD, PMD)     __pud_populate(PUD, PMD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+       return kmem_cache_alloc(pgtable_cache,
+                               GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+       kmem_cache_free(pgtable_cache, pud);
+}
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
 #define __pmd_free_tlb(tlb, pmd, addr)               \
        pgtable_free_tlb(tlb, pmd, false)
 
+#define __pud_free_tlb(tlb, pud, addr)               \
+       pgtable_free_tlb(tlb, pud, false)
+
 #endif /* _SPARC64_PGALLOC_H */
index 3770bf5..bfeb626 100644 (file)
@@ -20,8 +20,6 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 
-#include <asm-generic/pgtable-nopud.h>
-
 /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
  * The page copy blockops can use 0x6000000 to 0x8000000.
  * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
 #define LOW_OBP_ADDRESS                _AC(0x00000000f0000000,UL)
 #define HI_OBP_ADDRESS         _AC(0x0000000100000000,UL)
 #define VMALLOC_START          _AC(0x0000000100000000,UL)
-#define VMALLOC_END            _AC(0x0000010000000000,UL)
-#define VMEMMAP_BASE           _AC(0x0000010000000000,UL)
-
-#define vmemmap                        ((struct page *)VMEMMAP_BASE)
+#define VMEMMAP_BASE           VMALLOC_END
 
 /* PMD_SHIFT determines the size of the area a second-level page
  * table can map
 #define PMD_MASK       (~(PMD_SIZE-1))
 #define PMD_BITS       (PAGE_SHIFT - 3)
 
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT    (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
+/* PUD_SHIFT determines the size of the area a third-level page
+ * table can map
+ */
+#define PUD_SHIFT      (PMD_SHIFT + PMD_BITS)
+#define PUD_SIZE       (_AC(1,UL) << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE-1))
+#define PUD_BITS       (PAGE_SHIFT - 3)
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT    (PUD_SHIFT + PUD_BITS)
 #define PGDIR_SIZE     (_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 #define PGDIR_BITS     (PAGE_SHIFT - 3)
 
-#if (PGDIR_SHIFT + PGDIR_BITS) != 43
+#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS)
+#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support
+#endif
+
+#if (PGDIR_SHIFT + PGDIR_BITS) != 53
 #error Page table parameters do not cover virtual address space properly.
 #endif
 
 
 #ifndef __ASSEMBLY__
 
-#include <linux/sched.h>
-
-extern unsigned long sparc64_valid_addr_bitmap[];
+extern unsigned long VMALLOC_END;
 
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-static inline bool __kern_addr_valid(unsigned long paddr)
-{
-       if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
-               return false;
-       return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-}
+#define vmemmap                        ((struct page *)VMEMMAP_BASE)
 
-static inline bool kern_addr_valid(unsigned long addr)
-{
-       unsigned long paddr = __pa(addr);
+#include <linux/sched.h>
 
-       return __kern_addr_valid(paddr);
-}
+bool kern_addr_valid(unsigned long addr);
 
 /* Entries per page directory level. */
 #define PTRS_PER_PTE   (1UL << (PAGE_SHIFT-3))
 #define PTRS_PER_PMD   (1UL << PMD_BITS)
+#define PTRS_PER_PUD   (1UL << PUD_BITS)
 #define PTRS_PER_PGD   (1UL << PGDIR_BITS)
 
 /* Kernel has a separate 44bit address space. */
@@ -101,6 +98,9 @@ static inline bool kern_addr_valid(unsigned long addr)
 #define pmd_ERROR(e)                                                   \
        pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n",             \
               __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
+#define pud_ERROR(e)                                                   \
+       pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n",             \
+              __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0))
 #define pgd_ERROR(e)                                                   \
        pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n",             \
               __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
@@ -112,6 +112,7 @@ static inline bool kern_addr_valid(unsigned long addr)
 #define _PAGE_R                  _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
 #define _PAGE_SPECIAL     _AC(0x0200000000000000,UL) /* Special page         */
 #define _PAGE_PMD_HUGE    _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_PUD_HUGE    _PAGE_PMD_HUGE
 
 /* Advertise support for _PAGE_SPECIAL */
 #define __HAVE_ARCH_PTE_SPECIAL
@@ -658,26 +659,26 @@ static inline unsigned long pmd_large(pmd_t pmd)
        return pte_val(pte) & _PAGE_PMD_HUGE;
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline unsigned long pmd_young(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_young(pte);
+       return pte_pfn(pte);
 }
 
-static inline unsigned long pmd_write(pmd_t pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_young(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_write(pte);
+       return pte_young(pte);
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_write(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_pfn(pte);
+       return pte_write(pte);
 }
 
 static inline unsigned long pmd_trans_huge(pmd_t pmd)
@@ -771,13 +772,15 @@ static inline int pmd_present(pmd_t pmd)
  * the top bits outside of the range of any physical address size we
  * support are clear as well.  We also validate the physical itself.
  */
-#define pmd_bad(pmd)                   ((pmd_val(pmd) & ~PAGE_MASK) || \
-                                        !__kern_addr_valid(pmd_val(pmd)))
+#define pmd_bad(pmd)                   (pmd_val(pmd) & ~PAGE_MASK)
 
 #define pud_none(pud)                  (!pud_val(pud))
 
-#define pud_bad(pud)                   ((pud_val(pud) & ~PAGE_MASK) || \
-                                        !__kern_addr_valid(pud_val(pud)))
+#define pud_bad(pud)                   (pud_val(pud) & ~PAGE_MASK)
+
+#define pgd_none(pgd)                  (!pgd_val(pgd))
+
+#define pgd_bad(pgd)                   (pgd_val(pgd) & ~PAGE_MASK)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -815,10 +818,31 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 #define pmd_clear(pmdp)                        (pmd_val(*(pmdp)) = 0UL)
 #define pud_present(pud)               (pud_val(pud) != 0U)
 #define pud_clear(pudp)                        (pud_val(*(pudp)) = 0UL)
+#define pgd_page_vaddr(pgd)            \
+       ((unsigned long) __va(pgd_val(pgd)))
+#define pgd_present(pgd)               (pgd_val(pgd) != 0U)
+#define pgd_clear(pgdp)                        (pgd_val(*(pgd)) = 0UL)
+
+static inline unsigned long pud_large(pud_t pud)
+{
+       pte_t pte = __pte(pud_val(pud));
+
+       return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+       pte_t pte = __pte(pud_val(pud));
+
+       return pte_pfn(pte);
+}
 
 /* Same in both SUN4V and SUN4U.  */
 #define pte_none(pte)                  (!pte_val(pte))
 
+#define pgd_set(pgdp, pudp)    \
+       (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address)     (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 #define pgd_offset(mm, address)        ((mm)->pgd + pgd_index(address))
@@ -826,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
+/* Find an entry in the third-level page table.. */
+#define pud_index(address)     (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pud_offset(pgdp, address)      \
+       ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(pudp, address)      \
        ((pmd_t *) pud_page_vaddr(*(pudp)) + \
@@ -898,7 +927,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 #endif
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD];
 
 void paging_init(void);
 unsigned long find_ecache_flush_span(unsigned long size);
index 3fc5869..56f9338 100644 (file)
@@ -45,6 +45,8 @@
 #define SUN4V_CHIP_NIAGARA3    0x03
 #define SUN4V_CHIP_NIAGARA4    0x04
 #define SUN4V_CHIP_NIAGARA5    0x05
+#define SUN4V_CHIP_SPARC_M6    0x06
+#define SUN4V_CHIP_SPARC_M7    0x07
 #define SUN4V_CHIP_SPARC64X    0x8a
 #define SUN4V_CHIP_UNKNOWN     0xff
 
index a5f01ac..f85dc85 100644 (file)
@@ -102,6 +102,7 @@ struct thread_info {
 #define FAULT_CODE_ITLB                0x04    /* Miss happened in I-TLB          */
 #define FAULT_CODE_WINFIXUP    0x08    /* Miss happened during spill/fill */
 #define FAULT_CODE_BLKCOMMIT   0x10    /* Use blk-commit ASI in copy_page */
+#define        FAULT_CODE_BAD_RA       0x20    /* Bad RA for sun4v                */
 
 #if PAGE_SHIFT == 13
 #define THREAD_SIZE (2*PAGE_SIZE)
index 90916f9..ecb49cf 100644 (file)
@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        sub     TSB, 0x8, TSB;   \
        TSB_STORE(TSB, TAG);
 
-       /* Do a kernel page table walk.  Leaves physical PTE pointer in
-        * REG1.  Jumps to FAIL_LABEL on early page table walk termination.
-        * VADDR will not be clobbered, but REG2 will.
+       /* Do a kernel page table walk.  Leaves valid PTE value in
+        * REG1.  Jumps to FAIL_LABEL on early page table walk
+        * termination.  VADDR will not be clobbered, but REG2 will.
+        *
+        * There are two masks we must apply to propagate bits from
+        * the virtual address into the PTE physical address field
+        * when dealing with huge pages.  This is because the page
+        * table boundaries do not match the huge page size(s) the
+        * hardware supports.
+        *
+        * In these cases we propagate the bits that are below the
+        * page table level where we saw the huge page mapping, but
+        * are still within the relevant physical bits for the huge
+        * page size in question.  So for PMD mappings (which fall on
+        * bit 23, for 8MB per PMD) we must propagate bit 22 for a
+        * 4MB huge page.  For huge PUDs (which fall on bit 33, for
+        * 8GB per PUD), we have to accomodate 256MB and 2GB huge
+        * pages.  So for those we propagate bits 32 to 28.
         */
 #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)       \
        sethi           %hi(swapper_pg_dir), REG1; \
@@ -145,15 +160,40 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        andn            REG2, 0x7, REG2; \
        ldx             [REG1 + REG2], REG1; \
        brz,pn          REG1, FAIL_LABEL; \
-        sllx           VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+        sllx           VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        andn            REG2, 0x7, REG2; \
        ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
        brz,pn          REG1, FAIL_LABEL; \
-        sllx           VADDR, 64 - PMD_SHIFT, REG2; \
+       sethi           %uhi(_PAGE_PUD_HUGE), REG2; \
+       brz,pn          REG1, FAIL_LABEL; \
+        sllx           REG2, 32, REG2; \
+       andcc           REG1, REG2, %g0; \
+       sethi           %hi(0xf8000000), REG2; \
+       bne,pt          %xcc, 697f; \
+        sllx           REG2, 1, REG2; \
+       sllx            VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        andn            REG2, 0x7, REG2; \
-       add             REG1, REG2, REG1;
+       ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+       sethi           %uhi(_PAGE_PMD_HUGE), REG2; \
+       brz,pn          REG1, FAIL_LABEL; \
+        sllx           REG2, 32, REG2; \
+       andcc           REG1, REG2, %g0; \
+       be,pn           %xcc, 698f; \
+        sethi          %hi(0x400000), REG2; \
+697:   brgez,pn        REG1, FAIL_LABEL; \
+        andn           REG1, REG2, REG1; \
+       and             VADDR, REG2, REG2; \
+       ba,pt           %xcc, 699f; \
+        or             REG1, REG2, REG1; \
+698:   sllx            VADDR, 64 - PMD_SHIFT, REG2; \
+       srlx            REG2, 64 - PAGE_SHIFT, REG2; \
+       andn            REG2, 0x7, REG2; \
+       ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+       brgez,pn        REG1, FAIL_LABEL; \
+        nop; \
+699:
 
        /* PMD has been loaded into REG1, interpret the value, seeing
         * if it is a HUGE PMD or a normal one.  If it is not valid
@@ -197,6 +237,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        andn            REG2, 0x7, REG2; \
        ldxa            [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+       brz,pn          REG1, FAIL_LABEL; \
+        sllx           VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
+       srlx            REG2, 64 - PAGE_SHIFT, REG2; \
+       andn            REG2, 0x7, REG2; \
+       ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
        brz,pn          REG1, FAIL_LABEL; \
         sllx           VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
@@ -246,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        (KERNEL_TSB_SIZE_BYTES / 16)
 #define KERNEL_TSB4M_NENTRIES  4096
 
-#define KTSB_PHYS_SHIFT                15
-
        /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
         * on TSB hit.  REG1, REG2, REG3, and REG4 are used as temporaries
         * and the found TTE will be left in REG1.  REG3 and REG4 must
@@ -256,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
         * VADDR and TAG will be preserved and not clobbered by this macro.
         */
 #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661:   sethi           %hi(swapper_tsb), REG1;                 \
-       or              REG1, %lo(swapper_tsb), REG1; \
+661:   sethi           %uhi(swapper_tsb), REG1; \
+       sethi           %hi(swapper_tsb), REG2; \
+       or              REG1, %ulo(swapper_tsb), REG1; \
+       or              REG2, %lo(swapper_tsb), REG2; \
        .section        .swapper_tsb_phys_patch, "ax"; \
        .word           661b; \
        .previous; \
-661:   nop; \
-       .section        .tsb_ldquad_phys_patch, "ax"; \
-       .word           661b; \
-       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
-       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
-       .previous; \
+       sllx            REG1, 32, REG1; \
+       or              REG1, REG2, REG1; \
        srlx            VADDR, PAGE_SHIFT, REG2; \
        and             REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
        sllx            REG2, 4, REG2; \
@@ -281,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
         * we can make use of that for the index computation.
         */
 #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661:   sethi           %hi(swapper_4m_tsb), REG1;           \
-       or              REG1, %lo(swapper_4m_tsb), REG1; \
+661:   sethi           %uhi(swapper_4m_tsb), REG1; \
+       sethi           %hi(swapper_4m_tsb), REG2; \
+       or              REG1, %ulo(swapper_4m_tsb), REG1; \
+       or              REG2, %lo(swapper_4m_tsb), REG2; \
        .section        .swapper_4m_tsb_phys_patch, "ax"; \
        .word           661b; \
        .previous; \
-661:   nop; \
-       .section        .tsb_ldquad_phys_patch, "ax"; \
-       .word           661b; \
-       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
-       sllx            REG1, KTSB_PHYS_SHIFT, REG1; \
-       .previous; \
+       sllx            REG1, 32, REG1; \
+       or              REG1, REG2, REG1; \
        and             TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
        sllx            REG2, 4, REG2; \
        add             REG1, REG2, REG2; \
index 6b135a8..d758c8d 100644 (file)
@@ -121,12 +121,18 @@ struct vio_disk_attr_info {
        u8                      vdisk_type;
 #define VD_DISK_TYPE_SLICE     0x01 /* Slice in block device   */
 #define VD_DISK_TYPE_DISK      0x02 /* Entire block device     */
-       u16                     resv1;
+       u8                      vdisk_mtype;            /* v1.1 */
+#define VD_MEDIA_TYPE_FIXED    0x01 /* Fixed device */
+#define VD_MEDIA_TYPE_CD       0x02 /* CD Device    */
+#define VD_MEDIA_TYPE_DVD      0x03 /* DVD Device   */
+       u8                      resv1;
        u32                     vdisk_block_size;
        u64                     operations;
-       u64                     vdisk_size;
+       u64                     vdisk_size;             /* v1.1 */
        u64                     max_xfer_size;
-       u64                     resv2[2];
+       u32                     phys_block_size;        /* v1.2 */
+       u32                     resv2;
+       u64                     resv3[1];
 };
 
 struct vio_disk_desc {
@@ -272,7 +278,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr,
                                  unsigned int ring_size)
 {
        return (dr->pending -
-               ((dr->prod - dr->cons) & (ring_size - 1)));
+               ((dr->prod - dr->cons) & (ring_size - 1)) - 1);
 }
 
 #define VIO_MAX_TYPE_LEN       32
@@ -292,6 +298,7 @@ struct vio_dev {
 
        unsigned int            tx_irq;
        unsigned int            rx_irq;
+       u64                     rx_ino;
 
        struct device           dev;
 };
@@ -447,5 +454,6 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
                    char *name);
 
 void vio_port_up(struct vio_driver_state *vio);
+int vio_set_intr(unsigned long dev_ino, int state);
 
 #endif /* _SPARC64_VIO_H */
index 82a3a71..dfad8b1 100644 (file)
@@ -494,6 +494,18 @@ static void __init sun4v_cpu_probe(void)
                sparc_pmu_type = "niagara5";
                break;
 
+       case SUN4V_CHIP_SPARC_M6:
+               sparc_cpu_type = "SPARC-M6";
+               sparc_fpu_type = "SPARC-M6 integrated FPU";
+               sparc_pmu_type = "sparc-m6";
+               break;
+
+       case SUN4V_CHIP_SPARC_M7:
+               sparc_cpu_type = "SPARC-M7";
+               sparc_fpu_type = "SPARC-M7 integrated FPU";
+               sparc_pmu_type = "sparc-m7";
+               break;
+
        case SUN4V_CHIP_SPARC64X:
                sparc_cpu_type = "SPARC64-X";
                sparc_fpu_type = "SPARC64-X integrated FPU";
index de1c844..e69ec0e 100644 (file)
@@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
        case SUN4V_CHIP_NIAGARA3:
        case SUN4V_CHIP_NIAGARA4:
        case SUN4V_CHIP_NIAGARA5:
+       case SUN4V_CHIP_SPARC_M6:
+       case SUN4V_CHIP_SPARC_M7:
        case SUN4V_CHIP_SPARC64X:
                rover_inc_table = niagara_iterate_method;
                break;
index dff60ab..f87a55d 100644 (file)
@@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        ds_cfg.tx_irq = vdev->tx_irq;
        ds_cfg.rx_irq = vdev->rx_irq;
 
-       lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+       lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS");
        if (IS_ERR(lp)) {
                err = PTR_ERR(lp);
                goto out_free_ds_states;
        }
        dp->lp = lp;
 
-       err = ldc_bind(lp, "DS");
+       err = ldc_bind(lp);
        if (err)
                goto out_free_ldc;
 
index 452f04f..4fdeb80 100644 (file)
@@ -427,6 +427,12 @@ sun4v_chip_type:
        cmp     %g2, '5'
        be,pt   %xcc, 5f
         mov    SUN4V_CHIP_NIAGARA5, %g4
+       cmp     %g2, '6'
+       be,pt   %xcc, 5f
+        mov    SUN4V_CHIP_SPARC_M6, %g4
+       cmp     %g2, '7'
+       be,pt   %xcc, 5f
+        mov    SUN4V_CHIP_SPARC_M7, %g4
        ba,pt   %xcc, 49f
         nop
 
@@ -583,6 +589,12 @@ niagara_tlb_fixup:
        be,pt   %xcc, niagara4_patch
         nop
        cmp     %g1, SUN4V_CHIP_NIAGARA5
+       be,pt   %xcc, niagara4_patch
+        nop
+       cmp     %g1, SUN4V_CHIP_SPARC_M6
+       be,pt   %xcc, niagara4_patch
+        nop
+       cmp     %g1, SUN4V_CHIP_SPARC_M7
        be,pt   %xcc, niagara4_patch
         nop
 
index c0a2de0..5c55145 100644 (file)
@@ -46,6 +46,7 @@ static struct api_info api_table[] = {
        { .group = HV_GRP_VF_CPU,                               },
        { .group = HV_GRP_KT_CPU,                               },
        { .group = HV_GRP_VT_CPU,                               },
+       { .group = HV_GRP_T5_CPU,                               },
        { .group = HV_GRP_DIAG,         .flags = FLAG_PRE_API   },
 };
 
index f3ab509..caedf83 100644 (file)
@@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg)
        retl
         nop
 ENDPROC(sun4v_vt_set_perfreg)
+
+ENTRY(sun4v_t5_get_perfreg)
+       mov     %o1, %o4
+       mov     HV_FAST_T5_GET_PERFREG, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o4]
+       retl
+        nop
+ENDPROC(sun4v_t5_get_perfreg)
+
+ENTRY(sun4v_t5_set_perfreg)
+       mov     HV_FAST_T5_SET_PERFREG, %o5
+       ta      HV_FAST_TRAP
+       retl
+        nop
+ENDPROC(sun4v_t5_set_perfreg)
index 7f08ec8..28fed53 100644 (file)
@@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
        }
 
        order = get_order(len_total);
-       if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0)
+       va = __get_free_pages(gfp, order);
+       if (va == 0)
                goto err_nopages;
 
        if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
@@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
        }
 
        order = get_order(len_total);
-       va = (void *) __get_free_pages(GFP_KERNEL, order);
+       va = (void *) __get_free_pages(gfp, order);
        if (va == NULL) {
                printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
                goto err_nopages;
index 666193f..4033c23 100644 (file)
@@ -47,8 +47,6 @@
 #include "cpumap.h"
 #include "kstack.h"
 
-#define NUM_IVECS      (IMAP_INR + 1)
-
 struct ino_bucket *ivector_table;
 unsigned long ivector_table_pa;
 
@@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq)
 
 #define irq_work_pa(__cpu)     &(trap_block[(__cpu)].irq_worklist_pa)
 
-static struct {
-       unsigned int dev_handle;
-       unsigned int dev_ino;
-       unsigned int in_use;
-} irq_table[NR_IRQS];
-static DEFINE_SPINLOCK(irq_alloc_lock);
+static unsigned long hvirq_major __initdata;
+static int __init early_hvirq_major(char *p)
+{
+       int rc = kstrtoul(p, 10, &hvirq_major);
+
+       return rc;
+}
+early_param("hvirq", early_hvirq_major);
+
+static int hv_irq_version;
+
+/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie
+ * based interfaces, but:
+ *
+ * 1) Several OSs, Solaris and Linux included, use them even when only
+ *    negotiating version 1.0 (or failing to negotiate at all).  So the
+ *    hypervisor has a workaround that provides the VIRQ interfaces even
+ *    when only verion 1.0 of the API is in use.
+ *
+ * 2) Second, and more importantly, with major version 2.0 these VIRQ
+ *    interfaces only were actually hooked up for LDC interrupts, even
+ *    though the Hypervisor specification clearly stated:
+ *
+ *     The new interrupt API functions will be available to a guest
+ *     when it negotiates version 2.0 in the interrupt API group 0x2. When
+ *     a guest negotiates version 2.0, all interrupt sources will only
+ *     support using the cookie interface, and any attempt to use the
+ *     version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the
+ *     ENOTSUPPORTED error being returned.
+ *
+ *   with an emphasis on "all interrupt sources".
+ *
+ * To correct this, major version 3.0 was created which does actually
+ * support VIRQs for all interrupt sources (not just LDC devices).  So
+ * if we want to move completely over the cookie based VIRQs we must
+ * negotiate major version 3.0 or later of HV_GRP_INTR.
+ */
+static bool sun4v_cookie_only_virqs(void)
+{
+       if (hv_irq_version >= 3)
+               return true;
+       return false;
+}
 
-unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+static void __init irq_init_hv(void)
 {
-       unsigned long flags;
-       unsigned char ent;
+       unsigned long hv_error, major, minor = 0;
+
+       if (tlb_type != hypervisor)
+               return;
 
-       BUILD_BUG_ON(NR_IRQS >= 256);
+       if (hvirq_major)
+               major = hvirq_major;
+       else
+               major = 3;
 
-       spin_lock_irqsave(&irq_alloc_lock, flags);
+       hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor);
+       if (!hv_error)
+               hv_irq_version = major;
+       else
+               hv_irq_version = 1;
 
-       for (ent = 1; ent < NR_IRQS; ent++) {
-               if (!irq_table[ent].in_use)
+       pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n",
+               hv_irq_version,
+               sun4v_cookie_only_virqs() ? "enabled" : "disabled");
+}
+
+/* This function is for the timer interrupt.*/
+int __init arch_probe_nr_irqs(void)
+{
+       return 1;
+}
+
+#define DEFAULT_NUM_IVECS      (0xfffU)
+static unsigned int nr_ivec = DEFAULT_NUM_IVECS;
+#define NUM_IVECS (nr_ivec)
+
+static unsigned int __init size_nr_ivec(void)
+{
+       if (tlb_type == hypervisor) {
+               switch (sun4v_chip_type) {
+               /* Athena's devhandle|devino is large.*/
+               case SUN4V_CHIP_SPARC64X:
+                       nr_ivec = 0xffff;
                        break;
+               }
        }
-       if (ent >= NR_IRQS) {
-               printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
-               ent = 0;
-       } else {
-               irq_table[ent].dev_handle = dev_handle;
-               irq_table[ent].dev_ino = dev_ino;
-               irq_table[ent].in_use = 1;
-       }
+       return nr_ivec;
+}
+
+struct irq_handler_data {
+       union {
+               struct {
+                       unsigned int dev_handle;
+                       unsigned int dev_ino;
+               };
+               unsigned long sysino;
+       };
+       struct ino_bucket bucket;
+       unsigned long   iclr;
+       unsigned long   imap;
+};
+
+static inline unsigned int irq_data_to_handle(struct irq_data *data)
+{
+       struct irq_handler_data *ihd = data->handler_data;
+
+       return ihd->dev_handle;
+}
+
+static inline unsigned int irq_data_to_ino(struct irq_data *data)
+{
+       struct irq_handler_data *ihd = data->handler_data;
 
-       spin_unlock_irqrestore(&irq_alloc_lock, flags);
+       return ihd->dev_ino;
+}
+
+static inline unsigned long irq_data_to_sysino(struct irq_data *data)
+{
+       struct irq_handler_data *ihd = data->handler_data;
 
-       return ent;
+       return ihd->sysino;
 }
 
-#ifdef CONFIG_PCI_MSI
 void irq_free(unsigned int irq)
 {
-       unsigned long flags;
+       void *data = irq_get_handler_data(irq);
 
-       if (irq >= NR_IRQS)
-               return;
+       kfree(data);
+       irq_set_handler_data(irq, NULL);
+       irq_free_descs(irq, 1);
+}
 
-       spin_lock_irqsave(&irq_alloc_lock, flags);
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+{
+       int irq;
 
-       irq_table[irq].in_use = 0;
+       irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
+       if (irq <= 0)
+               goto out;
 
-       spin_unlock_irqrestore(&irq_alloc_lock, flags);
+       return irq;
+out:
+       return 0;
+}
+
+static unsigned int cookie_exists(u32 devhandle, unsigned int devino)
+{
+       unsigned long hv_err, cookie;
+       struct ino_bucket *bucket;
+       unsigned int irq = 0U;
+
+       hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie);
+       if (hv_err) {
+               pr_err("HV get cookie failed hv_err = %ld\n", hv_err);
+               goto out;
+       }
+
+       if (cookie & ((1UL << 63UL))) {
+               cookie = ~cookie;
+               bucket = (struct ino_bucket *) __va(cookie);
+               irq = bucket->__irq;
+       }
+out:
+       return irq;
+}
+
+static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
+{
+       unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+       struct ino_bucket *bucket;
+       unsigned int irq;
+
+       bucket = &ivector_table[sysino];
+       irq = bucket_get_irq(__pa(bucket));
+
+       return irq;
+}
+
+void ack_bad_irq(unsigned int irq)
+{
+       pr_crit("BAD IRQ ack %d\n", irq);
+}
+
+void irq_install_pre_handler(int irq,
+                            void (*func)(unsigned int, void *, void *),
+                            void *arg1, void *arg2)
+{
+       pr_warn("IRQ pre handler NOT supported.\n");
 }
-#endif
 
 /*
  * /proc/interrupts printing:
@@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
        return tid;
 }
 
-struct irq_handler_data {
-       unsigned long   iclr;
-       unsigned long   imap;
-
-       void            (*pre_handler)(unsigned int, void *, void *);
-       void            *arg1;
-       void            *arg2;
-};
-
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
 {
@@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data)
 
 static void sun4v_irq_enable(struct irq_data *data)
 {
-       unsigned int ino = irq_table[data->irq].dev_ino;
        unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity);
+       unsigned int ino = irq_data_to_sysino(data);
        int err;
 
        err = sun4v_intr_settarget(ino, cpuid);
@@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data)
 static int sun4v_set_affinity(struct irq_data *data,
                               const struct cpumask *mask, bool force)
 {
-       unsigned int ino = irq_table[data->irq].dev_ino;
        unsigned long cpuid = irq_choose_cpu(data->irq, mask);
+       unsigned int ino = irq_data_to_sysino(data);
        int err;
 
        err = sun4v_intr_settarget(ino, cpuid);
@@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data,
 
 static void sun4v_irq_disable(struct irq_data *data)
 {
-       unsigned int ino = irq_table[data->irq].dev_ino;
+       unsigned int ino = irq_data_to_sysino(data);
        int err;
 
        err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
@@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data)
 
 static void sun4v_irq_eoi(struct irq_data *data)
 {
-       unsigned int ino = irq_table[data->irq].dev_ino;
+       unsigned int ino = irq_data_to_sysino(data);
        int err;
 
        err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
@@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data)
 
 static void sun4v_virq_enable(struct irq_data *data)
 {
-       unsigned long cpuid, dev_handle, dev_ino;
+       unsigned long dev_handle = irq_data_to_handle(data);
+       unsigned long dev_ino = irq_data_to_ino(data);
+       unsigned long cpuid;
        int err;
 
        cpuid = irq_choose_cpu(data->irq, data->affinity);
 
-       dev_handle = irq_table[data->irq].dev_handle;
-       dev_ino = irq_table[data->irq].dev_ino;
-
        err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
        if (err != HV_EOK)
                printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data)
 static int sun4v_virt_set_affinity(struct irq_data *data,
                                    const struct cpumask *mask, bool force)
 {
-       unsigned long cpuid, dev_handle, dev_ino;
+       unsigned long dev_handle = irq_data_to_handle(data);
+       unsigned long dev_ino = irq_data_to_ino(data);
+       unsigned long cpuid;
        int err;
 
        cpuid = irq_choose_cpu(data->irq, mask);
 
-       dev_handle = irq_table[data->irq].dev_handle;
-       dev_ino = irq_table[data->irq].dev_ino;
-
        err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
        if (err != HV_EOK)
                printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data,
 
 static void sun4v_virq_disable(struct irq_data *data)
 {
-       unsigned long dev_handle, dev_ino;
+       unsigned long dev_handle = irq_data_to_handle(data);
+       unsigned long dev_ino = irq_data_to_ino(data);
        int err;
 
-       dev_handle = irq_table[data->irq].dev_handle;
-       dev_ino = irq_table[data->irq].dev_ino;
 
        err = sun4v_vintr_set_valid(dev_handle, dev_ino,
                                    HV_INTR_DISABLED);
@@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data)
 
 static void sun4v_virq_eoi(struct irq_data *data)
 {
-       unsigned long dev_handle, dev_ino;
+       unsigned long dev_handle = irq_data_to_handle(data);
+       unsigned long dev_ino = irq_data_to_ino(data);
        int err;
 
-       dev_handle = irq_table[data->irq].dev_handle;
-       dev_ino = irq_table[data->irq].dev_ino;
-
        err = sun4v_vintr_set_state(dev_handle, dev_ino,
                                    HV_INTR_STATE_IDLE);
        if (err != HV_EOK)
@@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = {
        .flags                  = IRQCHIP_EOI_IF_HANDLED,
 };
 
-static void pre_flow_handler(struct irq_data *d)
-{
-       struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d);
-       unsigned int ino = irq_table[d->irq].dev_ino;
-
-       handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2);
-}
-
-void irq_install_pre_handler(int irq,
-                            void (*func)(unsigned int, void *, void *),
-                            void *arg1, void *arg2)
-{
-       struct irq_handler_data *handler_data = irq_get_handler_data(irq);
-
-       handler_data->pre_handler = func;
-       handler_data->arg1 = arg1;
-       handler_data->arg2 = arg2;
-
-       __irq_set_preflow_handler(irq, pre_flow_handler);
-}
-
 unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 {
-       struct ino_bucket *bucket;
        struct irq_handler_data *handler_data;
+       struct ino_bucket *bucket;
        unsigned int irq;
        int ino;
 
@@ -537,119 +641,166 @@ out:
        return irq;
 }
 
-static unsigned int sun4v_build_common(unsigned long sysino,
-                                      struct irq_chip *chip)
+static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino,
+               void (*handler_data_init)(struct irq_handler_data *data,
+               u32 devhandle, unsigned int devino),
+               struct irq_chip *chip)
 {
-       struct ino_bucket *bucket;
-       struct irq_handler_data *handler_data;
+       struct irq_handler_data *data;
        unsigned int irq;
 
-       BUG_ON(tlb_type != hypervisor);
+       irq = irq_alloc(devhandle, devino);
+       if (!irq)
+               goto out;
 
-       bucket = &ivector_table[sysino];
-       irq = bucket_get_irq(__pa(bucket));
-       if (!irq) {
-               irq = irq_alloc(0, sysino);
-               bucket_set_irq(__pa(bucket), irq);
-               irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq,
-                                             "IVEC");
+       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+       if (unlikely(!data)) {
+               pr_err("IRQ handler data allocation failed.\n");
+               irq_free(irq);
+               irq = 0;
+               goto out;
        }
 
-       handler_data = irq_get_handler_data(irq);
-       if (unlikely(handler_data))
-               goto out;
+       irq_set_handler_data(irq, data);
+       handler_data_init(data, devhandle, devino);
+       irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC");
+       data->imap = ~0UL;
+       data->iclr = ~0UL;
+out:
+       return irq;
+}
 
-       handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-       if (unlikely(!handler_data)) {
-               prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
-               prom_halt();
-       }
-       irq_set_handler_data(irq, handler_data);
+static unsigned long cookie_assign(unsigned int irq, u32 devhandle,
+               unsigned int devino)
+{
+       struct irq_handler_data *ihd = irq_get_handler_data(irq);
+       unsigned long hv_error, cookie;
 
-       /* Catch accidental accesses to these things.  IMAP/ICLR handling
-        * is done by hypervisor calls on sun4v platforms, not by direct
-        * register accesses.
+       /* handler_irq needs to find the irq. cookie is seen signed in
+        * sun4v_dev_mondo and treated as a non ivector_table delivery.
         */
-       handler_data->imap = ~0UL;
-       handler_data->iclr = ~0UL;
+       ihd->bucket.__irq = irq;
+       cookie = ~__pa(&ihd->bucket);
 
-out:
-       return irq;
+       hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+       if (hv_error)
+               pr_err("HV vintr set cookie failed = %ld\n", hv_error);
+
+       return hv_error;
 }
 
-unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+static void cookie_handler_data(struct irq_handler_data *data,
+                               u32 devhandle, unsigned int devino)
 {
-       unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+       data->dev_handle = devhandle;
+       data->dev_ino = devino;
+}
 
-       return sun4v_build_common(sysino, &sun4v_irq);
+static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino,
+                                    struct irq_chip *chip)
+{
+       unsigned long hv_error;
+       unsigned int irq;
+
+       irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip);
+
+       hv_error = cookie_assign(irq, devhandle, devino);
+       if (hv_error) {
+               irq_free(irq);
+               irq = 0;
+       }
+
+       return irq;
 }
 
-unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino)
 {
-       struct irq_handler_data *handler_data;
-       unsigned long hv_err, cookie;
-       struct ino_bucket *bucket;
        unsigned int irq;
 
-       bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
-       if (unlikely(!bucket))
-               return 0;
+       irq = cookie_exists(devhandle, devino);
+       if (irq)
+               goto out;
 
-       /* The only reference we store to the IRQ bucket is
-        * by physical address which kmemleak can't see, tell
-        * it that this object explicitly is not a leak and
-        * should be scanned.
-        */
-       kmemleak_not_leak(bucket);
+       irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
 
-       __flush_dcache_range((unsigned long) bucket,
-                            ((unsigned long) bucket +
-                             sizeof(struct ino_bucket)));
+out:
+       return irq;
+}
 
-       irq = irq_alloc(devhandle, devino);
+static void sysino_set_bucket(unsigned int irq)
+{
+       struct irq_handler_data *ihd = irq_get_handler_data(irq);
+       struct ino_bucket *bucket;
+       unsigned long sysino;
+
+       sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino);
+       BUG_ON(sysino >= nr_ivec);
+       bucket = &ivector_table[sysino];
        bucket_set_irq(__pa(bucket), irq);
+}
 
-       irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq,
-                                     "IVEC");
+static void sysino_handler_data(struct irq_handler_data *data,
+                               u32 devhandle, unsigned int devino)
+{
+       unsigned long sysino;
 
-       handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-       if (unlikely(!handler_data))
-               return 0;
+       sysino = sun4v_devino_to_sysino(devhandle, devino);
+       data->sysino = sysino;
+}
 
-       /* In order to make the LDC channel startup sequence easier,
-        * especially wrt. locking, we do not let request_irq() enable
-        * the interrupt.
-        */
-       irq_set_status_flags(irq, IRQ_NOAUTOEN);
-       irq_set_handler_data(irq, handler_data);
+static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino,
+                                    struct irq_chip *chip)
+{
+       unsigned int irq;
 
-       /* Catch accidental accesses to these things.  IMAP/ICLR handling
-        * is done by hypervisor calls on sun4v platforms, not by direct
-        * register accesses.
-        */
-       handler_data->imap = ~0UL;
-       handler_data->iclr = ~0UL;
+       irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip);
+       if (!irq)
+               goto out;
 
-       cookie = ~__pa(bucket);
-       hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
-       if (hv_err) {
-               prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
-                           "err=%lu\n", devhandle, devino, hv_err);
-               prom_halt();
-       }
+       sysino_set_bucket(irq);
+out:
+       return irq;
+}
 
+static int sun4v_build_sysino(u32 devhandle, unsigned int devino)
+{
+       int irq;
+
+       irq = sysino_exists(devhandle, devino);
+       if (irq)
+               goto out;
+
+       irq = sysino_build_irq(devhandle, devino, &sun4v_irq);
+out:
        return irq;
 }
 
-void ack_bad_irq(unsigned int irq)
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 {
-       unsigned int ino = irq_table[irq].dev_ino;
+       unsigned int irq;
 
-       if (!ino)
-               ino = 0xdeadbeef;
+       if (sun4v_cookie_only_virqs())
+               irq = sun4v_build_cookie(devhandle, devino);
+       else
+               irq = sun4v_build_sysino(devhandle, devino);
 
-       printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n",
-              ino, irq);
+       return irq;
+}
+
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+{
+       int irq;
+
+       irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+       if (!irq)
+               goto out;
+
+       /* This is borrowed from the original function.
+        */
+       irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+out:
+       return irq;
 }
 
 void *hardirq_stack[NR_CPUS];
@@ -720,9 +871,12 @@ void fixup_irqs(void)
 
        for (irq = 0; irq < NR_IRQS; irq++) {
                struct irq_desc *desc = irq_to_desc(irq);
-               struct irq_data *data = irq_desc_get_irq_data(desc);
+               struct irq_data *data;
                unsigned long flags;
 
+               if (!desc)
+                       continue;
+               data = irq_desc_get_irq_data(desc);
                raw_spin_lock_irqsave(&desc->lock, flags);
                if (desc->action && !irqd_is_per_cpu(data)) {
                        if (data->chip->irq_set_affinity)
@@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action = {
        .name = "timer",
 };
 
-/* Only invoked on boot processor. */
-void __init init_IRQ(void)
+static void __init irq_ivector_init(void)
 {
-       unsigned long size;
+       unsigned long size, order;
+       unsigned int ivecs;
 
-       map_prom_timers();
-       kill_prom_timer();
+       /* If we are doing cookie only VIRQs then we do not need the ivector
+        * table to process interrupts.
+        */
+       if (sun4v_cookie_only_virqs())
+               return;
 
-       size = sizeof(struct ino_bucket) * NUM_IVECS;
-       ivector_table = kzalloc(size, GFP_KERNEL);
+       ivecs = size_nr_ivec();
+       size = sizeof(struct ino_bucket) * ivecs;
+       order = get_order(size);
+       ivector_table = (struct ino_bucket *)
+               __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
        if (!ivector_table) {
                prom_printf("Fatal error, cannot allocate ivector_table\n");
                prom_halt();
@@ -940,6 +1100,15 @@ void __init init_IRQ(void)
                             ((unsigned long) ivector_table) + size);
 
        ivector_table_pa = __pa(ivector_table);
+}
+
+/* Only invoked on boot processor.*/
+void __init init_IRQ(void)
+{
+       irq_init_hv();
+       irq_ivector_init();
+       map_prom_timers();
+       kill_prom_timer();
 
        if (tlb_type == hypervisor)
                sun4v_init_mondo_queues();
index 605d492..ef0d8e9 100644 (file)
@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
        TSB_LOCK_TAG(%g1, %g2, %g7)
-
-       /* Load and check PTE.  */
-       ldxa            [%g5] ASI_PHYS_USE_EC, %g5
-       mov             1, %g7
-       sllx            %g7, TSB_TAG_INVALID_BIT, %g7
-       brgez,a,pn      %g5, kvmap_itlb_longpath
-        TSB_STORE(%g1, %g7)
-
        TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
@@ -118,6 +110,12 @@ kvmap_dtlb_obp:
        ba,pt           %xcc, kvmap_dtlb_load
         nop
 
+kvmap_linear_early:
+       sethi           %hi(kern_linear_pte_xor), %g7
+       ldx             [%g7 + %lo(kern_linear_pte_xor)], %g2
+       ba,pt           %xcc, kvmap_dtlb_tsb4m_load
+        xor            %g2, %g4, %g5
+
        .align          32
 kvmap_dtlb_tsb4m_load:
        TSB_LOCK_TAG(%g1, %g2, %g7)
@@ -146,105 +144,17 @@ kvmap_dtlb_4v:
        /* Correct TAG_TARGET is already in %g6, check 4mb TSB.  */
        KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
 #endif
-       /* TSB entry address left in %g1, lookup linear PTE.
-        * Must preserve %g1 and %g6 (TAG).
-        */
-kvmap_dtlb_tsb4m_miss:
-       /* Clear the PAGE_OFFSET top virtual bits, shift
-        * down to get PFN, and make sure PFN is in range.
-        */
-661:   sllx            %g4, 0, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       /* Check to see if we know about valid memory at the 4MB
-        * chunk this physical address will reside within.
+       /* Linear mapping TSB lookup failed.  Fallthrough to kernel
+        * page table based lookup.
         */
-661:   srlx            %g5, MAX_PHYS_ADDRESS_BITS, %g2
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       brnz,pn         %g2, kvmap_dtlb_longpath
-        nop
-
-       /* This unconditional branch and delay-slot nop gets patched
-        * by the sethi sequence once the bitmap is properly setup.
-        */
-       .globl          valid_addr_bitmap_insn
-valid_addr_bitmap_insn:
-       ba,pt           %xcc, 2f
-        nop
-       .subsection     2
-       .globl          valid_addr_bitmap_patch
-valid_addr_bitmap_patch:
-       sethi           %hi(sparc64_valid_addr_bitmap), %g7
-       or              %g7, %lo(sparc64_valid_addr_bitmap), %g7
-       .previous
-
-661:   srlx            %g5, ILOG2_4MB, %g2
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       srlx            %g2, 6, %g5
-       and             %g2, 63, %g2
-       sllx            %g5, 3, %g5
-       ldx             [%g7 + %g5], %g5
-       mov             1, %g7
-       sllx            %g7, %g2, %g7
-       andcc           %g5, %g7, %g0
-       be,pn           %xcc, kvmap_dtlb_longpath
-
-2:      sethi          %hi(kpte_linear_bitmap), %g2
-
-       /* Get the 256MB physical address index. */
-661:   sllx            %g4, 0, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       or              %g2, %lo(kpte_linear_bitmap), %g2
-
-661:   srlx            %g5, ILOG2_256MB, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       and             %g5, (32 - 1), %g7
-
-       /* Divide by 32 to get the offset into the bitmask.  */
-       srlx            %g5, 5, %g5
-       add             %g7, %g7, %g7
-       sllx            %g5, 3, %g5
-
-       /* kern_linear_pte_xor[(mask >> shift) & 3)] */
-       ldx             [%g2 + %g5], %g2
-       srlx            %g2, %g7, %g7
-       sethi           %hi(kern_linear_pte_xor), %g5
-       and             %g7, 3, %g7
-       or              %g5, %lo(kern_linear_pte_xor), %g5
-       sllx            %g7, 3, %g7
-       ldx             [%g5 + %g7], %g2
-
        .globl          kvmap_linear_patch
 kvmap_linear_patch:
-       ba,pt           %xcc, kvmap_dtlb_tsb4m_load
-        xor            %g2, %g4, %g5
+       ba,a,pt         %xcc, kvmap_linear_early
 
 kvmap_dtlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
        TSB_LOCK_TAG(%g1, %g2, %g7)
-
-       /* Load and check PTE.  */
-       ldxa            [%g5] ASI_PHYS_USE_EC, %g5
-       mov             1, %g7
-       sllx            %g7, TSB_TAG_INVALID_BIT, %g7
-       brgez,a,pn      %g5, kvmap_dtlb_longpath
-        TSB_STORE(%g1, %g7)
-
        TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
@@ -276,13 +186,8 @@ kvmap_dtlb_load:
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 kvmap_vmemmap:
-       sub             %g4, %g5, %g5
-       srlx            %g5, ILOG2_4MB, %g5
-       sethi           %hi(vmemmap_table), %g1
-       sllx            %g5, 3, %g5
-       or              %g1, %lo(vmemmap_table), %g1
-       ba,pt           %xcc, kvmap_dtlb_load
-        ldx            [%g1 + %g5], %g5
+       KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+       ba,a,pt         %xcc, kvmap_dtlb_load
 #endif
 
 kvmap_dtlb_nonlinear:
@@ -294,8 +199,8 @@ kvmap_dtlb_nonlinear:
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
        /* Do not use the TSB for vmemmap.  */
-       mov             (VMEMMAP_BASE >> 40), %g5
-       sllx            %g5, 40, %g5
+       sethi           %hi(VMEMMAP_BASE), %g5
+       ldx             [%g5 + %lo(VMEMMAP_BASE)], %g5
        cmp             %g4,%g5
        bgeu,pn         %xcc, kvmap_vmemmap
         nop
@@ -307,8 +212,8 @@ kvmap_dtlb_tsbmiss:
        sethi           %hi(MODULES_VADDR), %g5
        cmp             %g4, %g5
        blu,pn          %xcc, kvmap_dtlb_longpath
-        mov            (VMALLOC_END >> 40), %g5
-       sllx            %g5, 40, %g5
+        sethi          %hi(VMALLOC_END), %g5
+       ldx             [%g5 + %lo(VMALLOC_END)], %g5
        cmp             %g4, %g5
        bgeu,pn         %xcc, kvmap_dtlb_longpath
         nop
index 0af28b9..4310332 100644 (file)
@@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp)
 
 struct ldc_channel *ldc_alloc(unsigned long id,
                              const struct ldc_channel_config *cfgp,
-                             void *event_arg)
+                             void *event_arg,
+                             const char *name)
 {
        struct ldc_channel *lp;
        const struct ldc_mode_ops *mops;
@@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id,
        err = -EINVAL;
        if (!cfgp)
                goto out_err;
+       if (!name)
+               goto out_err;
 
        switch (cfgp->mode) {
        case LDC_MODE_RAW:
@@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id,
 
        INIT_HLIST_HEAD(&lp->mh_list);
 
+       snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+       snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+       err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
+                         lp->rx_irq_name, lp);
+       if (err)
+               goto out_free_txq;
+
+       err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
+                         lp->tx_irq_name, lp);
+       if (err) {
+               free_irq(lp->cfg.rx_irq, lp);
+               goto out_free_txq;
+       }
+
        return lp;
 
 out_free_txq:
@@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free);
  * state.  This does not initiate a handshake, ldc_connect() does
  * that.
  */
-int ldc_bind(struct ldc_channel *lp, const char *name)
+int ldc_bind(struct ldc_channel *lp)
 {
        unsigned long hv_err, flags;
        int err = -EINVAL;
 
-       if (!name ||
-           (lp->state != LDC_STATE_INIT))
+       if (lp->state != LDC_STATE_INIT)
                return -EINVAL;
 
-       snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
-       snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
-
-       err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
-                         lp->rx_irq_name, lp);
-       if (err)
-               return err;
-
-       err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
-                         lp->tx_irq_name, lp);
-       if (err) {
-               free_irq(lp->cfg.rx_irq, lp);
-               return err;
-       }
-
-
        spin_lock_irqsave(&lp->lock, flags);
 
        enable_irq(lp->cfg.rx_irq);
index 683c4af..9bbb8f2 100644 (file)
@@ -37,6 +37,7 @@ unsigned long amba_system_id;
 static DEFINE_SPINLOCK(leon_irq_lock);
 
 static unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+static unsigned long leon3_gptimer_ackmask; /* For clearing pending bit */
 unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
 unsigned int sparc_leon_eirq;
 #define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
@@ -260,11 +261,19 @@ void leon_update_virq_handling(unsigned int virq,
 
 static u32 leon_cycles_offset(void)
 {
-       u32 rld, val, off;
+       u32 rld, val, ctrl, off;
+
        rld = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld);
        val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val);
-       off = rld - val;
-       return rld - val;
+       ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+       if (LEON3_GPTIMER_CTRL_ISPENDING(ctrl)) {
+               val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val);
+               off = 2 * rld - val;
+       } else {
+               off = rld - val;
+       }
+
+       return off;
 }
 
 #ifdef CONFIG_SMP
@@ -302,6 +311,7 @@ void __init leon_init_timers(void)
        int ampopts;
        int err;
        u32 config;
+       u32 ctrl;
 
        sparc_config.get_cycles_offset = leon_cycles_offset;
        sparc_config.cs_period = 1000000 / HZ;
@@ -374,6 +384,16 @@ void __init leon_init_timers(void)
        if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
                goto bad;
 
+       ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+                             ctrl | LEON3_GPTIMER_CTRL_PENDING);
+       ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+
+       if ((ctrl & LEON3_GPTIMER_CTRL_PENDING) != 0)
+               leon3_gptimer_ackmask = ~LEON3_GPTIMER_CTRL_PENDING;
+       else
+               leon3_gptimer_ackmask = ~0;
+
        LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
        LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
                                (((1000000 / HZ) - 1)));
@@ -452,6 +472,11 @@ bad:
 
 static void leon_clear_clock_irq(void)
 {
+       u32 ctrl;
+
+       ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+                             ctrl & leon3_gptimer_ackmask);
 }
 
 static void leon_load_profile_irq(int cpu, unsigned int limit)
index 269af58..7e967c8 100644 (file)
@@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = {
        .pcr_nmi_disable        = PCR_N4_PICNPT,
 };
 
+static u64 n5_pcr_read(unsigned long reg_num)
+{
+       unsigned long val;
+
+       (void) sun4v_t5_get_perfreg(reg_num, &val);
+
+       return val;
+}
+
+static void n5_pcr_write(unsigned long reg_num, u64 val)
+{
+       (void) sun4v_t5_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops n5_pcr_ops = {
+       .read_pcr               = n5_pcr_read,
+       .write_pcr              = n5_pcr_write,
+       .read_pic               = n4_pic_read,
+       .write_pic              = n4_pic_write,
+       .nmi_picl_value         = n4_picl_value,
+       .pcr_nmi_enable         = (PCR_N4_PICNPT | PCR_N4_STRACE |
+                                  PCR_N4_UTRACE | PCR_N4_TOE |
+                                  (26 << PCR_N4_SL_SHIFT)),
+       .pcr_nmi_disable        = PCR_N4_PICNPT,
+};
+
+
 static unsigned long perf_hsvc_group;
 static unsigned long perf_hsvc_major;
 static unsigned long perf_hsvc_minor;
 
 static int __init register_perf_hsvc(void)
 {
+       unsigned long hverror;
+
        if (tlb_type == hypervisor) {
                switch (sun4v_chip_type) {
                case SUN4V_CHIP_NIAGARA1:
@@ -215,6 +244,10 @@ static int __init register_perf_hsvc(void)
                        perf_hsvc_group = HV_GRP_VT_CPU;
                        break;
 
+               case SUN4V_CHIP_NIAGARA5:
+                       perf_hsvc_group = HV_GRP_T5_CPU;
+                       break;
+
                default:
                        return -ENODEV;
                }
@@ -222,10 +255,12 @@ static int __init register_perf_hsvc(void)
 
                perf_hsvc_major = 1;
                perf_hsvc_minor = 0;
-               if (sun4v_hvapi_register(perf_hsvc_group,
-                                        perf_hsvc_major,
-                                        &perf_hsvc_minor)) {
-                       printk("perfmon: Could not register hvapi.\n");
+               hverror = sun4v_hvapi_register(perf_hsvc_group,
+                                              perf_hsvc_major,
+                                              &perf_hsvc_minor);
+               if (hverror) {
+                       pr_err("perfmon: Could not register hvapi(0x%lx).\n",
+                              hverror);
                        return -ENODEV;
                }
        }
@@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void)
                pcr_ops = &n4_pcr_ops;
                break;
 
+       case SUN4V_CHIP_NIAGARA5:
+               pcr_ops = &n5_pcr_ops;
+               break;
+
        default:
                ret = -ENODEV;
                break;
index d35c490..c9759ad 100644 (file)
@@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void)
                sparc_pmu = &niagara2_pmu;
                return true;
        }
-       if (!strcmp(sparc_pmu_type, "niagara4")) {
+       if (!strcmp(sparc_pmu_type, "niagara4") ||
+           !strcmp(sparc_pmu_type, "niagara5")) {
                sparc_pmu = &niagara4_pmu;
                return true;
        }
index 3fdb455..e629b83 100644 (file)
@@ -141,21 +141,9 @@ static void __init boot_flags_init(char *commands)
                                process_switch(*commands++);
                        continue;
                }
-               if (!strncmp(commands, "mem=", 4)) {
-                       /*
-                        * "mem=XXX[kKmM]" overrides the PROM-reported
-                        * memory size.
-                        */
-                       cmdline_memory_size = simple_strtoul(commands + 4,
-                                                            &commands, 0);
-                       if (*commands == 'K' || *commands == 'k') {
-                               cmdline_memory_size <<= 10;
-                               commands++;
-                       } else if (*commands=='M' || *commands=='m') {
-                               cmdline_memory_size <<= 20;
-                               commands++;
-                       }
-               }
+               if (!strncmp(commands, "mem=", 4))
+                       cmdline_memory_size = memparse(commands + 4, &commands);
+
                while (*commands && *commands != ' ')
                        commands++;
        }
@@ -500,12 +488,16 @@ static void __init init_sparc64_elf_hwcap(void)
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+                   sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+                   sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
                    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
                        cap |= HWCAP_SPARC_BLKINIT;
                if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
                    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+                   sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+                   sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
                    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
                        cap |= HWCAP_SPARC_N2;
        }
@@ -533,6 +525,8 @@ static void __init init_sparc64_elf_hwcap(void)
                            sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
                            sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
                            sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+                           sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+                           sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
                            sun4v_chip_type == SUN4V_CHIP_SPARC64X)
                                cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
                                        AV_SPARC_ASI_BLK_INIT |
@@ -540,6 +534,8 @@ static void __init init_sparc64_elf_hwcap(void)
                        if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
                            sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
                            sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+                           sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+                           sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
                            sun4v_chip_type == SUN4V_CHIP_SPARC64X)
                                cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
                                        AV_SPARC_FMAF);
index f7ba875..c9300bf 100644 (file)
@@ -1467,6 +1467,13 @@ static void __init pcpu_populate_pte(unsigned long addr)
        pud_t *pud;
        pmd_t *pmd;
 
+       if (pgd_none(*pgd)) {
+               pud_t *new;
+
+               new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+               pgd_populate(&init_mm, pgd, new);
+       }
+
        pud = pud_offset(pgd, addr);
        if (pud_none(*pud)) {
                pmd_t *new;
index e0c09bf..6179e19 100644 (file)
@@ -195,6 +195,11 @@ sun4v_tsb_miss_common:
         ldx    [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
 
 sun4v_itlb_error:
+       rdpr    %tl, %g1
+       cmp     %g1, 1
+       ble,pt  %icc, sun4v_bad_ra
+        or     %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
+
        sethi   %hi(sun4v_err_itlb_vaddr), %g1
        stx     %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
        sethi   %hi(sun4v_err_itlb_ctx), %g1
@@ -206,15 +211,10 @@ sun4v_itlb_error:
        sethi   %hi(sun4v_err_itlb_error), %g1
        stx     %o0, [%g1 + %lo(sun4v_err_itlb_error)]
 
+       sethi   %hi(1f), %g7
        rdpr    %tl, %g4
-       cmp     %g4, 1
-       ble,pt  %icc, 1f
-        sethi  %hi(2f), %g7
        ba,pt   %xcc, etraptl1
-        or     %g7, %lo(2f), %g7
-
-1:     ba,pt   %xcc, etrap
-2:      or     %g7, %lo(2b), %g7
+1:      or     %g7, %lo(1f), %g7
        mov     %l4, %o1
        call    sun4v_itlb_error_report
         add    %sp, PTREGS_OFF, %o0
@@ -222,6 +222,11 @@ sun4v_itlb_error:
        /* NOTREACHED */
 
 sun4v_dtlb_error:
+       rdpr    %tl, %g1
+       cmp     %g1, 1
+       ble,pt  %icc, sun4v_bad_ra
+        or     %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
+
        sethi   %hi(sun4v_err_dtlb_vaddr), %g1
        stx     %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
        sethi   %hi(sun4v_err_dtlb_ctx), %g1
@@ -233,21 +238,23 @@ sun4v_dtlb_error:
        sethi   %hi(sun4v_err_dtlb_error), %g1
        stx     %o0, [%g1 + %lo(sun4v_err_dtlb_error)]
 
+       sethi   %hi(1f), %g7
        rdpr    %tl, %g4
-       cmp     %g4, 1
-       ble,pt  %icc, 1f
-        sethi  %hi(2f), %g7
        ba,pt   %xcc, etraptl1
-        or     %g7, %lo(2f), %g7
-
-1:     ba,pt   %xcc, etrap
-2:      or     %g7, %lo(2b), %g7
+1:      or     %g7, %lo(1f), %g7
        mov     %l4, %o1
        call    sun4v_dtlb_error_report
         add    %sp, PTREGS_OFF, %o0
 
        /* NOTREACHED */
 
+sun4v_bad_ra:
+       or      %g0, %g4, %g5
+       ba,pt   %xcc, sparc64_realfault_common
+        or     %g1, %g0, %g4
+
+       /* NOTREACHED */
+
        /* Instruction Access Exception, tl0. */
 sun4v_iacc:
        ldxa    [%g0] ASI_SCRATCHPAD, %g2
index fb6640e..981a769 100644 (file)
@@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs)
        atomic_inc(&sun4v_nonresum_oflow_cnt);
 }
 
+static void sun4v_tlb_error(struct pt_regs *regs)
+{
+       die_if_kernel("TLB/TSB error", regs);
+}
+
 unsigned long sun4v_err_itlb_vaddr;
 unsigned long sun4v_err_itlb_ctx;
 unsigned long sun4v_err_itlb_pte;
@@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error;
 
 void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
 {
-       if (tl > 1)
-               dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
 
        printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
               regs->tpc, tl);
@@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
               sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
               sun4v_err_itlb_pte, sun4v_err_itlb_error);
 
-       prom_halt();
+       sun4v_tlb_error(regs);
 }
 
 unsigned long sun4v_err_dtlb_vaddr;
@@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error;
 
 void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
 {
-       if (tl > 1)
-               dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
 
        printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
               regs->tpc, tl);
@@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
               sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
               sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
 
-       prom_halt();
+       sun4v_tlb_error(regs);
 }
 
 void hypervisor_tlbop_error(unsigned long err, unsigned long op)
index 8647fcc..cb5789c 100644 (file)
@@ -180,8 +180,10 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
                        vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
 
                irq = mdesc_get_property(hp, target, "rx-ino", NULL);
-               if (irq)
+               if (irq) {
                        vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
+                       vdev->rx_ino = *irq;
+               }
 
                chan_id = mdesc_get_property(hp, target, "id", NULL);
                if (chan_id)
@@ -189,6 +191,15 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
        }
 }
 
+int vio_set_intr(unsigned long dev_ino, int state)
+{
+       int err;
+
+       err = sun4v_vintr_set_valid(cdev_cfg_handle, dev_ino, state);
+       return err;
+}
+EXPORT_SYMBOL(vio_set_intr);
+
 static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
                                      struct device *parent)
 {
index 7ef081a..526fcb5 100644 (file)
@@ -724,7 +724,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio,
        cfg.tx_irq = vio->vdev->tx_irq;
        cfg.rx_irq = vio->vdev->rx_irq;
 
-       lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
+       lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name);
        if (IS_ERR(lp))
                return PTR_ERR(lp);
 
@@ -756,7 +756,7 @@ void vio_port_up(struct vio_driver_state *vio)
 
        err = 0;
        if (state == LDC_STATE_INIT) {
-               err = ldc_bind(vio->lp, vio->name);
+               err = ldc_bind(vio->lp);
                if (err)
                        printk(KERN_WARNING "%s: Port %lu bind failed, "
                               "err=%d\n",
index 932ff90..0924305 100644 (file)
@@ -35,8 +35,9 @@ jiffies = jiffies_64;
 
 SECTIONS
 {
-       /* swapper_low_pmd_dir is sparc64 only */
-       swapper_low_pmd_dir = 0x0000000000402000;
+#ifdef CONFIG_SPARC64
+       swapper_pg_dir = 0x0000000000402000;
+#endif
        . = INITIAL_ADDRESS;
        .text TEXTSTART :
        {
@@ -122,11 +123,6 @@ SECTIONS
                *(.swapper_4m_tsb_phys_patch)
                __swapper_4m_tsb_phys_patch_end = .;
        }
-       .page_offset_shift_patch : {
-               __page_offset_shift_patch = .;
-               *(.page_offset_shift_patch)
-               __page_offset_shift_patch_end = .;
-       }
        .popc_3insn_patch : {
                __popc_3insn_patch = .;
                *(.popc_3insn_patch)
index 99c017b..f75e690 100644 (file)
@@ -3,8 +3,9 @@
  * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  *
- * Returns 0, if ok, and number of bytes not yet set if exception
- * occurs and we were called as clear_user.
+ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
+ * number of bytes not yet set if exception occurs and we were called as
+ * clear_user.
  */
 
 #include <asm/ptrace.h>
@@ -65,6 +66,8 @@ __bzero_begin:
        .globl  __memset_start, __memset_end
 __memset_start:
 memset:
+       mov     %o0, %g1
+       mov     1, %g4
        and     %o1, 0xff, %g3
        sll     %g3, 8, %g2
        or      %g3, %g2, %g3
@@ -89,6 +92,7 @@ memset:
         sub    %o0, %o2, %o0
 
 __bzero:
+       clr     %g4
        mov     %g0, %g3
 1:
        cmp     %o1, 7
@@ -151,8 +155,8 @@ __bzero:
        bne,a   8f
         EX(stb %g3, [%o0], and %o1, 1)
 8:
-       retl
-        clr    %o0
+       b       0f
+        nop
 7:
        be      13b
         orcc   %o1, 0, %g0
@@ -164,6 +168,12 @@ __bzero:
        bne     8b
         EX(stb %g3, [%o0 - 1], add %o1, 1)
 0:
+       andcc   %g4, 1, %g0
+       be      5f
+        nop
+       retl
+        mov    %g1, %o0
+5:
        retl
         clr    %o0
 __memset_end:
index 587cd05..18fcd71 100644 (file)
@@ -346,6 +346,9 @@ retry:
                down_read(&mm->mmap_sem);
        }
 
+       if (fault_code & FAULT_CODE_BAD_RA)
+               goto do_sigbus;
+
        vma = find_vma(mm, address);
        if (!vma)
                goto bad_area;
index 98ac8e8..2d91c62 100644 (file)
@@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly;
  * 'cpu' properties, but we need to have this table setup before the
  * MDESC is initialized.
  */
-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
@@ -84,10 +83,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
  */
 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
 #endif
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
 
 static unsigned long cpu_pgsz_mask;
 
-#define MAX_BANKS      32
+#define MAX_BANKS      1024
 
 static struct linux_prom64_registers pavail[MAX_BANKS];
 static int pavail_ents;
@@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property,
             cmp_p64, NULL);
 }
 
-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
-                                       sizeof(unsigned long)];
-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-
 /* Kernel physical address base and size in bytes.  */
 unsigned long kern_base __read_mostly;
 unsigned long kern_size __read_mostly;
@@ -840,7 +836,10 @@ static int find_node(unsigned long addr)
                if ((addr & p->mask) == p->val)
                        return i;
        }
-       return -1;
+       /* The following condition has been observed on LDOM guests.*/
+       WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
+               " rule. Some physical memory will be owned by node 0.");
+       return 0;
 }
 
 static u64 memblock_nid_range(u64 start, u64 end, int *nid)
@@ -1366,9 +1365,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
 static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
 static int pall_ents __initdata;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long max_phys_bits = 40;
+
+bool kern_addr_valid(unsigned long addr)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if ((long)addr < 0L) {
+               unsigned long pa = __pa(addr);
+
+               if ((addr >> max_phys_bits) != 0UL)
+                       return false;
+
+               return pfn_valid(pa >> PAGE_SHIFT);
+       }
+
+       if (addr >= (unsigned long) KERNBASE &&
+           addr < (unsigned long)&_end)
+               return true;
+
+       pgd = pgd_offset_k(addr);
+       if (pgd_none(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, addr);
+       if (pud_none(*pud))
+               return 0;
+
+       if (pud_large(*pud))
+               return pfn_valid(pud_pfn(*pud));
+
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return 0;
+
+       if (pmd_large(*pmd))
+               return pfn_valid(pmd_pfn(*pmd));
+
+       pte = pte_offset_kernel(pmd, addr);
+       if (pte_none(*pte))
+               return 0;
+
+       return pfn_valid(pte_pfn(*pte));
+}
+EXPORT_SYMBOL(kern_addr_valid);
+
+static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+                                             unsigned long vend,
+                                             pud_t *pud)
+{
+       const unsigned long mask16gb = (1UL << 34) - 1UL;
+       u64 pte_val = vstart;
+
+       /* Each PUD is 8GB */
+       if ((vstart & mask16gb) ||
+           (vend - vstart <= mask16gb)) {
+               pte_val ^= kern_linear_pte_xor[2];
+               pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
+
+               return vstart + PUD_SIZE;
+       }
+
+       pte_val ^= kern_linear_pte_xor[3];
+       pte_val |= _PAGE_PUD_HUGE;
+
+       vend = vstart + mask16gb + 1UL;
+       while (vstart < vend) {
+               pud_val(*pud) = pte_val;
+
+               pte_val += PUD_SIZE;
+               vstart += PUD_SIZE;
+               pud++;
+       }
+       return vstart;
+}
+
+static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
+                                  bool guard)
+{
+       if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
+               return true;
+
+       return false;
+}
+
+static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
+                                             unsigned long vend,
+                                             pmd_t *pmd)
+{
+       const unsigned long mask256mb = (1UL << 28) - 1UL;
+       const unsigned long mask2gb = (1UL << 31) - 1UL;
+       u64 pte_val = vstart;
+
+       /* Each PMD is 8MB */
+       if ((vstart & mask256mb) ||
+           (vend - vstart <= mask256mb)) {
+               pte_val ^= kern_linear_pte_xor[0];
+               pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
+
+               return vstart + PMD_SIZE;
+       }
+
+       if ((vstart & mask2gb) ||
+           (vend - vstart <= mask2gb)) {
+               pte_val ^= kern_linear_pte_xor[1];
+               pte_val |= _PAGE_PMD_HUGE;
+               vend = vstart + mask256mb + 1UL;
+       } else {
+               pte_val ^= kern_linear_pte_xor[2];
+               pte_val |= _PAGE_PMD_HUGE;
+               vend = vstart + mask2gb + 1UL;
+       }
+
+       while (vstart < vend) {
+               pmd_val(*pmd) = pte_val;
+
+               pte_val += PMD_SIZE;
+               vstart += PMD_SIZE;
+               pmd++;
+       }
+
+       return vstart;
+}
+
+static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
+                                  bool guard)
+{
+       if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
+               return true;
+
+       return false;
+}
+
 static unsigned long __ref kernel_map_range(unsigned long pstart,
-                                           unsigned long pend, pgprot_t prot)
+                                           unsigned long pend, pgprot_t prot,
+                                           bool use_huge)
 {
        unsigned long vstart = PAGE_OFFSET + pstart;
        unsigned long vend = PAGE_OFFSET + pend;
@@ -1387,19 +1521,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
                pmd_t *pmd;
                pte_t *pte;
 
+               if (pgd_none(*pgd)) {
+                       pud_t *new;
+
+                       new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+                       alloc_bytes += PAGE_SIZE;
+                       pgd_populate(&init_mm, pgd, new);
+               }
                pud = pud_offset(pgd, vstart);
                if (pud_none(*pud)) {
                        pmd_t *new;
 
+                       if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
+                               vstart = kernel_map_hugepud(vstart, vend, pud);
+                               continue;
+                       }
                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
                        alloc_bytes += PAGE_SIZE;
                        pud_populate(&init_mm, pud, new);
                }
 
                pmd = pmd_offset(pud, vstart);
-               if (!pmd_present(*pmd)) {
+               if (pmd_none(*pmd)) {
                        pte_t *new;
 
+                       if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
+                               vstart = kernel_map_hugepmd(vstart, vend, pmd);
+                               continue;
+                       }
                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
                        alloc_bytes += PAGE_SIZE;
                        pmd_populate_kernel(&init_mm, pmd, new);
@@ -1422,100 +1571,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
        return alloc_bytes;
 }
 
-extern unsigned int kvmap_linear_patch[1];
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-static void __init kpte_set_val(unsigned long index, unsigned long val)
-{
-       unsigned long *ptr = kpte_linear_bitmap;
-
-       val <<= ((index % (BITS_PER_LONG / 2)) * 2);
-       ptr += (index / (BITS_PER_LONG / 2));
-
-       *ptr |= val;
-}
-
-static const unsigned long kpte_shift_min = 28; /* 256MB */
-static const unsigned long kpte_shift_max = 34; /* 16GB */
-static const unsigned long kpte_shift_incr = 3;
-
-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
-                                          unsigned long shift)
+static void __init flush_all_kernel_tsbs(void)
 {
-       unsigned long size = (1UL << shift);
-       unsigned long mask = (size - 1UL);
-       unsigned long remains = end - start;
-       unsigned long val;
-
-       if (remains < size || (start & mask))
-               return start;
-
-       /* VAL maps:
-        *
-        *      shift 28 --> kern_linear_pte_xor index 1
-        *      shift 31 --> kern_linear_pte_xor index 2
-        *      shift 34 --> kern_linear_pte_xor index 3
-        */
-       val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
-
-       remains &= ~mask;
-       if (shift != kpte_shift_max)
-               remains = size;
-
-       while (remains) {
-               unsigned long index = start >> kpte_shift_min;
+       int i;
 
-               kpte_set_val(index, val);
+       for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
+               struct tsb *ent = &swapper_tsb[i];
 
-               start += 1UL << kpte_shift_min;
-               remains -= 1UL << kpte_shift_min;
+               ent->tag = (1UL << TSB_TAG_INVALID_BIT);
        }
+#ifndef CONFIG_DEBUG_PAGEALLOC
+       for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
+               struct tsb *ent = &swapper_4m_tsb[i];
 
-       return start;
-}
-
-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
-{
-       unsigned long smallest_size, smallest_mask;
-       unsigned long s;
-
-       smallest_size = (1UL << kpte_shift_min);
-       smallest_mask = (smallest_size - 1UL);
-
-       while (start < end) {
-               unsigned long orig_start = start;
-
-               for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
-                       start = kpte_mark_using_shift(start, end, s);
-
-                       if (start != orig_start)
-                               break;
-               }
-
-               if (start == orig_start)
-                       start = (start + smallest_size) & ~smallest_mask;
+               ent->tag = (1UL << TSB_TAG_INVALID_BIT);
        }
+#endif
 }
 
-static void __init init_kpte_bitmap(void)
-{
-       unsigned long i;
-
-       for (i = 0; i < pall_ents; i++) {
-               unsigned long phys_start, phys_end;
-
-               phys_start = pall[i].phys_addr;
-               phys_end = phys_start + pall[i].reg_size;
-
-               mark_kpte_bitmap(phys_start, phys_end);
-       }
-}
+extern unsigned int kvmap_linear_patch[1];
 
 static void __init kernel_physical_mapping_init(void)
 {
-#ifdef CONFIG_DEBUG_PAGEALLOC
        unsigned long i, mem_alloced = 0UL;
+       bool use_huge = true;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       use_huge = false;
+#endif
        for (i = 0; i < pall_ents; i++) {
                unsigned long phys_start, phys_end;
 
@@ -1523,7 +1606,7 @@ static void __init kernel_physical_mapping_init(void)
                phys_end = phys_start + pall[i].reg_size;
 
                mem_alloced += kernel_map_range(phys_start, phys_end,
-                                               PAGE_KERNEL);
+                                               PAGE_KERNEL, use_huge);
        }
 
        printk("Allocated %ld bytes for kernel page tables.\n",
@@ -1532,8 +1615,9 @@ static void __init kernel_physical_mapping_init(void)
        kvmap_linear_patch[0] = 0x01000000; /* nop */
        flushi(&kvmap_linear_patch[0]);
 
+       flush_all_kernel_tsbs();
+
        __flush_tlb_all();
-#endif
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -1543,7 +1627,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
        unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
 
        kernel_map_range(phys_start, phys_end,
-                        (enable ? PAGE_KERNEL : __pgprot(0)));
+                        (enable ? PAGE_KERNEL : __pgprot(0)), false);
 
        flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
                               PAGE_OFFSET + phys_end);
@@ -1571,76 +1655,56 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
 unsigned long PAGE_OFFSET;
 EXPORT_SYMBOL(PAGE_OFFSET);
 
-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
-{
-       unsigned long final_shift;
-       unsigned int val = *insn;
-       unsigned int cnt;
-
-       /* We are patching in ilog2(max_supported_phys_address), and
-        * we are doing so in a manner similar to a relocation addend.
-        * That is, we are adding the shift value to whatever value
-        * is in the shift instruction count field already.
-        */
-       cnt = (val & 0x3f);
-       val &= ~0x3f;
-
-       /* If we are trying to shift >= 64 bits, clear the destination
-        * register.  This can happen when phys_bits ends up being equal
-        * to MAX_PHYS_ADDRESS_BITS.
-        */
-       final_shift = (cnt + (64 - phys_bits));
-       if (final_shift >= 64) {
-               unsigned int rd = (val >> 25) & 0x1f;
-
-               val = 0x80100000 | (rd << 25);
-       } else {
-               val |= final_shift;
-       }
-       *insn = val;
-
-       __asm__ __volatile__("flush     %0"
-                            : /* no outputs */
-                            : "r" (insn));
-}
-
-static void __init page_offset_shift_patch(unsigned long phys_bits)
-{
-       extern unsigned int __page_offset_shift_patch;
-       extern unsigned int __page_offset_shift_patch_end;
-       unsigned int *p;
-
-       p = &__page_offset_shift_patch;
-       while (p < &__page_offset_shift_patch_end) {
-               unsigned int *insn = (unsigned int *)(unsigned long)*p;
+unsigned long VMALLOC_END   = 0x0000010000000000UL;
+EXPORT_SYMBOL(VMALLOC_END);
 
-               page_offset_shift_patch_one(insn, phys_bits);
-
-               p++;
-       }
-}
+unsigned long sparc64_va_hole_top =    0xfffff80000000000UL;
+unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
 
 static void __init setup_page_offset(void)
 {
-       unsigned long max_phys_bits = 40;
-
        if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+               /* Cheetah/Panther support a full 64-bit virtual
+                * address, so we can use all that our page tables
+                * support.
+                */
+               sparc64_va_hole_top =    0xfff0000000000000UL;
+               sparc64_va_hole_bottom = 0x0010000000000000UL;
+
                max_phys_bits = 42;
        } else if (tlb_type == hypervisor) {
                switch (sun4v_chip_type) {
                case SUN4V_CHIP_NIAGARA1:
                case SUN4V_CHIP_NIAGARA2:
+                       /* T1 and T2 support 48-bit virtual addresses.  */
+                       sparc64_va_hole_top =    0xffff800000000000UL;
+                       sparc64_va_hole_bottom = 0x0000800000000000UL;
+
                        max_phys_bits = 39;
                        break;
                case SUN4V_CHIP_NIAGARA3:
+                       /* T3 supports 48-bit virtual addresses.  */
+                       sparc64_va_hole_top =    0xffff800000000000UL;
+                       sparc64_va_hole_bottom = 0x0000800000000000UL;
+
                        max_phys_bits = 43;
                        break;
                case SUN4V_CHIP_NIAGARA4:
                case SUN4V_CHIP_NIAGARA5:
                case SUN4V_CHIP_SPARC64X:
-               default:
+               case SUN4V_CHIP_SPARC_M6:
+                       /* T4 and later support 52-bit virtual addresses.  */
+                       sparc64_va_hole_top =    0xfff8000000000000UL;
+                       sparc64_va_hole_bottom = 0x0008000000000000UL;
                        max_phys_bits = 47;
                        break;
+               case SUN4V_CHIP_SPARC_M7:
+               default:
+                       /* M7 and later support 52-bit virtual addresses.  */
+                       sparc64_va_hole_top =    0xfff8000000000000UL;
+                       sparc64_va_hole_bottom = 0x0008000000000000UL;
+                       max_phys_bits = 49;
+                       break;
                }
        }
 
@@ -1650,12 +1714,16 @@ static void __init setup_page_offset(void)
                prom_halt();
        }
 
-       PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
+       PAGE_OFFSET = sparc64_va_hole_top;
+       VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
+                      (sparc64_va_hole_bottom >> 2));
 
-       pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+       pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
                PAGE_OFFSET, max_phys_bits);
-
-       page_offset_shift_patch(max_phys_bits);
+       pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
+               VMALLOC_START, VMALLOC_END);
+       pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
+               VMEMMAP_BASE, VMEMMAP_BASE << 1);
 }
 
 static void __init tsb_phys_patch(void)
@@ -1700,21 +1768,42 @@ static void __init tsb_phys_patch(void)
 #define NUM_KTSB_DESCR 1
 #endif
 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+/* The swapper TSBs are loaded with a base sequence of:
+ *
+ *     sethi   %uhi(SYMBOL), REG1
+ *     sethi   %hi(SYMBOL), REG2
+ *     or      REG1, %ulo(SYMBOL), REG1
+ *     or      REG2, %lo(SYMBOL), REG2
+ *     sllx    REG1, 32, REG1
+ *     or      REG1, REG2, REG1
+ *
+ * When we use physical addressing for the TSB accesses, we patch the
+ * first four instructions in the above sequence.
+ */
 
 static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
 {
-       pa >>= KTSB_PHYS_SHIFT;
+       unsigned long high_bits, low_bits;
+
+       high_bits = (pa >> 32) & 0xffffffff;
+       low_bits = (pa >> 0) & 0xffffffff;
 
        while (start < end) {
                unsigned int *ia = (unsigned int *)(unsigned long)*start;
 
-               ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
+               ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
                __asm__ __volatile__("flush     %0" : : "r" (ia));
 
-               ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
+               ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
                __asm__ __volatile__("flush     %0" : : "r" (ia + 1));
 
+               ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
+               __asm__ __volatile__("flush     %0" : : "r" (ia + 2));
+
+               ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
+               __asm__ __volatile__("flush     %0" : : "r" (ia + 3));
+
                start++;
        }
 }
@@ -1853,11 +1942,56 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 /* paging_init() sets up the page tables */
 
 static unsigned long last_valid_pfn;
-pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 static void sun4u_pgprot_init(void);
 static void sun4v_pgprot_init(void);
 
+static phys_addr_t __init available_memory(void)
+{
+       phys_addr_t available = 0ULL;
+       phys_addr_t pa_start, pa_end;
+       u64 i;
+
+       for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL)
+               available = available + (pa_end  - pa_start);
+
+       return available;
+}
+
+/* We need to exclude reserved regions. This exclusion will include
+ * vmlinux and initrd. To be more precise the initrd size could be used to
+ * compute a new lower limit because it is freed later during initialization.
+ */
+static void __init reduce_memory(phys_addr_t limit_ram)
+{
+       phys_addr_t avail_ram = available_memory();
+       phys_addr_t pa_start, pa_end;
+       u64 i;
+
+       if (limit_ram >= avail_ram)
+               return;
+
+       for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL) {
+               phys_addr_t region_size = pa_end - pa_start;
+               phys_addr_t clip_start = pa_start;
+
+               avail_ram = avail_ram - region_size;
+               /* Are we consuming too much? */
+               if (avail_ram < limit_ram) {
+                       phys_addr_t give_back = limit_ram - avail_ram;
+
+                       region_size = region_size - give_back;
+                       clip_start = clip_start + give_back;
+               }
+
+               memblock_remove(clip_start, region_size);
+
+               if (avail_ram <= limit_ram)
+                       break;
+               i = 0UL;
+       }
+}
+
 void __init paging_init(void)
 {
        unsigned long end_pfn, shift, phys_base;
@@ -1937,7 +2071,8 @@ void __init paging_init(void)
 
        find_ramdisk(phys_base);
 
-       memblock_enforce_memory_limit(cmdline_memory_size);
+       if (cmdline_memory_size)
+               reduce_memory(cmdline_memory_size);
 
        memblock_allow_resize();
        memblock_dump_all();
@@ -1956,16 +2091,10 @@ void __init paging_init(void)
         */
        init_mm.pgd += ((shift) / (sizeof(pgd_t)));
        
-       memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
+       memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
 
-       /* Now can init the kernel/bad page tables. */
-       pud_set(pud_offset(&swapper_pg_dir[0], 0),
-               swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
-       
        inherit_prom_mappings();
        
-       init_kpte_bitmap();
-
        /* Ok, we can use our TLB miss and window trap handlers safely.  */
        setup_tba();
 
@@ -2072,70 +2201,6 @@ int page_in_phys_avail(unsigned long paddr)
        return 0;
 }
 
-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
-static int pavail_rescan_ents __initdata;
-
-/* Certain OBP calls, such as fetching "available" properties, can
- * claim physical memory.  So, along with initializing the valid
- * address bitmap, what we do here is refetch the physical available
- * memory list again, and make sure it provides at least as much
- * memory as 'pavail' does.
- */
-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
-{
-       int i;
-
-       read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
-
-       for (i = 0; i < pavail_ents; i++) {
-               unsigned long old_start, old_end;
-
-               old_start = pavail[i].phys_addr;
-               old_end = old_start + pavail[i].reg_size;
-               while (old_start < old_end) {
-                       int n;
-
-                       for (n = 0; n < pavail_rescan_ents; n++) {
-                               unsigned long new_start, new_end;
-
-                               new_start = pavail_rescan[n].phys_addr;
-                               new_end = new_start +
-                                       pavail_rescan[n].reg_size;
-
-                               if (new_start <= old_start &&
-                                   new_end >= (old_start + PAGE_SIZE)) {
-                                       set_bit(old_start >> ILOG2_4MB, bitmap);
-                                       goto do_next_page;
-                               }
-                       }
-
-                       prom_printf("mem_init: Lost memory in pavail\n");
-                       prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
-                                   pavail[i].phys_addr,
-                                   pavail[i].reg_size);
-                       prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
-                                   pavail_rescan[i].phys_addr,
-                                   pavail_rescan[i].reg_size);
-                       prom_printf("mem_init: Cannot continue, aborting.\n");
-                       prom_halt();
-
-               do_next_page:
-                       old_start += PAGE_SIZE;
-               }
-       }
-}
-
-static void __init patch_tlb_miss_handler_bitmap(void)
-{
-       extern unsigned int valid_addr_bitmap_insn[];
-       extern unsigned int valid_addr_bitmap_patch[];
-
-       valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
-       mb();
-       valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
-       flushi(&valid_addr_bitmap_insn[0]);
-}
-
 static void __init register_page_bootmem_info(void)
 {
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2148,18 +2213,6 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
-       unsigned long addr, last;
-
-       addr = PAGE_OFFSET + kern_base;
-       last = PAGE_ALIGN(kern_size) + addr;
-       while (addr < last) {
-               set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-               addr += PAGE_SIZE;
-       }
-
-       setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
-       patch_tlb_miss_handler_bitmap();
-
        high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
        register_page_bootmem_info();
@@ -2249,18 +2302,9 @@ unsigned long _PAGE_CACHE __read_mostly;
 EXPORT_SYMBOL(_PAGE_CACHE);
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-unsigned long vmemmap_table[VMEMMAP_SIZE];
-
-static long __meminitdata addr_start, addr_end;
-static int __meminitdata node_start;
-
 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
                               int node)
 {
-       unsigned long phys_start = (vstart - VMEMMAP_BASE);
-       unsigned long phys_end = (vend - VMEMMAP_BASE);
-       unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
-       unsigned long end = VMEMMAP_ALIGN(phys_end);
        unsigned long pte_base;
 
        pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
@@ -2271,47 +2315,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
                            _PAGE_CP_4V | _PAGE_CV_4V |
                            _PAGE_P_4V | _PAGE_W_4V);
 
-       for (; addr < end; addr += VMEMMAP_CHUNK) {
-               unsigned long *vmem_pp =
-                       vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
-               void *block;
+       pte_base |= _PAGE_PMD_HUGE;
 
-               if (!(*vmem_pp & _PAGE_VALID)) {
-                       block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
-                       if (!block)
+       vstart = vstart & PMD_MASK;
+       vend = ALIGN(vend, PMD_SIZE);
+       for (; vstart < vend; vstart += PMD_SIZE) {
+               pgd_t *pgd = pgd_offset_k(vstart);
+               unsigned long pte;
+               pud_t *pud;
+               pmd_t *pmd;
+
+               if (pgd_none(*pgd)) {
+                       pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+
+                       if (!new)
                                return -ENOMEM;
+                       pgd_populate(&init_mm, pgd, new);
+               }
 
-                       *vmem_pp = pte_base | __pa(block);
+               pud = pud_offset(pgd, vstart);
+               if (pud_none(*pud)) {
+                       pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
 
-                       /* check to see if we have contiguous blocks */
-                       if (addr_end != addr || node_start != node) {
-                               if (addr_start)
-                                       printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
-                                              addr_start, addr_end-1, node_start);
-                               addr_start = addr;
-                               node_start = node;
-                       }
-                       addr_end = addr + VMEMMAP_CHUNK;
+                       if (!new)
+                               return -ENOMEM;
+                       pud_populate(&init_mm, pud, new);
                }
-       }
-       return 0;
-}
 
-void __meminit vmemmap_populate_print_last(void)
-{
-       if (addr_start) {
-               printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
-                      addr_start, addr_end-1, node_start);
-               addr_start = 0;
-               addr_end = 0;
-               node_start = 0;
+               pmd = pmd_offset(pud, vstart);
+
+               pte = pmd_val(*pmd);
+               if (!(pte & _PAGE_VALID)) {
+                       void *block = vmemmap_alloc_block(PMD_SIZE, node);
+
+                       if (!block)
+                               return -ENOMEM;
+
+                       pmd_val(*pmd) = pte_base | __pa(block);
+               }
        }
+
+       return 0;
 }
 
 void vmemmap_free(unsigned long start, unsigned long end)
 {
 }
-
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 static void prot_init_common(unsigned long page_none,
@@ -2787,8 +2836,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
                        do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
                }
                if (end > HI_OBP_ADDRESS) {
-                       flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
-                       do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
+                       flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
+                       do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
                }
        } else {
                flush_tsb_kernel_range(start, end);
index 0668b36..a4c0960 100644 (file)
@@ -8,15 +8,8 @@
  */
 
 #define MAX_PHYS_ADDRESS       (1UL << MAX_PHYS_ADDRESS_BITS)
-#define KPTE_BITMAP_CHUNK_SZ           (256UL * 1024UL * 1024UL)
-#define KPTE_BITMAP_BYTES      \
-       ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ     (4UL * 1024UL * 1024UL)
-#define VALID_ADDR_BITMAP_BYTES        \
-       ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
 
 extern unsigned long kern_linear_pte_xor[4];
-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 extern unsigned int sparc64_highest_unlocked_tlb_ent;
 extern unsigned long sparc64_kern_pri_context;
 extern unsigned long sparc64_kern_pri_nuc_bits;
@@ -38,15 +31,4 @@ extern unsigned long kern_locked_tte_data;
 
 void prom_world(int enter);
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define VMEMMAP_CHUNK_SHIFT    22
-#define VMEMMAP_CHUNK          (1UL << VMEMMAP_CHUNK_SHIFT)
-#define VMEMMAP_CHUNK_MASK     ~(VMEMMAP_CHUNK - 1UL)
-#define VMEMMAP_ALIGN(x)       (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
-
-#define VMEMMAP_SIZE   ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
-                         sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT)
-extern unsigned long vmemmap_table[VMEMMAP_SIZE];
-#endif
-
 #endif /* _SPARC64_MM_INIT_H */
index 7994216..d7d9017 100644 (file)
@@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume)
         nop
 
        /* Write PAGE_OFFSET to %g7 */
-       sethi   %uhi(PAGE_OFFSET), %g7
-       sllx    %g7, 32, %g7
+       sethi   %hi(PAGE_OFFSET), %g7
+       ldx     [%g7 + %lo(PAGE_OFFSET)], %g7
 
        setuw   (PAGE_SIZE-8), %g3
 
index ab9ccc6..7149e77 100644 (file)
  *          the .bss section or it will break things.
  */
 
-#define BARG_LEN  256
+/* We limit BARG_LEN to 1024 because this is the size of the
+ * 'barg_out' command line buffer in the SILO bootloader.
+ */
+#define BARG_LEN 1024
 struct {
        int bootstr_len;
        int bootstr_valid;
index e58b817..b2340f0 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/smp.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
+#include <linux/irqflags.h>
 
 #include <asm/openprom.h>
 #include <asm/oplib.h>
@@ -36,8 +37,8 @@ void p1275_cmd_direct(unsigned long *args)
 {
        unsigned long flags;
 
-       raw_local_save_flags(flags);
-       raw_local_irq_restore((unsigned long)PIL_NMI);
+       local_save_flags(flags);
+       local_irq_restore((unsigned long)PIL_NMI);
        raw_spin_lock(&prom_entry_lock);
 
        prom_world(1);
@@ -45,7 +46,7 @@ void p1275_cmd_direct(unsigned long *args)
        prom_world(0);
 
        raw_spin_unlock(&prom_entry_lock);
-       raw_local_irq_restore(flags);
+       local_irq_restore(flags);
 }
 
 void prom_cif_init(void *cif_handler, void *cif_stack)
index 5814deb..756b8ec 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/genhd.h>
+#include <linux/cdrom.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
@@ -22,8 +23,8 @@
 
 #define DRV_MODULE_NAME                "sunvdc"
 #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "1.0"
-#define DRV_MODULE_RELDATE     "June 25, 2007"
+#define DRV_MODULE_VERSION     "1.1"
+#define DRV_MODULE_RELDATE     "February 13, 2013"
 
 static char version[] =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
 
-#define VDC_TX_RING_SIZE       256
+#define VDC_TX_RING_SIZE       512
 
 #define WAITING_FOR_LINK_UP    0x01
 #define WAITING_FOR_TX_SPACE   0x02
@@ -65,10 +66,10 @@ struct vdc_port {
        u64                     operations;
        u32                     vdisk_size;
        u8                      vdisk_type;
+       u8                      vdisk_mtype;
 
        char                    disk_name[32];
 
-       struct vio_disk_geom    geom;
        struct vio_disk_vtoc    label;
 };
 
@@ -79,9 +80,16 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
 
 /* Ordered from largest major to lowest */
 static struct vio_version vdc_versions[] = {
+       { .major = 1, .minor = 1 },
        { .major = 1, .minor = 0 },
 };
 
+static inline int vdc_version_supported(struct vdc_port *port,
+                                       u16 major, u16 minor)
+{
+       return port->vio.ver.major == major && port->vio.ver.minor >= minor;
+}
+
 #define VDCBLK_NAME    "vdisk"
 static int vdc_major;
 #define PARTITION_SHIFT        3
@@ -94,18 +102,54 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
 static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
        struct gendisk *disk = bdev->bd_disk;
-       struct vdc_port *port = disk->private_data;
+       sector_t nsect = get_capacity(disk);
+       sector_t cylinders = nsect;
 
-       geo->heads = (u8) port->geom.num_hd;
-       geo->sectors = (u8) port->geom.num_sec;
-       geo->cylinders = port->geom.num_cyl;
+       geo->heads = 0xff;
+       geo->sectors = 0x3f;
+       sector_div(cylinders, geo->heads * geo->sectors);
+       geo->cylinders = cylinders;
+       if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
+               geo->cylinders = 0xffff;
 
        return 0;
 }
 
+/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
+ * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
+ * Needed to be able to install inside an ldom from an iso image.
+ */
+static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
+                    unsigned command, unsigned long argument)
+{
+       int i;
+       struct gendisk *disk;
+
+       switch (command) {
+       case CDROMMULTISESSION:
+               pr_debug(PFX "Multisession CDs not supported\n");
+               for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+                       if (put_user(0, (char __user *)(argument + i)))
+                               return -EFAULT;
+               return 0;
+
+       case CDROM_GET_CAPABILITY:
+               disk = bdev->bd_disk;
+
+               if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
+                       return 0;
+               return -EINVAL;
+
+       default:
+               pr_debug(PFX "ioctl %08x not supported\n", command);
+               return -EINVAL;
+       }
+}
+
 static const struct block_device_operations vdc_fops = {
        .owner          = THIS_MODULE,
        .getgeo         = vdc_getgeo,
+       .ioctl          = vdc_ioctl,
 };
 
 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
@@ -165,9 +209,9 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
        struct vio_disk_attr_info *pkt = arg;
 
        viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
-              "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
+              "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
               pkt->tag.stype, pkt->operations,
-              pkt->vdisk_size, pkt->vdisk_type,
+              pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
               pkt->xfer_mode, pkt->vdisk_block_size,
               pkt->max_xfer_size);
 
@@ -192,8 +236,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
                }
 
                port->operations = pkt->operations;
-               port->vdisk_size = pkt->vdisk_size;
                port->vdisk_type = pkt->vdisk_type;
+               if (vdc_version_supported(port, 1, 1)) {
+                       port->vdisk_size = pkt->vdisk_size;
+                       port->vdisk_mtype = pkt->vdisk_mtype;
+               }
                if (pkt->max_xfer_size < port->max_xfer_size)
                        port->max_xfer_size = pkt->max_xfer_size;
                port->vdisk_block_size = pkt->vdisk_block_size;
@@ -236,7 +283,9 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
        __blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
 
-       if (blk_queue_stopped(port->disk->queue))
+       /* restart blk queue when ring is half emptied */
+       if (blk_queue_stopped(port->disk->queue) &&
+           vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
                blk_start_queue(port->disk->queue);
 }
 
@@ -388,12 +437,6 @@ static int __send_request(struct request *req)
        for (i = 0; i < nsg; i++)
                len += sg[i].length;
 
-       if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
-               blk_stop_queue(port->disk->queue);
-               err = -ENOMEM;
-               goto out;
-       }
-
        desc = vio_dring_cur(dr);
 
        err = ldc_map_sg(port->vio.lp, sg, nsg,
@@ -433,21 +476,32 @@ static int __send_request(struct request *req)
                port->req_id++;
                dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
        }
-out:
 
        return err;
 }
 
-static void do_vdc_request(struct request_queue *q)
+static void do_vdc_request(struct request_queue *rq)
 {
-       while (1) {
-               struct request *req = blk_fetch_request(q);
+       struct request *req;
 
-               if (!req)
-                       break;
+       while ((req = blk_peek_request(rq)) != NULL) {
+               struct vdc_port *port;
+               struct vio_dring_state *dr;
+
+               port = req->rq_disk->private_data;
+               dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+               if (unlikely(vdc_tx_dring_avail(dr) < 1))
+                       goto wait;
+
+               blk_start_request(req);
 
-               if (__send_request(req) < 0)
-                       __blk_end_request_all(req, -EIO);
+               if (__send_request(req) < 0) {
+                       blk_requeue_request(rq, req);
+wait:
+                       /* Avoid pointless unplugs. */
+                       blk_stop_queue(rq);
+                       break;
+               }
        }
 }
 
@@ -663,18 +717,27 @@ static int probe_disk(struct vdc_port *port)
                return err;
        }
 
-       err = generic_request(port, VD_OP_GET_DISKGEOM,
-                             &port->geom, sizeof(port->geom));
-       if (err < 0) {
-               printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
-                      "error %d\n", err);
-               return err;
+       if (vdc_version_supported(port, 1, 1)) {
+               /* vdisk_size should be set during the handshake, if it wasn't
+                * then the underlying disk is reserved by another system
+                */
+               if (port->vdisk_size == -1)
+                       return -ENODEV;
+       } else {
+               struct vio_disk_geom geom;
+
+               err = generic_request(port, VD_OP_GET_DISKGEOM,
+                                     &geom, sizeof(geom));
+               if (err < 0) {
+                       printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
+                              "error %d\n", err);
+                       return err;
+               }
+               port->vdisk_size = ((u64)geom.num_cyl *
+                                   (u64)geom.num_hd *
+                                   (u64)geom.num_sec);
        }
 
-       port->vdisk_size = ((u64)port->geom.num_cyl *
-                           (u64)port->geom.num_hd *
-                           (u64)port->geom.num_sec);
-
        q = blk_init_queue(do_vdc_request, &port->vio.lock);
        if (!q) {
                printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
@@ -691,6 +754,10 @@ static int probe_disk(struct vdc_port *port)
 
        port->disk = g;
 
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(q, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(q, PAGE_SIZE);
+
        blk_queue_max_segments(q, port->ring_cookies);
        blk_queue_max_hw_sectors(q, port->max_xfer_size);
        g->major = vdc_major;
@@ -704,9 +771,32 @@ static int probe_disk(struct vdc_port *port)
 
        set_capacity(g, port->vdisk_size);
 
-       printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n",
+       if (vdc_version_supported(port, 1, 1)) {
+               switch (port->vdisk_mtype) {
+               case VD_MEDIA_TYPE_CD:
+                       pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
+                       g->flags |= GENHD_FL_CD;
+                       g->flags |= GENHD_FL_REMOVABLE;
+                       set_disk_ro(g, 1);
+                       break;
+
+               case VD_MEDIA_TYPE_DVD:
+                       pr_info(PFX "Virtual DVD %s\n", port->disk_name);
+                       g->flags |= GENHD_FL_CD;
+                       g->flags |= GENHD_FL_REMOVABLE;
+                       set_disk_ro(g, 1);
+                       break;
+
+               case VD_MEDIA_TYPE_FIXED:
+                       pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
+                       break;
+               }
+       }
+
+       pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
               g->disk_name,
-              port->vdisk_size, (port->vdisk_size >> (20 - 9)));
+              port->vdisk_size, (port->vdisk_size >> (20 - 9)),
+              port->vio.ver.major, port->vio.ver.minor);
 
        add_disk(g);
 
@@ -765,6 +855,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        else
                snprintf(port->disk_name, sizeof(port->disk_name),
                         VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
+       port->vdisk_size = -1;
 
        err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
                              vdc_versions, ARRAY_SIZE(vdc_versions),
index 1539672..3652afd 100644 (file)
@@ -954,7 +954,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
        spin_lock_irqsave(&port->vio.lock, flags);
 
        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
-       if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+       if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
                if (!netif_queue_stopped(dev)) {
                        netif_stop_queue(dev);
 
@@ -1049,7 +1049,7 @@ ldc_start_done:
        dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
 
        dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
-       if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
+       if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
                netif_stop_queue(dev);
                if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
                        netif_wake_queue(dev);