extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
unsigned long prot_val);
--------- ----extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
+++++++++ ++++extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
/*
* early_ioremap() and early_iounmap() are for temporary early boot-time
* A boot-time mapping is currently limited to at most 16 pages.
*/
extern void early_ioremap_init(void);
------------ extern void early_ioremap_clear(void);
extern void early_ioremap_reset(void);
extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_single)(unsigned long addr);
------------- void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+++++++++++++ void (*flush_tlb_others)(const struct cpumask *cpus,
+++++++++++++ struct mm_struct *mm,
unsigned long va);
/* Hooks for allocating and freeing a pagetable top-level */
pte_t *ptep, pte_t pte);
pteval_t (*pte_val)(pte_t);
-------- ----- pteval_t (*pte_flags)(pte_t);
pte_t (*make_pte)(pteval_t pte);
pgdval_t (*pgd_val)(pgd_t);
PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
}
-------------static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+++++++++++++static inline void flush_tlb_others(const struct cpumask *cpumask,
+++++++++++++ struct mm_struct *mm,
unsigned long va)
{
------------- PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+++++++++++++ PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
return ret;
}
-------- -----static inline pteval_t pte_flags(pte_t pte)
-------- -----{
-------- ----- pteval_t ret;
-------- -----
-------- ----- if (sizeof(pteval_t) > sizeof(long))
-------- ----- ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
-------- ----- pte.pte, (u64)pte.pte >> 32);
-------- ----- else
-------- ----- ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
-------- ----- pte.pte);
-------- -----
-------- -----#ifdef CONFIG_PARAVIRT_DEBUG
-------- ----- BUG_ON(ret & PTE_PFN_MASK);
-------- -----#endif
-------- ----- return ret;
-------- -----}
-------- -----
static inline pgd_t __pgd(pgdval_t val)
{
pgdval_t ret;
void _paravirt_nop(void);
#define paravirt_nop ((void *)_paravirt_nop)
-- --------- -void paravirt_use_bytelocks(void);
-- --------- -
#ifdef CONFIG_SMP
static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
char pad0;
#else
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
--- ---------- int x86_tlbsize;
+++ ++++++++++ int x86_tlbsize;
__u8 x86_virt_bits;
__u8 x86_phys_bits;
#endif
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
+++++++++++++
+++++++++++++union irq_stack_union {
+++++++++++++ char irq_stack[IRQ_STACK_SIZE];
+++++++++++++ /*
+++++++++++++ * GCC hardcodes the stack canary as %gs:40. Since the
+++++++++++++ * irq_stack is the object at %gs:0, we reserve the bottom
+++++++++++++ * 48 bytes of the irq stack for the canary.
+++++++++++++ */
+++++++++++++ struct {
+++++++++++++ char gs_base[40];
+++++++++++++ unsigned long stack_canary;
+++++++++++++ };
+++++++++++++};
+++++++++++++
+++++++++++++DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+++++++++++++DECLARE_PER_CPU(char *, irq_stack_ptr);
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(void);
extern void cpu_init(void);
-------------extern void init_gdt(int cpu);
static inline unsigned long get_debugctlmsr(void)
{
#ifndef _ASM_X86_SETUP_H
#define _ASM_X86_SETUP_H
++++++ +++++++#ifdef __KERNEL__
++++++ +++++++
#define COMMAND_LINE_SIZE 2048
#ifndef __ASSEMBLY__
/* Interrupt control for vSMPowered x86_64 systems */
void vsmp_init(void);
------ -------
void setup_bios_corruption_check(void);
------ -------
#ifdef CONFIG_X86_VISWS
extern void visws_early_detect(void);
extern int is_visws_box(void);
void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
void (*mpc_oem_pci_bus)(struct mpc_bus *m);
void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
------ ------- unsigned short oemsize);
++++++ +++++++ unsigned short oemsize);
int (*setup_ioapic_ids)(void);
int (*update_genapic)(void);
};
#endif
#endif /* __ASSEMBLY__ */
------ -------#ifdef __KERNEL__
------ -------
#ifdef __i386__
#include <linux/pfn.h>
extern unsigned long init_pg_tables_end;
#else
-------------void __init x86_64_init_pda(void);
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
-- --------- -#ifdef CONFIG_PARAVIRT
-- --------- -/*
-- --------- - * Define virtualization-friendly old-style lock byte lock, for use in
-- --------- - * pv_lock_ops if desired.
-- --------- - *
-- --------- - * This differs from the pre-2.6.24 spinlock by always using xchgb
-- --------- - * rather than decb to take the lock; this allows it to use a
-- --------- - * zero-initialized lock structure. It also maintains a 1-byte
-- --------- - * contention counter, so that we can implement
-- --------- - * __byte_spin_is_contended.
-- --------- - */
-- --------- -struct __byte_spinlock {
-- --------- - s8 lock;
-- --------- - s8 spinners;
-- --------- -};
-- --------- -
-- --------- -static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
-- --------- -{
-- --------- - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
-- --------- - return bl->lock != 0;
-- --------- -}
-- --------- -
-- --------- -static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
-- --------- -{
-- --------- - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
-- --------- - return bl->spinners != 0;
-- --------- -}
-- --------- -
-- --------- -static inline void __byte_spin_lock(raw_spinlock_t *lock)
-- --------- -{
-- --------- - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
-- --------- - s8 val = 1;
- --------- -
- --------- - asm("1: xchgb %1, %0\n"
- --------- - " test %1,%1\n"
- --------- - " jz 3f\n"
- --------- - " " LOCK_PREFIX "incb %2\n"
- --------- - "2: rep;nop\n"
- --------- - " cmpb $1, %0\n"
- --------- - " je 2b\n"
- --------- - " " LOCK_PREFIX "decb %2\n"
- --------- - " jmp 1b\n"
- --------- - "3:"
- --------- - : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
- --------- -}
- --------- -
- --------- -static inline int __byte_spin_trylock(raw_spinlock_t *lock)
- --------- -{
- --------- - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- --------- - u8 old = 1;
- --------- -
- --------- - asm("xchgb %1,%0"
- --------- - : "+m" (bl->lock), "+q" (old) : : "memory");
++ +++++++++ +#ifndef CONFIG_PARAVIRT
- asm("1: xchgb %1, %0\n"
- " test %1,%1\n"
- " jz 3f\n"
- " " LOCK_PREFIX "incb %2\n"
- "2: rep;nop\n"
- " cmpb $1, %0\n"
- " je 2b\n"
- " " LOCK_PREFIX "decb %2\n"
- " jmp 1b\n"
- "3:"
- : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
- }
-
- static inline int __byte_spin_trylock(raw_spinlock_t *lock)
- {
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
- u8 old = 1;
-
- asm("xchgb %1,%0"
- : "+m" (bl->lock), "+q" (old) : : "memory");
-
-- --------- - return old == 0;
-- --------- -}
-- --------- -
-- --------- -static inline void __byte_spin_unlock(raw_spinlock_t *lock)
-- --------- -{
-- --------- - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
-- --------- - smp_wmb();
-- --------- - bl->lock = 0;
-- --------- -}
-- --------- -#else /* !CONFIG_PARAVIRT */
static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
{
return __ticket_spin_is_locked(lock);
__raw_spin_lock(lock);
}
-- --------- -#endif /* CONFIG_PARAVIRT */
++ +++++++++ +#endif
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
{
{
atomic_t *count = (atomic_t *)lock;
- ------------ atomic_dec(count);
- ------------ if (atomic_read(count) >= 0)
+ ++++++++++++ if (atomic_dec_return(count) >= 0)
return 1;
atomic_inc(count);
return 0;
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
"r12", "r13", "r14", "r15"
+++++++++++++#ifdef CONFIG_CC_STACKPROTECTOR
+++++++++++++#define __switch_canary \
+++++++++++++ "movq %P[task_canary](%%rsi),%%r8\n\t" \
+++++++++++++ "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
+++++++++++++#define __switch_canary_oparam \
+++++++++++++ , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+++++++++++++#define __switch_canary_iparam \
+++++++++++++ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+++++++++++++#else /* CC_STACKPROTECTOR */
+++++++++++++#define __switch_canary
+++++++++++++#define __switch_canary_oparam
+++++++++++++#define __switch_canary_iparam
+++++++++++++#endif /* CC_STACKPROTECTOR */
+++++++++++++
/* Save restore flags to clear handle leaking NT */
#define switch_to(prev, next, last) \
------------- asm volatile(SAVE_CONTEXT \
+++++++++++++ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
------------- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+++++++++++++ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
+++++++++++++ __switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
- ------------ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
- ------------ "jc ret_from_fork\n\t" \
+ ++++++++++++ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ ++++++++++++ "jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
+++++++++++++ __switch_canary_oparam \
: [next] "S" (next), [prev] "D" (prev), \
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
- ------------ [tif_fork] "i" (TIF_FORK), \
+ ++++++++++++ [_tif_fork] "i" (_TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
------------- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+++++++++++++ [current_task] "m" (per_cpu_var(current_task)) \
+++++++++++++ __switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
*/
__u8 supervisor_stack[0];
#endif
++++++++++++ + int uaccess_err;
};
#define INIT_THREAD_INFO(tsk) \
#else /* X86_32 */
-------------#include <asm/pda.h>
+++++++++++++#include <asm/percpu.h>
+++++++++++++#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
-------------static inline struct thread_info *current_thread_info(void)
-------------{
------------- struct thread_info *ti;
------------- ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
------------- return ti;
-------------}
+++++++++++++DECLARE_PER_CPU(unsigned long, kernel_stack);
-------------/* do not use in interrupt context */
-------------static inline struct thread_info *stack_thread_info(void)
+++++++++++++static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
------------- asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+++++++++++++ ti = (void *)(percpu_read(kernel_stack) +
+++++++++++++ KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
------------- movq %gs:pda_kernelstack,reg ; \
------------- subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+++++++++++++ movq PER_CPU_VAR(kernel_stack),reg ; \
+++++++++++++ subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
#endif
#include <asm/asm.h>
#include <asm/numa.h>
#include <asm/smp.h>
+ +++++++++ +#include <asm/cpu.h>
+ +++++++++ +#include <asm/cpumask.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <mach_apic.h>
#include <asm/genapic.h>
+++++++++++++#include <asm/uv/uv.h>
#endif
-------------#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+++++++++++++/* correctly size the local cpu masks */
+++++++++++++void __init setup_cpu_local_masks(void)
+++++++++++++{
+++++++++++++ alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+++++++++++++ alloc_bootmem_cpumask_var(&cpu_callin_mask);
+++++++++++++ alloc_bootmem_cpumask_var(&cpu_callout_mask);
+++++++++++++ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+++++++++++++}
+++++++++++++
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
static struct cpu_dev *this_cpu __cpuinitdata;
+++++++++++++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
-------------/* We need valid kernel segments for data and code in long mode too
------------- * IRET will check the segment types kkeil 2000/10/28
------------- * Also sysret mandates a special GDT layout
------------- */
-------------/* The TLS descriptors are currently at a different place compared to i386.
------------- Hopefully nobody expects them at a fixed place (Wine?) */
-------------DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+++++++++++++ /*
+++++++++++++ * We need valid kernel segments for data and code in long mode too
+++++++++++++ * IRET will check the segment types kkeil 2000/10/28
+++++++++++++ * Also sysret mandates a special GDT layout
+++++++++++++ *
+++++++++++++ * The TLS descriptors are currently at a different place compared to i386.
+++++++++++++ * Hopefully nobody expects them at a fixed place (Wine?)
+++++++++++++ */
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
-------------} };
#else
-------------DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
------------- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
-------------} };
+++++++++++++ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
#endif
+++++++++++++} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32
}
#endif
+++ ++++++++++/*
+++ ++++++++++ * Some CPU features depend on higher CPUID levels, which may not always
+++ ++++++++++ * be available due to CPUID level capping or broken virtualization
+++ ++++++++++ * software. Add those features to this table to auto-disable them.
+++ ++++++++++ */
+++ ++++++++++struct cpuid_dependent_feature {
+++ ++++++++++ u32 feature;
+++ ++++++++++ u32 level;
+++ ++++++++++};
+++ ++++++++++static const struct cpuid_dependent_feature __cpuinitconst
+++ ++++++++++cpuid_dependent_features[] = {
+++ ++++++++++ { X86_FEATURE_MWAIT, 0x00000005 },
+++ ++++++++++ { X86_FEATURE_DCA, 0x00000009 },
+++ ++++++++++ { X86_FEATURE_XSAVE, 0x0000000d },
+++ ++++++++++ { 0, 0 }
+++ ++++++++++};
+++ ++++++++++
+++ ++++++++++static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
+++ ++++++++++{
+++ ++++++++++ const struct cpuid_dependent_feature *df;
+++ ++++++++++ for (df = cpuid_dependent_features; df->feature; df++) {
+++ ++++++++++ /*
+++ ++++++++++ * Note: cpuid_level is set to -1 if unavailable, but
+++ ++++++++++ * extended_extended_level is set to 0 if unavailable
+++ ++++++++++ * and the legitimate extended levels are all negative
+++ ++++++++++ * when signed; hence the weird messing around with
+++ ++++++++++ * signs here...
+++ ++++++++++ */
+++ ++++++++++ if (cpu_has(c, df->feature) &&
+++ ++++++++++ ((s32)df->feature < 0 ?
+++ ++++++++++ (u32)df->feature > (u32)c->extended_cpuid_level :
+++ ++++++++++ (s32)df->feature > (s32)c->cpuid_level)) {
+++ ++++++++++ clear_cpu_cap(c, df->feature);
+++ ++++++++++ if (warn)
+++ ++++++++++ printk(KERN_WARNING
+++ ++++++++++ "CPU: CPU feature %s disabled "
+++ ++++++++++ "due to lack of CPUID level 0x%x\n",
+++ ++++++++++ x86_cap_flags[df->feature],
+++ ++++++++++ df->level);
+++ ++++++++++ }
+++ ++++++++++ }
+++ ++++++++++}
+++ ++++++++++
/*
* Naming convention should be: <Name> [(<Codename>)]
* This table only is used unless init_<vendor>() below doesn't set it;
void switch_to_new_gdt(void)
{
struct desc_ptr gdt_descr;
+++++++++++++ int cpu = smp_processor_id();
------------- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+++++++++++++ gdt_descr.address = (long)get_cpu_gdt_table(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
+++++++++++++ /* Reload the per-cpu base */
#ifdef CONFIG_X86_32
------------- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+++++++++++++ loadsegment(fs, __KERNEL_PERCPU);
+++++++++++++#else
+++++++++++++ loadsegment(gs, 0);
+++++++++++++ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
}
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
--- ---------- validate_pat_support(c);
--- ----------
#ifdef CONFIG_SMP
c->cpu_index = boot_cpu_id;
#endif
+++ ++++++++++ filter_cpuid_features(c, false);
}
void __init early_cpu_init(void)
* we do "generic changes."
*/
+++ ++++++++++ /* Filter out anything that depends on CPUID levels we don't have */
+++ ++++++++++ filter_cpuid_features(c, true);
+++ ++++++++++
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
char *p;
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-------------struct x8664_pda **_cpu_pda __read_mostly;
-------------EXPORT_SYMBOL(_cpu_pda);
-------------
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-------------static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
-------------
-------------void __cpuinit pda_init(int cpu)
-------------{
------------- struct x8664_pda *pda = cpu_pda(cpu);
+++++++++++++DEFINE_PER_CPU_FIRST(union irq_stack_union,
+++++++++++++ irq_stack_union) __aligned(PAGE_SIZE);
+++++++++++++#ifdef CONFIG_SMP
+++++++++++++DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
+++++++++++++#else
+++++++++++++DEFINE_PER_CPU(char *, irq_stack_ptr) =
+++++++++++++ per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+++++++++++++#endif
------------- /* Setup up data that may be needed in __get_free_pages early */
------------- loadsegment(fs, 0);
------------- loadsegment(gs, 0);
------------- /* Memory clobbers used to order PDA accessed */
------------- mb();
------------- wrmsrl(MSR_GS_BASE, pda);
------------- mb();
-------------
------------- pda->cpunumber = cpu;
------------- pda->irqcount = -1;
------------- pda->kernelstack = (unsigned long)stack_thread_info() -
------------- PDA_STACKOFFSET + THREAD_SIZE;
------------- pda->active_mm = &init_mm;
------------- pda->mmu_state = 0;
-------------
------------- if (cpu == 0) {
------------- /* others are initialized in smpboot.c */
------------- pda->pcurrent = &init_task;
------------- pda->irqstackptr = boot_cpu_stack;
------------- pda->irqstackptr += IRQSTACKSIZE - 64;
------------- } else {
------------- if (!pda->irqstackptr) {
------------- pda->irqstackptr = (char *)
------------- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
------------- if (!pda->irqstackptr)
------------- panic("cannot allocate irqstack for cpu %d",
------------- cpu);
------------- pda->irqstackptr += IRQSTACKSIZE - 64;
------------- }
+++++++++++++DEFINE_PER_CPU(unsigned long, kernel_stack) =
+++++++++++++ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+++++++++++++EXPORT_PER_CPU_SYMBOL(kernel_stack);
------------- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
------------- pda->nodenumber = cpu_to_node(cpu);
------------- }
-------------}
+++++++++++++DEFINE_PER_CPU(unsigned int, irq_count) = -1;
-------------static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
------------- DEBUG_STKSZ] __page_aligned_bss;
+++++++++++++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+++++++++++++ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
+++++++++++++ __aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
------------- char *estacks = NULL;
struct task_struct *me;
int i;
------------- /* CPU 0 is initialised in head64.c */
------------- if (cpu != 0)
------------- pda_init(cpu);
------------- else
------------- estacks = boot_exception_stacks;
+++++++++++++#ifdef CONFIG_NUMA
+++++++++++++ if (cpu != 0 && percpu_read(node_number) == 0 &&
+++++++++++++ cpu_to_node(cpu) != NUMA_NO_NODE)
+++++++++++++ percpu_write(node_number, cpu_to_node(cpu));
+++++++++++++#endif
me = current;
*/
switch_to_new_gdt();
+++++++++++++ loadsegment(fs, 0);
+++++++++++++
load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
------------- static const unsigned int order[N_EXCEPTION_STACKS] = {
------------- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
------------- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+++++++++++++ static const unsigned int sizes[N_EXCEPTION_STACKS] = {
+++++++++++++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+++++++++++++ [DEBUG_STACK - 1] = DEBUG_STKSZ
};
+++++++++++++ char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
------------- if (cpu) {
------------- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
------------- if (!estacks)
------------- panic("Cannot allocate exception "
------------- "stack %ld %d\n", v, cpu);
------------- }
------------- estacks += PAGE_SIZE << order[v];
+++++++++++++ estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
*/
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
arch_kgdb_ops.correct_hw_break();
-- ----------- else {
++ +++++++++++ else
#endif
-- ----------- /*
-- ----------- * Clear all 6 debug registers:
-- ----------- */
-- -----------
-- ----------- set_debugreg(0UL, 0);
-- ----------- set_debugreg(0UL, 1);
-- ----------- set_debugreg(0UL, 2);
-- ----------- set_debugreg(0UL, 3);
-- ----------- set_debugreg(0UL, 6);
-- ----------- set_debugreg(0UL, 7);
-- -----------#ifdef CONFIG_KGDB
-- ----------- /* If the kgdb is connected no debug regs should be altered. */
++ +++++++++++ {
++ +++++++++++ /*
++ +++++++++++ * Clear all 6 debug registers:
++ +++++++++++ */
++ +++++++++++ set_debugreg(0UL, 0);
++ +++++++++++ set_debugreg(0UL, 1);
++ +++++++++++ set_debugreg(0UL, 2);
++ +++++++++++ set_debugreg(0UL, 3);
++ +++++++++++ set_debugreg(0UL, 6);
++ +++++++++++ set_debugreg(0UL, 7);
}
-- -----------#endif
fpu_init();
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
{
- if (c->x86 == 6 && c->x86_model >= 15) {
++++++++++++ /* Unmask CPUID levels if masked: */
+++++++++++++ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
++++++++++++ u64 misc_enable;
++++++++++++
++++++++++++ rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
++++++++++++
++++++++++++ if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
++++++++++++ misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
++++++++++++ wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
++++++++++++ c->cpuid_level = cpuid_eax(0);
++++++++++++ }
++++++++++++ }
++++++++++++
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
}
+++ ++++++++++ /*
+++ ++++++++++ * There is a known erratum on Pentium III and Core Solo
+++ ++++++++++ * and Core Duo CPUs.
+++ ++++++++++ * " Page with PAT set to WC while associated MTRR is UC
+++ ++++++++++ * may consolidate to UC "
+++ ++++++++++ * Because of this erratum, it is better to stick with
+++ ++++++++++ * setting WC in MTRR rather than using PAT on these CPUs.
+++ ++++++++++ *
+++ ++++++++++ * Enable PAT WC only on P4, Core 2 or later CPUs.
+++ ++++++++++ */
+++ ++++++++++ if (c->x86 == 6 && c->x86_model < 15)
+++ ++++++++++ clear_cpu_cap(c, X86_FEATURE_PAT);
}
#ifdef CONFIG_X86_32
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
+++++++++++++#include <asm/percpu.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
------------- movq %gs:pda_oldrsp,\tmp
+++++++++++++ movq PER_CPU_VAR(old_rsp),\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
------------- movq \tmp,%gs:pda_oldrsp
+++++++++++++ movq \tmp,PER_CPU_VAR(old_rsp)
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
je 1f
SWAPGS
/*
------------- * irqcount is used to check if a CPU is already on an interrupt stack
+++++++++++++ * irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
-------------1: incl %gs:pda_irqcount
+++++++++++++1: incl PER_CPU_VAR(irq_count)
jne 2f
popq_cfi %rax /* move return address... */
------------- mov %gs:pda_irqstackptr,%rsp
+++++++++++++ mov PER_CPU_VAR(irq_stack_ptr),%rsp
EMPTY_FRAME 0
pushq_cfi %rax /* ... to the new stack */
/*
ENTRY(ret_from_fork)
DEFAULT_FRAME
+ ++++++++++++ LOCK ; btr $TIF_FORK,TI_flags(%r8)
+ ++++++++++++
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
popf # reset kernel eflags
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
------------- CFI_DEF_CFA rsp,PDA_STACKOFFSET
+++++++++++++ CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
*/
ENTRY(system_call_after_swapgs)
------------- movq %rsp,%gs:pda_oldrsp
------------- movq %gs:pda_kernelstack,%rsp
+++++++++++++ movq %rsp,PER_CPU_VAR(old_rsp)
+++++++++++++ movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
------------- movq %gs:pda_oldrsp, %rsp
+++++++++++++ movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
------------- /* 0(%rsp): oldrsp-ARGOFFSET */
+++++++++++++ /* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
------------- decl %gs:pda_irqcount
+++++++++++++ decl PER_CPU_VAR(irq_count)
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
+++++++++++++#ifdef CONFIG_X86_UV
apicinterrupt UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
+++++++++++++#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
------------- movq %gs:pda_data_offset, %rbp
------------- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+++++++++++++ PER_CPU(init_tss, %rbp)
+++++++++++++ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
call \do_sym
------------- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+++++++++++++ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
------------- incl %gs:pda_irqcount
------------- cmove %gs:pda_irqstackptr,%rsp
+++++++++++++ incl PER_CPU_VAR(irq_count)
+++++++++++++ cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
------------- decl %gs:pda_irqcount
+++++++++++++ decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
END(call_softirq)
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
DEFAULT_FRAME
-------------11: incl %gs:pda_irqcount
+++++++++++++11: incl PER_CPU_VAR(irq_count)
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
------------- cmovzq %gs:pda_irqstackptr,%rsp
+++++++++++++ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
------------- decl %gs:pda_irqcount
+++++++++++++ decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
------------- movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
+++++++++++++ movl $(__KERNEL_PERCPU), %eax
+++++++++++++ movl %eax,%fs # set this cpu's percpu
+++++++++++++
xorl %eax,%eax # Clear GS and LDT
movl %eax,%gs
lldt %ax
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
------------- movl $(__KERNEL_PERCPU), %eax
------------- movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
pushl %eax
pushl %edx /* trapno */
pushl $fault_msg
---- ---------#ifdef CONFIG_EARLY_PRINTK
---- --------- call early_printk
---- ---------#else
call printk
---- ---------#endif
#endif
call dump_stack
hlt_loop:
pushl 32(%esp)
pushl 40(%esp)
pushl $int_msg
---- ---------#ifdef CONFIG_EARLY_PRINTK
---- --------- call early_printk
---- ---------#else
call printk
---- ---------#endif
++++ +++++++++
++++ +++++++++ call dump_stack
++++ +++++++++
addl $(5*4),%esp
popl %ds
popl %es
.long 0
int_msg:
---- --------- .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
++++ +++++++++ .asciz "Unknown interrupt or fault at: %p %p %p\n"
fault_msg:
/* fault info: */
#include <asm/idle.h>
#include <asm/io.h>
#include <asm/smp.h>
+ +++++++++ +#include <asm/cpu.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/acpi.h>
int nr_ioapic_registers[MAX_IO_APICS];
/* I/O APIC entries */
- --------- -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+ +++++++++ +struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
/* MP IRQ source entries */
- --------- -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ +++++++++ +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
if (!cfg->move_in_progress) {
/* it means that domain is not changed */
------------- if (!cpumask_intersects(&desc->affinity, mask))
+++++++++++++ if (!cpumask_intersects(desc->affinity, mask))
cfg->move_desc_pending = 1;
}
}
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
{
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
- --------- - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+ +++++++++ + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
}
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
if (assign_irq_vector(irq, cfg, mask))
return BAD_APICID;
------------- cpumask_and(&desc->affinity, cfg->domain, mask);
+++++++++++++ cpumask_and(desc->affinity, cfg->domain, mask);
set_extra_move_desc(desc, mask);
------------- return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+++++++++++++ return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
}
static void
int i;
for (i = 0; i < mp_irq_entries; i++)
- --------- - if (mp_irqs[i].mp_irqtype == type &&
- --------- - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
- --------- - mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
- --------- - mp_irqs[i].mp_dstirq == pin)
+ +++++++++ + if (mp_irqs[i].irqtype == type &&
+ +++++++++ + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
+ +++++++++ + mp_irqs[i].dstapic == MP_APIC_ALL) &&
+ +++++++++ + mp_irqs[i].dstirq == pin)
return i;
return -1;
int i;
for (i = 0; i < mp_irq_entries; i++) {
- --------- - int lbus = mp_irqs[i].mp_srcbus;
+ +++++++++ + int lbus = mp_irqs[i].srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
- --------- - (mp_irqs[i].mp_irqtype == type) &&
- --------- - (mp_irqs[i].mp_srcbusirq == irq))
+ +++++++++ + (mp_irqs[i].irqtype == type) &&
+ +++++++++ + (mp_irqs[i].srcbusirq == irq))
- --------- - return mp_irqs[i].mp_dstirq;
+ +++++++++ + return mp_irqs[i].dstirq;
}
return -1;
}
int i;
for (i = 0; i < mp_irq_entries; i++) {
- --------- - int lbus = mp_irqs[i].mp_srcbus;
+ +++++++++ + int lbus = mp_irqs[i].srcbus;
if (test_bit(lbus, mp_bus_not_pci) &&
- --------- - (mp_irqs[i].mp_irqtype == type) &&
- --------- - (mp_irqs[i].mp_srcbusirq == irq))
+ +++++++++ + (mp_irqs[i].irqtype == type) &&
+ +++++++++ + (mp_irqs[i].srcbusirq == irq))
break;
}
if (i < mp_irq_entries) {
int apic;
for(apic = 0; apic < nr_ioapics; apic++) {
- --------- - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+ +++++++++ + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
return apic;
}
}
return -1;
}
for (i = 0; i < mp_irq_entries; i++) {
- --------- - int lbus = mp_irqs[i].mp_srcbus;
+ +++++++++ + int lbus = mp_irqs[i].srcbus;
for (apic = 0; apic < nr_ioapics; apic++)
- --------- - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
- --------- - mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+ +++++++++ + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+ +++++++++ + mp_irqs[i].dstapic == MP_APIC_ALL)
break;
if (!test_bit(lbus, mp_bus_not_pci) &&
- --------- - !mp_irqs[i].mp_irqtype &&
+ +++++++++ + !mp_irqs[i].irqtype &&
(bus == lbus) &&
- --------- - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
- --------- - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+ +++++++++ + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ +++++++++ + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
if (!(apic || IO_APIC_IRQ(irq)))
continue;
- --------- - if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+ +++++++++ + if (pin == (mp_irqs[i].srcbusirq & 3))
return irq;
/*
* Use the first all-but-pin matching entry as a
* EISA conforming in the MP table, that means its trigger type must
* be read in from the ELCR */
- --------- -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+ +++++++++ +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
/* PCI interrupts are always polarity one level triggered,
static int MPBIOS_polarity(int idx)
{
- --------- - int bus = mp_irqs[idx].mp_srcbus;
+ +++++++++ + int bus = mp_irqs[idx].srcbus;
int polarity;
/*
* Determine IRQ line polarity (high active or low active):
*/
- --------- - switch (mp_irqs[idx].mp_irqflag & 3)
+ +++++++++ + switch (mp_irqs[idx].irqflag & 3)
{
case 0: /* conforms, ie. bus-type dependent polarity */
if (test_bit(bus, mp_bus_not_pci))
static int MPBIOS_trigger(int idx)
{
- --------- - int bus = mp_irqs[idx].mp_srcbus;
+ +++++++++ + int bus = mp_irqs[idx].srcbus;
int trigger;
/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
- --------- - switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+ +++++++++ + switch ((mp_irqs[idx].irqflag>>2) & 3)
{
case 0: /* conforms, ie. bus-type dependent */
if (test_bit(bus, mp_bus_not_pci))
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
- --------- - int bus = mp_irqs[idx].mp_srcbus;
+ +++++++++ + int bus = mp_irqs[idx].srcbus;
/*
* Debugging check, we are in big trouble if this message pops up!
*/
- --------- - if (mp_irqs[idx].mp_dstirq != pin)
+ +++++++++ + if (mp_irqs[idx].dstirq != pin)
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
if (test_bit(bus, mp_bus_not_pci)) {
- --------- - irq = mp_irqs[idx].mp_srcbusirq;
+ +++++++++ + irq = mp_irqs[idx].srcbusirq;
} else {
/*
* PCI IRQs are mapped in order
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
"IRQ %d Mode:%i Active:%i)\n",
- --------- - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+ +++++++++ + apic, mp_ioapics[apic].apicid, pin, cfg->vector,
irq, trigger, polarity);
- --------- - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ +++++++++ + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
dest, trigger, polarity, cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
- --------- - mp_ioapics[apic].mp_apicid, pin);
+ +++++++++ + mp_ioapics[apic].apicid, pin);
__clear_irq_vector(irq, cfg);
return;
}
notcon = 1;
apic_printk(APIC_VERBOSE,
KERN_DEBUG " %d-%d",
- --------- - mp_ioapics[apic].mp_apicid,
- --------- - pin);
+ +++++++++ + mp_ioapics[apic].apicid, pin);
} else
apic_printk(APIC_VERBOSE, " %d-%d",
- --------- - mp_ioapics[apic].mp_apicid,
- --------- - pin);
+ +++++++++ + mp_ioapics[apic].apicid, pin);
continue;
}
if (notcon) {
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- --------- - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+ +++++++++ + mp_ioapics[i].apicid, nr_ioapic_registers[i]);
/*
* We are a bit conservative about what we expect. We have to
spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
- --------- - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+ +++++++++ + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
reg_00.raw = io_apic_read(apic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
- --------- - old_id = mp_ioapics[apic].mp_apicid;
+ +++++++++ + old_id = mp_ioapics[apic].apicid;
- --------- - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+ +++++++++ + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- --------- - apic, mp_ioapics[apic].mp_apicid);
+ +++++++++ + apic, mp_ioapics[apic].apicid);
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
reg_00.bits.ID);
- --------- - mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+ +++++++++ + mp_ioapics[apic].apicid = reg_00.bits.ID;
}
/*
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
if (check_apicid_used(phys_id_present_map,
- --------- - mp_ioapics[apic].mp_apicid)) {
+ +++++++++ + mp_ioapics[apic].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- --------- - apic, mp_ioapics[apic].mp_apicid);
+ +++++++++ + apic, mp_ioapics[apic].apicid);
for (i = 0; i < get_physical_broadcast(); i++)
if (!physid_isset(i, phys_id_present_map))
break;
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
i);
physid_set(i, phys_id_present_map);
- --------- - mp_ioapics[apic].mp_apicid = i;
+ +++++++++ + mp_ioapics[apic].apicid = i;
} else {
physid_mask_t tmp;
- --------- - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+ +++++++++ + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
- --------- - mp_ioapics[apic].mp_apicid);
+ +++++++++ + mp_ioapics[apic].apicid);
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
* We need to adjust the IRQ routing table
* if the ID changed.
*/
- --------- - if (old_id != mp_ioapics[apic].mp_apicid)
+ +++++++++ + if (old_id != mp_ioapics[apic].apicid)
for (i = 0; i < mp_irq_entries; i++)
- --------- - if (mp_irqs[i].mp_dstapic == old_id)
- --------- - mp_irqs[i].mp_dstapic
- --------- - = mp_ioapics[apic].mp_apicid;
+ +++++++++ + if (mp_irqs[i].dstapic == old_id)
+ +++++++++ + mp_irqs[i].dstapic
+ +++++++++ + = mp_ioapics[apic].apicid;
/*
* Read the right value from the MPC table and
*/
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
- --------- - mp_ioapics[apic].mp_apicid);
+ +++++++++ + mp_ioapics[apic].apicid);
- --------- - reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+ +++++++++ + reg_00.bits.ID = mp_ioapics[apic].apicid;
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0, reg_00.raw);
spin_unlock_irqrestore(&ioapic_lock, flags);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic, 0);
spin_unlock_irqrestore(&ioapic_lock, flags);
- --------- - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+ +++++++++ + if (reg_00.bits.ID != mp_ioapics[apic].apicid)
printk("could not set ID!\n");
else
apic_printk(APIC_VERBOSE, " ok.\n");
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
------------- cpumask_copy(&desc->affinity, mask);
+++++++++++++ cpumask_copy(desc->affinity, mask);
}
static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
}
/* everthing is clear. we have right of way */
------------- migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+++++++++++++ migrate_ioapic_irq_desc(desc, desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
------------- cpumask_clear(&desc->pending_mask);
+++++++++++++ cpumask_clear(desc->pending_mask);
unmask:
unmask_IO_APIC_irq_desc(desc);
continue;
}
------------- desc->chip->set_affinity(irq, &desc->pending_mask);
+++++++++++++ desc->chip->set_affinity(irq, desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
{
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
------------- cpumask_copy(&desc->pending_mask, mask);
+++++++++++++ cpumask_copy(desc->pending_mask, mask);
migrate_irq_remapped_level_desc(desc);
return;
}
/* domain has not changed, but affinity did */
me = smp_processor_id();
------------- if (cpu_isset(me, desc->affinity)) {
+++++++++++++ if (cpumask_test_cpu(me, desc->affinity)) {
*descp = desc = move_irq_desc(desc, me);
/* get the new one */
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
- --------- - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
- --------- - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+ +++++++++ + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
+ +++++++++ + reg_00.bits.ID = mp_ioapics[dev->id].apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
spin_unlock_irqrestore(&ioapic_lock, flags);
irq = 0;
spin_lock_irqsave(&vector_lock, flags);
------------- for (new = irq_want; new < NR_IRQS; new++) {
+++++++++++++ for (new = irq_want; new < nr_irqs; new++) {
if (platform_legacy_irq(new))
continue;
int err;
unsigned dest;
+++++++++++++ if (disable_apic)
+++++++++++++ return -ENXIO;
+++++++++++++
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (err)
return 0;
}
-- --------- -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
-- --------- -{
-- --------- - unsigned int irq;
-- --------- - int ret;
-- --------- - unsigned int irq_want;
-- --------- -
-- --------- - irq_want = nr_irqs_gsi;
-- --------- - irq = create_irq_nr(irq_want);
-- --------- - if (irq == 0)
-- --------- - return -1;
-- --------- -
-- --------- -#ifdef CONFIG_INTR_REMAP
-- --------- - if (!intr_remapping_enabled)
-- --------- - goto no_ir;
-- --------- -
-- --------- - ret = msi_alloc_irte(dev, irq, 1);
-- --------- - if (ret < 0)
-- --------- - goto error;
-- --------- -no_ir:
-- --------- -#endif
-- --------- - ret = setup_msi_irq(dev, msidesc, irq);
-- --------- - if (ret < 0) {
-- --------- - destroy_irq(irq);
-- --------- - return ret;
-- --------- - }
-- --------- - return 0;
-- --------- -
-- --------- -#ifdef CONFIG_INTR_REMAP
-- --------- -error:
-- --------- - destroy_irq(irq);
-- --------- - return ret;
-- --------- -#endif
-- --------- -}
-- --------- -
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
unsigned int irq;
struct irq_cfg *cfg;
int err;
+++++++++++++ if (disable_apic)
+++++++++++++ return -ENXIO;
+++++++++++++
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (!err) {
}
#endif /* CONFIG_HT_IRQ */
-------------#ifdef CONFIG_X86_64
+++++++++++++#ifdef CONFIG_X86_UV
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
nr_irqs_gsi = nr;
}
+++++++++++++#ifdef CONFIG_SPARSE_IRQ
+++++++++++++int __init arch_probe_nr_irqs(void)
+++++++++++++{
+++++++++++++ int nr;
+++++++++++++
+++++++++++++ nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
+++++++++++++ (NR_VECTORS + (8 * nr_cpu_ids)) :
+++++++++++++ (NR_VECTORS + (32 * nr_ioapics)));
+++++++++++++
+++++++++++++ if (nr < nr_irqs && nr > nr_irqs_gsi)
+++++++++++++ nr_irqs = nr;
+++++++++++++
+++++++++++++ return 0;
+++++++++++++}
+++++++++++++#endif
+++++++++++++
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
return -1;
for (i = 0; i < mp_irq_entries; i++)
- --------- - if (mp_irqs[i].mp_irqtype == mp_INT &&
- --------- - mp_irqs[i].mp_srcbusirq == bus_irq)
+ +++++++++ + if (mp_irqs[i].irqtype == mp_INT &&
+ +++++++++ + mp_irqs[i].srcbusirq == bus_irq)
break;
if (i >= mp_irq_entries)
return -1;
*/
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
------------- mask = &desc->affinity;
+++++++++++++ mask = desc->affinity;
else
mask = TARGET_CPUS;
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
- --------- - ioapic_phys = mp_ioapics[i].mp_apicaddr;
+ +++++++++ + ioapic_phys = mp_ioapics[i].apicaddr;
#ifdef CONFIG_X86_32
if (!ioapic_phys) {
printk(KERN_ERR
#endif
#define COPY(x) { \
------------ - err |= __get_user(regs->x, &sc->x); \
++++++++++++ + get_user_ex(regs->x, &sc->x); \
}
#define COPY_SEG(seg) { \
unsigned short tmp; \
------------ - err |= __get_user(tmp, &sc->seg); \
++++++++++++ + get_user_ex(tmp, &sc->seg); \
regs->seg = tmp; \
}
#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
------------ - err |= __get_user(tmp, &sc->seg); \
++++++++++++ + get_user_ex(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define GET_SEG(seg) { \
unsigned short tmp; \
------------ - err |= __get_user(tmp, &sc->seg); \
++++++++++++ + get_user_ex(tmp, &sc->seg); \
loadsegment(seg, tmp); \
}
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
++++++++++++ + get_user_try {
++++++++++++ +
#ifdef CONFIG_X86_32
------------ - GET_SEG(gs);
------------ - COPY_SEG(fs);
------------ - COPY_SEG(es);
------------ - COPY_SEG(ds);
++++++++++++ + GET_SEG(gs);
++++++++++++ + COPY_SEG(fs);
++++++++++++ + COPY_SEG(es);
++++++++++++ + COPY_SEG(ds);
#endif /* CONFIG_X86_32 */
------------ - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
------------ - COPY(dx); COPY(cx); COPY(ip);
++++++++++++ + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
++++++++++++ + COPY(dx); COPY(cx); COPY(ip);
#ifdef CONFIG_X86_64
------------ - COPY(r8);
------------ - COPY(r9);
------------ - COPY(r10);
------------ - COPY(r11);
------------ - COPY(r12);
------------ - COPY(r13);
------------ - COPY(r14);
------------ - COPY(r15);
++++++++++++ + COPY(r8);
++++++++++++ + COPY(r9);
++++++++++++ + COPY(r10);
++++++++++++ + COPY(r11);
++++++++++++ + COPY(r12);
++++++++++++ + COPY(r13);
++++++++++++ + COPY(r14);
++++++++++++ + COPY(r15);
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_32
------------ - COPY_SEG_CPL3(cs);
------------ - COPY_SEG_CPL3(ss);
++++++++++++ + COPY_SEG_CPL3(cs);
++++++++++++ + COPY_SEG_CPL3(ss);
#else /* !CONFIG_X86_32 */
------------ - /* Kernel saves and restores only the CS segment register on signals,
------------ - * which is the bare minimum needed to allow mixed 32/64-bit code.
------------ - * App's signal handler can save/restore other segments if needed. */
------------ - COPY_SEG_CPL3(cs);
++++++++++++ + /* Kernel saves and restores only the CS segment register on signals,
++++++++++++ + * which is the bare minimum needed to allow mixed 32/64-bit code.
++++++++++++ + * App's signal handler can save/restore other segments if needed. */
++++++++++++ + COPY_SEG_CPL3(cs);
#endif /* CONFIG_X86_32 */
------------ - err |= __get_user(tmpflags, &sc->flags);
------------ - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
------------ - regs->orig_ax = -1; /* disable syscall checks */
++++++++++++ + get_user_ex(tmpflags, &sc->flags);
++++++++++++ + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
++++++++++++ + regs->orig_ax = -1; /* disable syscall checks */
++++++++++++ +
++++++++++++ + get_user_ex(buf, &sc->fpstate);
++++++++++++ + err |= restore_i387_xstate(buf);
------------ - err |= __get_user(buf, &sc->fpstate);
------------ - err |= restore_i387_xstate(buf);
++++++++++++ + get_user_ex(*pax, &sc->ax);
++++++++++++ + } get_user_catch(err);
------------ - err |= __get_user(*pax, &sc->ax);
return err;
}
{
int err = 0;
------------ -#ifdef CONFIG_X86_32
------------ - {
------------ - unsigned int tmp;
++++++++++++ + put_user_try {
------------ - savesegment(gs, tmp);
------------ - err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
------------ - }
------------ - err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
------------ - err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
------------ - err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
++++++++++++ +#ifdef CONFIG_X86_32
++++++++++++ + {
++++++++++++ + unsigned int tmp;
++++++++++++ +
++++++++++++ + savesegment(gs, tmp);
++++++++++++ + put_user_ex(tmp, (unsigned int __user *)&sc->gs);
++++++++++++ + }
++++++++++++ + put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
++++++++++++ + put_user_ex(regs->es, (unsigned int __user *)&sc->es);
++++++++++++ + put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
#endif /* CONFIG_X86_32 */
------------ - err |= __put_user(regs->di, &sc->di);
------------ - err |= __put_user(regs->si, &sc->si);
------------ - err |= __put_user(regs->bp, &sc->bp);
------------ - err |= __put_user(regs->sp, &sc->sp);
------------ - err |= __put_user(regs->bx, &sc->bx);
------------ - err |= __put_user(regs->dx, &sc->dx);
------------ - err |= __put_user(regs->cx, &sc->cx);
------------ - err |= __put_user(regs->ax, &sc->ax);
++++++++++++ + put_user_ex(regs->di, &sc->di);
++++++++++++ + put_user_ex(regs->si, &sc->si);
++++++++++++ + put_user_ex(regs->bp, &sc->bp);
++++++++++++ + put_user_ex(regs->sp, &sc->sp);
++++++++++++ + put_user_ex(regs->bx, &sc->bx);
++++++++++++ + put_user_ex(regs->dx, &sc->dx);
++++++++++++ + put_user_ex(regs->cx, &sc->cx);
++++++++++++ + put_user_ex(regs->ax, &sc->ax);
#ifdef CONFIG_X86_64
------------ - err |= __put_user(regs->r8, &sc->r8);
------------ - err |= __put_user(regs->r9, &sc->r9);
------------ - err |= __put_user(regs->r10, &sc->r10);
------------ - err |= __put_user(regs->r11, &sc->r11);
------------ - err |= __put_user(regs->r12, &sc->r12);
------------ - err |= __put_user(regs->r13, &sc->r13);
------------ - err |= __put_user(regs->r14, &sc->r14);
------------ - err |= __put_user(regs->r15, &sc->r15);
++++++++++++ + put_user_ex(regs->r8, &sc->r8);
++++++++++++ + put_user_ex(regs->r9, &sc->r9);
++++++++++++ + put_user_ex(regs->r10, &sc->r10);
++++++++++++ + put_user_ex(regs->r11, &sc->r11);
++++++++++++ + put_user_ex(regs->r12, &sc->r12);
++++++++++++ + put_user_ex(regs->r13, &sc->r13);
++++++++++++ + put_user_ex(regs->r14, &sc->r14);
++++++++++++ + put_user_ex(regs->r15, &sc->r15);
#endif /* CONFIG_X86_64 */
------------ - err |= __put_user(current->thread.trap_no, &sc->trapno);
------------ - err |= __put_user(current->thread.error_code, &sc->err);
------------ - err |= __put_user(regs->ip, &sc->ip);
++++++++++++ + put_user_ex(current->thread.trap_no, &sc->trapno);
++++++++++++ + put_user_ex(current->thread.error_code, &sc->err);
++++++++++++ + put_user_ex(regs->ip, &sc->ip);
#ifdef CONFIG_X86_32
------------ - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
------------ - err |= __put_user(regs->flags, &sc->flags);
------------ - err |= __put_user(regs->sp, &sc->sp_at_signal);
------------ - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
++++++++++++ + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
++++++++++++ + put_user_ex(regs->flags, &sc->flags);
++++++++++++ + put_user_ex(regs->sp, &sc->sp_at_signal);
++++++++++++ + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
#else /* !CONFIG_X86_32 */
------------ - err |= __put_user(regs->flags, &sc->flags);
------------ - err |= __put_user(regs->cs, &sc->cs);
------------ - err |= __put_user(0, &sc->gs);
------------ - err |= __put_user(0, &sc->fs);
++++++++++++ + put_user_ex(regs->flags, &sc->flags);
++++++++++++ + put_user_ex(regs->cs, &sc->cs);
++++++++++++ + put_user_ex(0, &sc->gs);
++++++++++++ + put_user_ex(0, &sc->fs);
#endif /* CONFIG_X86_32 */
------------ - err |= __put_user(fpstate, &sc->fpstate);
++++++++++++ + put_user_ex(fpstate, &sc->fpstate);
------------ - /* non-iBCS2 extensions.. */
------------ - err |= __put_user(mask, &sc->oldmask);
------------ - err |= __put_user(current->thread.cr2, &sc->cr2);
++++++++++++ + /* non-iBCS2 extensions.. */
++++++++++++ + put_user_ex(mask, &sc->oldmask);
++++++++++++ + put_user_ex(current->thread.cr2, &sc->cr2);
++++++++++++ + } put_user_catch(err);
return err;
}
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
------------ - err |= __put_user(sig, &frame->sig);
------------ - err |= __put_user(&frame->info, &frame->pinfo);
------------ - err |= __put_user(&frame->uc, &frame->puc);
------------ - err |= copy_siginfo_to_user(&frame->info, info);
------------ - if (err)
------------ - return -EFAULT;
------------ -
------------ - /* Create the ucontext. */
------------ - if (cpu_has_xsave)
------------ - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
------------ - else
------------ - err |= __put_user(0, &frame->uc.uc_flags);
------------ - err |= __put_user(0, &frame->uc.uc_link);
------------ - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
------------ - err |= __put_user(sas_ss_flags(regs->sp),
------------ - &frame->uc.uc_stack.ss_flags);
------------ - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
------------ - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
------------ - regs, set->sig[0]);
------------ - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
------------ - if (err)
------------ - return -EFAULT;
------------ -
------------ - /* Set up to return from userspace. */
------------ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
------------ - if (ka->sa.sa_flags & SA_RESTORER)
------------ - restorer = ka->sa.sa_restorer;
------------ - err |= __put_user(restorer, &frame->pretcode);
------------ -
------------ - /*
------------ - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
------------ - *
------------ - * WE DO NOT USE IT ANY MORE! It's only left here for historical
------------ - * reasons and because gdb uses it as a signature to notice
------------ - * signal handler stack frames.
------------ - */
------------ - err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
++++++++++++ + put_user_try {
++++++++++++ + put_user_ex(sig, &frame->sig);
++++++++++++ + put_user_ex(&frame->info, &frame->pinfo);
++++++++++++ + put_user_ex(&frame->uc, &frame->puc);
++++++++++++ + err |= copy_siginfo_to_user(&frame->info, info);
++++++++++++ +
++++++++++++ + /* Create the ucontext. */
++++++++++++ + if (cpu_has_xsave)
++++++++++++ + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
++++++++++++ + else
++++++++++++ + put_user_ex(0, &frame->uc.uc_flags);
++++++++++++ + put_user_ex(0, &frame->uc.uc_link);
++++++++++++ + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++++++++++++ + put_user_ex(sas_ss_flags(regs->sp),
++++++++++++ + &frame->uc.uc_stack.ss_flags);
++++++++++++ + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
++++++++++++ + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
++++++++++++ + regs, set->sig[0]);
++++++++++++ + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
++++++++++++ +
++++++++++++ + /* Set up to return from userspace. */
++++++++++++ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
++++++++++++ + if (ka->sa.sa_flags & SA_RESTORER)
++++++++++++ + restorer = ka->sa.sa_restorer;
++++++++++++ + put_user_ex(restorer, &frame->pretcode);
++++++++++++ +
++++++++++++ + /*
++++++++++++ + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
++++++++++++ + *
++++++++++++ + * WE DO NOT USE IT ANY MORE! It's only left here for historical
++++++++++++ + * reasons and because gdb uses it as a signature to notice
++++++++++++ + * signal handler stack frames.
++++++++++++ + */
++++++++++++ + put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
++++++++++++ + } put_user_catch(err);
if (err)
return -EFAULT;
return -EFAULT;
}
------------ - /* Create the ucontext. */
------------ - if (cpu_has_xsave)
------------ - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
------------ - else
------------ - err |= __put_user(0, &frame->uc.uc_flags);
------------ - err |= __put_user(0, &frame->uc.uc_link);
------------ - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
------------ - err |= __put_user(sas_ss_flags(regs->sp),
------------ - &frame->uc.uc_stack.ss_flags);
------------ - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
------------ - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
------------ - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
------------ -
------------ - /* Set up to return from userspace. If provided, use a stub
------------ - already in userspace. */
------------ - /* x86-64 should always use SA_RESTORER. */
------------ - if (ka->sa.sa_flags & SA_RESTORER) {
------------ - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
------------ - } else {
------------ - /* could use a vstub here */
------------ - return -EFAULT;
------------ - }
++++++++++++ + put_user_try {
++++++++++++ + /* Create the ucontext. */
++++++++++++ + if (cpu_has_xsave)
++++++++++++ + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
++++++++++++ + else
++++++++++++ + put_user_ex(0, &frame->uc.uc_flags);
++++++++++++ + put_user_ex(0, &frame->uc.uc_link);
++++++++++++ + put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++++++++++++ + put_user_ex(sas_ss_flags(regs->sp),
++++++++++++ + &frame->uc.uc_stack.ss_flags);
++++++++++++ + put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
++++++++++++ + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
++++++++++++ + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
++++++++++++ +
++++++++++++ + /* Set up to return from userspace. If provided, use a stub
++++++++++++ + already in userspace. */
++++++++++++ + /* x86-64 should always use SA_RESTORER. */
++++++++++++ + if (ka->sa.sa_flags & SA_RESTORER) {
++++++++++++ + put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
++++++++++++ + } else {
++++++++++++ + /* could use a vstub here */
++++++++++++ + err |= -EFAULT;
++++++++++++ + }
++++++++++++ + } put_user_catch(err);
if (err)
return -EFAULT;
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
------------ - int ret;
++++++++++++ + int ret = 0;
if (act) {
old_sigset_t mask;
------------ - if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
------------ - __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
------------ - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
++++++++++++ + if (!access_ok(VERIFY_READ, act, sizeof(*act)))
return -EFAULT;
------------ - __get_user(new_ka.sa.sa_flags, &act->sa_flags);
------------ - __get_user(mask, &act->sa_mask);
++++++++++++ + get_user_try {
++++++++++++ + get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
++++++++++++ + get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
++++++++++++ + get_user_ex(mask, &act->sa_mask);
++++++++++++ + get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
++++++++++++ + } get_user_catch(ret);
++++++++++++ +
++++++++++++ + if (ret)
++++++++++++ + return -EFAULT;
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
------------ - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
------------ - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
------------ - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
++++++++++++ + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
return -EFAULT;
------------ - __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
------------ - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++++++++++++ + put_user_try {
++++++++++++ + put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
++++++++++++ + put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
++++++++++++ + put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++++++++++++ + put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
++++++++++++ + } put_user_catch(ret);
++++++++++++ +
++++++++++++ + if (ret)
++++++++++++ + return -EFAULT;
}
return ret;
}
#ifdef CONFIG_X86_32
------------ asmlinkage int sys_rt_sigreturn(struct pt_regs regs)
++++++++++++ /*
++++++++++++ * Note: do not pass in pt_regs directly as with tail-call optimization
++++++++++++ * GCC will incorrectly stomp on the caller's frame and corrupt user-space
++++++++++++ * register state:
++++++++++++ */
++++++++++++ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
------------ return do_rt_sigreturn(®s);
++++++++++++ struct pt_regs *regs = (struct pt_regs *)&__unused;
++++++++++++
++++++++++++ return do_rt_sigreturn(regs);
}
#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
+++++++++++++#include <linux/magic.h>
#include <asm/system.h>
#include <asm/desc.h>
*
* Opcode checker based on code by Richard Brunner
*/
------ ------static int is_prefetch(struct pt_regs *regs, unsigned long addr,
------ ------ unsigned long error_code)
++++++ ++++++static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
++++++ ++++++ unsigned long addr)
{
unsigned char *instr;
int scan_more = 1;
}
#ifdef CONFIG_X86_64
------ ------static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
------ ------ unsigned long error_code)
++++++ ++++++static noinline void pgtable_bad(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
{
unsigned long flags = oops_begin();
int sig = SIGKILL;
------ ------ struct task_struct *tsk;
++++++ ++++++ struct task_struct *tsk = current;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
------ ------ current->comm, address);
++++++ ++++++ tsk->comm, address);
dump_pagetable(address);
------- ------ tsk = current;
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
}
#endif
++++++ ++++++static noinline void no_context(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ struct task_struct *tsk = current;
+++++++++++++ unsigned long *stackend;
+++++++++++++
++++++ ++++++#ifdef CONFIG_X86_64
++++++ ++++++ unsigned long flags;
++++++ ++++++ int sig;
++++++ ++++++#endif
++++++ ++++++
++++++ ++++++ /* Are we prepared to handle this kernel fault? */
++++++ ++++++ if (fixup_exception(regs))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ /*
++++++ ++++++ * X86_32
++++++ ++++++ * Valid to do another page fault here, because if this fault
++++++ ++++++ * had been triggered by is_prefetch fixup_exception would have
++++++ ++++++ * handled it.
++++++ ++++++ *
++++++ ++++++ * X86_64
++++++ ++++++ * Hall of shame of CPU/BIOS bugs.
++++++ ++++++ */
++++++ ++++++ if (is_prefetch(regs, error_code, address))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ if (is_errata93(regs, address))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ /*
++++++ ++++++ * Oops. The kernel tried to access some bad page. We'll have to
++++++ ++++++ * terminate things with extreme prejudice.
++++++ ++++++ */
++++++ ++++++#ifdef CONFIG_X86_32
++++++ ++++++ bust_spinlocks(1);
++++++ ++++++#else
++++++ ++++++ flags = oops_begin();
++++++ ++++++#endif
++++++ ++++++
++++++ ++++++ show_fault_oops(regs, error_code, address);
++++++ ++++++
+++++++++++++ stackend = end_of_stack(tsk);
+++++++++++++ if (*stackend != STACK_END_MAGIC)
+++++++++++++ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+++++++++++++
++++++ ++++++ tsk->thread.cr2 = address;
++++++ ++++++ tsk->thread.trap_no = 14;
++++++ ++++++ tsk->thread.error_code = error_code;
++++++ ++++++
++++++ ++++++#ifdef CONFIG_X86_32
++++++ ++++++ die("Oops", regs, error_code);
++++++ ++++++ bust_spinlocks(0);
++++++ ++++++ do_exit(SIGKILL);
++++++ ++++++#else
++++++ ++++++ sig = SIGKILL;
++++++ ++++++ if (__die("Oops", regs, error_code))
++++++ ++++++ sig = 0;
++++++ ++++++ /* Executive summary in case the body of the oops scrolled away */
++++++ ++++++ printk(KERN_EMERG "CR2: %016lx\n", address);
++++++ ++++++ oops_end(flags, regs, sig);
++++++ ++++++#endif
++++++ ++++++}
++++++ ++++++
++++++ ++++++static void __bad_area_nosemaphore(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address,
++++++ ++++++ int si_code)
++++++ ++++++{
++++++ ++++++ struct task_struct *tsk = current;
++++++ ++++++
++++++ ++++++ /* User mode accesses just cause a SIGSEGV */
++++++ ++++++ if (error_code & PF_USER) {
++++++ ++++++ /*
++++++ ++++++ * It's possible to have interrupts off here.
++++++ ++++++ */
++++++ ++++++ local_irq_enable();
++++++ ++++++
++++++ ++++++ /*
++++++ ++++++ * Valid to do another page fault here because this one came
++++++ ++++++ * from user space.
++++++ ++++++ */
++++++ ++++++ if (is_prefetch(regs, error_code, address))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ if (is_errata100(regs, address))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
++++++ ++++++ printk_ratelimit()) {
++++++ ++++++ printk(
++++++ ++++++ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
++++++ ++++++ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
++++++ ++++++ tsk->comm, task_pid_nr(tsk), address,
++++++ ++++++ (void *) regs->ip, (void *) regs->sp, error_code);
++++++ ++++++ print_vma_addr(" in ", regs->ip);
++++++ ++++++ printk("\n");
++++++ ++++++ }
++++++ ++++++
++++++ ++++++ tsk->thread.cr2 = address;
++++++ ++++++ /* Kernel addresses are always protection faults */
++++++ ++++++ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
++++++ ++++++ tsk->thread.trap_no = 14;
++++++ ++++++ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
++++++ ++++++ return;
++++++ ++++++ }
++++++ ++++++
++++++ ++++++ if (is_f00f_bug(regs, address))
++++++ ++++++ return;
++++++ ++++++
++++++ ++++++ no_context(regs, error_code, address);
++++++ ++++++}
++++++ ++++++
++++++ ++++++static noinline void bad_area_nosemaphore(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
++++++ ++++++}
++++++ ++++++
++++++ ++++++static void __bad_area(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address,
++++++ ++++++ int si_code)
++++++ ++++++{
++++++ ++++++ struct mm_struct *mm = current->mm;
++++++ ++++++
++++++ ++++++ /*
++++++ ++++++ * Something tried to access memory that isn't in our memory map..
++++++ ++++++ * Fix it, but check if it's kernel or user first..
++++++ ++++++ */
++++++ ++++++ up_read(&mm->mmap_sem);
++++++ ++++++
++++++ ++++++ __bad_area_nosemaphore(regs, error_code, address, si_code);
++++++ ++++++}
++++++ ++++++
++++++ ++++++static noinline void bad_area(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ __bad_area(regs, error_code, address, SEGV_MAPERR);
++++++ ++++++}
++++++ ++++++
++++++ ++++++static noinline void bad_area_access_error(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ __bad_area(regs, error_code, address, SEGV_ACCERR);
++++++ ++++++}
++++++ ++++++
++++++ ++++++/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
++++++ ++++++static void out_of_memory(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ /*
++++++ ++++++ * We ran out of memory, call the OOM killer, and return the userspace
++++++ ++++++ * (which will retry the fault, or kill us if we got oom-killed).
++++++ ++++++ */
++++++ ++++++ up_read(¤t->mm->mmap_sem);
++++++ ++++++ pagefault_out_of_memory();
++++++ ++++++}
++++++ ++++++
++++++ ++++++static void do_sigbus(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address)
++++++ ++++++{
++++++ ++++++ struct task_struct *tsk = current;
++++++ ++++++ struct mm_struct *mm = tsk->mm;
++++++ ++++++
++++++ ++++++ up_read(&mm->mmap_sem);
++++++ ++++++
++++++ ++++++ /* Kernel mode? Handle exceptions or die */
++++++ ++++++ if (!(error_code & PF_USER))
++++++ ++++++ no_context(regs, error_code, address);
++++++ ++++++#ifdef CONFIG_X86_32
++++++ ++++++ /* User space => ok to do another page fault */
++++++ ++++++ if (is_prefetch(regs, error_code, address))
++++++ ++++++ return;
++++++ ++++++#endif
++++++ ++++++ tsk->thread.cr2 = address;
++++++ ++++++ tsk->thread.error_code = error_code;
++++++ ++++++ tsk->thread.trap_no = 14;
++++++ ++++++ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
++++++ ++++++}
++++++ ++++++
++++++ ++++++static noinline void mm_fault_error(struct pt_regs *regs,
++++++ ++++++ unsigned long error_code, unsigned long address, unsigned int fault)
++++++ ++++++{
++++++ ++++++ if (fault & VM_FAULT_OOM)
++++++ ++++++ out_of_memory(regs, error_code, address);
++++++ ++++++ else if (fault & VM_FAULT_SIGBUS)
++++++ ++++++ do_sigbus(regs, error_code, address);
++++++ ++++++ else
++++++ ++++++ BUG();
++++++ ++++++}
++++++ ++++++
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
------ ------static int spurious_fault(unsigned long address,
------ ------ unsigned long error_code)
++++++ ++++++static noinline int spurious_fault(unsigned long error_code,
++++++ ++++++ unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
*
* This assumes no large pages in there.
*/
------ ------static int vmalloc_fault(unsigned long address)
++++++ ++++++static noinline int vmalloc_fault(unsigned long address)
{
#ifdef CONFIG_X86_32
unsigned long pgd_paddr;
happen within a race in page table update. In the later
case just flush. */
-- - - pgd = pgd_offset(current->mm ?: &init_mm, address);
++ + + pgd = pgd_offset(current->active_mm, address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
int show_unhandled_signals = 1;
++++++ ++++++static inline int access_error(unsigned long error_code, int write,
++++++ ++++++ struct vm_area_struct *vma)
++++++ ++++++{
++++++ ++++++ if (write) {
++++++ ++++++ /* write, present and write, not present */
++++++ ++++++ if (unlikely(!(vma->vm_flags & VM_WRITE)))
++++++ ++++++ return 1;
++++++ ++++++ } else if (unlikely(error_code & PF_PROT)) {
++++++ ++++++ /* read, present */
++++++ ++++++ return 1;
++++++ ++++++ } else {
++++++ ++++++ /* read, not present */
++++++ ++++++ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
++++++ ++++++ return 1;
++++++ ++++++ }
++++++ ++++++
++++++ ++++++ return 0;
++++++ ++++++}
++++++ ++++++
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
#endif
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
++++++ ++++++ unsigned long address;
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
------ ------ unsigned long address;
------ ------ int write, si_code;
++++++ ++++++ int write;
int fault;
------ ------#ifdef CONFIG_X86_64
------ ------ unsigned long flags;
------ ------ int sig;
------ ------#endif
tsk = current;
mm = tsk->mm;
/* get the address */
address = read_cr2();
------ ------ si_code = SEGV_MAPERR;
------ ------
------ ------ if (notify_page_fault(regs))
++++++ ++++++ if (unlikely(notify_page_fault(regs)))
return;
if (unlikely(kmmio_fault(regs, address)))
return;
return;
/* Can handle a stale RO->RW TLB */
------ ------ if (spurious_fault(address, error_code))
++++++ ++++++ if (spurious_fault(error_code, address))
return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
*/
------ ------ goto bad_area_nosemaphore;
++++++ ++++++ bad_area_nosemaphore(regs, error_code, address);
++++++ ++++++ return;
}
------ ------
/*
* It's safe to allow irq's after cr2 has been saved and the
* vmalloc fault has been handled.
#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
------ ------ pgtable_bad(address, regs, error_code);
++++++ ++++++ pgtable_bad(regs, error_code, address);
#endif
/*
* If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault.
*/
------ ------ if (unlikely(in_atomic() || !mm))
------ ------ goto bad_area_nosemaphore;
++++++ ++++++ if (unlikely(in_atomic() || !mm)) {
++++++ ++++++ bad_area_nosemaphore(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
/*
* When running in the kernel we expect faults to occur only to
* source. If this is invalid we can skip the address space check,
* thus avoiding the deadlock.
*/
------ ------ if (!down_read_trylock(&mm->mmap_sem)) {
++++++ ++++++ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if ((error_code & PF_USER) == 0 &&
------ ------ !search_exception_tables(regs->ip))
------ ------ goto bad_area_nosemaphore;
++++++ ++++++ !search_exception_tables(regs->ip)) {
++++++ ++++++ bad_area_nosemaphore(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
down_read(&mm->mmap_sem);
}
vma = find_vma(mm, address);
------ ------ if (!vma)
------ ------ goto bad_area;
------ ------ if (vma->vm_start <= address)
++++++ ++++++ if (unlikely(!vma)) {
++++++ ++++++ bad_area(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
++++++ ++++++ if (likely(vma->vm_start <= address))
goto good_area;
------ ------ if (!(vma->vm_flags & VM_GROWSDOWN))
------ ------ goto bad_area;
++++++ ++++++ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
++++++ ++++++ bad_area(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
if (error_code & PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* and pusha to work. ("enter $65535,$31" pushes
* 32 pointers and then decrements %sp by 65535.)
*/
------ ------ if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
------ ------ goto bad_area;
++++++ ++++++ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
++++++ ++++++ bad_area(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
}
------ ------ if (expand_stack(vma, address))
------ ------ goto bad_area;
------ ------/*
------ ------ * Ok, we have a good vm_area for this memory access, so
------ ------ * we can handle it..
------ ------ */
++++++ ++++++ if (unlikely(expand_stack(vma, address))) {
++++++ ++++++ bad_area(regs, error_code, address);
++++++ ++++++ return;
++++++ ++++++ }
++++++ ++++++
++++++ ++++++ /*
++++++ ++++++ * Ok, we have a good vm_area for this memory access, so
++++++ ++++++ * we can handle it..
++++++ ++++++ */
good_area:
------ ------ si_code = SEGV_ACCERR;
------ ------ write = 0;
------ ------ switch (error_code & (PF_PROT|PF_WRITE)) {
------ ------ default: /* 3: write, present */
------ ------ /* fall through */
------ ------ case PF_WRITE: /* write, not present */
------ ------ if (!(vma->vm_flags & VM_WRITE))
------ ------ goto bad_area;
------ ------ write++;
------ ------ break;
------ ------ case PF_PROT: /* read, present */
------ ------ goto bad_area;
------ ------ case 0: /* read, not present */
------ ------ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
------ ------ goto bad_area;
++++++ ++++++ write = error_code & PF_WRITE;
++++++ ++++++ if (unlikely(access_error(error_code, write, vma))) {
++++++ ++++++ bad_area_access_error(regs, error_code, address);
++++++ ++++++ return;
}
/*
*/
fault = handle_mm_fault(mm, vma, address, write);
if (unlikely(fault & VM_FAULT_ERROR)) {
------ ------ if (fault & VM_FAULT_OOM)
------ ------ goto out_of_memory;
------ ------ else if (fault & VM_FAULT_SIGBUS)
------ ------ goto do_sigbus;
------ ------ BUG();
++++++ ++++++ mm_fault_error(regs, error_code, address, fault);
++++++ ++++++ return;
}
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
}
#endif
up_read(&mm->mmap_sem);
------ ------ return;
------ ------
------ ------/*
------ ------ * Something tried to access memory that isn't in our memory map..
------ ------ * Fix it, but check if it's kernel or user first..
------ ------ */
------ ------bad_area:
------ ------ up_read(&mm->mmap_sem);
------ ------
------ ------bad_area_nosemaphore:
------ ------ /* User mode accesses just cause a SIGSEGV */
------ ------ if (error_code & PF_USER) {
------ ------ /*
------ ------ * It's possible to have interrupts off here.
------ ------ */
------ ------ local_irq_enable();
------ ------
------ ------ /*
------ ------ * Valid to do another page fault here because this one came
------ ------ * from user space.
------ ------ */
------ ------ if (is_prefetch(regs, address, error_code))
------ ------ return;
------ ------
------ ------ if (is_errata100(regs, address))
------ ------ return;
------ ------
------ ------ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
------ ------ printk_ratelimit()) {
------ ------ printk(
------ ------ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
------ ------ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
------ ------ tsk->comm, task_pid_nr(tsk), address,
------ ------ (void *) regs->ip, (void *) regs->sp, error_code);
------ ------ print_vma_addr(" in ", regs->ip);
------ ------ printk("\n");
------ ------ }
------ ------
------ ------ tsk->thread.cr2 = address;
------ ------ /* Kernel addresses are always protection faults */
------ ------ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
------ ------ tsk->thread.trap_no = 14;
------ ------ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
------ ------ return;
------ ------ }
------ ------
------ ------ if (is_f00f_bug(regs, address))
------ ------ return;
------ ------
------ ------no_context:
------ ------ /* Are we prepared to handle this kernel fault? */
------ ------ if (fixup_exception(regs))
------ ------ return;
------ ------
------ ------ /*
------ ------ * X86_32
------ ------ * Valid to do another page fault here, because if this fault
------ ------ * had been triggered by is_prefetch fixup_exception would have
------ ------ * handled it.
------ ------ *
------ ------ * X86_64
------ ------ * Hall of shame of CPU/BIOS bugs.
------ ------ */
------ ------ if (is_prefetch(regs, address, error_code))
------ ------ return;
------ ------
------ ------ if (is_errata93(regs, address))
------ ------ return;
------ ------
------ ------/*
------ ------ * Oops. The kernel tried to access some bad page. We'll have to
------ ------ * terminate things with extreme prejudice.
------ ------ */
------ ------#ifdef CONFIG_X86_32
------ ------ bust_spinlocks(1);
------ ------#else
------ ------ flags = oops_begin();
------ ------#endif
------ ------
------ ------ show_fault_oops(regs, error_code, address);
------ ------
------ ------ tsk->thread.cr2 = address;
------ ------ tsk->thread.trap_no = 14;
------ ------ tsk->thread.error_code = error_code;
------ ------
------ ------#ifdef CONFIG_X86_32
------ ------ die("Oops", regs, error_code);
------ ------ bust_spinlocks(0);
------ ------ do_exit(SIGKILL);
------ ------#else
------ ------ sig = SIGKILL;
------ ------ if (__die("Oops", regs, error_code))
------ ------ sig = 0;
------ ------ /* Executive summary in case the body of the oops scrolled away */
------ ------ printk(KERN_EMERG "CR2: %016lx\n", address);
------ ------ oops_end(flags, regs, sig);
------ ------#endif
------ ------
------ ------out_of_memory:
------ ------ /*
------ ------ * We ran out of memory, call the OOM killer, and return the userspace
------ ------ * (which will retry the fault, or kill us if we got oom-killed).
------ ------ */
------ ------ up_read(&mm->mmap_sem);
------ ------ pagefault_out_of_memory();
------ ------ return;
------ ------
------ ------do_sigbus:
------ ------ up_read(&mm->mmap_sem);
------ ------
------ ------ /* Kernel mode? Handle exceptions or die */
------ ------ if (!(error_code & PF_USER))
------ ------ goto no_context;
------ ------#ifdef CONFIG_X86_32
------ ------ /* User space => ok to do another page fault */
------ ------ if (is_prefetch(regs, address, error_code))
------ ------ return;
------ ------#endif
------ ------ tsk->thread.cr2 = address;
------ ------ tsk->thread.error_code = error_code;
------ ------ tsk->thread.trap_no = 14;
------ ------ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
DEFINE_SPINLOCK(pgd_lock);
*
* Must be freed with iounmap.
*/
--------- ----void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
+++++++++ ++++void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
if (pat_enabled)
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
}
}
------------ void __init early_ioremap_clear(void)
------------ {
------------ pmd_t *pmd;
------------
------------ if (early_ioremap_debug)
------------ printk(KERN_INFO "early_ioremap_clear()\n");
------------
------------ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
------------ pmd_clear(pmd);
------------ paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
------------ __flush_tlb_all();
------------ }
------------
void __init early_ioremap_reset(void)
{
------------ enum fixed_addresses idx;
------------ unsigned long addr, phys;
------------ pte_t *pte;
------------
after_paging_init = 1;
------------ for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
------------ addr = fix_to_virt(idx);
------------ pte = early_ioremap_pte(addr);
------------ if (pte_present(*pte)) {
------------ phys = pte_val(*pte) & PAGE_MASK;
------------ set_fixmap(idx, phys);
------------ }
------------ }
}
static void __init __early_set_fixmap(enum fixed_addresses idx,
#ifdef CONFIG_X86_PAT
int __read_mostly pat_enabled = 1;
--- ----------void __cpuinit pat_disable(char *reason)
+++ ++++++++++void __cpuinit pat_disable(const char *reason)
{
pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
return 0;
}
early_param("nopat", nopat);
+++ ++++++++++#else
+++ ++++++++++static inline void pat_disable(const char *reason)
+++ ++++++++++{
+++ ++++++++++ (void)reason;
+++ ++++++++++}
#endif
if (!pat_enabled)
return;
--- ---------- /* Paranoia check. */
--- ---------- if (!cpu_has_pat && boot_pat_state) {
--- ---------- /*
--- ---------- * If this happens we are on a secondary CPU, but
--- ---------- * switched to PAT on the boot CPU. We have no way to
--- ---------- * undo PAT.
--- ---------- */
--- ---------- printk(KERN_ERR "PAT enabled, "
--- ---------- "but not supported by secondary CPU\n");
--- ---------- BUG();
+++ ++++++++++ if (!cpu_has_pat) {
+++ ++++++++++ if (!boot_pat_state) {
+++ ++++++++++ pat_disable("PAT not supported by CPU.");
+++ ++++++++++ return;
+++ ++++++++++ } else {
+++ ++++++++++ /*
+++ ++++++++++ * If this happens we are on a secondary CPU, but
+++ ++++++++++ * switched to PAT on the boot CPU. We have no way to
+++ ++++++++++ * undo PAT.
+++ ++++++++++ */
+++ ++++++++++ printk(KERN_ERR "PAT enabled, "
+++ ++++++++++ "but not supported by secondary CPU\n");
+++ ++++++++++ BUG();
+++ ++++++++++ }
}
/* Set PWT to Write-Combining. All other bits stay the same */
req_type & _PAGE_CACHE_MASK);
}
------------ is_range_ram = pagerange_is_ram(start, end);
------------ if (is_range_ram == 1)
------------ return reserve_ram_pages_type(start, end, req_type, new_type);
------------ else if (is_range_ram < 0)
------------ return -EINVAL;
++++++++++++ if (new_type)
++++++++++++ *new_type = actual_type;
++++++++++++
++++++++++++ /*
++++++++++++ * For legacy reasons, some parts of the physical address range in the
++++++++++++ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
++++++++++++ * the e820 tables). So we will track the memory attributes of this
++++++++++++ * legacy 1MB region using the linear memtype_list always.
++++++++++++ */
++++++++++++ if (end >= ISA_END_ADDRESS) {
++++++++++++ is_range_ram = pagerange_is_ram(start, end);
++++++++++++ if (is_range_ram == 1)
++++++++++++ return reserve_ram_pages_type(start, end, req_type,
++++++++++++ new_type);
++++++++++++ else if (is_range_ram < 0)
++++++++++++ return -EINVAL;
++++++++++++ }
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
new->end = end;
new->type = actual_type;
------------ if (new_type)
------------ *new_type = actual_type;
------------
spin_lock(&memtype_lock);
if (cached_entry && start >= cached_start)
if (is_ISA_range(start, end - 1))
return 0;
------------ is_range_ram = pagerange_is_ram(start, end);
------------ if (is_range_ram == 1)
------------ return free_ram_pages_type(start, end);
------------ else if (is_range_ram < 0)
------------ return -EINVAL;
++++++++++++ /*
++++++++++++ * For legacy reasons, some parts of the physical address range in the
++++++++++++ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
++++++++++++ * the e820 tables). So we will track the memory attributes of this
++++++++++++ * legacy 1MB region using the linear memtype_list always.
++++++++++++ */
++++++++++++ if (end >= ISA_END_ADDRESS) {
++++++++++++ is_range_ram = pagerange_is_ram(start, end);
++++++++++++ if (is_range_ram == 1)
++++++++++++ return free_ram_pages_type(start, end);
++++++++++++ else if (is_range_ram < 0)
++++++++++++ return -EINVAL;
++++++++++++ }
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
}
#endif /* CONFIG_STRICT_DEVMEM */
- /*
- * Change the memory type for the physial address range in kernel identity
- * mapping space if that range is a part of identity map.
- */
- static int kernel_map_sync_memtype(u64 base, unsigned long size,
- unsigned long flags)
- {
- unsigned long id_sz;
- int ret;
-
- if (!pat_enabled || base >= __pa(high_memory))
- return 0;
-
- id_sz = (__pa(high_memory) < base + size) ?
- __pa(high_memory) - base :
- size;
-
- ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags);
- /*
- * -EFAULT return means that the addr was not valid and did not have
- * any identity mapping. That case is a success for
- * kernel_map_sync_memtype.
- */
- if (ret == -EFAULT)
- ret = 0;
-
- return ret;
- }
-
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
if (retval < 0)
return 0;
- if (kernel_map_sync_memtype(offset, size, flags)) {
+ if (((pfn < max_low_pfn_mapped) ||
+ (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
+ ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
free_memtype(offset, offset + size);
printk(KERN_INFO
"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
* Reserved non RAM regions only and after successful reserve_memtype,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
-- - --static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
++ + ++static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
++ + ++ int strict_prot)
{
int is_ram = 0;
- int ret;
+ int id_sz, ret;
unsigned long flags;
-- - -- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
++ + ++ unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
is_ram = pagerange_is_ram(paddr, paddr + size);
return ret;
if (flags != want_flags) {
-- - -- free_memtype(paddr, paddr + size);
-- - -- printk(KERN_ERR
-- - -- "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
-- - -- current->comm, current->pid,
-- - -- cattr_name(want_flags),
-- - -- (unsigned long long)paddr,
-- - -- (unsigned long long)(paddr + size),
-- - -- cattr_name(flags));
-- - -- return -EINVAL;
++ + ++ if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
++ + ++ free_memtype(paddr, paddr + size);
++ + ++ printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
++ + ++ " for %Lx-%Lx, got %s\n",
++ + ++ current->comm, current->pid,
++ + ++ cattr_name(want_flags),
++ + ++ (unsigned long long)paddr,
++ + ++ (unsigned long long)(paddr + size),
++ + ++ cattr_name(flags));
++ + ++ return -EINVAL;
++ + ++ }
++ + ++ /*
++ + ++ * We allow returning different type than the one requested in
++ + ++ * non strict case.
++ + ++ */
++ + ++ *vma_prot = __pgprot((pgprot_val(*vma_prot) &
++ + ++ (~_PAGE_CACHE_MASK)) |
++ + ++ flags);
}
- if (kernel_map_sync_memtype(paddr, size, flags)) {
+ /* Need to keep identity mapping in sync */
+ if (paddr >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < paddr + size) ?
+ __pa(high_memory) - paddr :
+ size;
+
+ if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR
"%s:%d reserve_pfn_range ioremap_change_attr failed %s "
unsigned long vma_start = vma->vm_start;
unsigned long vma_end = vma->vm_end;
unsigned long vma_size = vma_end - vma_start;
++ + ++ pgprot_t pgprot;
if (!pat_enabled)
return 0;
WARN_ON_ONCE(1);
return -EINVAL;
}
-- - -- return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
++ + ++ pgprot = __pgprot(prot);
++ + ++ return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
/* reserve entire vma page by page, using pfn and prot from pte */
if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
continue;
-- - -- retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
++ + ++ pgprot = __pgprot(prot);
++ + ++ retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
if (retval)
goto cleanup_ret;
}
* Note that this function can be called with caller trying to map only a
* subrange/page inside the vma.
*/
-- - --int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
++ + ++int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
{
int retval = 0;
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
-- - -- return reserve_pfn_range(paddr, vma_size, prot);
++ + ++ return reserve_pfn_range(paddr, vma_size, prot, 0);
}
/* reserve page by page using pfn and size */
base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
for (i = 0; i < size; i += PAGE_SIZE) {
paddr = base_paddr + i;
-- - -- retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
++ + ++ retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
if (retval)
goto cleanup_ret;
}
preempt_enable();
}
-------------static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
------------- unsigned long va)
+++++++++++++static void xen_flush_tlb_others(const struct cpumask *cpus,
+++++++++++++ struct mm_struct *mm, unsigned long va)
{
struct {
struct mmuext_op op;
------------- cpumask_t mask;
+++++++++++++ DECLARE_BITMAP(mask, NR_CPUS);
} *args;
------------- cpumask_t cpumask = *cpus;
struct multicall_space mcs;
------------- /*
------------- * A couple of (to be removed) sanity checks:
------------- *
------------- * - current CPU must not be in mask
------------- * - mask must exist :)
------------- */
------------- BUG_ON(cpus_empty(cpumask));
------------- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+++++++++++++ BUG_ON(cpumask_empty(cpus));
BUG_ON(!mm);
------------- /* If a CPU which we ran on has gone down, OK. */
------------- cpus_and(cpumask, cpumask, cpu_online_map);
------------- if (cpus_empty(cpumask))
------------- return;
-------------
mcs = xen_mc_entry(sizeof(*args));
args = mcs.args;
------------- args->mask = cpumask;
------------- args->op.arg2.vcpumask = &args->mask;
+++++++++++++ args->op.arg2.vcpumask = to_cpumask(args->mask);
+++++++++++++
+++++++++++++ /* Remove us, and any offline CPUS. */
+++++++++++++ cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
+++++++++++++ cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
+++++++++++++ if (unlikely(cpumask_empty(to_cpumask(args->mask))))
+++++++++++++ goto issue;
if (va == TLB_FLUSH_ALL) {
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
+++++++++++++issue:
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
static void xen_write_cr2(unsigned long cr2)
{
------------- x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
+++++++++++++ percpu_read(xen_vcpu)->arch.cr2 = cr2;
}
static unsigned long xen_read_cr2(void)
{
------------- return x86_read_percpu(xen_vcpu)->arch.cr2;
+++++++++++++ return percpu_read(xen_vcpu)->arch.cr2;
}
static unsigned long xen_read_cr2_direct(void)
{
------------- return x86_read_percpu(xen_vcpu_info.arch.cr2);
+++++++++++++ return percpu_read(xen_vcpu_info.arch.cr2);
}
static void xen_write_cr4(unsigned long cr4)
static unsigned long xen_read_cr3(void)
{
------------- return x86_read_percpu(xen_cr3);
+++++++++++++ return percpu_read(xen_cr3);
}
static void set_current_cr3(void *v)
{
------------- x86_write_percpu(xen_current_cr3, (unsigned long)v);
+++++++++++++ percpu_write(xen_current_cr3, (unsigned long)v);
}
static void __xen_write_cr3(bool kernel, unsigned long cr3)
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
if (kernel) {
------------- x86_write_percpu(xen_cr3, cr3);
+++++++++++++ percpu_write(xen_cr3, cr3);
/* Update xen_current_cr3 once the batch has actually
been submitted. */
/* Update while interrupts are disabled, so its atomic with
respect to ipis */
------------- x86_write_percpu(xen_cr3, cr3);
+++++++++++++ percpu_write(xen_cr3, cr3);
__xen_write_cr3(true, cr3);
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
.pte_val = xen_pte_val,
-------- ----- .pte_flags = native_pte_flags,
.pgd_val = xen_pgd_val,
.make_pte = xen_make_pte,
#ifdef CONFIG_X86_64
/* Disable until direct per-cpu data access. */
have_vcpu_info_placement = 0;
------------- x86_64_init_pda();
#endif
xen_smp_init();