Merge branch 'x86/apic' into x86-v28-for-linus-phase4-B
authorIngo Molnar <mingo@elte.hu>
Fri, 10 Oct 2008 17:50:00 +0000 (19:50 +0200)
committerIngo Molnar <mingo@elte.hu>
Sat, 11 Oct 2008 18:17:36 +0000 (20:17 +0200)
Conflicts:
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/setup.c
drivers/pci/intel-iommu.c
include/asm-x86/cpufeature.h
include/asm-x86/dma-mapping.h

19 files changed:
1  2 
Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/cpu/feature_names.c
arch/x86/kernel/genx2apic_uv_x.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/numaq_32.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/vmi_32.c
arch/x86/xen/enlighten.c
include/asm-x86/apic.h
include/asm-x86/cpufeature.h
include/asm-x86/genapic_64.h
include/asm-x86/paravirt.h

@@@ -284,11 -284,6 +284,11 @@@ and is between 256 and 4096 characters
                        isolate - enable device isolation (each device, as far
                                  as possible, will get its own protection
                                  domain)
 +                      fullflush - enable flushing of IO/TLB entries when
 +                                  they are unmapped. Otherwise they are
 +                                  flushed before they will be reused, which
 +                                  is a lot of faster
 +
        amd_iommu_size= [HW,X86-64]
                        Define the size of the aperture for the AMD IOMMU
                        driver. Possible values are:
                        no delay (0).
                        Format: integer
  
 +      bootmem_debug   [KNL] Enable bootmem allocator debug messages.
 +
        bttv.card=      [HW,V4L] bttv (bt848 + bt878 based grabber cards)
        bttv.radio=     Most important insmod options are available as
                        kernel args too.
                        Range: 0 - 8192
                        Default: 64
  
 -      disable_8254_timer
 -      enable_8254_timer
 -                      [IA32/X86_64] Disable/Enable interrupt 0 timer routing
 -                      over the 8254 in addition to over the IO-APIC. The
 -                      kernel tries to set a sensible default.
 -
        hpet=           [X86-32,HPET] option to control HPET usage
                        Format: { enable (default) | disable | force }
                        disable: disable HPET and use PIT instead
  
                        * [no]ncq: Turn on or off NCQ.
  
 +                      * nohrst, nosrst, norst: suppress hard, soft
 +                          and both resets.
 +
                        If there are multiple matching configurations changing
                        the same attribute, the last one is used.
  
  
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
  
+       nox2apic        [X86-64,APIC] Do not enable x2APIC mode.
+       x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
+                       default x2apic cluster mode on platforms
+                       supporting x2apic.
        noltlbs         [PPC] Do not use large page/tlb entries for kernel
                        lowmem mapping on PPC40x.
  
        shapers=        [NET]
                        Maximal number of shapers.
  
 +      show_msr=       [x86] show boot-time MSR settings
 +                      Format: { <integer> }
 +                      Show boot-time (BIOS-initialized) MSR settings.
 +                      The parameter means the number of CPUs to show,
 +                      for example 1 means boot CPU only.
 +
        sim710=         [SCSI,HW]
                        See header of drivers/scsi/sim710.c.
  
diff --combined arch/x86/Kconfig
@@@ -29,7 -29,6 +29,7 @@@ config X8
        select HAVE_FTRACE
        select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
        select HAVE_ARCH_KGDB if !X86_VOYAGER
 +      select HAVE_ARCH_TRACEHOOK
        select HAVE_GENERIC_DMA_COHERENT if X86_32
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
  
@@@ -554,7 -553,6 +554,7 @@@ config CALGARY_IOMMU_ENABLED_BY_DEFAUL
  config AMD_IOMMU
        bool "AMD IOMMU support"
        select SWIOTLB
 +      select PCI_MSI
        depends on X86_64 && PCI && ACPI
        help
          With this option you can enable support for AMD IOMMU hardware in
@@@ -579,29 -577,35 +579,29 @@@ config SWIOTL
  
  config IOMMU_HELPER
        def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
 +
  config MAXSMP
        bool "Configure Maximum number of SMP Processors and NUMA Nodes"
 -      depends on X86_64 && SMP
 +      depends on X86_64 && SMP && BROKEN
        default n
        help
          Configure maximum number of CPUS and NUMA Nodes for this architecture.
          If unsure, say N.
  
 -if MAXSMP
  config NR_CPUS
 -      int
 -      default "4096"
 -endif
 -
 -if !MAXSMP
 -config NR_CPUS
 -      int "Maximum number of CPUs (2-4096)"
 -      range 2 4096
 +      int "Maximum number of CPUs (2-512)" if !MAXSMP
 +      range 2 512
        depends on SMP
 +      default "4096" if MAXSMP
        default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
        default "8"
        help
          This allows you to specify the maximum number of CPUs which this
 -        kernel will support.  The maximum supported value is 4096 and the
 +        kernel will support.  The maximum supported value is 512 and the
          minimum value which makes sense is 2.
  
          This is purely to save memory - each supported CPU adds
          approximately eight kilobytes to the kernel image.
 -endif
  
  config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
@@@ -947,9 -951,9 +947,9 @@@ config NUM
          local memory controller of the CPU and add some more
          NUMA awareness to the kernel.
  
 -        For i386 this is currently highly experimental and should be only
 +        For 32-bit this is currently highly experimental and should be only
          used for kernel development. It might also cause boot failures.
 -        For x86_64 this is recommended on all multiprocessor Opteron systems.
 +        For 64-bit this is recommended on all multiprocessor Opteron systems.
          If the system is EM64T, you should say N unless your system is
          EM64T NUMA.
  
@@@ -992,10 -996,17 +992,10 @@@ config NUMA_EM
          into virtual nodes when booted with "numa=fake=N", where N is the
          number of nodes. This is only useful for debugging.
  
 -if MAXSMP
 -
  config NODES_SHIFT
 -      int
 -      default "9"
 -endif
 -
 -if !MAXSMP
 -config NODES_SHIFT
 -      int "Maximum NUMA Nodes (as a power of 2)"
 +      int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
        range 1 9   if X86_64
 +      default "9" if MAXSMP
        default "6" if X86_64
        default "4" if X86_NUMAQ
        default "3"
        help
          Specify the maximum number of NUMA Nodes available on the target
          system.  Increases memory reserved to accomodate various tables.
 -endif
  
  config HAVE_ARCH_BOOTMEM_NODE
        def_bool y
@@@ -1022,7 -1034,7 +1022,7 @@@ config HAVE_ARCH_ALLOC_REMA
  
  config ARCH_FLATMEM_ENABLE
        def_bool y
 -      depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
 +      depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
  
  config ARCH_DISCONTIGMEM_ENABLE
        def_bool y
@@@ -1038,7 -1050,7 +1038,7 @@@ config ARCH_SPARSEMEM_DEFAUL
  
  config ARCH_SPARSEMEM_ENABLE
        def_bool y
 -      depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
 +      depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
        select SPARSEMEM_STATIC if X86_32
        select SPARSEMEM_VMEMMAP_ENABLE if X86_64
  
@@@ -1119,10 -1131,10 +1119,10 @@@ config MTR
          You can safely say Y even if your machine doesn't have MTRRs, you'll
          just add about 9 KB to your kernel.
  
 -        See <file:Documentation/mtrr.txt> for more information.
 +        See <file:Documentation/x86/mtrr.txt> for more information.
  
  config MTRR_SANITIZER
 -      bool
 +      def_bool y
        prompt "MTRR cleanup support"
        depends on MTRR
        help
          The largest mtrr entry size for a continous block can be set with
          mtrr_chunk_size.
  
 -        If unsure, say N.
 +        If unsure, say Y.
  
  config MTRR_SANITIZER_ENABLE_DEFAULT
        int "MTRR cleanup enable value (0-1)"
@@@ -1193,6 -1205,7 +1193,6 @@@ config IRQBALANC
  config SECCOMP
        def_bool y
        prompt "Enable seccomp to safely compute untrusted bytecode"
 -      depends on PROC_FS
        help
          This kernel feature is useful for number crunching applications
          that may need to compute untrusted bytecode during their
          the process as file descriptors supporting the read/write
          syscalls, it's possible to isolate those applications in
          their own address space using seccomp. Once seccomp is
 -        enabled via /proc/<pid>/seccomp, it cannot be disabled
 +        enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
          and the task is only allowed to execute a few safe syscalls
          defined by each seccomp mode.
  
@@@ -1250,7 -1263,7 +1250,7 @@@ config KEXE
          strongly in flux, so no good recommendation can be made.
  
  config CRASH_DUMP
 -      bool "kernel crash dumps (EXPERIMENTAL)"
 +      bool "kernel crash dumps"
        depends on X86_64 || (X86_32 && HIGHMEM)
        help
          Generate crash dump after being started by kexec.
@@@ -1357,14 -1370,14 +1357,14 @@@ config PHYSICAL_ALIG
          Don't change this unless you know what you are doing.
  
  config HOTPLUG_CPU
 -      bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
 -      depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
 +      bool "Support for hot-pluggable CPUs"
 +      depends on SMP && HOTPLUG && !X86_VOYAGER
        ---help---
 -        Say Y here to experiment with turning CPUs off and on, and to
 -        enable suspend on SMP systems. CPUs can be controlled through
 -        /sys/devices/system/cpu.
 -        Say N if you want to disable CPU hotplug and don't need to
 -        suspend.
 +        Say Y here to allow turning CPUs off and on. CPUs can be
 +        controlled through /sys/devices/system/cpu.
 +        ( Note: power management support will enable this option
 +          automatically on SMP systems. )
 +        Say N if you want to disable CPU hotplug.
  
  config COMPAT_VDSO
        def_bool y
  
          If unsure, say Y.
  
 +config CMDLINE_BOOL
 +      bool "Built-in kernel command line"
 +      default n
 +      help
 +        Allow for specifying boot arguments to the kernel at
 +        build time.  On some systems (e.g. embedded ones), it is
 +        necessary or convenient to provide some or all of the
 +        kernel boot arguments with the kernel itself (that is,
 +        to not rely on the boot loader to provide them.)
 +
 +        To compile command line arguments into the kernel,
 +        set this option to 'Y', then fill in the
 +        the boot arguments in CONFIG_CMDLINE.
 +
 +        Systems with fully functional boot loaders (i.e. non-embedded)
 +        should leave this option set to 'N'.
 +
 +config CMDLINE
 +      string "Built-in kernel command string"
 +      depends on CMDLINE_BOOL
 +      default ""
 +      help
 +        Enter arguments here that should be compiled into the kernel
 +        image and used at boot time.  If the boot loader provides a
 +        command line at boot time, it is appended to this string to
 +        form the full kernel command line, when the system boots.
 +
 +        However, you can use the CONFIG_CMDLINE_OVERRIDE option to
 +        change this behavior.
 +
 +        In most cases, the command line (whether built-in or provided
 +        by the boot loader) should specify the device for the root
 +        file system.
 +
 +config CMDLINE_OVERRIDE
 +      bool "Built-in command line overrides boot loader arguments"
 +      default n
 +      depends on CMDLINE_BOOL
 +      help
 +        Set this option to 'Y' to have the kernel ignore the boot loader
 +        command line, and use ONLY the built-in command line.
 +
 +        This is used to work around broken boot loaders.  This should
 +        be set to 'N' under normal conditions.
 +
  endmenu
  
  config ARCH_ENABLE_MEMORY_HOTPLUG
@@@ -1689,6 -1657,14 +1689,14 @@@ config DMAR_FLOPPY_W
         workaround will setup a 1:1 mapping for the first
         16M to make floppy (an ISA device) work.
  
+ config INTR_REMAP
+       bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+       depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+       help
+        Supports Interrupt remapping for IO-APIC and MSI devices.
+        To use x2apic mode in the CPU's which support x2APIC enhancements or
+        to support platforms with CPU's having > 8 bit APIC ID, say Y.
  source "drivers/pci/pcie/Kconfig"
  
  source "drivers/pci/Kconfig"
@@@ -1819,7 -1795,7 +1827,7 @@@ config COMPAT_FOR_U64_ALIGNMEN
  
  config SYSVIPC_COMPAT
        def_bool y
 -      depends on X86_64 && COMPAT && SYSVIPC
 +      depends on COMPAT && SYSVIPC
  
  endmenu
  
@@@ -58,6 -58,7 +58,6 @@@ EXPORT_SYMBOL(acpi_disabled)
  #ifdef        CONFIG_X86_64
  
  #include <asm/proto.h>
 -#include <asm/genapic.h>
  
  #else                         /* X86 */
  
@@@ -157,16 -158,6 +157,16 @@@ char *__init __acpi_map_table(unsigned 
  struct acpi_mcfg_allocation *pci_mmcfg_config;
  int pci_mmcfg_config_num;
  
 +static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
 +
 +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 +{
 +      if (!strcmp(mcfg->header.oem_id, "SGI"))
 +              acpi_mcfg_64bit_base_addr = TRUE;
 +
 +      return 0;
 +}
 +
  int __init acpi_parse_mcfg(struct acpi_table_header *header)
  {
        struct acpi_table_mcfg *mcfg;
        }
  
        memcpy(pci_mmcfg_config, &mcfg[1], config_size);
 +
 +      acpi_mcfg_oem_check(mcfg);
 +
        for (i = 0; i < pci_mmcfg_config_num; ++i) {
 -              if (pci_mmcfg_config[i].address > 0xFFFFFFFF) {
 +              if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
 +                  !acpi_mcfg_64bit_base_addr) {
                        printk(KERN_ERR PREFIX
                               "MMCONFIG not in low 4GB of memory\n");
                        kfree(pci_mmcfg_config);
@@@ -252,10 -239,8 +252,8 @@@ static void __cpuinit acpi_register_lap
                return;
        }
  
- #ifdef CONFIG_X86_32
        if (boot_cpu_physical_apicid != -1U)
                ver = apic_version[boot_cpu_physical_apicid];
- #endif
  
        generic_processor_info(id, ver);
  }
@@@ -774,11 -759,9 +772,9 @@@ static void __init acpi_register_lapic_
  
        set_fixmap_nocache(FIX_APIC_BASE, address);
        if (boot_cpu_physical_apicid == -1U) {
-               boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
- #ifdef CONFIG_X86_32
+               boot_cpu_physical_apicid  = read_apic_id();
                apic_version[boot_cpu_physical_apicid] =
                         GET_APIC_VERSION(apic_read(APIC_LVR));
- #endif
        }
  }
  
@@@ -1350,7 -1333,9 +1346,9 @@@ static void __init acpi_process_madt(vo
                                acpi_ioapic = 1;
  
                                smp_found_config = 1;
+ #ifdef CONFIG_X86_32
                                setup_apic_routing();
+ #endif
                        }
                }
                if (error == -EINVAL) {
@@@ -1602,14 -1587,6 +1600,14 @@@ static struct dmi_system_id __initdata 
         * is not connected at all.  Force ignoring BIOS IRQ0 pin2
         * override in that cases.
         */
 +      {
 +       .callback = dmi_ignore_irq0_timer_override,
 +       .ident = "HP nx6115 laptop",
 +       .matches = {
 +                   DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
 +                   DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
 +                   },
 +       },
        {
         .callback = dmi_ignore_irq0_timer_override,
         .ident = "HP NX6125 laptop",
                     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
                     },
         },
 +      {
 +       .callback = dmi_ignore_irq0_timer_override,
 +       .ident = "HP 6715b laptop",
 +       .matches = {
 +                   DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
 +                   DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
 +                   },
 +       },
        {}
  };
  
@@@ -60,10 -60,8 +60,8 @@@ unsigned long mp_lapic_addr
  static int force_enable_local_apic;
  int disable_apic;
  
- /* Local APIC timer verification ok */
- static int local_apic_timer_verify_ok;
  /* Disable local APIC timer from the kernel commandline or via dmi quirk */
- static int local_apic_timer_disabled;
+ static int disable_apic_timer __cpuinitdata;
  /* Local APIC timer works in C2 */
  int local_apic_timer_c2_ok;
  EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@@ -116,6 -114,8 +114,6 @@@ static DEFINE_PER_CPU(struct clock_even
  static int enabled_via_apicbase;
  
  static unsigned long apic_phys;
 -unsigned int __cpuinitdata maxcpus = NR_CPUS;
 -
  
  /*
   * Get the LAPIC version
@@@ -130,7 -130,11 +128,11 @@@ static inline int lapic_get_version(voi
   */
  static inline int lapic_is_integrated(void)
  {
+ #ifdef CONFIG_X86_64
+       return 1;
+ #else
        return APIC_INTEGRATED(lapic_get_version());
+ #endif
  }
  
  /*
@@@ -145,13 -149,18 +147,18 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
- void apic_wait_icr_idle(void)
+ /*
+  * Paravirt kernels also might be using these below ops. So we still
+  * use generic apic_read()/apic_write(), which might be pointing to different
+  * ops in PARAVIRT case.
+  */
+ void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
- u32 safe_apic_wait_icr_idle(void)
+ u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
+ void xapic_icr_write(u32 low, u32 id)
+ {
+       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+       apic_write(APIC_ICR, low);
+ }
+ u64 xapic_icr_read(void)
+ {
+       u32 icr1, icr2;
+       icr2 = apic_read(APIC_ICR2);
+       icr1 = apic_read(APIC_ICR);
+       return icr1 | ((u64)icr2 << 32);
+ }
+ static struct apic_ops xapic_ops = {
+       .read = native_apic_mem_read,
+       .write = native_apic_mem_write,
+       .icr_read = xapic_icr_read,
+       .icr_write = xapic_icr_write,
+       .wait_icr_idle = xapic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+ };
+ struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+ EXPORT_SYMBOL_GPL(apic_ops);
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
  void __cpuinit enable_NMI_through_LVT0(void)
  {
-       unsigned int v = APIC_DM_NMI;
+       unsigned int v;
  
-       /* Level triggered for 82489DX */
+       /* unmask and set to NMI */
+       v = APIC_DM_NMI;
+       /* Level triggered for 82489DX (32bit mode) */
        if (!lapic_is_integrated())
                v |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT0, v);
  }
  
@@@ -193,9 -234,13 +232,13 @@@ int get_physical_broadcast(void
   */
  int lapic_get_maxlvt(void)
  {
-       unsigned int v = apic_read(APIC_LVR);
+       unsigned int v;
  
-       /* 82489DXs do not report # of LVT entries. */
+       v = apic_read(APIC_LVR);
+       /*
+        * - we always have APIC integrated on 64bit mode
+        * - 82489DXs do not report # of LVT entries
+        */
        return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
  }
  
   * Local APIC timer
   */
  
- /* Clock divisor is set to 16 */
+ /* Clock divisor */
+ #ifdef CONFG_X86_64
+ #define APIC_DIVISOR 1
+ #else
  #define APIC_DIVISOR 16
+ #endif
  
  /*
   * This function sets up the local APIC timer, with a timeout of
   * this function twice on the boot CPU, once with a bogus timeout
   * value, second time for real. The other (noncalibrating) CPUs
   * call this function only once, with the real, calibrated value.
+  *
+  * We do reads before writes even if unnecessary, to get around the
+  * P5 APIC double write bug.
   */
  static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
  {
         */
        tmp_value = apic_read(APIC_TDCR);
        apic_write(APIC_TDCR,
-                  (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-                  APIC_TDR_DIV_16);
+               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+               APIC_TDR_DIV_16);
  
        if (!oneshot)
                apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
  }
  
+ /*
+  * Setup extended LVT, AMD specific (K8, family 10h)
+  *
+  * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+  * MCE interrupts are supported. Thus MCE offset must be set to 0.
+  */
+ #define APIC_EILVT_LVTOFF_MCE 0
+ #define APIC_EILVT_LVTOFF_IBS 1
+ static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+ {
+       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+       apic_write(reg, v);
+ }
+ u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+ {
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_MCE;
+ }
+ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+ {
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_IBS;
+ }
  /*
   * Program the next event, relative to now
   */
@@@ -259,8 -341,8 +339,8 @@@ static void lapic_timer_setup(enum cloc
        unsigned long flags;
        unsigned int v;
  
-       /* Lapic used for broadcast ? */
-       if (!local_apic_timer_verify_ok)
+       /* Lapic used as dummy for broadcast ? */
+       if (evt->features & CLOCK_EVT_FEAT_DUMMY)
                return;
  
        local_irq_save(flags);
@@@ -473,7 -555,7 +553,7 @@@ static int __init calibrate_APIC_clock(
                return -1;
        }
  
-       local_apic_timer_verify_ok = 1;
+       levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
  
        /* We trust the pm timer based calibration */
        if (!pm_referenced) {
                if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
                        apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
                else
-                       local_apic_timer_verify_ok = 0;
+                       levt->features |= CLOCK_EVT_FEAT_DUMMY;
        } else
                local_irq_enable();
  
-       if (!local_apic_timer_verify_ok) {
+       if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
                printk(KERN_WARNING
                       "APIC timer disabled due to verification failure.\n");
                        return -1;
@@@ -533,7 -615,8 +613,8 @@@ void __init setup_boot_APIC_clock(void
         * timer as a dummy clock event source on SMP systems, so the
         * broadcast mechanism is used. On UP systems simply ignore it.
         */
-       if (local_apic_timer_disabled) {
+       if (disable_apic_timer) {
+               printk(KERN_INFO "Disabling APIC timer\n");
                /* No broadcast on UP ! */
                if (num_possible_cpus() > 1) {
                        lapic_clockevent.mult = 1;
@@@ -602,7 -685,11 +683,11 @@@ static void local_apic_timer_interrupt(
        /*
         * the NMI deadlock-detector uses this.
         */
+ #ifdef CONFIG_X86_64
+       add_pda(apic_timer_irqs, 1);
+ #else
        per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ #endif
  
        evt->event_handler(evt);
  }
@@@ -641,35 -728,6 +726,6 @@@ int setup_profiling_timer(unsigned int 
        return -EINVAL;
  }
  
- /*
-  * Setup extended LVT, AMD specific (K8, family 10h)
-  *
-  * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
-  * MCE interrupts are supported. Thus MCE offset must be set to 0.
-  */
- #define APIC_EILVT_LVTOFF_MCE 0
- #define APIC_EILVT_LVTOFF_IBS 1
- static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
- {
-       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-       apic_write(reg, v);
- }
- u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
- {
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_MCE;
- }
- u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
- {
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_IBS;
- }
  /*
   * Local APIC start and shutdown
   */
@@@ -715,7 -773,7 +771,7 @@@ void clear_local_APIC(void
        }
  
        /* lets not touch this if we didn't frob it */
- #ifdef CONFIG_X86_MCE_P4THERMAL
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
        if (maxlvt >= 5) {
                v = apic_read(APIC_LVTTHMR);
                apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
        if (maxlvt >= 4)
                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
  
- #ifdef CONFIG_X86_MCE_P4THERMAL
-       if (maxlvt >= 5)
-               apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
- #endif
        /* Integrated APIC (!82489DX) ? */
        if (lapic_is_integrated()) {
                if (maxlvt > 3)
   */
  void disable_local_APIC(void)
  {
-       unsigned long value;
+       unsigned int value;
  
        clear_local_APIC();
  
        value &= ~APIC_SPIV_APIC_ENABLED;
        apic_write(APIC_SPIV, value);
  
+ #ifdef CONFIG_X86_32
        /*
         * When LAPIC was disabled by the BIOS and enabled by the kernel,
         * restore the disabled state.
                l &= ~MSR_IA32_APICBASE_ENABLE;
                wrmsr(MSR_IA32_APICBASE, l, h);
        }
+ #endif
  }
  
  /*
@@@ -789,11 -845,15 +843,15 @@@ void lapic_shutdown(void
                return;
  
        local_irq_save(flags);
-       clear_local_APIC();
  
-       if (enabled_via_apicbase)
+ #ifdef CONFIG_X86_32
+       if (!enabled_via_apicbase)
+               clear_local_APIC();
+       else
+ #endif
                disable_local_APIC();
  
        local_irq_restore(flags);
  }
  
@@@ -838,6 -898,12 +896,12 @@@ int __init verify_local_APIC(void
         */
        reg0 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+       apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+       reg1 = apic_read(APIC_ID);
+       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+       apic_write(APIC_ID, reg0);
+       if (reg1 != (reg0 ^ APIC_ID_MASK))
+               return 0;
  
        /*
         * The next two are just to see if we have sane values.
@@@ -863,14 -929,15 +927,15 @@@ void __init sync_Arb_IDs(void
         */
        if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
                return;
        /*
         * Wait for idle.
         */
        apic_wait_icr_idle();
  
        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write(APIC_ICR,
-                  APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
+       apic_write(APIC_ICR, APIC_DEST_ALLINC |
+                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
  }
  
  /*
   */
  void __init init_bsp_APIC(void)
  {
-       unsigned long value;
+       unsigned int value;
  
        /*
         * Don't do the setup now if we have a SMP BIOS as the
        value &= ~APIC_VECTOR_MASK;
        value |= APIC_SPIV_APIC_ENABLED;
  
+ #ifdef CONFIG_X86_32
        /* This bit is reserved on P4/Xeon and should be cleared */
        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
            (boot_cpu_data.x86 == 15))
                value &= ~APIC_SPIV_FOCUS_DISABLED;
        else
+ #endif
                value |= APIC_SPIV_FOCUS_DISABLED;
        value |= SPURIOUS_APIC_VECTOR;
        apic_write(APIC_SPIV, value);
@@@ -922,6 -991,16 +989,16 @@@ static void __cpuinit lapic_setup_esr(v
  {
        unsigned long oldvalue, value, maxlvt;
        if (lapic_is_integrated() && !esr_disable) {
+               if (esr_disable) {
+                       /*
+                        * Something untraceable is creating bad interrupts on
+                        * secondary quads ... for the moment, just leave the
+                        * ESR disabled - we can't do anything useful with the
+                        * errors anyway - mbligh
+                        */
+                       printk(KERN_INFO "Leaving ESR disabled.\n");
+                       return;
+               }
                /* !82489DX */
                maxlvt = lapic_get_maxlvt();
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
                                "vector: 0x%08lx  after: 0x%08lx\n",
                                oldvalue, value);
        } else {
-               if (esr_disable)
-                       /*
-                        * Something untraceable is creating bad interrupts on
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk(KERN_INFO "Leaving ESR disabled.\n");
-               else
-                       printk(KERN_INFO "No ESR for 82489DX.\n");
+               printk(KERN_INFO "No ESR for 82489DX.\n");
        }
  }
  
@@@ -1089,13 -1159,17 +1157,17 @@@ void __cpuinit setup_local_APIC(void
  
  void __cpuinit end_local_APIC_setup(void)
  {
-       unsigned long value;
        lapic_setup_esr();
-       /* Disable the local apic timer */
-       value = apic_read(APIC_LVTT);
-       value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-       apic_write(APIC_LVTT, value);
+ #ifdef CONFIG_X86_32
+       {
+               unsigned int value;
+               /* Disable the local apic timer */
+               value = apic_read(APIC_LVTT);
+               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write(APIC_LVTT, value);
+       }
+ #endif
  
        setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
@@@ -1205,7 -1279,7 +1277,7 @@@ void __init init_apic_mappings(void
         * default configuration (or the MP table is broken).
         */
        if (boot_cpu_physical_apicid == -1U)
-               boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+               boot_cpu_physical_apicid = read_apic_id();
  
  }
  
@@@ -1242,7 -1316,7 +1314,7 @@@ int __init APIC_init_uniprocessor(void
         * might be zero if read from MP tables. Get it from LAPIC.
         */
  #ifdef CONFIG_CRASH_DUMP
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
  #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
  
@@@ -1321,59 -1395,12 +1393,12 @@@ void smp_error_interrupt(struct pt_reg
        irq_exit();
  }
  
- #ifdef CONFIG_SMP
- void __init smp_intr_init(void)
- {
-       /*
-        * IRQ0 must be given a fixed assignment and initialized,
-        * because it's used before the IO-APIC is set up.
-        */
-       set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-       /*
-        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-        * IPI, driven by wakeup.
-        */
-       alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-       /* IPI for invalidation */
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-       /* IPI for generic function call */
-       alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-       /* IPI for single call function */
-       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-                               call_function_single_interrupt);
- }
- #endif
- /*
-  * Initialize APIC interrupts
-  */
- void __init apic_intr_init(void)
- {
- #ifdef CONFIG_SMP
-       smp_intr_init();
- #endif
-       /* self generated IPI for local APIC timer */
-       alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-       /* IPI vectors for APIC spurious and error interrupts */
-       alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-       alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-       /* thermal monitor LVT interrupt */
- #ifdef CONFIG_X86_MCE_P4THERMAL
-       alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
- #endif
- }
  /**
   * connect_bsp_APIC - attach the APIC to the interrupt system
   */
  void __init connect_bsp_APIC(void)
  {
+ #ifdef CONFIG_X86_32
        if (pic_mode) {
                /*
                 * Do not trust the local APIC being empty at bootup.
                outb(0x70, 0x22);
                outb(0x01, 0x23);
        }
+ #endif
        enable_apic_mode();
  }
  
   */
  void disconnect_bsp_APIC(int virt_wire_setup)
  {
+       unsigned int value;
+ #ifdef CONFIG_X86_32
        if (pic_mode) {
                /*
                 * Put the board back into PIC mode (has an effect only on
                                "entering PIC mode.\n");
                outb(0x70, 0x22);
                outb(0x00, 0x23);
-       } else {
-               /* Go back to Virtual Wire compatibility mode */
-               unsigned long value;
+               return;
+       }
+ #endif
  
-               /* For the spurious interrupt use vector F, and enable it */
-               value = apic_read(APIC_SPIV);
-               value &= ~APIC_VECTOR_MASK;
-               value |= APIC_SPIV_APIC_ENABLED;
-               value |= 0xf;
-               apic_write(APIC_SPIV, value);
+       /* Go back to Virtual Wire compatibility mode */
  
-               if (!virt_wire_setup) {
-                       /*
-                        * For LVT0 make it edge triggered, active high,
-                        * external and enabled
-                        */
-                       value = apic_read(APIC_LVT0);
-                       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                               APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                               APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-                       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-                       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-                       apic_write(APIC_LVT0, value);
-               } else {
-                       /* Disable LVT0 */
-                       apic_write(APIC_LVT0, APIC_LVT_MASKED);
-               }
+       /* For the spurious interrupt use vector F, and enable it */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       value |= APIC_SPIV_APIC_ENABLED;
+       value |= 0xf;
+       apic_write(APIC_SPIV, value);
  
+       if (!virt_wire_setup) {
                /*
-                * For LVT1 make it edge triggered, active high, nmi and
-                * enabled
+                * For LVT0 make it edge triggered, active high,
+                * external and enabled
                 */
-               value = apic_read(APIC_LVT1);
-               value &= ~(
-                       APIC_MODE_MASK | APIC_SEND_PENDING |
+               value = apic_read(APIC_LVT0);
+               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-               apic_write(APIC_LVT1, value);
+               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+               apic_write(APIC_LVT0, value);
+       } else {
+               /* Disable LVT0 */
+               apic_write(APIC_LVT0, APIC_LVT_MASKED);
        }
+       /*
+        * For LVT1 make it edge triggered, active high,
+        * nmi and enabled
+        */
+       value = apic_read(APIC_LVT1);
+       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+       apic_write(APIC_LVT1, value);
  }
  
  void __cpuinit generic_processor_info(int apicid, int version)
  {
        int cpu;
        cpumask_t tmp_map;
-       physid_mask_t phys_cpu;
  
        /*
         * Validate version
        }
        apic_version[apicid] = version;
  
-       phys_cpu = apicid_to_cpu_present(apicid);
-       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
        if (num_processors >= NR_CPUS) {
                printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
                        "  Processor ignored.\n", NR_CPUS);
                return;
        }
  
 -      if (num_processors >= maxcpus) {
 -              printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
 -                      " Processor ignored.\n", maxcpus);
 -              return;
 -      }
 -
        num_processors++;
        cpus_complement(tmp_map, cpu_present_map);
        cpu = first_cpu(tmp_map);
  
-       if (apicid == boot_cpu_physical_apicid)
+       physid_set(apicid, phys_cpu_present_map);
+       if (apicid == boot_cpu_physical_apicid) {
                /*
                 * x86_bios_cpu_apicid is required to have processors listed
                 * in same order as logical cpu numbers. Hence the first
                 * entry is BSP, and so on.
                 */
                cpu = 0;
+       }
        if (apicid > max_physical_apicid)
                max_physical_apicid = apicid;
  
+ #ifdef CONFIG_X86_32
        /*
         * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
         * but we need to work other dependencies like SMP_SUSPEND etc
                        def_to_bigsmp = 1;
                }
        }
- #ifdef CONFIG_SMP
+ #endif
+ #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
        /* are we being called early in kernel startup? */
        if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
                u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
                per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
        }
  #endif
        cpu_set(cpu, cpu_possible_map);
        cpu_set(cpu, cpu_present_map);
  }
  #ifdef CONFIG_PM
  
  static struct {
+       /*
+        * 'active' is true if the local APIC was enabled by us and
+        * not the BIOS; this signifies that we are also responsible
+        * for disabling it before entering apm/acpi suspend
+        */
        int active;
        /* r/w apic fields */
        unsigned int apic_id;
@@@ -1577,7 -1620,7 +1612,7 @@@ static int lapic_suspend(struct sys_dev
        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
- #ifdef CONFIG_X86_MCE_P4THERMAL
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
        if (maxlvt >= 5)
                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
  #endif
@@@ -1601,16 -1644,23 +1636,23 @@@ static int lapic_resume(struct sys_devi
  
        local_irq_save(flags);
  
-       /*
-        * Make sure the APICBASE points to the right address
-        *
-        * FIXME! This will be wrong if we ever support suspend on
-        * SMP! We'll need to do this as part of the CPU restore!
-        */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       l &= ~MSR_IA32_APICBASE_BASE;
-       l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-       wrmsr(MSR_IA32_APICBASE, l, h);
+ #ifdef CONFIG_X86_64
+       if (x2apic)
+               enable_x2apic();
+       else
+ #endif
+       {
+               /*
+                * Make sure the APICBASE points to the right address
+                *
+                * FIXME! This will be wrong if we ever support suspend on
+                * SMP! We'll need to do this as part of the CPU restore!
+                */
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       }
  
        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
        apic_write(APIC_ID, apic_pm_state.apic_id);
        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
- #ifdef CONFIG_X86_MCE_P4THERMAL
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
        if (maxlvt >= 5)
                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
  #endif
        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
        apic_write(APIC_ESR, 0);
        apic_read(APIC_ESR);
        local_irq_restore(flags);
        return 0;
  }
  
@@@ -1690,20 -1742,20 +1734,20 @@@ static int __init parse_lapic(char *arg
  }
  early_param("lapic", parse_lapic);
  
- static int __init parse_nolapic(char *arg)
+ static int __init setup_disableapic(char *arg)
  {
        disable_apic = 1;
        setup_clear_cpu_cap(X86_FEATURE_APIC);
        return 0;
  }
- early_param("nolapic", parse_nolapic);
+ early_param("disableapic", setup_disableapic);
  
- static int __init parse_disable_lapic_timer(char *arg)
+ /* same as disableapic, for compatibility */
+ static int __init setup_nolapic(char *arg)
  {
-       local_apic_timer_disabled = 1;
-       return 0;
+       return setup_disableapic(arg);
  }
- early_param("nolapic_timer", parse_disable_lapic_timer);
+ early_param("nolapic", setup_nolapic);
  
  static int __init parse_lapic_timer_c2_ok(char *arg)
  {
  }
  early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
  
+ static int __init parse_disable_apic_timer(char *arg)
+ {
+       disable_apic_timer = 1;
+       return 0;
+ }
+ early_param("noapictimer", parse_disable_apic_timer);
+ static int __init parse_nolapic_timer(char *arg)
+ {
+       disable_apic_timer = 1;
+       return 0;
+ }
+ early_param("nolapic_timer", parse_nolapic_timer);
  static int __init apic_set_verbosity(char *arg)
  {
-       if (!arg)
+       if (!arg)  {
+ #ifdef CONFIG_X86_64
+               skip_ioapic_setup = 0;
+               ioapic_force = 1;
+               return 0;
+ #endif
                return -EINVAL;
+       }
  
-       if (strcmp(arg, "debug") == 0)
+       if (strcmp("debug", arg) == 0)
                apic_verbosity = APIC_DEBUG;
-       else if (strcmp(arg, "verbose") == 0)
+       else if (strcmp("verbose", arg) == 0)
                apic_verbosity = APIC_VERBOSE;
+       else {
+               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+                       " use apic=verbose or apic=debug\n", arg);
+               return -EINVAL;
+       }
  
        return 0;
  }
@@@ -27,6 -27,7 +27,7 @@@
  #include <linux/clockchips.h>
  #include <linux/acpi_pmtmr.h>
  #include <linux/module.h>
+ #include <linux/dmar.h>
  
  #include <asm/atomic.h>
  #include <asm/smp.h>
  #include <asm/proto.h>
  #include <asm/timex.h>
  #include <asm/apic.h>
+ #include <asm/i8259.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
  
+ /* Disable local APIC timer from the kernel commandline or via dmi quirk */
  static int disable_apic_timer __cpuinitdata;
  static int apic_calibrate_pmtmr __initdata;
  int disable_apic;
+ int disable_x2apic;
+ int x2apic;
+ /* x2apic enabled before OS handover */
+ int x2apic_preenabled;
  
  /* Local APIC timer works in C2 */
  int local_apic_timer_c2_ok;
@@@ -73,6 -81,9 +81,9 @@@ static void lapic_timer_setup(enum cloc
  static void lapic_timer_broadcast(cpumask_t mask);
  static void apic_pm_activate(void);
  
+ /*
+  * The local apic timer can be used for any function which is CPU local.
+  */
  static struct clock_event_device lapic_clockevent = {
        .name           = "lapic",
        .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
@@@ -87,6 -98,7 +98,6 @@@
  static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
  
  static unsigned long apic_phys;
 -unsigned int __cpuinitdata maxcpus = NR_CPUS;
  
  unsigned long mp_lapic_addr;
  
@@@ -99,11 -111,15 +110,15 @@@ static inline int lapic_get_version(voi
  }
  
  /*
-  * Check, if the APIC is integrated or a seperate chip
+  * Check, if the APIC is integrated or a separate chip
   */
  static inline int lapic_is_integrated(void)
  {
+ #ifdef CONFIG_X86_64
        return 1;
+ #else
+       return APIC_INTEGRATED(lapic_get_version());
+ #endif
  }
  
  /*
@@@ -118,13 -134,18 +133,18 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
- void apic_wait_icr_idle(void)
+ /*
+  * Paravirt kernels also might be using these below ops. So we still
+  * use generic apic_read()/apic_write(), which might be pointing to different
+  * ops in PARAVIRT case.
+  */
+ void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
- u32 safe_apic_wait_icr_idle(void)
+ u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
+ void xapic_icr_write(u32 low, u32 id)
+ {
+       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+       apic_write(APIC_ICR, low);
+ }
+ u64 xapic_icr_read(void)
+ {
+       u32 icr1, icr2;
+       icr2 = apic_read(APIC_ICR2);
+       icr1 = apic_read(APIC_ICR);
+       return icr1 | ((u64)icr2 << 32);
+ }
+ static struct apic_ops xapic_ops = {
+       .read = native_apic_mem_read,
+       .write = native_apic_mem_write,
+       .icr_read = xapic_icr_read,
+       .icr_write = xapic_icr_write,
+       .wait_icr_idle = xapic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+ };
+ struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+ EXPORT_SYMBOL_GPL(apic_ops);
+ static void x2apic_wait_icr_idle(void)
+ {
+       /* no need to wait for icr idle in x2apic */
+       return;
+ }
+ static u32 safe_x2apic_wait_icr_idle(void)
+ {
+       /* no need to wait for icr idle in x2apic */
+       return 0;
+ }
+ void x2apic_icr_write(u32 low, u32 id)
+ {
+       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+ }
+ u64 x2apic_icr_read(void)
+ {
+       unsigned long val;
+       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+       return val;
+ }
+ static struct apic_ops x2apic_ops = {
+       .read = native_apic_msr_read,
+       .write = native_apic_msr_write,
+       .icr_read = x2apic_icr_read,
+       .icr_write = x2apic_icr_write,
+       .wait_icr_idle = x2apic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+ };
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
@@@ -149,6 -232,11 +231,11 @@@ void __cpuinit enable_NMI_through_LVT0(
  
        /* unmask and set to NMI */
        v = APIC_DM_NMI;
+       /* Level triggered for 82489DX (32bit mode) */
+       if (!lapic_is_integrated())
+               v |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT0, v);
  }
  
   */
  int lapic_get_maxlvt(void)
  {
-       unsigned int v, maxlvt;
+       unsigned int v;
  
        v = apic_read(APIC_LVR);
-       maxlvt = GET_APIC_MAXLVT(v);
-       return maxlvt;
+       /*
+        * - we always have APIC integrated on 64bit mode
+        * - 82489DXs do not report # of LVT entries
+        */
+       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
  }
  
+ /*
+  * Local APIC timer
+  */
+ /* Clock divisor */
+ #ifdef CONFG_X86_64
+ #define APIC_DIVISOR 1
+ #else
+ #define APIC_DIVISOR 16
+ #endif
  /*
   * This function sets up the local APIC timer, with a timeout of
   * 'clocks' APIC bus clock. During calibration we actually call
   * We do reads before writes even if unnecessary, to get around the
   * P5 APIC double write bug.
   */
  static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
  {
        unsigned int lvtt_value, tmp_value;
        lvtt_value = LOCAL_TIMER_VECTOR;
        if (!oneshot)
                lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+       if (!lapic_is_integrated())
+               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
        if (!irqen)
                lvtt_value |= APIC_LVT_MASKED;
  
         * Divide PICLK by 16
         */
        tmp_value = apic_read(APIC_TDCR);
-       apic_write(APIC_TDCR, (tmp_value
-                               & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
-                               | APIC_TDR_DIV_16);
+       apic_write(APIC_TDCR,
+               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+               APIC_TDR_DIV_16);
  
        if (!oneshot)
-               apic_write(APIC_TMICT, clocks);
+               apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
  }
  
  /*
@@@ -366,7 -470,7 +469,7 @@@ static int __init calibrate_APIC_clock(
        lapic_clockevent.min_delta_ns =
                clockevent_delta2ns(0xF, &lapic_clockevent);
  
-       calibration_result = result / HZ;
+       calibration_result = (result * APIC_DIVISOR) / HZ;
  
        /*
         * Do a sanity check on the APIC calibration result
  void __init setup_boot_APIC_clock(void)
  {
        /*
-        * The local apic timer can be disabled via the kernel commandline.
-        * Register the lapic timer as a dummy clock event source on SMP
-        * systems, so the broadcast mechanism is used. On UP systems simply
-        * ignore it.
+        * The local apic timer can be disabled via the kernel
+        * commandline or from the CPU detection code. Register the lapic
+        * timer as a dummy clock event source on SMP systems, so the
+        * broadcast mechanism is used. On UP systems simply ignore it.
         */
        if (disable_apic_timer) {
                printk(KERN_INFO "Disabling APIC timer\n");
                return;
        }
  
-       printk(KERN_INFO "Using local APIC timer interrupts.\n");
+       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+                   "calibrating APIC timer ...\n");
        if (calibrate_APIC_clock()) {
                /* No broadcast on UP ! */
                if (num_possible_cpus() > 1)
                printk(KERN_WARNING "APIC timer registered as dummy,"
                        " due to nmi_watchdog=%d!\n", nmi_watchdog);
  
+       /* Setup the lapic or request the broadcast */
        setup_APIC_timer();
  }
  
@@@ -460,7 -567,11 +566,11 @@@ static void local_apic_timer_interrupt(
        /*
         * the NMI deadlock-detector uses this.
         */
+ #ifdef CONFIG_X86_64
        add_pda(apic_timer_irqs, 1);
+ #else
+       per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ #endif
  
        evt->event_handler(evt);
  }
@@@ -491,6 -602,7 +601,7 @@@ void smp_apic_timer_interrupt(struct pt
        irq_enter();
        local_apic_timer_interrupt();
        irq_exit();
        set_irq_regs(old_regs);
  }
  
@@@ -544,6 -656,13 +655,13 @@@ void clear_local_APIC(void
                apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
        }
  
+       /* lets not touch this if we didn't frob it */
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
+       if (maxlvt >= 5) {
+               v = apic_read(APIC_LVTTHMR);
+               apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+       }
+ #endif
        /*
         * Clean APIC state for other OSs:
         */
                apic_write(APIC_LVTERR, APIC_LVT_MASKED);
        if (maxlvt >= 4)
                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
+       /* Integrated APIC (!82489DX) ? */
+       if (lapic_is_integrated()) {
+               if (maxlvt > 3)
+                       /* Clear ESR due to Pentium errata 3AP and 11AP */
+                       apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
  }
  
  /**
@@@ -574,8 -699,28 +698,28 @@@ void disable_local_APIC(void
        value = apic_read(APIC_SPIV);
        value &= ~APIC_SPIV_APIC_ENABLED;
        apic_write(APIC_SPIV, value);
+ #ifdef CONFIG_X86_32
+       /*
+        * When LAPIC was disabled by the BIOS and enabled by the kernel,
+        * restore the disabled state.
+        */
+       if (enabled_via_apicbase) {
+               unsigned int l, h;
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_ENABLE;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       }
+ #endif
  }
  
+ /*
+  * If Linux enabled the LAPIC against the BIOS default disable it down before
+  * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
+  * not power-off.  Additionally clear all LVT entries before disable_local_APIC
+  * for the case where Linux didn't enable the LAPIC.
+  */
  void lapic_shutdown(void)
  {
        unsigned long flags;
  
        local_irq_save(flags);
  
-       disable_local_APIC();
+ #ifdef CONFIG_X86_32
+       if (!enabled_via_apicbase)
+               clear_local_APIC();
+       else
+ #endif
+               disable_local_APIC();
  
        local_irq_restore(flags);
  }
@@@ -629,10 -780,10 +779,10 @@@ int __init verify_local_APIC(void
        /*
         * The ID register is read/write in a real APIC.
         */
-       reg0 = read_apic_id();
+       reg0 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
        apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-       reg1 = read_apic_id();
+       reg1 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
        apic_write(APIC_ID, reg0);
        if (reg1 != (reg0 ^ APIC_ID_MASK))
   */
  void __init sync_Arb_IDs(void)
  {
-       /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
-       if (modern_apic())
+       /*
+        * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+        * needed on AMD.
+        */
+       if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
                return;
  
        /*
        apic_wait_icr_idle();
  
        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
-                               | APIC_DM_INIT);
+       apic_write(APIC_ICR, APIC_DEST_ALLINC |
+                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
  }
  
  /*
@@@ -684,8 -838,6 +837,6 @@@ void __init init_bsp_APIC(void
        if (smp_found_config || !cpu_has_apic)
                return;
  
-       value = apic_read(APIC_LVR);
        /*
         * Do not trust the local APIC being empty at bootup.
         */
        value = apic_read(APIC_SPIV);
        value &= ~APIC_VECTOR_MASK;
        value |= APIC_SPIV_APIC_ENABLED;
-       value |= APIC_SPIV_FOCUS_DISABLED;
+ #ifdef CONFIG_X86_32
+       /* This bit is reserved on P4/Xeon and should be cleared */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+           (boot_cpu_data.x86 == 15))
+               value &= ~APIC_SPIV_FOCUS_DISABLED;
+       else
+ #endif
+               value |= APIC_SPIV_FOCUS_DISABLED;
        value |= SPURIOUS_APIC_VECTOR;
        apic_write(APIC_SPIV, value);
  
         */
        apic_write(APIC_LVT0, APIC_DM_EXTINT);
        value = APIC_DM_NMI;
+       if (!lapic_is_integrated())             /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT1, value);
  }
  
+ static void __cpuinit lapic_setup_esr(void)
+ {
+       unsigned long oldvalue, value, maxlvt;
+       if (lapic_is_integrated() && !esr_disable) {
+               if (esr_disable) {
+                       /*
+                        * Something untraceable is creating bad interrupts on
+                        * secondary quads ... for the moment, just leave the
+                        * ESR disabled - we can't do anything useful with the
+                        * errors anyway - mbligh
+                        */
+                       printk(KERN_INFO "Leaving ESR disabled.\n");
+                       return;
+               }
+               /* !82489DX */
+               maxlvt = lapic_get_maxlvt();
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               oldvalue = apic_read(APIC_ESR);
+               /* enables sending errors */
+               value = ERROR_APIC_VECTOR;
+               apic_write(APIC_LVTERR, value);
+               /*
+                * spec says clear errors after enabling vector.
+                */
+               if (maxlvt > 3)
+                       apic_write(APIC_ESR, 0);
+               value = apic_read(APIC_ESR);
+               if (value != oldvalue)
+                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
+                               "vector: 0x%08lx  after: 0x%08lx\n",
+                               oldvalue, value);
+       } else {
+               printk(KERN_INFO "No ESR for 82489DX.\n");
+       }
+ }
  /**
   * setup_local_APIC - setup the local APIC
   */
@@@ -814,25 -1015,143 +1014,143 @@@ void __cpuinit setup_local_APIC(void
        preempt_enable();
  }
  
- static void __cpuinit lapic_setup_esr(void)
- {
-       unsigned maxlvt = lapic_get_maxlvt();
-       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
-       /*
-        * spec says clear errors after enabling vector.
-        */
-       if (maxlvt > 3)
-               apic_write(APIC_ESR, 0);
- }
  void __cpuinit end_local_APIC_setup(void)
  {
        lapic_setup_esr();
+ #ifdef CONFIG_X86_32
+       {
+               unsigned int value;
+               /* Disable the local apic timer */
+               value = apic_read(APIC_LVTT);
+               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write(APIC_LVTT, value);
+       }
+ #endif
        setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
  }
  
+ void check_x2apic(void)
+ {
+       int msr, msr2;
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (msr & X2APIC_ENABLE) {
+               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+               x2apic_preenabled = x2apic = 1;
+               apic_ops = &x2apic_ops;
+       }
+ }
+ void enable_x2apic(void)
+ {
+       int msr, msr2;
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (!(msr & X2APIC_ENABLE)) {
+               printk("Enabling x2apic\n");
+               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+       }
+ }
+ void enable_IR_x2apic(void)
+ {
+ #ifdef CONFIG_INTR_REMAP
+       int ret;
+       unsigned long flags;
+       if (!cpu_has_x2apic)
+               return;
+       if (!x2apic_preenabled && disable_x2apic) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of nox2apic\n");
+               return;
+       }
+       if (x2apic_preenabled && disable_x2apic)
+               panic("Bios already enabled x2apic, can't enforce nox2apic");
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of skipping io-apic setup\n");
+               return;
+       }
+       ret = dmar_table_init();
+       if (ret) {
+               printk(KERN_INFO
+                      "dmar_table_init() failed with %d:\n", ret);
+               if (x2apic_preenabled)
+                       panic("x2apic enabled by bios. But IR enabling failed");
+               else
+                       printk(KERN_INFO
+                              "Not enabling x2apic,Intr-remapping\n");
+               return;
+       }
+       local_irq_save(flags);
+       mask_8259A();
+       save_mask_IO_APIC_setup();
+       ret = enable_intr_remapping(1);
+       if (ret && x2apic_preenabled) {
+               local_irq_restore(flags);
+               panic("x2apic enabled by bios. But IR enabling failed");
+       }
+       if (ret)
+               goto end;
+       if (!x2apic) {
+               x2apic = 1;
+               apic_ops = &x2apic_ops;
+               enable_x2apic();
+       }
+ end:
+       if (ret)
+               /*
+                * IR enabling failed
+                */
+               restore_IO_APIC_setup();
+       else
+               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+       unmask_8259A();
+       local_irq_restore(flags);
+       if (!ret) {
+               if (!x2apic_preenabled)
+                       printk(KERN_INFO
+                              "Enabled x2apic and interrupt-remapping\n");
+               else
+                       printk(KERN_INFO
+                              "Enabled Interrupt-remapping\n");
+       } else
+               printk(KERN_ERR
+                      "Failed to enable Interrupt-remapping and x2apic\n");
+ #else
+       if (!cpu_has_x2apic)
+               return;
+       if (x2apic_preenabled)
+               panic("x2apic enabled prior OS handover,"
+                     " enable CONFIG_INTR_REMAP");
+       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+              " and x2apic\n");
+ #endif
+       return;
+ }
  /*
   * Detect and enable local APICs on non-SMP boards.
   * Original code written by Keir Fraser.
@@@ -872,7 -1191,7 +1190,7 @@@ void __init early_init_lapic_mapping(vo
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
  }
  
  /**
   */
  void __init init_apic_mappings(void)
  {
+       if (x2apic) {
+               boot_cpu_physical_apicid = read_apic_id();
+               return;
+       }
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
  }
  
  /*
   * This initializes the IO-APIC and APIC hardware if this is
   * a UP kernel.
   */
+ int apic_version[MAX_APICS];
  int __init APIC_init_uniprocessor(void)
  {
        if (disable_apic) {
                return -1;
        }
  
+       enable_IR_x2apic();
+       setup_apic_routing();
        verify_local_APIC();
  
        connect_bsp_APIC();
@@@ -1004,17 -1333,57 +1332,57 @@@ asmlinkage void smp_error_interrupt(voi
  }
  
  /**
-  *  * connect_bsp_APIC - attach the APIC to the interrupt system
-  *   */
+  * connect_bsp_APIC - attach the APIC to the interrupt system
+  */
  void __init connect_bsp_APIC(void)
  {
+ #ifdef CONFIG_X86_32
+       if (pic_mode) {
+               /*
+                * Do not trust the local APIC being empty at bootup.
+                */
+               clear_local_APIC();
+               /*
+                * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
+                * local APIC to INT and NMI lines.
+                */
+               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+                               "enabling APIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x01, 0x23);
+       }
+ #endif
        enable_apic_mode();
  }
  
+ /**
+  * disconnect_bsp_APIC - detach the APIC from the interrupt system
+  * @virt_wire_setup:  indicates, whether virtual wire mode is selected
+  *
+  * Virtual wire mode is necessary to deliver legacy interrupts even when the
+  * APIC is disabled.
+  */
  void disconnect_bsp_APIC(int virt_wire_setup)
  {
+       unsigned int value;
+ #ifdef CONFIG_X86_32
+       if (pic_mode) {
+               /*
+                * Put the board back into PIC mode (has an effect only on
+                * certain older boards).  Note that APIC interrupts, including
+                * IPIs, won't work beyond this point!  The only exception are
+                * INIT IPIs.
+                */
+               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+                               "entering PIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x00, 0x23);
+               return;
+       }
+ #endif
        /* Go back to Virtual Wire compatibility mode */
-       unsigned long value;
  
        /* For the spurious interrupt use vector F, and enable it */
        value = apic_read(APIC_SPIV);
                apic_write(APIC_LVT0, APIC_LVT_MASKED);
        }
  
-       /* For LVT1 make it edge triggered, active high, nmi and enabled */
+       /*
+        * For LVT1 make it edge triggered, active high,
+        * nmi and enabled
+        */
        value = apic_read(APIC_LVT1);
        value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
@@@ -1055,12 -1427,29 +1426,23 @@@ void __cpuinit generic_processor_info(i
        int cpu;
        cpumask_t tmp_map;
  
+       /*
+        * Validate version
+        */
+       if (version == 0x0) {
+               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+                               "fixing up to 0x10. (tell your hw vendor)\n",
+                               version);
+               version = 0x10;
+       }
+       apic_version[apicid] = version;
        if (num_processors >= NR_CPUS) {
                printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                      " Processor ignored.\n", NR_CPUS);
+                       "  Processor ignored.\n", NR_CPUS);
                return;
        }
  
 -      if (num_processors >= maxcpus) {
 -              printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
 -                      " Processor ignored.\n", maxcpus);
 -              return;
 -      }
 -
        num_processors++;
        cpus_complement(tmp_map, cpu_present_map);
        cpu = first_cpu(tmp_map);
        if (apicid > max_physical_apicid)
                max_physical_apicid = apicid;
  
+ #ifdef CONFIG_X86_32
+       /*
+        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+        * but we need to work other dependencies like SMP_SUSPEND etc
+        * before this can be done without some confusion.
+        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+        *       - Ashok Raj <ashok.raj@intel.com>
+        */
+       if (max_physical_apicid >= 8) {
+               switch (boot_cpu_data.x86_vendor) {
+               case X86_VENDOR_INTEL:
+                       if (!APIC_XAPIC(version)) {
+                               def_to_bigsmp = 0;
+                               break;
+                       }
+                       /* If P4 and above fall through */
+               case X86_VENDOR_AMD:
+                       def_to_bigsmp = 1;
+               }
+       }
+ #endif
+ #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
        /* are we being called early in kernel startup? */
        if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
                u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
                per_cpu(x86_cpu_to_apicid, cpu) = apicid;
                per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
        }
+ #endif
  
        cpu_set(cpu, cpu_possible_map);
        cpu_set(cpu, cpu_present_map);
  }
  
+ int hard_smp_processor_id(void)
+ {
+       return read_apic_id();
+ }
  /*
   * Power management
   */
  #ifdef CONFIG_PM
  
  static struct {
-       /* 'active' is true if the local APIC was enabled by us and
-          not the BIOS; this signifies that we are also responsible
-          for disabling it before entering apm/acpi suspend */
+       /*
+        * 'active' is true if the local APIC was enabled by us and
+        * not the BIOS; this signifies that we are also responsible
+        * for disabling it before entering apm/acpi suspend
+        */
        int active;
        /* r/w apic fields */
        unsigned int apic_id;
@@@ -1129,7 -1549,7 +1542,7 @@@ static int lapic_suspend(struct sys_dev
  
        maxlvt = lapic_get_maxlvt();
  
-       apic_pm_state.apic_id = read_apic_id();
+       apic_pm_state.apic_id = apic_read(APIC_ID);
        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
- #ifdef CONFIG_X86_MCE_INTEL
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
        if (maxlvt >= 5)
                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
  #endif
        local_irq_save(flags);
        disable_local_APIC();
        local_irq_restore(flags);
@@@ -1164,10 -1585,25 +1578,25 @@@ static int lapic_resume(struct sys_devi
        maxlvt = lapic_get_maxlvt();
  
        local_irq_save(flags);
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       l &= ~MSR_IA32_APICBASE_BASE;
-       l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-       wrmsr(MSR_IA32_APICBASE, l, h);
+ #ifdef CONFIG_X86_64
+       if (x2apic)
+               enable_x2apic();
+       else
+ #endif
+       {
+               /*
+                * Make sure the APICBASE points to the right address
+                *
+                * FIXME! This will be wrong if we ever support suspend on
+                * SMP! We'll need to do this as part of the CPU restore!
+                */
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       }
        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
        apic_write(APIC_ID, apic_pm_state.apic_id);
        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
- #ifdef CONFIG_X86_MCE_INTEL
+ #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
        if (maxlvt >= 5)
                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
  #endif
        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
        apic_write(APIC_ESR, 0);
        apic_read(APIC_ESR);
        local_irq_restore(flags);
        return 0;
  }
  
+ /*
+  * This device has no shutdown method - fully functioning local APICs
+  * are needed on every CPU up until machine_halt/restart/poweroff.
+  */
  static struct sysdev_class lapic_sysclass = {
        .name           = "lapic",
        .resume         = lapic_resume,
@@@ -1307,31 -1750,19 +1743,19 @@@ __cpuinit int apic_is_clustered_box(voi
        return (clusters > 2);
  }
  
- /*
-  * APIC command line parameters
-  */
- static int __init apic_set_verbosity(char *str)
+ static __init int setup_nox2apic(char *str)
  {
-       if (str == NULL)  {
-               skip_ioapic_setup = 0;
-               ioapic_force = 1;
-               return 0;
-       }
-       if (strcmp("debug", str) == 0)
-               apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", str) == 0)
-               apic_verbosity = APIC_VERBOSE;
-       else {
-               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-                               " use apic=verbose or apic=debug\n", str);
-               return -EINVAL;
-       }
+       disable_x2apic = 1;
+       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
        return 0;
  }
- early_param("apic", apic_set_verbosity);
+ early_param("nox2apic", setup_nox2apic);
  
- static __init int setup_disableapic(char *str)
+ /*
+  * APIC command line parameters
+  */
+ static int __init setup_disableapic(char *arg)
  {
        disable_apic = 1;
        setup_clear_cpu_cap(X86_FEATURE_APIC);
  early_param("disableapic", setup_disableapic);
  
  /* same as disableapic, for compatibility */
- static __init int setup_nolapic(char *str)
+ static int __init setup_nolapic(char *arg)
  {
-       return setup_disableapic(str);
+       return setup_disableapic(arg);
  }
  early_param("nolapic", setup_nolapic);
  
@@@ -1353,14 -1784,19 +1777,19 @@@ static int __init parse_lapic_timer_c2_
  }
  early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
  
- static __init int setup_noapictimer(char *str)
+ static int __init parse_disable_apic_timer(char *arg)
  {
-       if (str[0] != ' ' && str[0] != 0)
-               return 0;
        disable_apic_timer = 1;
-       return 1;
+       return 0;
  }
- __setup("noapictimer", setup_noapictimer);
+ early_param("noapictimer", parse_disable_apic_timer);
+ static int __init parse_nolapic_timer(char *arg)
+ {
+       disable_apic_timer = 1;
+       return 0;
+ }
+ early_param("nolapic_timer", parse_nolapic_timer);
  
  static __init int setup_apicpmtimer(char *s)
  {
  }
  __setup("apicpmtimer", setup_apicpmtimer);
  
+ static int __init apic_set_verbosity(char *arg)
+ {
+       if (!arg)  {
+ #ifdef CONFIG_X86_64
+               skip_ioapic_setup = 0;
+               ioapic_force = 1;
+               return 0;
+ #endif
+               return -EINVAL;
+       }
+       if (strcmp("debug", arg) == 0)
+               apic_verbosity = APIC_DEBUG;
+       else if (strcmp("verbose", arg) == 0)
+               apic_verbosity = APIC_VERBOSE;
+       else {
+               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+                       " use apic=verbose or apic=debug\n", arg);
+               return -EINVAL;
+       }
+       return 0;
+ }
+ early_param("apic", apic_set_verbosity);
  static int __init lapic_insert_resource(void)
  {
        if (!apic_phys)
@@@ -18,7 -18,6 +18,7 @@@
  #include <asm/mtrr.h>
  #include <asm/mce.h>
  #include <asm/pat.h>
 +#include <asm/asm.h>
  #include <asm/numa.h>
  #ifdef CONFIG_X86_LOCAL_APIC
  #include <asm/mpspec.h>
@@@ -216,39 -215,6 +216,39 @@@ static void __init early_cpu_support_pr
        }
  }
  
 +/*
 + * The NOPL instruction is supposed to exist on all CPUs with
 + * family >= 6, unfortunately, that's not true in practice because
 + * of early VIA chips and (more importantly) broken virtualizers that
 + * are not easy to detect.  Hence, probe for it based on first
 + * principles.
 + *
 + * Note: no 64-bit chip is known to lack these, but put the code here
 + * for consistency with 32 bits, and to make it utterly trivial to
 + * diagnose the problem should it ever surface.
 + */
 +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 +{
 +      const u32 nopl_signature = 0x888c53b1; /* Random number */
 +      u32 has_nopl = nopl_signature;
 +
 +      clear_cpu_cap(c, X86_FEATURE_NOPL);
 +      if (c->x86 >= 6) {
 +              asm volatile("\n"
 +                           "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
 +                           "2:\n"
 +                           "        .section .fixup,\"ax\"\n"
 +                           "3:      xor %0,%0\n"
 +                           "        jmp 2b\n"
 +                           "        .previous\n"
 +                           _ASM_EXTABLE(1b,3b)
 +                           : "+a" (has_nopl));
 +
 +              if (has_nopl == nopl_signature)
 +                      set_cpu_cap(c, X86_FEATURE_NOPL);
 +      }
 +}
 +
  static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
  
  void __init early_cpu_init(void)
@@@ -347,8 -313,6 +347,8 @@@ static void __cpuinit early_identify_cp
                c->x86_phys_bits = eax & 0xff;
        }
  
 +      detect_nopl(c);
 +
        if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
            cpu_devs[c->x86_vendor]->c_early_init)
                cpu_devs[c->x86_vendor]->c_early_init(c);
@@@ -430,49 -394,6 +430,49 @@@ static __init int setup_noclflush(char 
  }
  __setup("noclflush", setup_noclflush);
  
 +struct msr_range {
 +      unsigned min;
 +      unsigned max;
 +};
 +
 +static struct msr_range msr_range_array[] __cpuinitdata = {
 +      { 0x00000000, 0x00000418},
 +      { 0xc0000000, 0xc000040b},
 +      { 0xc0010000, 0xc0010142},
 +      { 0xc0011000, 0xc001103b},
 +};
 +
 +static void __cpuinit print_cpu_msr(void)
 +{
 +      unsigned index;
 +      u64 val;
 +      int i;
 +      unsigned index_min, index_max;
 +
 +      for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
 +              index_min = msr_range_array[i].min;
 +              index_max = msr_range_array[i].max;
 +              for (index = index_min; index < index_max; index++) {
 +                      if (rdmsrl_amd_safe(index, &val))
 +                              continue;
 +                      printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
 +              }
 +      }
 +}
 +
 +static int show_msr __cpuinitdata;
 +static __init int setup_show_msr(char *arg)
 +{
 +      int num;
 +
 +      get_option(&arg, &num);
 +
 +      if (num > 0)
 +              show_msr = num;
 +      return 1;
 +}
 +__setup("show_msr=", setup_show_msr);
 +
  void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
  {
        if (c->x86_model_id[0])
                printk(KERN_CONT " stepping %02x\n", c->x86_mask);
        else
                printk(KERN_CONT "\n");
 +
 +#ifdef CONFIG_SMP
 +      if (c->cpu_index < show_msr)
 +              print_cpu_msr();
 +#else
 +      if (show_msr)
 +              print_cpu_msr();
 +#endif
  }
  
  static __init int setup_disablecpuid(char *arg)
@@@ -687,6 -600,8 +687,8 @@@ void __cpuinit cpu_init(void
        barrier();
  
        check_efer();
+       if (cpu != 0 && x2apic)
+               enable_x2apic();
  
        /*
         * set up and load the per-CPU TSS
@@@ -39,14 -39,13 +39,14 @@@ const char * const x86_cap_flags[NCAPIN
        NULL, NULL, NULL, NULL,
        "constant_tsc", "up", NULL, "arch_perfmon",
        "pebs", "bts", NULL, NULL,
 -      "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 +      "rep_good", NULL, NULL, NULL,
 +      "nopl", NULL, NULL, NULL,
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  
        /* Intel-defined (#2) */
        "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
        "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-       NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+       NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  
        /* VIA/Cyrix/Centaur-defined */
  #include <linux/threads.h>
  #include <linux/cpumask.h>
  #include <linux/string.h>
- #include <linux/kernel.h>
  #include <linux/ctype.h>
  #include <linux/init.h>
  #include <linux/sched.h>
  #include <linux/bootmem.h>
  #include <linux/module.h>
+ #include <linux/hardirq.h>
  #include <asm/smp.h>
  #include <asm/ipi.h>
  #include <asm/genapic.h>
  #include <asm/uv/uv_hub.h>
  #include <asm/uv/bios.h>
  
+ DEFINE_PER_CPU(int, x2apic_extra_bits);
+ static enum uv_system_type uv_system_type;
+ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+ {
+       if (!strcmp(oem_id, "SGI")) {
+               if (!strcmp(oem_table_id, "UVL"))
+                       uv_system_type = UV_LEGACY_APIC;
+               else if (!strcmp(oem_table_id, "UVX"))
+                       uv_system_type = UV_X2APIC;
+               else if (!strcmp(oem_table_id, "UVH")) {
+                       uv_system_type = UV_NON_UNIQUE_APIC;
+                       return 1;
+               }
+       }
+       return 0;
+ }
+ enum uv_system_type get_uv_system_type(void)
+ {
+       return uv_system_type;
+ }
+ int is_uv_system(void)
+ {
+       return uv_system_type != UV_NONE;
+ }
+ EXPORT_SYMBOL_GPL(is_uv_system);
  DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
  EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
  
@@@ -123,6 -153,10 +153,10 @@@ static int uv_apic_id_registered(void
        return 1;
  }
  
+ static void uv_init_apic_ldr(void)
+ {
+ }
  static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
  {
        int cpu;
                return BAD_APICID;
  }
  
+ static unsigned int get_apic_id(unsigned long x)
+ {
+       unsigned int id;
+       WARN_ON(preemptible() && num_online_cpus() > 1);
+       id = x | __get_cpu_var(x2apic_extra_bits);
+       return id;
+ }
+ static unsigned long set_apic_id(unsigned int id)
+ {
+       unsigned long x;
+       /* maskout x2apic_extra_bits ? */
+       x = id;
+       return x;
+ }
+ static unsigned int uv_read_apic_id(void)
+ {
+       return get_apic_id(apic_read(APIC_ID));
+ }
  static unsigned int phys_pkg_id(int index_msb)
  {
-       return GET_APIC_ID(read_apic_id()) >> index_msb;
+       return uv_read_apic_id() >> index_msb;
  }
  
  #ifdef ZZZ            /* Needs x2apic patch */
@@@ -152,17 -211,22 +211,22 @@@ static void uv_send_IPI_self(int vector
  
  struct genapic apic_x2apic_uv_x = {
        .name = "UV large system",
+       .acpi_madt_oem_check = uv_acpi_madt_oem_check,
        .int_delivery_mode = dest_Fixed,
        .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
        .target_cpus = uv_target_cpus,
        .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
        .apic_id_registered = uv_apic_id_registered,
+       .init_apic_ldr = uv_init_apic_ldr,
        .send_IPI_all = uv_send_IPI_all,
        .send_IPI_allbutself = uv_send_IPI_allbutself,
        .send_IPI_mask = uv_send_IPI_mask,
        /* ZZZ.send_IPI_self = uv_send_IPI_self, */
        .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
        .phys_pkg_id = phys_pkg_id,     /* Fixme ZZZ */
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFFFFFFFu),
  };
  
  static __cpuinit void set_x2apic_extra_bits(int pnode)
@@@ -222,7 -286,7 +286,7 @@@ static __init void map_low_mmrs(void
  
  enum map_type {map_wb, map_uc};
  
 -static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
 +static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
  {
        unsigned long bytes, paddr;
  
@@@ -293,9 -357,7 +357,9 @@@ static __init void uv_rtc_init(void
                sn_rtc_cycles_per_second = ticks_per_sec;
  }
  
 -static __init void uv_system_init(void)
 +static bool uv_system_inited;
 +
 +void __init uv_system_init(void)
  {
        union uvh_si_addr_map_config_u m_n_config;
        union uvh_node_id_u node_id;
        map_mmr_high(max_pnode);
        map_config_high(max_pnode);
        map_mmioh_high(max_pnode);
 +      uv_system_inited = true;
  }
  
  /*
   */
  void __cpuinit uv_cpu_init(void)
  {
 -      if (!uv_node_to_blade)
 -              uv_system_init();
 +      BUG_ON(!uv_system_inited);
  
        uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
  
        if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
                set_x2apic_extra_bits(uv_hub_info->pnode);
  }
@@@ -49,7 -49,7 +49,7 @@@ static int __init mpf_checksum(unsigne
        return sum & 0xFF;
  }
  
 -static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 +static void __init MP_processor_info(struct mpc_config_processor *m)
  {
        int apicid;
        char *bootup_cpu = "";
@@@ -397,7 -397,9 +397,9 @@@ static int __init smp_read_mpc(struct m
         generic_bigsmp_probe();
  #endif
  
+ #ifdef CONFIG_X86_32
        setup_apic_routing();
+ #endif
        if (!num_processors)
                printk(KERN_ERR "MPTABLE: no processors registered!\n");
        return num_processors;
@@@ -484,7 -486,7 +486,7 @@@ static void __init construct_default_io
  }
  
  
 -static void construct_ioapic_table(int mpc_default_type)
 +static void __init construct_ioapic_table(int mpc_default_type)
  {
        struct mpc_config_ioapic ioapic;
        struct mpc_config_bus bus;
        construct_default_ioirq_mptable(mpc_default_type);
  }
  #else
 -static inline void construct_ioapic_table(int mpc_default_type) { }
 +static inline void __init construct_ioapic_table(int mpc_default_type) { }
  #endif
  
  static inline void __init construct_default_ISA_mptable(int mpc_default_type)
@@@ -73,7 -73,7 +73,7 @@@ static void __init smp_dump_qct(void
  }
  
  
 -void __init numaq_tsc_disable(void)
 +void __cpuinit numaq_tsc_disable(void)
  {
        if (!found_numaq)
                return;
@@@ -229,6 -229,12 +229,12 @@@ static void __init smp_read_mpc_oem(str
        }
  }
  
+ static int __init numaq_setup_ioapic_ids(void)
+ {
+       /* so can skip it */
+       return 1;
+ }
  static struct x86_quirks numaq_x86_quirks __initdata = {
        .arch_pre_time_init     = numaq_pre_time_init,
        .arch_time_init         = NULL,
        .mpc_oem_bus_info       = mpc_oem_bus_info,
        .mpc_oem_pci_bus        = mpc_oem_pci_bus,
        .smp_read_mpc_oem       = smp_read_mpc_oem,
+       .setup_ioapic_ids       = numaq_setup_ioapic_ids,
  };
  
  void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
@@@ -330,7 -330,6 +330,7 @@@ struct pv_cpu_ops pv_cpu_ops = 
  #endif
        .wbinvd = native_wbinvd,
        .read_msr = native_read_msr_safe,
 +      .read_msr_amd = native_read_msr_amd_safe,
        .write_msr = native_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
  
  struct pv_apic_ops pv_apic_ops = {
  #ifdef CONFIG_X86_LOCAL_APIC
-       .apic_write = native_apic_write,
-       .apic_read = native_apic_read,
        .setup_boot_clock = setup_boot_APIC_clock,
        .setup_secondary_clock = setup_secondary_APIC_clock,
        .startup_ipi_hook = paravirt_nop,
@@@ -472,7 -469,7 +470,7 @@@ struct pv_lock_ops pv_lock_ops = 
        .spin_unlock = __ticket_spin_unlock,
  #endif
  };
 -EXPORT_SYMBOL_GPL(pv_lock_ops);
 +EXPORT_SYMBOL(pv_lock_ops);
  
  EXPORT_SYMBOL_GPL(pv_time_ops);
  EXPORT_SYMBOL    (pv_cpu_ops);
diff --combined arch/x86/kernel/setup.c
@@@ -223,9 -223,6 +223,9 @@@ unsigned long saved_video_mode
  #define RAMDISK_LOAD_FLAG             0x4000
  
  static char __initdata command_line[COMMAND_LINE_SIZE];
 +#ifdef CONFIG_CMDLINE_BOOL
 +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
 +#endif
  
  #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
  struct edd edd;
@@@ -448,7 -445,7 +448,7 @@@ static void __init reserve_early_setup_
   * @size: Size of the crashkernel memory to reserve.
   * Returns the base address on success, and -1ULL on failure.
   */
 -unsigned long long find_and_reserve_crashkernel(unsigned long long size)
 +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
  {
        const unsigned long long alignment = 16<<20;    /* 16M */
        unsigned long long start = 0LL;
@@@ -607,6 -604,14 +607,6 @@@ void __init setup_arch(char **cmdline_p
        early_cpu_init();
        early_ioremap_init();
  
 -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
 -      /*
 -       * Must be before kernel pagetables are setup
 -       * or fixmap area is touched.
 -       */
 -      vmi_init();
 -#endif
 -
        ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
        screen_info = boot_params.screen_info;
        edid_info = boot_params.edid_info;
        bss_resource.start = virt_to_phys(&__bss_start);
        bss_resource.end = virt_to_phys(&__bss_stop)-1;
  
 +#ifdef CONFIG_CMDLINE_BOOL
 +#ifdef CONFIG_CMDLINE_OVERRIDE
 +      strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
 +#else
 +      if (builtin_cmdline[0]) {
 +              /* append boot loader cmdline to builtin */
 +              strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
 +              strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
 +              strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
 +      }
 +#endif
 +#endif
 +
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
  
        parse_early_param();
  
 +#ifdef CONFIG_X86_64
 +      check_efer();
 +#endif
 +
 +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
 +      /*
 +       * Must be before kernel pagetables are setup
 +       * or fixmap area is touched.
 +       */
 +      vmi_init();
 +#endif
 +
        /* after early param, so could get panic from serial */
        reserve_early_setup_data();
  
  #else
        num_physpages = max_pfn;
  
 -      check_efer();
+       if (cpu_has_x2apic)
+               check_x2apic();
  
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
@@@ -123,7 -123,6 +123,6 @@@ EXPORT_PER_CPU_SYMBOL(cpu_info)
  
  static atomic_t init_deasserted;
  
- static int boot_cpu_logical_apicid;
  
  /* representing cpus for which sibling maps can be computed */
  static cpumask_t cpu_sibling_setup_map;
@@@ -165,6 -164,8 +164,8 @@@ static void unmap_cpu_to_node(int cpu
  #endif
  
  #ifdef CONFIG_X86_32
+ static int boot_cpu_logical_apicid;
  u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
                                        { [0 ... NR_CPUS-1] = BAD_APICID };
  
@@@ -210,7 -211,7 +211,7 @@@ static void __cpuinit smp_callin(void
        /*
         * (This works even if the APIC is not enabled.)
         */
-       phys_id = GET_APIC_ID(read_apic_id());
+       phys_id = read_apic_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@@ -550,8 -551,7 +551,7 @@@ static inline void __inquire_remote_api
                        printk(KERN_CONT
                               "a previous APIC delivery may have failed\n");
  
-               apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-               apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+               apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
  
                timeout = 0;
                do {
@@@ -583,11 -583,9 +583,9 @@@ wakeup_secondary_cpu(int logical_apicid
        int maxlvt;
  
        /* Target chip */
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
        /* Boot on the stack */
        /* Kick the second */
-       apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+       apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
@@@ -640,13 -638,11 +638,11 @@@ wakeup_secondary_cpu(int phys_apicid, u
        /*
         * Turn INIT on target chip
         */
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
        /*
         * Send IPI
         */
-       apic_write(APIC_ICR,
-                  APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+                      phys_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
        pr_debug("Deasserting INIT.\n");
  
        /* Target chip */
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
        /* Send IPI */
-       apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
                 */
  
                /* Target chip */
-               apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
                /* Boot on the stack */
                /* Kick the second */
-               apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+               apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+                              phys_apicid);
  
                /*
                 * Give the other CPU some time to accept the IPI.
@@@ -756,14 -749,6 +749,14 @@@ static void __cpuinit do_fork_idle(stru
  }
  
  #ifdef CONFIG_X86_64
 +
 +/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
 +static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
 +{
 +      if (!after_bootmem)
 +              free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
 +}
 +
  /*
   * Allocate node local memory for the AP pda.
   *
@@@ -792,7 -777,8 +785,7 @@@ int __cpuinit get_local_pda(int cpu
  
        if (oldpda) {
                memcpy(newpda, oldpda, size);
 -              if (!after_bootmem)
 -                      free_bootmem((unsigned long)oldpda, size);
 +              free_bootmem_pda(oldpda);
        }
  
        newpda->in_bootmem = 0;
@@@ -1001,7 -987,17 +994,7 @@@ int __cpuinit native_cpu_up(unsigned in
        flush_tlb_all();
        low_mappings = 1;
  
 -#ifdef CONFIG_X86_PC
 -      if (def_to_bigsmp && apicid > 8) {
 -              printk(KERN_WARNING
 -                      "More than 8 CPUs detected - skipping them.\n"
 -                      "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
 -              err = -1;
 -      } else
 -              err = do_boot_cpu(apicid, cpu);
 -#else
        err = do_boot_cpu(apicid, cpu);
 -#endif
  
        zap_low_mappings();
        low_mappings = 0;
@@@ -1055,34 -1051,6 +1048,34 @@@ static __init void disable_smp(void
  static int __init smp_sanity_check(unsigned max_cpus)
  {
        preempt_disable();
 +
 +#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
 +      if (def_to_bigsmp && nr_cpu_ids > 8) {
 +              unsigned int cpu;
 +              unsigned nr;
 +
 +              printk(KERN_WARNING
 +                     "More than 8 CPUs detected - skipping them.\n"
 +                     "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
 +
 +              nr = 0;
 +              for_each_present_cpu(cpu) {
 +                      if (nr >= 8)
 +                              cpu_clear(cpu, cpu_present_map);
 +                      nr++;
 +              }
 +
 +              nr = 0;
 +              for_each_possible_cpu(cpu) {
 +                      if (nr >= 8)
 +                              cpu_clear(cpu, cpu_possible_map);
 +                      nr++;
 +              }
 +
 +              nr_cpu_ids = 8;
 +      }
 +#endif
 +
        if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
                printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
                                    "by the BIOS.\n", hard_smp_processor_id());
@@@ -1175,10 -1143,17 +1168,17 @@@ void __init native_smp_prepare_cpus(uns
         * Setup boot CPU information
         */
        smp_store_cpu_info(0); /* Final full version of the data */
+ #ifdef CONFIG_X86_32
        boot_cpu_logical_apicid = logical_smp_processor_id();
+ #endif
        current_thread_info()->cpu = 0;  /* needed? */
        set_cpu_sibling_map(0);
  
+ #ifdef CONFIG_X86_64
+       enable_IR_x2apic();
+       setup_apic_routing();
+ #endif
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
                disable_smp();
        }
  
        preempt_disable();
-       if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+       if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-                    GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+                    read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
        preempt_enable();
        printk(KERN_INFO "CPU%d: ", 0);
        print_cpu_info(&cpu_data(0));
        setup_boot_clock();
 +
 +      if (is_uv_system())
 +              uv_system_init();
  out:
        preempt_enable();
  }
@@@ -1313,13 -1285,16 +1313,13 @@@ __init void prefill_possible_map(void
        if (!num_processors)
                num_processors = 1;
  
 -#ifdef CONFIG_HOTPLUG_CPU
        if (additional_cpus == -1) {
                if (disabled_cpus > 0)
                        additional_cpus = disabled_cpus;
                else
                        additional_cpus = 0;
        }
 -#else
 -      additional_cpus = 0;
 -#endif
 +
        possible = num_processors + additional_cpus;
        if (possible > NR_CPUS)
                possible = NR_CPUS;
@@@ -1411,3 -1386,17 +1411,3 @@@ void __cpu_die(unsigned int cpu
        BUG();
  }
  #endif
 -
 -/*
 - * If the BIOS enumerates physical processors before logical,
 - * maxcpus=N at enumeration-time can be used to disable HT.
 - */
 -static int __init parse_maxcpus(char *arg)
 -{
 -      extern unsigned int maxcpus;
 -
 -      if (arg)
 -              maxcpus = simple_strtoul(arg, NULL, 0);
 -      return 0;
 -}
 -early_param("maxcpus", parse_maxcpus);
diff --combined arch/x86/kernel/vmi_32.c
@@@ -235,7 -235,7 +235,7 @@@ static void vmi_write_ldt_entry(struct 
                                const void *desc)
  {
        u32 *ldt_entry = (u32 *)desc;
 -      vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
 +      vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
  }
  
  static void vmi_load_sp0(struct tss_struct *tss,
@@@ -393,13 -393,13 +393,13 @@@ static void *vmi_kmap_atomic_pte(struc
  }
  #endif
  
 -static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
 +static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
  {
        vmi_set_page_type(pfn, VMI_PAGE_L1);
        vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
  }
  
 -static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
 +static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
  {
        /*
         * This call comes in very early, before mem_map is setup.
        vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
  }
  
 -static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
 +static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
  {
        vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
        vmi_check_page_type(clonepfn, VMI_PAGE_L2);
        vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
  }
  
 -static void vmi_release_pte(u32 pfn)
 +static void vmi_release_pte(unsigned long pfn)
  {
        vmi_ops.release_page(pfn, VMI_PAGE_L1);
        vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
  }
  
 -static void vmi_release_pmd(u32 pfn)
 +static void vmi_release_pmd(unsigned long pfn)
  {
        vmi_ops.release_page(pfn, VMI_PAGE_L2);
        vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@@ -905,8 -905,8 +905,8 @@@ static inline int __init activate_vmi(v
  #endif
  
  #ifdef CONFIG_X86_LOCAL_APIC
-       para_fill(pv_apic_ops.apic_read, APICRead);
-       para_fill(pv_apic_ops.apic_write, APICWrite);
+        para_fill(apic_ops->read, APICRead);
+        para_fill(apic_ops->write, APICWrite);
  #endif
  
        /*
diff --combined arch/x86/xen/enlighten.c
@@@ -36,6 -36,7 +36,7 @@@
  #include <xen/hvc-console.h>
  
  #include <asm/paravirt.h>
+ #include <asm/apic.h>
  #include <asm/page.h>
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
@@@ -580,16 -581,47 +581,47 @@@ static void xen_io_delay(void
  }
  
  #ifdef CONFIG_X86_LOCAL_APIC
- static u32 xen_apic_read(unsigned long reg)
+ static u32 xen_apic_read(u32 reg)
  {
        return 0;
  }
  
- static void xen_apic_write(unsigned long reg, u32 val)
+ static void xen_apic_write(u32 reg, u32 val)
  {
        /* Warn to see if there's any stray references */
        WARN_ON(1);
  }
+ static u64 xen_apic_icr_read(void)
+ {
+       return 0;
+ }
+ static void xen_apic_icr_write(u32 low, u32 id)
+ {
+       /* Warn to see if there's any stray references */
+       WARN_ON(1);
+ }
+ static void xen_apic_wait_icr_idle(void)
+ {
+         return;
+ }
+ static u32 xen_safe_apic_wait_icr_idle(void)
+ {
+         return 0;
+ }
+ static struct apic_ops xen_basic_apic_ops = {
+       .read = xen_apic_read,
+       .write = xen_apic_write,
+       .icr_read = xen_apic_icr_read,
+       .icr_write = xen_apic_icr_write,
+       .wait_icr_idle = xen_apic_wait_icr_idle,
+       .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+ };
  #endif
  
  static void xen_flush_tlb(void)
@@@ -812,7 -844,7 +844,7 @@@ static int xen_write_msr_safe(unsigned 
  
  /* Early in boot, while setting up the initial pagetable, assume
     everything is pinned. */
 -static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
 +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
  {
  #ifdef CONFIG_FLATMEM
        BUG_ON(mem_map);        /* should only be used early */
  
  /* Early release_pte assumes that all pts are pinned, since there's
     only init_mm and anything attached to that is pinned. */
 -static void xen_release_pte_init(u32 pfn)
 +static void xen_release_pte_init(unsigned long pfn)
  {
        make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
  }
@@@ -838,7 -870,7 +870,7 @@@ static void pin_pagetable_pfn(unsigned 
  
  /* This needs to make sure the new pte page is pinned iff its being
     attached to a pinned pagetable. */
 -static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
  {
        struct page *page = pfn_to_page(pfn);
  
        }
  }
  
 -static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
 +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
  {
        xen_alloc_ptpage(mm, pfn, PT_PTE);
  }
  
 -static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
 +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
  {
        xen_alloc_ptpage(mm, pfn, PT_PMD);
  }
@@@ -909,7 -941,7 +941,7 @@@ static void xen_pgd_free(struct mm_stru
  }
  
  /* This should never happen until we're OK to use struct page */
 -static void xen_release_ptpage(u32 pfn, unsigned level)
 +static void xen_release_ptpage(unsigned long pfn, unsigned level)
  {
        struct page *page = pfn_to_page(pfn);
  
        }
  }
  
 -static void xen_release_pte(u32 pfn)
 +static void xen_release_pte(unsigned long pfn)
  {
        xen_release_ptpage(pfn, PT_PTE);
  }
  
 -static void xen_release_pmd(u32 pfn)
 +static void xen_release_pmd(unsigned long pfn)
  {
        xen_release_ptpage(pfn, PT_PMD);
  }
  
  #if PAGETABLE_LEVELS == 4
 -static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
 +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
  {
        xen_alloc_ptpage(mm, pfn, PT_PUD);
  }
  
 -static void xen_release_pud(u32 pfn)
 +static void xen_release_pud(unsigned long pfn)
  {
        xen_release_ptpage(pfn, PT_PUD);
  }
@@@ -1273,8 -1305,6 +1305,6 @@@ static const struct pv_irq_ops xen_irq_
  
  static const struct pv_apic_ops xen_apic_ops __initdata = {
  #ifdef CONFIG_X86_LOCAL_APIC
-       .apic_write = xen_apic_write,
-       .apic_read = xen_apic_read,
        .setup_boot_clock = paravirt_nop,
        .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
@@@ -1324,7 -1354,7 +1354,7 @@@ static const struct pv_mmu_ops xen_mmu_
        .ptep_modify_prot_commit = __ptep_modify_prot_commit,
  
        .pte_val = xen_pte_val,
 -      .pte_flags = native_pte_val,
 +      .pte_flags = native_pte_flags,
        .pgd_val = xen_pgd_val,
  
        .make_pte = xen_make_pte,
@@@ -1677,6 -1707,13 +1707,13 @@@ asmlinkage void __init xen_start_kernel
        pv_apic_ops = xen_apic_ops;
        pv_mmu_ops = xen_mmu_ops;
  
+ #ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * set up the basic apic ops.
+        */
+       apic_ops = &xen_basic_apic_ops;
+ #endif
        if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
                pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
                pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --combined include/asm-x86/apic.h
@@@ -9,6 -9,8 +9,8 @@@
  #include <asm/apicdef.h>
  #include <asm/processor.h>
  #include <asm/system.h>
+ #include <asm/cpufeature.h>
+ #include <asm/msr.h>
  
  #define ARCH_APICTIMER_STOPS_ON_C3    1
  
@@@ -47,8 -49,6 +49,6 @@@ extern int disable_apic
  #ifdef CONFIG_PARAVIRT
  #include <asm/paravirt.h>
  #else
- #define apic_write native_apic_write
- #define apic_read native_apic_read
  #define setup_boot_clock setup_boot_APIC_clock
  #define setup_secondary_clock setup_secondary_APIC_clock
  #endif
@@@ -60,7 -60,7 +60,7 @@@ extern u64 xapic_icr_read(void)
  extern void xapic_icr_write(u32, u32);
  extern int setup_profiling_timer(unsigned int);
  
- static inline void native_apic_write(unsigned long reg, u32 v)
+ static inline void native_apic_mem_write(u32 reg, u32 v)
  {
        volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
  
                       ASM_OUTPUT2("0" (v), "m" (*addr)));
  }
  
- static inline u32 native_apic_read(unsigned long reg)
+ static inline u32 native_apic_mem_read(u32 reg)
  {
        return *((volatile u32 *)(APIC_BASE + reg));
  }
  
- extern void apic_wait_icr_idle(void);
- extern u32 safe_apic_wait_icr_idle(void);
+ static inline void native_apic_msr_write(u32 reg, u32 v)
+ {
+       if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+           reg == APIC_LVR)
+               return;
+       wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+ }
+ static inline u32 native_apic_msr_read(u32 reg)
+ {
+       u32 low, high;
+       if (reg == APIC_DFR)
+               return -1;
+       rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
+       return low;
+ }
+ #ifndef CONFIG_X86_32
+ extern int x2apic, x2apic_preenabled;
+ extern void check_x2apic(void);
+ extern void enable_x2apic(void);
+ extern void enable_IR_x2apic(void);
+ extern void x2apic_icr_write(u32 low, u32 id);
+ #endif
+ struct apic_ops {
+       u32 (*read)(u32 reg);
+       void (*write)(u32 reg, u32 v);
+       u64 (*icr_read)(void);
+       void (*icr_write)(u32 low, u32 high);
+       void (*wait_icr_idle)(void);
+       u32 (*safe_wait_icr_idle)(void);
+ };
+ extern struct apic_ops *apic_ops;
+ #define apic_read (apic_ops->read)
+ #define apic_write (apic_ops->write)
+ #define apic_icr_read (apic_ops->icr_read)
+ #define apic_icr_write (apic_ops->icr_write)
+ #define apic_wait_icr_idle (apic_ops->wait_icr_idle)
+ #define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
  extern int get_physical_broadcast(void);
  
+ #ifdef CONFIG_X86_64
+ static inline void ack_x2APIC_irq(void)
+ {
+       /* Docs say use 0 for future compatibility */
+       native_apic_msr_write(APIC_EOI, 0);
+ }
+ #endif
  static inline void ack_APIC_irq(void)
  {
        /*
 -       * ack_APIC_irq() actually gets compiled as a single instruction:
 -       * - a single rmw on Pentium/82489DX
 -       * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
 +       * ack_APIC_irq() actually gets compiled as a single instruction
         * ... yummie.
         */
  
  #define X86_FEATURE_UP                (3*32+ 9) /* smp kernel running on up */
  #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
  #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 -#define X86_FEATURE_PEBS      (3*32+12)  /* Precise-Event Based Sampling */
 -#define X86_FEATURE_BTS               (3*32+13)  /* Branch Trace Store */
 -#define X86_FEATURE_SYSCALL32 (3*32+14)  /* syscall in ia32 userspace */
 -#define X86_FEATURE_SYSENTER32        (3*32+15)  /* sysenter in ia32 userspace */
 +#define X86_FEATURE_PEBS      (3*32+12) /* Precise-Event Based Sampling */
 +#define X86_FEATURE_BTS               (3*32+13) /* Branch Trace Store */
 +#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */
 +#define X86_FEATURE_SYSENTER32        (3*32+15) /* sysenter in ia32 userspace */
  #define X86_FEATURE_REP_GOOD  (3*32+16) /* rep microcode works well on this CPU */
  #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
  #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
 -#define X86_FEATURE_11AP      (3*32+19)  /* Bad local APIC aka 11AP */
 +#define X86_FEATURE_11AP      (3*32+19) /* Bad local APIC aka 11AP */
 +#define X86_FEATURE_NOPL      (3*32+20) /* The NOPL (0F 1F) instructions */
 +#define X86_FEATURE_AMDC1E    (3*32+21) /* AMD C1E detected */
  
  /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
  #define X86_FEATURE_XMM3      (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@@ -93,7 -91,7 +93,8 @@@
  #define X86_FEATURE_CX16      (4*32+13) /* CMPXCHG16B */
  #define X86_FEATURE_XTPR      (4*32+14) /* Send Task Priority Messages */
  #define X86_FEATURE_DCA               (4*32+18) /* Direct Cache Access */
+ #define X86_FEATURE_X2APIC    (4*32+21) /* x2APIC */
 +#define X86_FEATURE_XMM4_2    (4*32+20) /* Streaming SIMD Extensions-4.2 */
  
  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
  #define X86_FEATURE_XSTORE    (5*32+ 2) /* on-CPU RNG present (xstore insn) */
@@@ -192,7 -190,7 +193,8 @@@ extern const char * const x86_power_fla
  #define cpu_has_gbpages               boot_cpu_has(X86_FEATURE_GBPAGES)
  #define cpu_has_arch_perfmon  boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
  #define cpu_has_pat           boot_cpu_has(X86_FEATURE_PAT)
+ #define cpu_has_x2apic                boot_cpu_has(X86_FEATURE_X2APIC)
 +#define cpu_has_xmm4_2                boot_cpu_has(X86_FEATURE_XMM4_2)
  
  #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
  # define cpu_has_invlpg               1
@@@ -14,6 -14,7 +14,7 @@@
  
  struct genapic {
        char *name;
+       int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
        u32 int_delivery_mode;
        u32 int_dest_mode;
        int (*apic_id_registered)(void);
        void (*send_IPI_mask)(cpumask_t mask, int vector);
        void (*send_IPI_allbutself)(int vector);
        void (*send_IPI_all)(int vector);
+       void (*send_IPI_self)(int vector);
        /* */
        unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
        unsigned int (*phys_pkg_id)(int index_msb);
+       unsigned int (*get_apic_id)(unsigned long x);
+       unsigned long (*set_apic_id)(unsigned int id);
+       unsigned long apic_id_mask;
  };
  
  extern struct genapic *genapic;
  
  extern struct genapic apic_flat;
  extern struct genapic apic_physflat;
+ extern struct genapic apic_x2apic_cluster;
+ extern struct genapic apic_x2apic_phys;
  extern int acpi_madt_oem_check(char *, char *);
  
+ extern void apic_send_IPI_self(int vector);
  enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
  extern enum uv_system_type get_uv_system_type(void);
  extern int is_uv_system(void);
@@@ -42,7 -50,6 +50,7 @@@
  extern struct genapic apic_x2apic_uv_x;
  DECLARE_PER_CPU(int, x2apic_extra_bits);
  extern void uv_cpu_init(void);
 +extern void uv_system_init(void);
  extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
  
  extern void setup_apic_routing(void);
@@@ -137,7 -137,6 +137,7 @@@ struct pv_cpu_ops 
  
        /* MSR, PMC and TSR operations.
           err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 +      u64 (*read_msr_amd)(unsigned int msr, int *err);
        u64 (*read_msr)(unsigned int msr, int *err);
        int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
  
@@@ -201,12 -200,6 +201,6 @@@ struct pv_irq_ops 
  
  struct pv_apic_ops {
  #ifdef CONFIG_X86_LOCAL_APIC
-       /*
-        * Direct APIC operations, principally for VMI.  Ideally
-        * these shouldn't be in this interface.
-        */
-       void (*apic_write)(unsigned long reg, u32 v);
-       u32 (*apic_read)(unsigned long reg);
        void (*setup_boot_clock)(void);
        void (*setup_secondary_clock)(void);
  
@@@ -258,13 -251,13 +252,13 @@@ struct pv_mmu_ops 
         * Hooks for allocating/releasing pagetable pages when they're
         * attached to a pagetable
         */
 -      void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
 -      void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
 -      void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
 -      void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
 -      void (*release_pte)(u32 pfn);
 -      void (*release_pmd)(u32 pfn);
 -      void (*release_pud)(u32 pfn);
 +      void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
 +      void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
 +      void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
 +      void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
 +      void (*release_pte)(unsigned long pfn);
 +      void (*release_pmd)(unsigned long pfn);
 +      void (*release_pud)(unsigned long pfn);
  
        /* Pagetable manipulation functions */
        void (*set_pte)(pte_t *ptep, pte_t pteval);
@@@ -727,10 -720,6 +721,10 @@@ static inline u64 paravirt_read_msr(uns
  {
        return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
  }
 +static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
 +{
 +      return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
 +}
  static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
  {
        return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
@@@ -776,13 -765,6 +770,13 @@@ static inline int rdmsrl_safe(unsigned 
        *p = paravirt_read_msr(msr, &err);
        return err;
  }
 +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 +{
 +      int err;
 +
 +      *p = paravirt_read_msr_amd(msr, &err);
 +      return err;
 +}
  
  static inline u64 paravirt_read_tsc(void)
  {
@@@ -910,19 -892,6 +904,6 @@@ static inline void slow_down_io(void
  }
  
  #ifdef CONFIG_X86_LOCAL_APIC
- /*
-  * Basic functions accessing APICs.
-  */
- static inline void apic_write(unsigned long reg, u32 v)
- {
-       PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
- }
- static inline u32 apic_read(unsigned long reg)
- {
-       return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
- }
  static inline void setup_boot_clock(void)
  {
        PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
@@@ -1005,35 -974,35 +986,35 @@@ static inline void paravirt_pgd_free(st
        PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
  }
  
 -static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
 +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
  {
        PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
  }
 -static inline void paravirt_release_pte(unsigned pfn)
 +static inline void paravirt_release_pte(unsigned long pfn)
  {
        PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
  }
  
 -static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
 +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
  {
        PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
  }
  
 -static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
 -                                          unsigned start, unsigned count)
 +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
 +                                          unsigned long start, unsigned long count)
  {
        PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
  }
 -static inline void paravirt_release_pmd(unsigned pfn)
 +static inline void paravirt_release_pmd(unsigned long pfn)
  {
        PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
  }
  
 -static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
 +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
  {
        PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
  }
 -static inline void paravirt_release_pud(unsigned pfn)
 +static inline void paravirt_release_pud(unsigned long pfn)
  {
        PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
  }