Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 30 Oct 2011 22:46:19 +0000 (15:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 30 Oct 2011 22:46:19 +0000 (15:46 -0700)
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (33 commits)
  iommu/core: Remove global iommu_ops and register_iommu
  iommu/msm: Use bus_set_iommu instead of register_iommu
  iommu/omap: Use bus_set_iommu instead of register_iommu
  iommu/vt-d: Use bus_set_iommu instead of register_iommu
  iommu/amd: Use bus_set_iommu instead of register_iommu
  iommu/core: Use bus->iommu_ops in the iommu-api
  iommu/core: Convert iommu_found to iommu_present
  iommu/core: Add bus_type parameter to iommu_domain_alloc
  Driver core: Add iommu_ops to bus_type
  iommu/core: Define iommu_ops and register_iommu only with CONFIG_IOMMU_API
  iommu/amd: Fix wrong shift direction
  iommu/omap: always provide iommu debug code
  iommu/core: let drivers know if an iommu fault handler isn't installed
  iommu/core: export iommu_set_fault_handler()
  iommu/omap: Fix build error with !IOMMU_SUPPORT
  iommu/omap: Migrate to the generic fault report mechanism
  iommu/core: Add fault reporting mechanism
  iommu/core: Use PAGE_SIZE instead of hard-coded value
  iommu/core: use the existing IS_ALIGNED macro
  iommu/msm: ->unmap() should return order of unmapped page
  ...

Fixup trivial conflicts in drivers/iommu/Makefile: "move omap iommu to
dedicated iommu folder" vs "Rename the DMAR and INTR_REMAP config
options" just happened to touch lines next to each other.

1  2 
arch/arm/plat-omap/Kconfig
arch/x86/kvm/x86.c
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/intel-iommu.c
include/linux/device.h
virt/kvm/iommu.c

@@@ -14,8 -14,6 +14,8 @@@ config ARCH_OMAP
        select CLKDEV_LOOKUP
        select CLKSRC_MMIO
        select GENERIC_IRQ_CHIP
 +      select HAVE_IDE
 +      select NEED_MACH_MEMORY_H
        help
          "Systems based on omap7xx, omap15xx or omap16xx"
  
@@@ -134,18 -132,6 +134,6 @@@ config OMAP_MBOX_KFIFO_SIZ
          This can also be changed at runtime (via the mbox_kfifo_size
          module parameter).
  
- config OMAP_IOMMU
-       tristate
- config OMAP_IOMMU_DEBUG
-        tristate "Export OMAP IOMMU internals in DebugFS"
-        depends on OMAP_IOMMU && DEBUG_FS
-        help
-          Select this to see extensive information about
-          the internal state of OMAP IOMMU in debugfs.
-          Say N unless you know you need this.
  config OMAP_IOMMU_IVA2
        bool
  
diff --combined arch/x86/kvm/x86.c
@@@ -44,6 -44,7 +44,7 @@@
  #include <linux/perf_event.h>
  #include <linux/uaccess.h>
  #include <linux/hash.h>
+ #include <linux/pci.h>
  #include <trace/events/kvm.h>
  
  #define CREATE_TRACE_POINTS
@@@ -83,7 -84,6 +84,7 @@@ static u64 __read_mostly efer_reserved_
  static void update_cr8_intercept(struct kvm_vcpu *vcpu);
  static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                    struct kvm_cpuid_entry2 __user *entries);
 +static void process_nmi(struct kvm_vcpu *vcpu);
  
  struct kvm_x86_ops *kvm_x86_ops;
  EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@@ -360,8 -360,8 +361,8 @@@ void kvm_propagate_fault(struct kvm_vcp
  
  void kvm_inject_nmi(struct kvm_vcpu *vcpu)
  {
 -      kvm_make_request(KVM_REQ_EVENT, vcpu);
 -      vcpu->arch.nmi_pending = 1;
 +      atomic_inc(&vcpu->arch.nmi_queued);
 +      kvm_make_request(KVM_REQ_NMI, vcpu);
  }
  EXPORT_SYMBOL_GPL(kvm_inject_nmi);
  
@@@ -600,8 -600,6 +601,8 @@@ static bool guest_cpuid_has_fsgsbase(st
  static void update_cpuid(struct kvm_vcpu *vcpu)
  {
        struct kvm_cpuid_entry2 *best;
 +      struct kvm_lapic *apic = vcpu->arch.apic;
 +      u32 timer_mode_mask;
  
        best = kvm_find_cpuid_entry(vcpu, 1, 0);
        if (!best)
                if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
                        best->ecx |= bit(X86_FEATURE_OSXSAVE);
        }
 +
 +      if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 +              best->function == 0x1) {
 +              best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
 +              timer_mode_mask = 3 << 17;
 +      } else
 +              timer_mode_mask = 1 << 17;
 +
 +      if (apic)
 +              apic->lapic_timer.timer_mode_mask = timer_mode_mask;
  }
  
  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@@ -838,7 -826,6 +839,7 @@@ static u32 msrs_to_save[] = 
  static unsigned num_msrs_to_save;
  
  static u32 emulated_msrs[] = {
 +      MSR_IA32_TSCDEADLINE,
        MSR_IA32_MISC_ENABLE,
        MSR_IA32_MCG_STATUS,
        MSR_IA32_MCG_CTL,
@@@ -1014,7 -1001,7 +1015,7 @@@ static inline int kvm_tsc_changes_freq(
        return ret;
  }
  
 -static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
 +u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
  {
        if (vcpu->arch.virtual_tsc_khz)
                return vcpu->arch.virtual_tsc_khz;
@@@ -1112,7 -1099,7 +1113,7 @@@ static int kvm_guest_time_update(struc
  
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
 -      kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
 +      tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
        kernel_ns = get_kernel_ns();
        this_tsc_khz = vcpu_tsc_khz(v);
        if (unlikely(this_tsc_khz == 0)) {
@@@ -1578,9 -1565,6 +1579,9 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                break;
        case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
                return kvm_x2apic_msr_write(vcpu, msr, data);
 +      case MSR_IA32_TSCDEADLINE:
 +              kvm_set_lapic_tscdeadline_msr(vcpu, data);
 +              break;
        case MSR_IA32_MISC_ENABLE:
                vcpu->arch.ia32_misc_enable_msr = data;
                break;
@@@ -1842,9 -1826,6 +1843,9 @@@ static int get_msr_hyperv(struct kvm_vc
                return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
        case HV_X64_MSR_TPR:
                return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
 +      case HV_X64_MSR_APIC_ASSIST_PAGE:
 +              data = vcpu->arch.hv_vapic;
 +              break;
        default:
                pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
                return 1;
@@@ -1859,6 -1840,7 +1860,6 @@@ int kvm_get_msr_common(struct kvm_vcpu 
  
        switch (msr) {
        case MSR_IA32_PLATFORM_ID:
 -      case MSR_IA32_UCODE_REV:
        case MSR_IA32_EBL_CR_POWERON:
        case MSR_IA32_DEBUGCTLMSR:
        case MSR_IA32_LASTBRANCHFROMIP:
        case MSR_FAM10H_MMIO_CONF_BASE:
                data = 0;
                break;
 +      case MSR_IA32_UCODE_REV:
 +              data = 0x100000000ULL;
 +              break;
        case MSR_MTRRcap:
                data = 0x500 | KVM_NR_VAR_MTRR;
                break;
        case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
                return kvm_x2apic_msr_read(vcpu, msr, pdata);
                break;
 +      case MSR_IA32_TSCDEADLINE:
 +              data = kvm_get_lapic_tscdeadline_msr(vcpu);
 +              break;
        case MSR_IA32_MISC_ENABLE:
                data = vcpu->arch.ia32_misc_enable_msr;
                break;
@@@ -2111,9 -2087,6 +2112,9 @@@ int kvm_dev_ioctl_check_extension(long 
                r = !kvm_x86_ops->cpu_has_accelerated_tpr();
                break;
        case KVM_CAP_NR_VCPUS:
 +              r = KVM_SOFT_MAX_VCPUS;
 +              break;
 +      case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
                break;
        case KVM_CAP_NR_MEMSLOTS:
                r = 0;
                break;
        case KVM_CAP_IOMMU:
-               r = iommu_found();
+               r = iommu_present(&pci_bus_type);
                break;
        case KVM_CAP_MCE:
                r = KVM_MAX_MCE_BANKS;
@@@ -2238,7 -2211,7 +2239,7 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
                s64 tsc_delta;
                u64 tsc;
  
 -              kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
 +              tsc = kvm_x86_ops->read_l1_tsc(vcpu);
                tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
                             tsc - vcpu->arch.last_guest_tsc;
  
@@@ -2262,7 -2235,7 +2263,7 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
  {
        kvm_x86_ops->vcpu_put(vcpu);
        kvm_put_guest_fpu(vcpu);
 -      kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
 +      vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
  }
  
  static int is_efer_nx(void)
@@@ -2847,7 -2820,6 +2848,7 @@@ static int kvm_vcpu_ioctl_x86_set_mce(s
  static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
                                               struct kvm_vcpu_events *events)
  {
 +      process_nmi(vcpu);
        events->exception.injected =
                vcpu->arch.exception.pending &&
                !kvm_exception_is_soft(vcpu->arch.exception.nr);
                        KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
  
        events->nmi.injected = vcpu->arch.nmi_injected;
 -      events->nmi.pending = vcpu->arch.nmi_pending;
 +      events->nmi.pending = vcpu->arch.nmi_pending != 0;
        events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
        events->nmi.pad = 0;
  
@@@ -2885,7 -2857,6 +2886,7 @@@ static int kvm_vcpu_ioctl_x86_set_vcpu_
                              | KVM_VCPUEVENT_VALID_SHADOW))
                return -EINVAL;
  
 +      process_nmi(vcpu);
        vcpu->arch.exception.pending = events->exception.injected;
        vcpu->arch.exception.nr = events->exception.nr;
        vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@@ -3586,11 -3557,7 +3587,11 @@@ long kvm_arch_vm_ioctl(struct file *fil
                        if (r) {
                                mutex_lock(&kvm->slots_lock);
                                kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 -                                                        &vpic->dev);
 +                                                        &vpic->dev_master);
 +                              kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 +                                                        &vpic->dev_slave);
 +                              kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
 +                                                        &vpic->dev_eclr);
                                mutex_unlock(&kvm->slots_lock);
                                kfree(vpic);
                                goto create_irqchip_unlock;
@@@ -4079,105 -4046,84 +4080,105 @@@ static int vcpu_mmio_gva_to_gpa(struct 
        return 0;
  }
  
 -static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
 -                                unsigned long addr,
 -                                void *val,
 -                                unsigned int bytes,
 -                                struct x86_exception *exception)
 +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                      const void *val, int bytes)
  {
 -      struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 -      gpa_t gpa;
 -      int handled, ret;
 +      int ret;
  
 +      ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 +      if (ret < 0)
 +              return 0;
 +      kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
 +      return 1;
 +}
 +
 +struct read_write_emulator_ops {
 +      int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
 +                                int bytes);
 +      int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                                void *val, int bytes);
 +      int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                             int bytes, void *val);
 +      int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                                  void *val, int bytes);
 +      bool write;
 +};
 +
 +static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 +{
        if (vcpu->mmio_read_completed) {
                memcpy(val, vcpu->mmio_data, bytes);
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
                               vcpu->mmio_phys_addr, *(u64 *)val);
                vcpu->mmio_read_completed = 0;
 -              return X86EMUL_CONTINUE;
 +              return 1;
        }
  
 -      ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false);
 -
 -      if (ret < 0)
 -              return X86EMUL_PROPAGATE_FAULT;
 -
 -      if (ret)
 -              goto mmio;
 -
 -      if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
 -          == X86EMUL_CONTINUE)
 -              return X86EMUL_CONTINUE;
 +      return 0;
 +}
  
 -mmio:
 -      /*
 -       * Is this MMIO handled locally?
 -       */
 -      handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
 +static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                      void *val, int bytes)
 +{
 +      return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
 +}
  
 -      if (handled == bytes)
 -              return X86EMUL_CONTINUE;
 +static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                       void *val, int bytes)
 +{
 +      return emulator_write_phys(vcpu, gpa, val, bytes);
 +}
  
 -      gpa += handled;
 -      bytes -= handled;
 -      val += handled;
 +static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
 +{
 +      trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
 +      return vcpu_mmio_write(vcpu, gpa, bytes, val);
 +}
  
 +static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                        void *val, int bytes)
 +{
        trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 -
 -      vcpu->mmio_needed = 1;
 -      vcpu->run->exit_reason = KVM_EXIT_MMIO;
 -      vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
 -      vcpu->mmio_size = bytes;
 -      vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 -      vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
 -      vcpu->mmio_index = 0;
 -
        return X86EMUL_IO_NEEDED;
  }
  
 -int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 -                      const void *val, int bytes)
 +static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 +                         void *val, int bytes)
  {
 -      int ret;
 -
 -      ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 -      if (ret < 0)
 -              return 0;
 -      kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
 -      return 1;
 +      memcpy(vcpu->mmio_data, val, bytes);
 +      memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
 +      return X86EMUL_CONTINUE;
  }
  
 -static int emulator_write_emulated_onepage(unsigned long addr,
 -                                         const void *val,
 -                                         unsigned int bytes,
 -                                         struct x86_exception *exception,
 -                                         struct kvm_vcpu *vcpu)
 +static struct read_write_emulator_ops read_emultor = {
 +      .read_write_prepare = read_prepare,
 +      .read_write_emulate = read_emulate,
 +      .read_write_mmio = vcpu_mmio_read,
 +      .read_write_exit_mmio = read_exit_mmio,
 +};
 +
 +static struct read_write_emulator_ops write_emultor = {
 +      .read_write_emulate = write_emulate,
 +      .read_write_mmio = write_mmio,
 +      .read_write_exit_mmio = write_exit_mmio,
 +      .write = true,
 +};
 +
 +static int emulator_read_write_onepage(unsigned long addr, void *val,
 +                                     unsigned int bytes,
 +                                     struct x86_exception *exception,
 +                                     struct kvm_vcpu *vcpu,
 +                                     struct read_write_emulator_ops *ops)
  {
        gpa_t gpa;
        int handled, ret;
 +      bool write = ops->write;
  
 -      ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true);
 +      if (ops->read_write_prepare &&
 +                ops->read_write_prepare(vcpu, val, bytes))
 +              return X86EMUL_CONTINUE;
 +
 +      ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
  
        if (ret < 0)
                return X86EMUL_PROPAGATE_FAULT;
        if (ret)
                goto mmio;
  
 -      if (emulator_write_phys(vcpu, gpa, val, bytes))
 +      if (ops->read_write_emulate(vcpu, gpa, val, bytes))
                return X86EMUL_CONTINUE;
  
  mmio:
 -      trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
        /*
         * Is this MMIO handled locally?
         */
 -      handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
 +      handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
        if (handled == bytes)
                return X86EMUL_CONTINUE;
  
        val += handled;
  
        vcpu->mmio_needed = 1;
 -      memcpy(vcpu->mmio_data, val, bytes);
        vcpu->run->exit_reason = KVM_EXIT_MMIO;
        vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
        vcpu->mmio_size = bytes;
        vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 -      vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
 -      memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
 +      vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
        vcpu->mmio_index = 0;
  
 -      return X86EMUL_CONTINUE;
 +      return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
  }
  
 -int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 -                          unsigned long addr,
 -                          const void *val,
 -                          unsigned int bytes,
 -                          struct x86_exception *exception)
 +int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 +                      void *val, unsigned int bytes,
 +                      struct x86_exception *exception,
 +                      struct read_write_emulator_ops *ops)
  {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
  
                int rc, now;
  
                now = -addr & ~PAGE_MASK;
 -              rc = emulator_write_emulated_onepage(addr, val, now, exception,
 -                                                   vcpu);
 +              rc = emulator_read_write_onepage(addr, val, now, exception,
 +                                               vcpu, ops);
 +
                if (rc != X86EMUL_CONTINUE)
                        return rc;
                addr += now;
                val += now;
                bytes -= now;
        }
 -      return emulator_write_emulated_onepage(addr, val, bytes, exception,
 -                                             vcpu);
 +
 +      return emulator_read_write_onepage(addr, val, bytes, exception,
 +                                         vcpu, ops);
 +}
 +
 +static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
 +                                unsigned long addr,
 +                                void *val,
 +                                unsigned int bytes,
 +                                struct x86_exception *exception)
 +{
 +      return emulator_read_write(ctxt, addr, val, bytes,
 +                                 exception, &read_emultor);
 +}
 +
 +int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 +                          unsigned long addr,
 +                          const void *val,
 +                          unsigned int bytes,
 +                          struct x86_exception *exception)
 +{
 +      return emulator_read_write(ctxt, addr, (void *)val, bytes,
 +                                 exception, &write_emultor);
  }
  
  #define CMPXCHG_TYPE(t, ptr, old, new) \
@@@ -4785,7 -4713,7 +4786,7 @@@ int kvm_inject_realmode_interrupt(struc
        kvm_set_rflags(vcpu, ctxt->eflags);
  
        if (irq == NMI_VECTOR)
 -              vcpu->arch.nmi_pending = false;
 +              vcpu->arch.nmi_pending = 0;
        else
                vcpu->arch.interrupt.pending = false;
  
@@@ -4861,7 -4789,7 +4862,7 @@@ int x86_emulate_instruction(struct kvm_
  
                trace_kvm_emulate_insn_start(vcpu);
                ++vcpu->stat.insn_emulation;
 -              if (r)  {
 +              if (r != EMULATION_OK)  {
                        if (emulation_type & EMULTYPE_TRAP_UD)
                                return EMULATE_FAIL;
                        if (reexecute_instruction(vcpu, cr2))
@@@ -5594,7 -5522,7 +5595,7 @@@ static void inject_pending_event(struc
        /* try to inject new event if pending */
        if (vcpu->arch.nmi_pending) {
                if (kvm_x86_ops->nmi_allowed(vcpu)) {
 -                      vcpu->arch.nmi_pending = false;
 +                      --vcpu->arch.nmi_pending;
                        vcpu->arch.nmi_injected = true;
                        kvm_x86_ops->set_nmi(vcpu);
                }
@@@ -5626,26 -5554,10 +5627,26 @@@ static void kvm_put_guest_xcr0(struct k
        }
  }
  
 +static void process_nmi(struct kvm_vcpu *vcpu)
 +{
 +      unsigned limit = 2;
 +
 +      /*
 +       * x86 is limited to one NMI running, and one NMI pending after it.
 +       * If an NMI is already in progress, limit further NMIs to just one.
 +       * Otherwise, allow two (and we'll inject the first one immediately).
 +       */
 +      if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
 +              limit = 1;
 +
 +      vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
 +      vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
 +      kvm_make_request(KVM_REQ_EVENT, vcpu);
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
        int r;
 -      bool nmi_pending;
        bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
                vcpu->run->request_interrupt_window;
  
                }
                if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
                        record_steal_time(vcpu);
 +              if (kvm_check_request(KVM_REQ_NMI, vcpu))
 +                      process_nmi(vcpu);
  
        }
  
        if (unlikely(r))
                goto out;
  
 -      /*
 -       * An NMI can be injected between local nmi_pending read and
 -       * vcpu->arch.nmi_pending read inside inject_pending_event().
 -       * But in that case, KVM_REQ_EVENT will be set, which makes
 -       * the race described above benign.
 -       */
 -      nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
 -
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
                inject_pending_event(vcpu);
  
                /* enable NMI/IRQ window open exits if needed */
 -              if (nmi_pending)
 +              if (vcpu->arch.nmi_pending)
                        kvm_x86_ops->enable_nmi_window(vcpu);
                else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
                        kvm_x86_ops->enable_irq_window(vcpu);
        if (hw_breakpoint_active())
                hw_breakpoint_restore();
  
 -      kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
 +      vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
  
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
@@@ -6406,8 -6324,7 +6407,8 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
  
  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
  {
 -      vcpu->arch.nmi_pending = false;
 +      atomic_set(&vcpu->arch.nmi_queued, 0);
 +      vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
  
        vcpu->arch.switch_db_regs = 0;
@@@ -6682,7 -6599,7 +6683,7 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
                !vcpu->arch.apf.halted)
                || !list_empty_careful(&vcpu->async_pf.done)
                || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
 -              || vcpu->arch.nmi_pending ||
 +              || atomic_read(&vcpu->arch.nmi_queued) ||
                (kvm_arch_interrupt_allowed(vcpu) &&
                 kvm_cpu_has_interrupt(vcpu));
  }
diff --combined drivers/iommu/Kconfig
@@@ -59,14 -59,10 +59,14 @@@ config AMD_IOMMU_STAT
          If unsure, say N.
  
  # Intel IOMMU support
 -config DMAR
 -      bool "Support for DMA Remapping Devices"
 +config DMAR_TABLE
 +      bool
 +
 +config INTEL_IOMMU
 +      bool "Support for Intel IOMMU using DMA Remapping Devices"
        depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
        select IOMMU_API
 +      select DMAR_TABLE
        help
          DMA remapping (DMAR) devices support enables independent address
          translations for Direct Memory Access (DMA) from devices.
          and include PCI device scope covered by these DMA
          remapping devices.
  
 -config DMAR_DEFAULT_ON
 +config INTEL_IOMMU_DEFAULT_ON
        def_bool y
 -      prompt "Enable DMA Remapping Devices by default"
 -      depends on DMAR
 +      prompt "Enable Intel DMA Remapping Devices by default"
 +      depends on INTEL_IOMMU
        help
          Selecting this option will enable a DMAR device at boot time if
          one is found. If this option is not selected, DMAR support can
          be enabled by passing intel_iommu=on to the kernel.
  
 -config DMAR_BROKEN_GFX_WA
 +config INTEL_IOMMU_BROKEN_GFX_WA
        bool "Workaround broken graphics drivers (going away soon)"
 -      depends on DMAR && BROKEN && X86
 +      depends on INTEL_IOMMU && BROKEN && X86
        ---help---
          Current Graphics drivers tend to use physical address
          for DMA and avoid using DMA APIs. Setting this config
          to use physical addresses for DMA, at least until this
          option is removed in the 2.6.32 kernel.
  
 -config DMAR_FLOPPY_WA
 +config INTEL_IOMMU_FLOPPY_WA
        def_bool y
 -      depends on DMAR && X86
 +      depends on INTEL_IOMMU && X86
        ---help---
          Floppy disk drivers are known to bypass DMA API calls
          thereby failing to work when IOMMU is enabled. This
          workaround will setup a 1:1 mapping for the first
          16MiB to make floppy (an ISA device) work.
  
 -config INTR_REMAP
 +config IRQ_REMAP
        bool "Support for Interrupt Remapping (EXPERIMENTAL)"
        depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
 +      select DMAR_TABLE
        ---help---
          Supports Interrupt remapping for IO-APIC and MSI devices.
          To use x2apic mode in the CPU's which support x2APIC enhancements or
          to support platforms with CPU's having > 8 bit APIC ID, say Y.
  
+ # OMAP IOMMU support
+ config OMAP_IOMMU
+       bool "OMAP IOMMU Support"
+       depends on ARCH_OMAP
+       select IOMMU_API
+ config OMAP_IOVMM
+       tristate "OMAP IO Virtual Memory Manager Support"
+       depends on OMAP_IOMMU
+ config OMAP_IOMMU_DEBUG
+        tristate "Export OMAP IOMMU/IOVMM internals in DebugFS"
+        depends on OMAP_IOVMM && DEBUG_FS
+        help
+          Select this to see extensive information about
+          the internal state of OMAP IOMMU/IOVMM in debugfs.
+          Say N unless you know you need this.
  endif # IOMMU_SUPPORT
diff --combined drivers/iommu/Makefile
@@@ -1,6 -1,8 +1,9 @@@
  obj-$(CONFIG_IOMMU_API) += iommu.o
  obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
  obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 -obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 -obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
 +obj-$(CONFIG_DMAR_TABLE) += dmar.o
 +obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
 +obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
+ obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
+ obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
+ obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
@@@ -306,11 -306,6 +306,11 @@@ static inline bool dma_pte_present(stru
        return (pte->val & 3) != 0;
  }
  
 +static inline bool dma_pte_superpage(struct dma_pte *pte)
 +{
 +      return (pte->val & (1 << 7));
 +}
 +
  static inline int first_pte_in_page(struct dma_pte *pte)
  {
        return !((unsigned long)pte & ~VTD_PAGE_MASK);
@@@ -398,20 -393,17 +398,20 @@@ static long list_size
  
  static void domain_remove_dev_info(struct dmar_domain *domain);
  
 -#ifdef CONFIG_DMAR_DEFAULT_ON
 +#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
  int dmar_disabled = 0;
  #else
  int dmar_disabled = 1;
 -#endif /*CONFIG_DMAR_DEFAULT_ON*/
 +#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
  
  static int dmar_map_gfx = 1;
  static int dmar_forcedac;
  static int intel_iommu_strict;
  static int intel_iommu_superpage = 1;
  
 +int intel_iommu_gfx_mapped;
 +EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 +
  #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
  static DEFINE_SPINLOCK(device_domain_lock);
  static LIST_HEAD(device_domain_list);
@@@ -585,18 -577,17 +585,18 @@@ static void domain_update_iommu_snoopin
  
  static void domain_update_iommu_superpage(struct dmar_domain *domain)
  {
 -      int i, mask = 0xf;
 +      struct dmar_drhd_unit *drhd;
 +      struct intel_iommu *iommu = NULL;
 +      int mask = 0xf;
  
        if (!intel_iommu_superpage) {
                domain->iommu_superpage = 0;
                return;
        }
  
 -      domain->iommu_superpage = 4; /* 1TiB */
 -
 -      for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
 -              mask |= cap_super_page_val(g_iommus[i]->cap);
 +      /* set iommu_superpage to the smallest common denominator */
 +      for_each_active_iommu(iommu, drhd) {
 +              mask &= cap_super_page_val(iommu->cap);
                if (!mask) {
                        break;
                }
@@@ -739,23 -730,29 +739,23 @@@ out
  }
  
  static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 -                                    unsigned long pfn, int large_level)
 +                                    unsigned long pfn, int target_level)
  {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        struct dma_pte *parent, *pte = NULL;
        int level = agaw_to_level(domain->agaw);
 -      int offset, target_level;
 +      int offset;
  
        BUG_ON(!domain->pgd);
        BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
        parent = domain->pgd;
  
 -      /* Search pte */
 -      if (!large_level)
 -              target_level = 1;
 -      else
 -              target_level = large_level;
 -
        while (level > 0) {
                void *tmp_page;
  
                offset = pfn_level_offset(pfn, level);
                pte = &parent[offset];
 -              if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
 +              if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
                        break;
                if (level == target_level)
                        break;
@@@ -819,14 -816,13 +819,14 @@@ static struct dma_pte *dma_pfn_level_pt
  }
  
  /* clear last level pte, a tlb flush should be followed */
 -static void dma_pte_clear_range(struct dmar_domain *domain,
 +static int dma_pte_clear_range(struct dmar_domain *domain,
                                unsigned long start_pfn,
                                unsigned long last_pfn)
  {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        unsigned int large_page = 1;
        struct dma_pte *first_pte, *pte;
 +      int order;
  
        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
                                   (void *)pte - (void *)first_pte);
  
        } while (start_pfn && start_pfn <= last_pfn);
 +
 +      order = (large_page - 1) * 9;
 +      return order;
  }
  
  /* free page table pages. last level pte should already be cleared */
@@@ -939,7 -932,7 +939,7 @@@ static void iommu_set_root_entry(struc
  
        addr = iommu->root_entry;
  
 -      spin_lock_irqsave(&iommu->register_lock, flag);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flag);
        dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
  
        writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
                      readl, (sts & DMA_GSTS_RTPS), sts);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flag);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
  }
  
  static void iommu_flush_write_buffer(struct intel_iommu *iommu)
        if (!rwbf_quirk && !cap_rwbf(iommu->cap))
                return;
  
 -      spin_lock_irqsave(&iommu->register_lock, flag);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flag);
        writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
  
        /* Make sure hardware complete it */
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
                      readl, (!(val & DMA_GSTS_WBFS)), val);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flag);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
  }
  
  /* return value determine if we need a write buffer flush */
@@@ -993,14 -986,14 +993,14 @@@ static void __iommu_flush_context(struc
        }
        val |= DMA_CCMD_ICC;
  
 -      spin_lock_irqsave(&iommu->register_lock, flag);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flag);
        dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
  
        /* Make sure hardware complete it */
        IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
                dmar_readq, (!(val & DMA_CCMD_ICC)), val);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flag);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
  }
  
  /* return value determine if we need a write buffer flush */
@@@ -1039,7 -1032,7 +1039,7 @@@ static void __iommu_flush_iotlb(struct 
        if (cap_write_drain(iommu->cap))
                val |= DMA_TLB_WRITE_DRAIN;
  
 -      spin_lock_irqsave(&iommu->register_lock, flag);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flag);
        /* Note: Only uses first TLB reg currently */
        if (val_iva)
                dmar_writeq(iommu->reg + tlb_offset, val_iva);
        IOMMU_WAIT_OP(iommu, tlb_offset + 8,
                dmar_readq, (!(val & DMA_TLB_IVT)), val);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flag);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
  
        /* check IOTLB invalidation granularity */
        if (DMA_TLB_IAIG(val) == 0)
@@@ -1165,7 -1158,7 +1165,7 @@@ static void iommu_disable_protect_mem_r
        u32 pmen;
        unsigned long flags;
  
 -      spin_lock_irqsave(&iommu->register_lock, flags);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flags);
        pmen = readl(iommu->reg + DMAR_PMEN_REG);
        pmen &= ~DMA_PMEN_EPM;
        writel(pmen, iommu->reg + DMAR_PMEN_REG);
        IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
                readl, !(pmen & DMA_PMEN_PRS), pmen);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flags);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
  }
  
  static int iommu_enable_translation(struct intel_iommu *iommu)
        u32 sts;
        unsigned long flags;
  
 -      spin_lock_irqsave(&iommu->register_lock, flags);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flags);
        iommu->gcmd |= DMA_GCMD_TE;
        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
  
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
                      readl, (sts & DMA_GSTS_TES), sts);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flags);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
        return 0;
  }
  
@@@ -1199,7 -1192,7 +1199,7 @@@ static int iommu_disable_translation(st
        u32 sts;
        unsigned long flag;
  
 -      spin_lock_irqsave(&iommu->register_lock, flag);
 +      raw_spin_lock_irqsave(&iommu->register_lock, flag);
        iommu->gcmd &= ~DMA_GCMD_TE;
        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
  
        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
                      readl, (!(sts & DMA_GSTS_TES)), sts);
  
 -      spin_unlock_irqrestore(&iommu->register_lock, flag);
 +      raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
        return 0;
  }
  
@@@ -2157,7 -2150,7 +2157,7 @@@ static inline int iommu_prepare_rmrr_de
                rmrr->end_address);
  }
  
 -#ifdef CONFIG_DMAR_FLOPPY_WA
 +#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
  static inline void iommu_prepare_isa(void)
  {
        struct pci_dev *pdev;
@@@ -2180,7 -2173,7 +2180,7 @@@ static inline void iommu_prepare_isa(vo
  {
        return;
  }
 -#endif /* !CONFIG_DMAR_FLPY_WA */
 +#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
  
  static int md_domain_init(struct dmar_domain *domain, int guest_width);
  
@@@ -2491,7 -2484,7 +2491,7 @@@ static int __init init_dmars(void
        if (iommu_pass_through)
                iommu_identity_mapping |= IDENTMAP_ALL;
  
 -#ifdef CONFIG_DMAR_BROKEN_GFX_WA
 +#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
        iommu_identity_mapping |= IDENTMAP_GFX;
  #endif
  
@@@ -3233,6 -3226,9 +3233,6 @@@ static void __init init_no_remapping_de
                }
        }
  
 -      if (dmar_map_gfx)
 -              return;
 -
        for_each_drhd_unit(drhd) {
                int i;
                if (drhd->ignored || drhd->include_all)
  
                for (i = 0; i < drhd->devices_cnt; i++)
                        if (drhd->devices[i] &&
 -                              !IS_GFX_DEVICE(drhd->devices[i]))
 +                          !IS_GFX_DEVICE(drhd->devices[i]))
                                break;
  
                if (i < drhd->devices_cnt)
                        continue;
  
 -              /* bypass IOMMU if it is just for gfx devices */
 -              drhd->ignored = 1;
 -              for (i = 0; i < drhd->devices_cnt; i++) {
 -                      if (!drhd->devices[i])
 -                              continue;
 -                      drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 +              /* This IOMMU has *only* gfx devices. Either bypass it or
 +                 set the gfx_mapped flag, as appropriate */
 +              if (dmar_map_gfx) {
 +                      intel_iommu_gfx_mapped = 1;
 +              } else {
 +                      drhd->ignored = 1;
 +                      for (i = 0; i < drhd->devices_cnt; i++) {
 +                              if (!drhd->devices[i])
 +                                      continue;
 +                              drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 +                      }
                }
        }
  }
@@@ -3329,7 -3320,7 +3329,7 @@@ static int iommu_suspend(void
        for_each_active_iommu(iommu, drhd) {
                iommu_disable_translation(iommu);
  
 -              spin_lock_irqsave(&iommu->register_lock, flag);
 +              raw_spin_lock_irqsave(&iommu->register_lock, flag);
  
                iommu->iommu_state[SR_DMAR_FECTL_REG] =
                        readl(iommu->reg + DMAR_FECTL_REG);
                iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
                        readl(iommu->reg + DMAR_FEUADDR_REG);
  
 -              spin_unlock_irqrestore(&iommu->register_lock, flag);
 +              raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
        }
        return 0;
  
@@@ -3367,7 -3358,7 +3367,7 @@@ static void iommu_resume(void
  
        for_each_active_iommu(iommu, drhd) {
  
 -              spin_lock_irqsave(&iommu->register_lock, flag);
 +              raw_spin_lock_irqsave(&iommu->register_lock, flag);
  
                writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
                        iommu->reg + DMAR_FECTL_REG);
                writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
                        iommu->reg + DMAR_FEUADDR_REG);
  
 -              spin_unlock_irqrestore(&iommu->register_lock, flag);
 +              raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
        }
  
        for_each_active_iommu(iommu, drhd)
@@@ -3399,151 -3390,6 +3399,151 @@@ static void __init init_iommu_pm_ops(vo
  static inline void init_iommu_pm_ops(void) {}
  #endif        /* CONFIG_PM */
  
 +LIST_HEAD(dmar_rmrr_units);
 +
 +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
 +{
 +      list_add(&rmrr->list, &dmar_rmrr_units);
 +}
 +
 +
 +int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 +{
 +      struct acpi_dmar_reserved_memory *rmrr;
 +      struct dmar_rmrr_unit *rmrru;
 +
 +      rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
 +      if (!rmrru)
 +              return -ENOMEM;
 +
 +      rmrru->hdr = header;
 +      rmrr = (struct acpi_dmar_reserved_memory *)header;
 +      rmrru->base_address = rmrr->base_address;
 +      rmrru->end_address = rmrr->end_address;
 +
 +      dmar_register_rmrr_unit(rmrru);
 +      return 0;
 +}
 +
 +static int __init
 +rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
 +{
 +      struct acpi_dmar_reserved_memory *rmrr;
 +      int ret;
 +
 +      rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
 +      ret = dmar_parse_dev_scope((void *)(rmrr + 1),
 +              ((void *)rmrr) + rmrr->header.length,
 +              &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 +
 +      if (ret || (rmrru->devices_cnt == 0)) {
 +              list_del(&rmrru->list);
 +              kfree(rmrru);
 +      }
 +      return ret;
 +}
 +
 +static LIST_HEAD(dmar_atsr_units);
 +
 +int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 +{
 +      struct acpi_dmar_atsr *atsr;
 +      struct dmar_atsr_unit *atsru;
 +
 +      atsr = container_of(hdr, struct acpi_dmar_atsr, header);
 +      atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
 +      if (!atsru)
 +              return -ENOMEM;
 +
 +      atsru->hdr = hdr;
 +      atsru->include_all = atsr->flags & 0x1;
 +
 +      list_add(&atsru->list, &dmar_atsr_units);
 +
 +      return 0;
 +}
 +
 +static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
 +{
 +      int rc;
 +      struct acpi_dmar_atsr *atsr;
 +
 +      if (atsru->include_all)
 +              return 0;
 +
 +      atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
 +      rc = dmar_parse_dev_scope((void *)(atsr + 1),
 +                              (void *)atsr + atsr->header.length,
 +                              &atsru->devices_cnt, &atsru->devices,
 +                              atsr->segment);
 +      if (rc || !atsru->devices_cnt) {
 +              list_del(&atsru->list);
 +              kfree(atsru);
 +      }
 +
 +      return rc;
 +}
 +
 +int dmar_find_matched_atsr_unit(struct pci_dev *dev)
 +{
 +      int i;
 +      struct pci_bus *bus;
 +      struct acpi_dmar_atsr *atsr;
 +      struct dmar_atsr_unit *atsru;
 +
 +      dev = pci_physfn(dev);
 +
 +      list_for_each_entry(atsru, &dmar_atsr_units, list) {
 +              atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
 +              if (atsr->segment == pci_domain_nr(dev->bus))
 +                      goto found;
 +      }
 +
 +      return 0;
 +
 +found:
 +      for (bus = dev->bus; bus; bus = bus->parent) {
 +              struct pci_dev *bridge = bus->self;
 +
 +              if (!bridge || !pci_is_pcie(bridge) ||
 +                  bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
 +                      return 0;
 +
 +              if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
 +                      for (i = 0; i < atsru->devices_cnt; i++)
 +                              if (atsru->devices[i] == bridge)
 +                                      return 1;
 +                      break;
 +              }
 +      }
 +
 +      if (atsru->include_all)
 +              return 1;
 +
 +      return 0;
 +}
 +
 +int dmar_parse_rmrr_atsr_dev(void)
 +{
 +      struct dmar_rmrr_unit *rmrr, *rmrr_n;
 +      struct dmar_atsr_unit *atsr, *atsr_n;
 +      int ret = 0;
 +
 +      list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 +              ret = rmrr_parse_dev(rmrr);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
 +              ret = atsr_parse_dev(atsr);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      return ret;
 +}
 +
  /*
   * Here we only respond to action of unbound device from driver.
   *
@@@ -3593,12 -3439,16 +3593,12 @@@ int __init intel_iommu_init(void
                return  -ENODEV;
        }
  
 -      if (dmar_dev_scope_init()) {
 +      if (dmar_dev_scope_init() < 0) {
                if (force_on)
                        panic("tboot: Failed to initialize DMAR device scope\n");
                return  -ENODEV;
        }
  
 -      /*
 -       * Check the need for DMA-remapping initialization now.
 -       * Above initialization will also be used by Interrupt-remapping.
 -       */
        if (no_iommu || dmar_disabled)
                return -ENODEV;
  
                return  -ENODEV;
        }
  
 +      if (list_empty(&dmar_rmrr_units))
 +              printk(KERN_INFO "DMAR: No RMRR found\n");
 +
 +      if (list_empty(&dmar_atsr_units))
 +              printk(KERN_INFO "DMAR: No ATSR found\n");
 +
        if (dmar_init_reserved_ranges()) {
                if (force_on)
                        panic("tboot: Failed to reserve iommu ranges\n");
  
        init_iommu_pm_ops();
  
-       register_iommu(&intel_iommu_ops);
+       bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
  
        bus_register_notifier(&pci_bus_type, &device_nb);
  
@@@ -3724,8 -3568,6 +3724,8 @@@ static void domain_remove_one_dev_info(
                        found = 1;
        }
  
 +      spin_unlock_irqrestore(&device_domain_lock, flags);
 +
        if (found == 0) {
                unsigned long tmp_flags;
                spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
                        spin_unlock_irqrestore(&iommu->lock, tmp_flags);
                }
        }
 -
 -      spin_unlock_irqrestore(&device_domain_lock, flags);
  }
  
  static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
@@@ -3895,7 -3739,6 +3895,7 @@@ static int intel_iommu_domain_init(stru
                vm_domain_exit(dmar_domain);
                return -ENOMEM;
        }
 +      domain_update_iommu_cap(dmar_domain);
        domain->priv = dmar_domain;
  
        return 0;
@@@ -4021,15 -3864,14 +4021,15 @@@ static int intel_iommu_unmap(struct iom
  {
        struct dmar_domain *dmar_domain = domain->priv;
        size_t size = PAGE_SIZE << gfp_order;
 +      int order;
  
 -      dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
 +      order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
                            (iova + size - 1) >> VTD_PAGE_SHIFT);
  
        if (dmar_domain->max_addr == iova + size)
                dmar_domain->max_addr = iova;
  
 -      return gfp_order;
 +      return order;
  }
  
  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@@ -4108,11 -3950,7 +4108,11 @@@ static void __devinit quirk_calpella_no
        if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
                printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
                dmar_map_gfx = 0;
 -      }
 +      } else if (dmar_map_gfx) {
 +              /* we have to ensure the gfx device is idle before we flush */
 +              printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
 +              intel_iommu_strict = 1;
 +       }
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
diff --combined include/linux/device.h
@@@ -33,6 -33,7 +33,7 @@@ struct class
  struct subsys_private;
  struct bus_type;
  struct device_node;
+ struct iommu_ops;
  
  struct bus_attribute {
        struct attribute        attr;
@@@ -67,6 -68,9 +68,9 @@@ extern void bus_remove_file(struct bus_
   * @resume:   Called to bring a device on this bus out of sleep mode.
   * @pm:               Power management operations of this bus, callback the specific
   *            device driver's pm-ops.
+  * @iommu_ops   IOMMU specific operations for this bus, used to attach IOMMU
+  *              driver implementations to a bus and allow the driver to do
+  *              bus-specific setup
   * @p:                The private data of the driver core, only the driver core can
   *            touch this.
   *
@@@ -96,6 -100,8 +100,8 @@@ struct bus_type 
  
        const struct dev_pm_ops *pm;
  
+       struct iommu_ops *iommu_ops;
        struct subsys_private *p;
  };
  
@@@ -350,8 -356,6 +356,8 @@@ struct class_attribute 
                        char *buf);
        ssize_t (*store)(struct class *class, struct class_attribute *attr,
                        const char *buf, size_t count);
 +      const void *(*namespace)(struct class *class,
 +                               const struct class_attribute *attr);
  };
  
  #define CLASS_ATTR(_name, _mode, _show, _store)                       \
@@@ -638,11 -642,6 +644,11 @@@ static inline void set_dev_node(struct 
  }
  #endif
  
 +static inline struct pm_subsys_data *dev_to_psd(struct device *dev)
 +{
 +      return dev ? dev->power.subsys_data : NULL;
 +}
 +
  static inline unsigned int dev_get_uevent_suppress(const struct device *dev)
  {
        return dev->kobj.uevent_suppress;
@@@ -792,8 -791,6 +798,8 @@@ extern const char *dev_driver_string(co
  
  #ifdef CONFIG_PRINTK
  
 +extern int __dev_printk(const char *level, const struct device *dev,
 +                      struct va_format *vaf);
  extern int dev_printk(const char *level, const struct device *dev,
                      const char *fmt, ...)
        __attribute__ ((format (printf, 3, 4)));
@@@ -814,9 -811,6 +820,9 @@@ extern int _dev_info(const struct devic
  
  #else
  
 +static inline int __dev_printk(const char *level, const struct device *dev,
 +                             struct va_format *vaf)
 +       { return 0; }
  static inline int dev_printk(const char *level, const struct device *dev,
                      const char *fmt, ...)
        __attribute__ ((format (printf, 3, 4)));
diff --combined virt/kvm/iommu.c
@@@ -187,8 -187,6 +187,8 @@@ int kvm_assign_device(struct kvm *kvm
                        goto out_unmap;
        }
  
 +      pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 +
        printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
                assigned_dev->host_segnr,
                assigned_dev->host_busnr,
@@@ -217,8 -215,6 +217,8 @@@ int kvm_deassign_device(struct kvm *kvm
  
        iommu_detach_device(domain, &pdev->dev);
  
 +      pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 +
        printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
                assigned_dev->host_segnr,
                assigned_dev->host_busnr,
@@@ -232,12 -228,12 +232,12 @@@ int kvm_iommu_map_guest(struct kvm *kvm
  {
        int r;
  
-       if (!iommu_found()) {
+       if (!iommu_present(&pci_bus_type)) {
                printk(KERN_ERR "%s: iommu not found\n", __func__);
                return -ENODEV;
        }
  
-       kvm->arch.iommu_domain = iommu_domain_alloc();
+       kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
        if (!kvm->arch.iommu_domain)
                return -ENOMEM;