Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 26 Oct 2011 14:11:53 +0000 (16:11 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 26 Oct 2011 14:11:53 +0000 (16:11 +0200)
* 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, ioapic: Consolidate the explicit EOI code
  x86, ioapic: Restore the mask bit correctly in eoi_ioapic_irq()
  x86, kdump, ioapic: Reset remote-IRR in clear_IO_APIC
  iommu: Rename the DMAR and INTR_REMAP config options
  x86, ioapic: Define irq_remap_modify_chip_defaults()
  x86, msi, intr-remap: Use the ioapic set affinity routine
  iommu: Cleanup ifdefs in detect_intel_iommu()
  iommu: No need to set dmar_disabled in check_zero_address()
  iommu: Move IOMMU specific code to intel-iommu.c
  intr_remap: Call dmar_dev_scope_init() explicitly
  x86, x2apic: Enable the bios request for x2apic optout

1  2 
Documentation/kernel-parameters.txt
arch/x86/Kconfig
drivers/iommu/intel-iommu.c

@@@ -49,7 -49,6 +49,7 @@@ parameter is applicable
        EDD     BIOS Enhanced Disk Drive Services (EDD) is enabled
        EFI     EFI Partitioning (GPT) is enabled
        EIDE    EIDE/ATAPI support is enabled.
 +      EVM     Extended Verification Module
        FB      The frame buffer device is enabled.
        FTRACE  Function tracing enabled.
        GCOV    GCOV profiling is enabled.
@@@ -164,7 -163,7 +164,7 @@@ bytes respectively. Such letter suffixe
                        rsdt -- prefer RSDT over (default) XSDT
                        copy_dsdt -- copy DSDT to memory
  
 -                      See also Documentation/power/pm.txt, pci=noacpi
 +                      See also Documentation/power/runtime_pm.txt, pci=noacpi
  
        acpi_rsdp=      [ACPI,EFI,KEXEC]
                        Pass the RSDP address to the kernel, mostly used
        amijoy.map=     [HW,JOY] Amiga joystick support
                        Map of devices attached to JOY0DAT and JOY1DAT
                        Format: <a>,<b>
 -                      See also Documentation/kernel/input/joystick.txt
 +                      See also Documentation/input/joystick.txt
  
        analog.map=     [HW,JOY] Analog joystick and gamepad support
                        Specifies type or capabilities of an analog joystick
        bttv.radio=     Most important insmod options are available as
                        kernel args too.
        bttv.pll=       See Documentation/video4linux/bttv/Insmod-options
 -      bttv.tuner=     and Documentation/video4linux/bttv/CARDLIST
 +      bttv.tuner=
  
        bulk_remove=off [PPC]  This parameter disables the use of the pSeries
                        firmware feature for flushing multiple hpte entries
  
        elevator=       [IOSCHED]
                        Format: {"cfq" | "deadline" | "noop"}
 -                      See Documentation/block/as-iosched.txt and
 +                      See Documentation/block/cfq-iosched.txt and
                        Documentation/block/deadline-iosched.txt for details.
  
        elfcorehdr=     [IA-64,PPC,SH,X86]
                        This option is obsoleted by the "netdev=" option, which
                        has equivalent usage. See its documentation for details.
  
 +      evm=            [EVM]
 +                      Format: { "fix" }
 +                      Permit 'security.evm' to be updated regardless of
 +                      current integrity status.
 +
        failslab=
        fail_page_alloc=
        fail_make_request=[KNL]
                        General fault injection mechanism.
                        Format: <interval>,<probability>,<space>,<times>
 -                      See also /Documentation/fault-injection/.
 +                      See also Documentation/fault-injection/.
  
        floppy=         [HW]
                        See Documentation/blockdev/floppy.txt.
                        has the capability. With this option, super page will
                        not be supported.
        intremap=       [X86-64, Intel-IOMMU]
-                       Format: { on (default) | off | nosid }
                        on      enable Interrupt Remapping (default)
                        off     disable Interrupt Remapping
                        nosid   disable Source ID checking
+                       no_x2apic_optout
+                               BIOS x2APIC opt-out request will be ignored
  
        inttest=        [IA-64]
  
                        in <PAGE_SIZE> units (needed only for swap files).
                        See  Documentation/power/swsusp-and-swap-files.txt
  
 +      resumedelay=    [HIBERNATION] Delay (in seconds) to pause before attempting to
 +                      read the resume files
 +
 +      resumewait      [HIBERNATION] Wait (indefinitely) for resume device to show up.
 +                      Useful for devices that are detected asynchronously
 +                      (e.g. USB and MMC devices).
 +
        hibernate=      [HIBERNATION]
                noresume        Don't check if there's a hibernation image
                                present during boot.
                        Format: <integer>
  
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
 -                      See Documentation/sonypi.txt
 +                      See Documentation/laptops/sonypi.txt
  
        specialix=      [HW,SERIAL] Specialix multi-serial port adapter
                        See Documentation/serial/specialix.txt.
                        functions are at fixed addresses, they make nice
                        targets for exploits that can control RIP.
  
 -                      emulate     [default] Vsyscalls turn into traps and are
 -                                  emulated reasonably safely.
 +                      emulate     Vsyscalls turn into traps and are emulated
 +                                  reasonably safely.
  
 -                      native      Vsyscalls are native syscall instructions.
 +                      native      [default] Vsyscalls are native syscall
 +                                  instructions.
                                    This is a little bit faster than trapping
                                    and makes a few dynamic recompilers work
                                    better than they would in emulation mode.
diff --combined arch/x86/Kconfig
@@@ -130,7 -130,7 +130,7 @@@ config SBU
        bool
  
  config NEED_DMA_MAP_STATE
-        def_bool (X86_64 || DMAR || DMA_API_DEBUG)
+        def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
  
  config NEED_SG_DMA_LENGTH
        def_bool y
@@@ -220,7 -220,7 +220,7 @@@ config ARCH_SUPPORTS_DEBUG_PAGEALLO
  
  config HAVE_INTEL_TXT
        def_bool y
-       depends on EXPERIMENTAL && DMAR && ACPI
+       depends on EXPERIMENTAL && INTEL_IOMMU && ACPI
  
  config X86_32_SMP
        def_bool y
@@@ -279,7 -279,7 +279,7 @@@ config SM
          Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
          Management" code will be disabled if you say Y here.
  
 -        See also <file:Documentation/i386/IO-APIC.txt>,
 +        See also <file:Documentation/x86/i386/IO-APIC.txt>,
          <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
          <http://www.tldp.org/docs.html#howto>.
  
  
  config X86_X2APIC
        bool "Support x2apic"
-       depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP
+       depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
        ---help---
          This enables x2apic support on CPUs that have this feature.
  
@@@ -306,11 -306,6 +306,11 @@@ static inline bool dma_pte_present(stru
        return (pte->val & 3) != 0;
  }
  
 +static inline bool dma_pte_superpage(struct dma_pte *pte)
 +{
 +      return (pte->val & (1 << 7));
 +}
 +
  static inline int first_pte_in_page(struct dma_pte *pte)
  {
        return !((unsigned long)pte & ~VTD_PAGE_MASK);
@@@ -398,20 -393,17 +398,20 @@@ static long list_size
  
  static void domain_remove_dev_info(struct dmar_domain *domain);
  
- #ifdef CONFIG_DMAR_DEFAULT_ON
+ #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
  int dmar_disabled = 0;
  #else
  int dmar_disabled = 1;
- #endif /*CONFIG_DMAR_DEFAULT_ON*/
+ #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
  
  static int dmar_map_gfx = 1;
  static int dmar_forcedac;
  static int intel_iommu_strict;
  static int intel_iommu_superpage = 1;
  
 +int intel_iommu_gfx_mapped;
 +EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 +
  #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
  static DEFINE_SPINLOCK(device_domain_lock);
  static LIST_HEAD(device_domain_list);
@@@ -585,18 -577,17 +585,18 @@@ static void domain_update_iommu_snoopin
  
  static void domain_update_iommu_superpage(struct dmar_domain *domain)
  {
 -      int i, mask = 0xf;
 +      struct dmar_drhd_unit *drhd;
 +      struct intel_iommu *iommu = NULL;
 +      int mask = 0xf;
  
        if (!intel_iommu_superpage) {
                domain->iommu_superpage = 0;
                return;
        }
  
 -      domain->iommu_superpage = 4; /* 1TiB */
 -
 -      for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
 -              mask |= cap_super_page_val(g_iommus[i]->cap);
 +      /* set iommu_superpage to the smallest common denominator */
 +      for_each_active_iommu(iommu, drhd) {
 +              mask &= cap_super_page_val(iommu->cap);
                if (!mask) {
                        break;
                }
@@@ -739,23 -730,29 +739,23 @@@ out
  }
  
  static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 -                                    unsigned long pfn, int large_level)
 +                                    unsigned long pfn, int target_level)
  {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        struct dma_pte *parent, *pte = NULL;
        int level = agaw_to_level(domain->agaw);
 -      int offset, target_level;
 +      int offset;
  
        BUG_ON(!domain->pgd);
        BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
        parent = domain->pgd;
  
 -      /* Search pte */
 -      if (!large_level)
 -              target_level = 1;
 -      else
 -              target_level = large_level;
 -
        while (level > 0) {
                void *tmp_page;
  
                offset = pfn_level_offset(pfn, level);
                pte = &parent[offset];
 -              if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
 +              if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
                        break;
                if (level == target_level)
                        break;
@@@ -819,14 -816,13 +819,14 @@@ static struct dma_pte *dma_pfn_level_pt
  }
  
  /* clear last level pte, a tlb flush should be followed */
 -static void dma_pte_clear_range(struct dmar_domain *domain,
 +static int dma_pte_clear_range(struct dmar_domain *domain,
                                unsigned long start_pfn,
                                unsigned long last_pfn)
  {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        unsigned int large_page = 1;
        struct dma_pte *first_pte, *pte;
 +      int order;
  
        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
                                   (void *)pte - (void *)first_pte);
  
        } while (start_pfn && start_pfn <= last_pfn);
 +
 +      order = (large_page - 1) * 9;
 +      return order;
  }
  
  /* free page table pages. last level pte should already be cleared */
@@@ -2157,7 -2150,7 +2157,7 @@@ static inline int iommu_prepare_rmrr_de
                rmrr->end_address);
  }
  
- #ifdef CONFIG_DMAR_FLOPPY_WA
+ #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
  static inline void iommu_prepare_isa(void)
  {
        struct pci_dev *pdev;
@@@ -2180,7 -2173,7 +2180,7 @@@ static inline void iommu_prepare_isa(vo
  {
        return;
  }
- #endif /* !CONFIG_DMAR_FLPY_WA */
+ #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
  
  static int md_domain_init(struct dmar_domain *domain, int guest_width);
  
@@@ -2491,7 -2484,7 +2491,7 @@@ static int __init init_dmars(void
        if (iommu_pass_through)
                iommu_identity_mapping |= IDENTMAP_ALL;
  
- #ifdef CONFIG_DMAR_BROKEN_GFX_WA
+ #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
        iommu_identity_mapping |= IDENTMAP_GFX;
  #endif
  
@@@ -3233,6 -3226,9 +3233,6 @@@ static void __init init_no_remapping_de
                }
        }
  
 -      if (dmar_map_gfx)
 -              return;
 -
        for_each_drhd_unit(drhd) {
                int i;
                if (drhd->ignored || drhd->include_all)
  
                for (i = 0; i < drhd->devices_cnt; i++)
                        if (drhd->devices[i] &&
 -                              !IS_GFX_DEVICE(drhd->devices[i]))
 +                          !IS_GFX_DEVICE(drhd->devices[i]))
                                break;
  
                if (i < drhd->devices_cnt)
                        continue;
  
 -              /* bypass IOMMU if it is just for gfx devices */
 -              drhd->ignored = 1;
 -              for (i = 0; i < drhd->devices_cnt; i++) {
 -                      if (!drhd->devices[i])
 -                              continue;
 -                      drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 +              /* This IOMMU has *only* gfx devices. Either bypass it or
 +                 set the gfx_mapped flag, as appropriate */
 +              if (dmar_map_gfx) {
 +                      intel_iommu_gfx_mapped = 1;
 +              } else {
 +                      drhd->ignored = 1;
 +                      for (i = 0; i < drhd->devices_cnt; i++) {
 +                              if (!drhd->devices[i])
 +                                      continue;
 +                              drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
 +                      }
                }
        }
  }
@@@ -3399,6 -3390,151 +3399,151 @@@ static void __init init_iommu_pm_ops(vo
  static inline void init_iommu_pm_ops(void) {}
  #endif        /* CONFIG_PM */
  
+ LIST_HEAD(dmar_rmrr_units);
+ static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+ {
+       list_add(&rmrr->list, &dmar_rmrr_units);
+ }
+ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+ {
+       struct acpi_dmar_reserved_memory *rmrr;
+       struct dmar_rmrr_unit *rmrru;
+       rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
+       if (!rmrru)
+               return -ENOMEM;
+       rmrru->hdr = header;
+       rmrr = (struct acpi_dmar_reserved_memory *)header;
+       rmrru->base_address = rmrr->base_address;
+       rmrru->end_address = rmrr->end_address;
+       dmar_register_rmrr_unit(rmrru);
+       return 0;
+ }
+ static int __init
+ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+ {
+       struct acpi_dmar_reserved_memory *rmrr;
+       int ret;
+       rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
+       ret = dmar_parse_dev_scope((void *)(rmrr + 1),
+               ((void *)rmrr) + rmrr->header.length,
+               &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
+       if (ret || (rmrru->devices_cnt == 0)) {
+               list_del(&rmrru->list);
+               kfree(rmrru);
+       }
+       return ret;
+ }
+ static LIST_HEAD(dmar_atsr_units);
+ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+ {
+       struct acpi_dmar_atsr *atsr;
+       struct dmar_atsr_unit *atsru;
+       atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+       atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+       if (!atsru)
+               return -ENOMEM;
+       atsru->hdr = hdr;
+       atsru->include_all = atsr->flags & 0x1;
+       list_add(&atsru->list, &dmar_atsr_units);
+       return 0;
+ }
+ static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
+ {
+       int rc;
+       struct acpi_dmar_atsr *atsr;
+       if (atsru->include_all)
+               return 0;
+       atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+       rc = dmar_parse_dev_scope((void *)(atsr + 1),
+                               (void *)atsr + atsr->header.length,
+                               &atsru->devices_cnt, &atsru->devices,
+                               atsr->segment);
+       if (rc || !atsru->devices_cnt) {
+               list_del(&atsru->list);
+               kfree(atsru);
+       }
+       return rc;
+ }
+ int dmar_find_matched_atsr_unit(struct pci_dev *dev)
+ {
+       int i;
+       struct pci_bus *bus;
+       struct acpi_dmar_atsr *atsr;
+       struct dmar_atsr_unit *atsru;
+       dev = pci_physfn(dev);
+       list_for_each_entry(atsru, &dmar_atsr_units, list) {
+               atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+               if (atsr->segment == pci_domain_nr(dev->bus))
+                       goto found;
+       }
+       return 0;
+ found:
+       for (bus = dev->bus; bus; bus = bus->parent) {
+               struct pci_dev *bridge = bus->self;
+               if (!bridge || !pci_is_pcie(bridge) ||
+                   bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+                       return 0;
+               if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
+                       for (i = 0; i < atsru->devices_cnt; i++)
+                               if (atsru->devices[i] == bridge)
+                                       return 1;
+                       break;
+               }
+       }
+       if (atsru->include_all)
+               return 1;
+       return 0;
+ }
+ int dmar_parse_rmrr_atsr_dev(void)
+ {
+       struct dmar_rmrr_unit *rmrr, *rmrr_n;
+       struct dmar_atsr_unit *atsr, *atsr_n;
+       int ret = 0;
+       list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
+               ret = rmrr_parse_dev(rmrr);
+               if (ret)
+                       return ret;
+       }
+       list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
+               ret = atsr_parse_dev(atsr);
+               if (ret)
+                       return ret;
+       }
+       return ret;
+ }
  /*
   * Here we only respond to action of unbound device from driver.
   *
@@@ -3448,16 -3584,12 +3593,12 @@@ int __init intel_iommu_init(void
                return  -ENODEV;
        }
  
-       if (dmar_dev_scope_init()) {
+       if (dmar_dev_scope_init() < 0) {
                if (force_on)
                        panic("tboot: Failed to initialize DMAR device scope\n");
                return  -ENODEV;
        }
  
-       /*
-        * Check the need for DMA-remapping initialization now.
-        * Above initialization will also be used by Interrupt-remapping.
-        */
        if (no_iommu || dmar_disabled)
                return -ENODEV;
  
                return  -ENODEV;
        }
  
+       if (list_empty(&dmar_rmrr_units))
+               printk(KERN_INFO "DMAR: No RMRR found\n");
+       if (list_empty(&dmar_atsr_units))
+               printk(KERN_INFO "DMAR: No ATSR found\n");
        if (dmar_init_reserved_ranges()) {
                if (force_on)
                        panic("tboot: Failed to reserve iommu ranges\n");
@@@ -3577,8 -3715,6 +3724,8 @@@ static void domain_remove_one_dev_info(
                        found = 1;
        }
  
 +      spin_unlock_irqrestore(&device_domain_lock, flags);
 +
        if (found == 0) {
                unsigned long tmp_flags;
                spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
                        spin_unlock_irqrestore(&iommu->lock, tmp_flags);
                }
        }
 -
 -      spin_unlock_irqrestore(&device_domain_lock, flags);
  }
  
  static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
@@@ -3748,7 -3886,6 +3895,7 @@@ static int intel_iommu_domain_init(stru
                vm_domain_exit(dmar_domain);
                return -ENOMEM;
        }
 +      domain_update_iommu_cap(dmar_domain);
        domain->priv = dmar_domain;
  
        return 0;
@@@ -3874,15 -4011,14 +4021,15 @@@ static int intel_iommu_unmap(struct iom
  {
        struct dmar_domain *dmar_domain = domain->priv;
        size_t size = PAGE_SIZE << gfp_order;
 +      int order;
  
 -      dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
 +      order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
                            (iova + size - 1) >> VTD_PAGE_SHIFT);
  
        if (dmar_domain->max_addr == iova + size)
                dmar_domain->max_addr = iova;
  
 -      return gfp_order;
 +      return order;
  }
  
  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@@ -3961,11 -4097,7 +4108,11 @@@ static void __devinit quirk_calpella_no
        if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
                printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
                dmar_map_gfx = 0;
 -      }
 +      } else if (dmar_map_gfx) {
 +              /* we have to ensure the gfx device is idle before we flush */
 +              printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
 +              intel_iommu_strict = 1;
 +       }
  }
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);