intel-iommu: Defer the iotlb flush and iova free for intel_unmap_sg() too.
[pandora-kernel.git] / drivers / pci / intel-iommu.c
index 2c1b2ba..ee48fd0 100644 (file)
@@ -240,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
        return (pte->val & 3) != 0;
 }
 
+static inline int first_pte_in_page(struct dma_pte *pte)
+{
+       return !((unsigned long)pte & ~VTD_PAGE_MASK);
+}
+
 /*
  * This domain is a statically identity mapping domain.
  *     1. This domain creats a static 1:1 mapping to all usable memory.
@@ -780,13 +785,12 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
                        start_pfn = align_to_level(start_pfn + 1, 2);
                        continue;
                }
-               while (start_pfn <= last_pfn &&
-                      (unsigned long)pte >> VTD_PAGE_SHIFT ==
-                      (unsigned long)first_pte >> VTD_PAGE_SHIFT) {
+               do { 
                        dma_clear_pte(pte);
                        start_pfn++;
                        pte++;
-               }
+               } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
+
                domain_flush_cache(domain, first_pte,
                                   (void *)pte - (void *)first_pte);
        }
@@ -821,14 +825,16 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
                                tmp = align_to_level(tmp + 1, level + 1);
                                continue;
                        }
-                       while (tmp + level_size(level) - 1 <= last_pfn &&
-                              (unsigned long)pte >> VTD_PAGE_SHIFT ==
-                              (unsigned long)first_pte >> VTD_PAGE_SHIFT) {
-                               free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
-                               dma_clear_pte(pte);
+                       do {
+                               if (dma_pte_present(pte)) {
+                                       free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
+                                       dma_clear_pte(pte);
+                               }
                                pte++;
                                tmp += level_size(level);
-                       }
+                       } while (!first_pte_in_page(pte) &&
+                                tmp + level_size(level) - 1 <= last_pfn);
+
                        domain_flush_cache(domain, first_pte,
                                           (void *)pte - (void *)first_pte);
                        
@@ -1303,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 }
 
 static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_alloc_key;
 static struct lock_class_key reserved_rbtree_key;
 
 static void dmar_init_reserved_ranges(void)
@@ -1314,8 +1319,6 @@ static void dmar_init_reserved_ranges(void)
 
        init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 
-       lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
-               &reserved_alloc_key);
        lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
                &reserved_rbtree_key);
 
@@ -1694,9 +1697,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                        WARN_ON(1);
                }
                pte++;
-               if (!nr_pages ||
-                   (unsigned long)pte >> VTD_PAGE_SHIFT !=
-                   (unsigned long)first_pte >> VTD_PAGE_SHIFT) {
+               if (!nr_pages || first_pte_in_page(pte)) {
                        domain_flush_cache(domain, first_pte,
                                           (void *)pte - (void *)first_pte);
                        pte = NULL;
@@ -2113,6 +2114,47 @@ static int domain_add_dev_info(struct dmar_domain *domain,
        return 0;
 }
 
+static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
+{
+       if (iommu_identity_mapping == 2)
+               return IS_GFX_DEVICE(pdev);
+
+       /*
+        * We want to start off with all devices in the 1:1 domain, and
+        * take them out later if we find they can't access all of memory.
+        *
+        * However, we can't do this for PCI devices behind bridges,
+        * because all PCI devices behind the same bridge will end up
+        * with the same source-id on their transactions.
+        *
+        * Practically speaking, we can't change things around for these
+        * devices at run-time, because we can't be sure there'll be no
+        * DMA transactions in flight for any of their siblings.
+        * 
+        * So PCI devices (unless they're on the root bus) as well as
+        * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
+        * the 1:1 domain, just in _case_ one of their siblings turns out
+        * not to be able to map all of memory.
+        */
+       if (!pdev->is_pcie) {
+               if (!pci_is_root_bus(pdev->bus))
+                       return 0;
+               if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
+                       return 0;
+       } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+               return 0;
+
+       /* 
+        * At boot time, we don't yet know if devices will be 64-bit capable.
+        * Assume that they will -- if they turn out not to be, then we can 
+        * take them out of the 1:1 domain later.
+        */
+       if (!startup)
+               return pdev->dma_mask > DMA_BIT_MASK(32);
+
+       return 1;
+}
+
 static int iommu_prepare_static_identity_mapping(void)
 {
        struct pci_dev *pdev = NULL;
@@ -2123,16 +2165,18 @@ static int iommu_prepare_static_identity_mapping(void)
                return -EFAULT;
 
        for_each_pci_dev(pdev) {
-               printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
-                      pci_name(pdev));
+               if (iommu_should_identity_map(pdev, 1)) {
+                       printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
+                              pci_name(pdev));
 
-               ret = domain_context_mapping(si_domain, pdev,
-                                            CONTEXT_TT_MULTI_LEVEL);
-               if (ret)
-                       return ret;
-               ret = domain_add_dev_info(si_domain, pdev);
-               if (ret)
-                       return ret;
+                       ret = domain_context_mapping(si_domain, pdev,
+                                                    CONTEXT_TT_MULTI_LEVEL);
+                       if (ret)
+                               return ret;
+                       ret = domain_add_dev_info(si_domain, pdev);
+                       if (ret)
+                               return ret;
+               }
        }
 
        return 0;
@@ -2287,6 +2331,10 @@ int __init init_dmars(void)
         * identity mapping if iommu_identity_mapping is set.
         */
        if (!iommu_pass_through) {
+#ifdef CONFIG_DMAR_BROKEN_GFX_WA
+               if (!iommu_identity_mapping)
+                       iommu_identity_mapping = 2;
+#endif
                if (iommu_identity_mapping)
                        iommu_prepare_static_identity_mapping();
                /*
@@ -2364,15 +2412,15 @@ error:
        return ret;
 }
 
+/* Returns a number of VTD pages, but aligned to MM page size */
 static inline unsigned long aligned_nrpages(unsigned long host_addr,
                                            size_t size)
 {
        host_addr &= ~PAGE_MASK;
-       host_addr += size + PAGE_SIZE - 1;
-
-       return host_addr >> VTD_PAGE_SHIFT;
+       return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
 }
 
+/* This takes a number of _MM_ pages, not VTD pages */
 static struct iova *intel_alloc_iova(struct device *dev,
                                     struct dmar_domain *domain,
                                     unsigned long nrpages, uint64_t dma_mask)
@@ -2404,8 +2452,7 @@ static struct iova *intel_alloc_iova(struct device *dev,
        return iova;
 }
 
-static struct dmar_domain *
-get_valid_domain_for_dev(struct pci_dev *pdev)
+static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
 {
        struct dmar_domain *domain;
        int ret;
@@ -2433,22 +2480,42 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
        return domain;
 }
 
+static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
+{
+       struct device_domain_info *info;
+
+       /* No lock here, assumes no domain exit in normal case */
+       info = dev->dev.archdata.iommu;
+       if (likely(info))
+               return info->domain;
+
+       return __get_valid_domain_for_dev(dev);
+}
+
 static int iommu_dummy(struct pci_dev *pdev)
 {
        return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
 /* Check if the pdev needs to go through non-identity map and unmap process.*/
-static int iommu_no_mapping(struct pci_dev *pdev)
+static int iommu_no_mapping(struct device *dev)
 {
+       struct pci_dev *pdev;
        int found;
 
+       if (unlikely(dev->bus != &pci_bus_type))
+               return 1;
+
+       pdev = to_pci_dev(dev);
+       if (iommu_dummy(pdev))
+               return 1;
+
        if (!iommu_identity_mapping)
-               return iommu_dummy(pdev);
+               return 0;
 
        found = identity_mapping(pdev);
        if (found) {
-               if (pdev->dma_mask > DMA_BIT_MASK(32))
+               if (iommu_should_identity_map(pdev, 0))
                        return 1;
                else {
                        /*
@@ -2465,9 +2532,12 @@ static int iommu_no_mapping(struct pci_dev *pdev)
                 * In case of a detached 64 bit DMA device from vm, the device
                 * is put into si_domain for identity mapping.
                 */
-               if (pdev->dma_mask > DMA_BIT_MASK(32)) {
+               if (iommu_should_identity_map(pdev, 0)) {
                        int ret;
                        ret = domain_add_dev_info(si_domain, pdev);
+                       if (ret)
+                               return 0;
+                       ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
                        if (!ret) {
                                printk(KERN_INFO "64bit %s uses identity mapping\n",
                                       pci_name(pdev));
@@ -2476,7 +2546,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
                }
        }
 
-       return iommu_dummy(pdev);
+       return 0;
 }
 
 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
@@ -2492,7 +2562,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 
        BUG_ON(dir == DMA_NONE);
 
-       if (iommu_no_mapping(pdev))
+       if (iommu_no_mapping(hwdev))
                return paddr;
 
        domain = get_valid_domain_for_dev(pdev);
@@ -2502,7 +2572,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
        iommu = domain_get_iommu(domain);
        size = aligned_nrpages(paddr, size);
 
-       iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+       iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
+                               pdev->dma_mask);
        if (!iova)
                goto error;
 
@@ -2631,7 +2702,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
        struct iova *iova;
        struct intel_iommu *iommu;
 
-       if (iommu_no_mapping(pdev))
+       if (iommu_no_mapping(dev))
                return;
 
        domain = find_domain(pdev);
@@ -2722,7 +2793,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
        struct iova *iova;
        struct intel_iommu *iommu;
 
-       if (iommu_no_mapping(pdev))
+       if (iommu_no_mapping(hwdev))
                return;
 
        domain = find_domain(pdev);
@@ -2744,11 +2815,18 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
        /* free page tables */
        dma_pte_free_pagetable(domain, start_pfn, last_pfn);
 
-       iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-                             (last_pfn - start_pfn + 1));
-
-       /* free iova */
-       __free_iova(&domain->iovad, iova);
+       if (intel_iommu_strict) {
+               iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
+                                     last_pfn - start_pfn + 1);
+               /* free iova */
+               __free_iova(&domain->iovad, iova);
+       } else {
+               add_unmap(domain, iova);
+               /*
+                * queue up the release of the unmap to save the 1/6th of the
+                * cpu used up by the iotlb flush operation...
+                */
+       }
 }
 
 static int intel_nontranslate_map_sg(struct device *hddev,
@@ -2781,7 +2859,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
        struct intel_iommu *iommu;
 
        BUG_ON(dir == DMA_NONE);
-       if (iommu_no_mapping(pdev))
+       if (iommu_no_mapping(hwdev))
                return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
 
        domain = get_valid_domain_for_dev(pdev);
@@ -2793,7 +2871,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
        for_each_sg(sglist, sg, nelems, i)
                size += aligned_nrpages(sg->offset, sg->length);
 
-       iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+       iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
+                               pdev->dma_mask);
        if (!iova) {
                sglist->dma_length = 0;
                return 0;