ARM: 7979/1: mm: Remove hugetlb warning from Coherent DMA allocator
[pandora-kernel.git] / arch / arm / mm / dma-mapping.c
index ed0efe5..775903e 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include <linux/io.h>
 #include <linux/vmalloc.h>
 
 #include <asm/memory.h>
@@ -218,114 +219,66 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 
-#define CONSISTENT_OFFSET(x)   (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT)
-#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT)
-
-/*
- * These are the page tables (2MB each) covering uncached, DMA consistent allocations
- */
-static pte_t **consistent_pte;
-
-#define DEFAULT_CONSISTENT_DMA_SIZE SZ_2M
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+                                    pgprot_t prot, struct page **ret_page);
 
-unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE;
+static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+                                pgprot_t prot, struct page **ret_page,
+                                const void *caller);
 
-void __init init_consistent_dma_size(unsigned long size)
+static void *
+__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
+       const void *caller)
 {
-       unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M);
+       struct vm_struct *area;
+       unsigned long addr;
 
-       BUG_ON(consistent_pte); /* Check we're called before DMA region init */
-       BUG_ON(base < VMALLOC_END);
+       /*
+        * DMA allocation can be mapped to user space, so lets
+        * set VM_USERMAP flags too.
+        */
+       area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+                                 caller);
+       if (!area)
+               return NULL;
+       addr = (unsigned long)area->addr;
+       area->phys_addr = __pfn_to_phys(page_to_pfn(page));
 
-       /* Grow region to accommodate specified size  */
-       if (base < consistent_base)
-               consistent_base = base;
+       if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
+               vunmap((void *)addr);
+               return NULL;
+       }
+       return (void *)addr;
 }
 
-#include "vmregion.h"
-
-static struct arm_vmregion_head consistent_head = {
-       .vm_lock        = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock),
-       .vm_list        = LIST_HEAD_INIT(consistent_head.vm_list),
-       .vm_end         = CONSISTENT_END,
-};
-
-#ifdef CONFIG_HUGETLB_PAGE
-#warning ARM Coherent DMA allocator does not (yet) support huge TLB
-#endif
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
+static void __dma_free_remap(void *cpu_addr, size_t size)
 {
-       int ret = 0;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-       int i = 0;
-       unsigned long base = consistent_base;
-       unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
-
-       if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
-               return 0;
-
-       consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
-       if (!consistent_pte) {
-               pr_err("%s: no memory\n", __func__);
-               return -ENOMEM;
+       unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP;
+       struct vm_struct *area = find_vm_area(cpu_addr);
+       if (!area || (area->flags & flags) != flags) {
+               WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+               return;
        }
-
-       pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END);
-       consistent_head.vm_start = base;
-
-       do {
-               pgd = pgd_offset(&init_mm, base);
-
-               pud = pud_alloc(&init_mm, pgd, base);
-               if (!pud) {
-                       pr_err("%s: no pud tables\n", __func__);
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               pmd = pmd_alloc(&init_mm, pud, base);
-               if (!pmd) {
-                       pr_err("%s: no pmd tables\n", __func__);
-                       ret = -ENOMEM;
-                       break;
-               }
-               WARN_ON(!pmd_none(*pmd));
-
-               pte = pte_alloc_kernel(pmd, base);
-               if (!pte) {
-                       pr_err("%s: no pte tables\n", __func__);
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               consistent_pte[i++] = pte;
-               base += PMD_SIZE;
-       } while (base < CONSISTENT_END);
-
-       return ret;
+       unmap_kernel_range((unsigned long)cpu_addr, size);
+       vunmap(cpu_addr);
 }
-core_initcall(consistent_init);
 
-static void *__alloc_from_contiguous(struct device *dev, size_t size,
-                                    pgprot_t prot, struct page **ret_page);
-
-static struct arm_vmregion_head coherent_head = {
-       .vm_lock        = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
-       .vm_list        = LIST_HEAD_INIT(coherent_head.vm_list),
+struct dma_pool {
+       size_t size;
+       spinlock_t lock;
+       unsigned long *bitmap;
+       unsigned long nr_pages;
+       void *vaddr;
+       struct page *page;
 };
 
-size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
+static struct dma_pool atomic_pool = {
+       .size = SZ_256K,
+};
 
 static int __init early_coherent_pool(char *p)
 {
-       coherent_pool_size = memparse(p, &p);
+       atomic_pool.size = memparse(p, &p);
        return 0;
 }
 early_param("coherent_pool", early_coherent_pool);
@@ -333,32 +286,45 @@ early_param("coherent_pool", early_coherent_pool);
 /*
  * Initialise the coherent pool for atomic allocations.
  */
-static int __init coherent_init(void)
+static int __init atomic_pool_init(void)
 {
+       struct dma_pool *pool = &atomic_pool;
        pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
-       size_t size = coherent_pool_size;
+       unsigned long nr_pages = pool->size >> PAGE_SHIFT;
+       unsigned long *bitmap;
        struct page *page;
        void *ptr;
+       int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
 
-       if (!IS_ENABLED(CONFIG_CMA))
-               return 0;
+       bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+       if (!bitmap)
+               goto no_bitmap;
 
-       ptr = __alloc_from_contiguous(NULL, size, prot, &page);
+       if (IS_ENABLED(CONFIG_CMA))
+               ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
+       else
+               ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
+                                          &page, NULL);
        if (ptr) {
-               coherent_head.vm_start = (unsigned long) ptr;
-               coherent_head.vm_end = (unsigned long) ptr + size;
-               printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
-                      (unsigned)size / 1024);
+               spin_lock_init(&pool->lock);
+               pool->vaddr = ptr;
+               pool->page = page;
+               pool->bitmap = bitmap;
+               pool->nr_pages = nr_pages;
+               pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
+                      (unsigned)pool->size / 1024);
                return 0;
        }
-       printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
-              (unsigned)size / 1024);
+       kfree(bitmap);
+no_bitmap:
+       pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
+              (unsigned)pool->size / 1024);
        return -ENOMEM;
 }
 /*
  * CMA is activated by core_initcall, so we must be called after it.
  */
-postcore_initcall(coherent_init);
+postcore_initcall(atomic_pool_init);
 
 struct dma_contig_early_reserve {
        phys_addr_t base;
@@ -406,112 +372,6 @@ void __init dma_contiguous_remap(void)
        }
 }
 
-static void *
-__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
-       const void *caller)
-{
-       struct arm_vmregion *c;
-       size_t align;
-       int bit;
-
-       if (!consistent_pte) {
-               pr_err("%s: not initialised\n", __func__);
-               dump_stack();
-               return NULL;
-       }
-
-       /*
-        * Align the virtual region allocation - maximum alignment is
-        * a section size, minimum is a page size.  This helps reduce
-        * fragmentation of the DMA space, and also prevents allocations
-        * smaller than a section from crossing a section boundary.
-        */
-       bit = fls(size - 1);
-       if (bit > SECTION_SHIFT)
-               bit = SECTION_SHIFT;
-       align = 1 << bit;
-
-       /*
-        * Allocate a virtual address in the consistent mapping region.
-        */
-       c = arm_vmregion_alloc(&consistent_head, align, size,
-                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
-       if (c) {
-               pte_t *pte;
-               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
-               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-
-               pte = consistent_pte[idx] + off;
-               c->priv = page;
-
-               do {
-                       BUG_ON(!pte_none(*pte));
-
-                       set_pte_ext(pte, mk_pte(page, prot), 0);
-                       page++;
-                       pte++;
-                       off++;
-                       if (off >= PTRS_PER_PTE) {
-                               off = 0;
-                               pte = consistent_pte[++idx];
-                       }
-               } while (size -= PAGE_SIZE);
-
-               dsb();
-
-               return (void *)c->vm_start;
-       }
-       return NULL;
-}
-
-static void __dma_free_remap(void *cpu_addr, size_t size)
-{
-       struct arm_vmregion *c;
-       unsigned long addr;
-       pte_t *ptep;
-       int idx;
-       u32 off;
-
-       c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr);
-       if (!c) {
-               pr_err("%s: trying to free invalid coherent area: %p\n",
-                      __func__, cpu_addr);
-               dump_stack();
-               return;
-       }
-
-       if ((c->vm_end - c->vm_start) != size) {
-               pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
-                      __func__, c->vm_end - c->vm_start, size);
-               dump_stack();
-               size = c->vm_end - c->vm_start;
-       }
-
-       idx = CONSISTENT_PTE_INDEX(c->vm_start);
-       off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-       ptep = consistent_pte[idx] + off;
-       addr = c->vm_start;
-       do {
-               pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
-
-               ptep++;
-               addr += PAGE_SIZE;
-               off++;
-               if (off >= PTRS_PER_PTE) {
-                       off = 0;
-                       ptep = consistent_pte[++idx];
-               }
-
-               if (pte_none(pte) || !pte_present(pte))
-                       pr_crit("%s: bad page in kernel page table\n",
-                               __func__);
-       } while (size -= PAGE_SIZE);
-
-       flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
-       arm_vmregion_free(&consistent_head, c);
-}
-
 static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
                            void *data)
 {
@@ -552,16 +412,17 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
        return ptr;
 }
 
-static void *__alloc_from_pool(struct device *dev, size_t size,
-                              struct page **ret_page, const void *caller)
+static void *__alloc_from_pool(size_t size, struct page **ret_page)
 {
-       struct arm_vmregion *c;
-       size_t align;
+       struct dma_pool *pool = &atomic_pool;
+       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       unsigned int pageno;
+       unsigned long flags;
+       void *ptr = NULL;
+       unsigned long align_mask;
 
-       if (!coherent_head.vm_start) {
-               printk(KERN_ERR "%s: coherent pool not initialised!\n",
-                      __func__);
-               dump_stack();
+       if (!pool->vaddr) {
+               WARN(1, "coherent pool not initialised!\n");
                return NULL;
        }
 
@@ -570,36 +431,46 @@ static void *__alloc_from_pool(struct device *dev, size_t size,
         * small, so align them to their order in pages, minimum is a page
         * size. This helps reduce fragmentation of the DMA space.
         */
-       align = PAGE_SIZE << get_order(size);
-       c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
-       if (c) {
-               void *ptr = (void *)c->vm_start;
-               struct page *page = virt_to_page(ptr);
-               *ret_page = page;
-               return ptr;
+       align_mask = (1 << get_order(size)) - 1;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages,
+                                           0, count, align_mask);
+       if (pageno < pool->nr_pages) {
+               bitmap_set(pool->bitmap, pageno, count);
+               ptr = pool->vaddr + PAGE_SIZE * pageno;
+               *ret_page = pool->page + pageno;
+       } else {
+               pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+                           "Please increase it with coherent_pool= kernel parameter!\n",
+                           (unsigned)pool->size / 1024);
        }
-       return NULL;
+       spin_unlock_irqrestore(&pool->lock, flags);
+
+       return ptr;
 }
 
-static int __free_from_pool(void *cpu_addr, size_t size)
+static int __free_from_pool(void *start, size_t size)
 {
-       unsigned long start = (unsigned long)cpu_addr;
-       unsigned long end = start + size;
-       struct arm_vmregion *c;
+       struct dma_pool *pool = &atomic_pool;
+       unsigned long pageno, count;
+       unsigned long flags;
 
-       if (start < coherent_head.vm_start || end > coherent_head.vm_end)
+       if (start < pool->vaddr || start > pool->vaddr + pool->size)
                return 0;
 
-       c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
-
-       if ((c->vm_end - c->vm_start) != size) {
-               printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
-                      __func__, c->vm_end - c->vm_start, size);
-               dump_stack();
-               size = c->vm_end - c->vm_start;
+       if (start + size > pool->vaddr + pool->size) {
+               WARN(1, "freeing wrong coherent size from pool\n");
+               return 0;
        }
 
-       arm_vmregion_free(&coherent_head, c);
+       pageno = (start - pool->vaddr) >> PAGE_SHIFT;
+       count = size >> PAGE_SHIFT;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       bitmap_clear(pool->bitmap, pageno, count);
+       spin_unlock_irqrestore(&pool->lock, flags);
+
        return 1;
 }
 
@@ -644,7 +515,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
 
 #define __get_dma_pgprot(attrs, prot)  __pgprot(0)
 #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c)     NULL
-#define __alloc_from_pool(dev, size, ret_page, c)              NULL
+#define __alloc_from_pool(size, ret_page)                      NULL
 #define __alloc_from_contiguous(dev, size, prot, ret)          NULL
 #define __free_from_pool(cpu_addr, size)                       0
 #define __free_from_contiguous(dev, page, size)                        do { } while (0)
@@ -702,10 +573,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
        if (arch_is_coherent() || nommu())
                addr = __alloc_simple_buffer(dev, size, gfp, &page);
+       else if (gfp & GFP_ATOMIC)
+               addr = __alloc_from_pool(size, &page);
        else if (!IS_ENABLED(CONFIG_CMA))
                addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
-       else if (gfp & GFP_ATOMIC)
-               addr = __alloc_from_pool(dev, size, &page, caller);
        else
                addr = __alloc_from_contiguous(dev, size, prot, &page);
 
@@ -741,16 +612,22 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 {
        int ret = -ENXIO;
 #ifdef CONFIG_MMU
+       unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
        unsigned long pfn = dma_to_pfn(dev, dma_addr);
+       unsigned long off = vma->vm_pgoff;
+
        vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
 
        if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
                return ret;
 
-       ret = remap_pfn_range(vma, vma->vm_start,
-                             pfn + vma->vm_pgoff,
-                             vma->vm_end - vma->vm_start,
-                             vma->vm_page_prot);
+       if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+               ret = remap_pfn_range(vma, vma->vm_start,
+                                     pfn + off,
+                                     vma->vm_end - vma->vm_start,
+                                     vma->vm_page_prot);
+       }
 #endif /* CONFIG_MMU */
 
        return ret;
@@ -771,12 +648,12 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 
        if (arch_is_coherent() || nommu()) {
                __dma_free_buffer(page, size);
+       } else if (__free_from_pool(cpu_addr, size)) {
+               return;
        } else if (!IS_ENABLED(CONFIG_CMA)) {
                __dma_free_remap(cpu_addr, size);
                __dma_free_buffer(page, size);
        } else {
-               if (__free_from_pool(cpu_addr, size))
-                       return;
                /*
                 * Non-atomic allocations cannot be freed with IRQs disabled
                 */
@@ -866,10 +743,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
        dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
 
        /*
-        * Mark the D-cache clean for this page to avoid extra flushing.
+        * Mark the D-cache clean for these pages to avoid extra flushing.
         */
-       if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
-               set_bit(PG_dcache_clean, &page->flags);
+       if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+               unsigned long pfn;
+               size_t left = size;
+
+               pfn = page_to_pfn(page) + off / PAGE_SIZE;
+               off %= PAGE_SIZE;
+               if (off) {
+                       pfn++;
+                       left -= PAGE_SIZE - off;
+               }
+               while (left >= PAGE_SIZE) {
+                       page = pfn_to_page(pfn++);
+                       set_bit(PG_dcache_clean, &page->flags);
+                       left -= PAGE_SIZE;
+               }
+       }
 }
 
 /**
@@ -1000,9 +891,6 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
 
 static int __init dma_debug_do_init(void)
 {
-#ifdef CONFIG_MMU
-       arm_vmregion_create_proc("dma-mappings", &consistent_head);
-#endif
        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
        return 0;
 }
@@ -1069,7 +957,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
                return NULL;
 
        while (count) {
-               int j, order = __ffs(count);
+               int j, order = __fls(count);
 
                pages[i] = alloc_pages(gfp | __GFP_NOWARN, order);
                while (!pages[i] && order)
@@ -1090,10 +978,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t
 
        return pages;
 error:
-       while (--i)
+       while (i--)
                if (pages[i])
                        __free_pages(pages[i], 0);
-       if (array_size < PAGE_SIZE)
+       if (array_size <= PAGE_SIZE)
                kfree(pages);
        else
                vfree(pages);
@@ -1108,7 +996,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
        for (i = 0; i < count; i++)
                if (pages[i])
                        __free_pages(pages[i], 0);
-       if (array_size < PAGE_SIZE)
+       if (array_size <= PAGE_SIZE)
                kfree(pages);
        else
                vfree(pages);
@@ -1119,61 +1007,32 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t s
  * Create a CPU mapping for a specified pages
  */
 static void *
-__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot)
+__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
+                   const void *caller)
 {
-       struct arm_vmregion *c;
-       size_t align;
-       size_t count = size >> PAGE_SHIFT;
-       int bit;
+       unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       struct vm_struct *area;
+       unsigned long p;
 
-       if (!consistent_pte[0]) {
-               pr_err("%s: not initialised\n", __func__);
-               dump_stack();
+       area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+                                 caller);
+       if (!area)
                return NULL;
-       }
-
-       /*
-        * Align the virtual region allocation - maximum alignment is
-        * a section size, minimum is a page size.  This helps reduce
-        * fragmentation of the DMA space, and also prevents allocations
-        * smaller than a section from crossing a section boundary.
-        */
-       bit = fls(size - 1);
-       if (bit > SECTION_SHIFT)
-               bit = SECTION_SHIFT;
-       align = 1 << bit;
-
-       /*
-        * Allocate a virtual address in the consistent mapping region.
-        */
-       c = arm_vmregion_alloc(&consistent_head, align, size,
-                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL);
-       if (c) {
-               pte_t *pte;
-               int idx = CONSISTENT_PTE_INDEX(c->vm_start);
-               int i = 0;
-               u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-
-               pte = consistent_pte[idx] + off;
-               c->priv = pages;
-
-               do {
-                       BUG_ON(!pte_none(*pte));
-
-                       set_pte_ext(pte, mk_pte(pages[i], prot), 0);
-                       pte++;
-                       off++;
-                       i++;
-                       if (off >= PTRS_PER_PTE) {
-                               off = 0;
-                               pte = consistent_pte[++idx];
-                       }
-               } while (i < count);
 
-               dsb();
+       area->pages = pages;
+       area->nr_pages = nr_pages;
+       p = (unsigned long)area->addr;
 
-               return (void *)c->vm_start;
+       for (i = 0; i < nr_pages; i++) {
+               phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i]));
+               if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot))
+                       goto err;
+               p += PAGE_SIZE;
        }
+       return area->addr;
+err:
+       unmap_kernel_range((unsigned long)area->addr, size);
+       vunmap(area->addr);
        return NULL;
 }
 
@@ -1232,6 +1091,19 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si
        return 0;
 }
 
+static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
+{
+       struct vm_struct *area;
+
+       if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+               return cpu_addr;
+
+       area = find_vm_area(cpu_addr);
+       if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
+               return area->pages;
+       return NULL;
+}
+
 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
            dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
 {
@@ -1250,7 +1122,11 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
        if (*handle == DMA_ERROR_CODE)
                goto err_buffer;
 
-       addr = __iommu_alloc_remap(pages, size, gfp, prot);
+       if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+               return pages;
+
+       addr = __iommu_alloc_remap(pages, size, gfp, prot,
+                                  __builtin_return_address(0));
        if (!addr)
                goto err_mapping;
 
@@ -1267,31 +1143,25 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
                    void *cpu_addr, dma_addr_t dma_addr, size_t size,
                    struct dma_attrs *attrs)
 {
-       struct arm_vmregion *c;
+       unsigned long uaddr = vma->vm_start;
+       unsigned long usize = vma->vm_end - vma->vm_start;
+       struct page **pages = __iommu_get_pages(cpu_addr, attrs);
 
        vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
-       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
-
-       if (c) {
-               struct page **pages = c->priv;
 
-               unsigned long uaddr = vma->vm_start;
-               unsigned long usize = vma->vm_end - vma->vm_start;
-               int i = 0;
-
-               do {
-                       int ret;
+       if (!pages)
+               return -ENXIO;
 
-                       ret = vm_insert_page(vma, uaddr, pages[i++]);
-                       if (ret) {
-                               pr_err("Remapping memory, error: %d\n", ret);
-                               return ret;
-                       }
+       do {
+               int ret = vm_insert_page(vma, uaddr, *pages++);
+               if (ret) {
+                       pr_err("Remapping memory failed: %d\n", ret);
+                       return ret;
+               }
+               uaddr += PAGE_SIZE;
+               usize -= PAGE_SIZE;
+       } while (usize > 0);
 
-                       uaddr += PAGE_SIZE;
-                       usize -= PAGE_SIZE;
-               } while (usize > 0);
-       }
        return 0;
 }
 
@@ -1302,16 +1172,21 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
                          dma_addr_t handle, struct dma_attrs *attrs)
 {
-       struct arm_vmregion *c;
+       struct page **pages = __iommu_get_pages(cpu_addr, attrs);
        size = PAGE_ALIGN(size);
 
-       c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
-       if (c) {
-               struct page **pages = c->priv;
-               __dma_free_remap(cpu_addr, size);
-               __iommu_remove_mapping(dev, handle, size);
-               __iommu_free_buffer(dev, pages, size);
+       if (!pages) {
+               WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+               return;
        }
+
+       if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
+               unmap_kernel_range((unsigned long)cpu_addr, size);
+               vunmap(cpu_addr);
+       }
+
+       __iommu_remove_mapping(dev, handle, size);
+       __iommu_free_buffer(dev, pages, size);
 }
 
 /*