access_process_vm device memory infrastructure
authorRik van Riel <riel@redhat.com>
Thu, 24 Jul 2008 04:27:05 +0000 (21:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 17:47:15 +0000 (10:47 -0700)
In order to be able to debug things like the X server and programs using
the PPC Cell SPUs, the debugger needs to be able to access device memory
through ptrace and /proc/pid/mem.

This patch:

Add the generic_access_phys access function and put the hooks in place
to allow access_process_vm to access device or PPC Cell SPU memory.

[riel@redhat.com: Add documentation for the vm_ops->access function]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Benjamin Herrensmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/filesystems/Locking
arch/Kconfig
arch/x86/Kconfig
arch/x86/mm/ioremap.c
include/asm-x86/io_32.h
include/asm-x86/io_64.h
include/linux/mm.h
mm/memory.c

index 8b22d7d..680fb56 100644 (file)
@@ -510,6 +510,7 @@ prototypes:
        void (*close)(struct vm_area_struct*);
        int (*fault)(struct vm_area_struct*, struct vm_fault *);
        int (*page_mkwrite)(struct vm_area_struct *, struct page *);
+       int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
                BKL     mmap_sem        PageLocked(page)
@@ -517,6 +518,7 @@ open:               no      yes
 close:         no      yes
 fault:         no      yes
 page_mkwrite:  no      yes             no
+access:                no      yes
 
        ->page_mkwrite() is called when a previously read-only page is
 about to become writeable. The file system is responsible for
@@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be
 within i_size. The page mapping should also be checked that it is not
 NULL.
 
+       ->access() is called when get_user_pages() fails in
+acces_process_vm(), typically used to debug a process through
+/proc/pid/mem or ptrace.  This function is needed only for
+VM_IO | VM_PFNMAP VMAs.
+
 ================================================================================
                        Dubious stuff
 
index 4d5ebbc..6093c0b 100644 (file)
@@ -31,6 +31,9 @@ config KRETPROBES
        def_bool y
        depends on KPROBES && HAVE_KRETPROBES
 
+config HAVE_IOREMAP_PROT
+       def_bool n
+
 config HAVE_KPROBES
        def_bool n
 
index 03980cb..b2ddfcf 100644 (file)
@@ -21,6 +21,7 @@ config X86
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_IDE
        select HAVE_OPROFILE
+       select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select HAVE_KRETPROBES
        select HAVE_DYNAMIC_FTRACE
index 24c1d3c..016f335 100644 (file)
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
        return (void __iomem *)ret;
 }
 
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
+                               unsigned long prot_val)
+{
+       return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
+                               __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
 /**
  * iounmap - Free a IO remapping
  * @addr: virtual address from ioremap_*
index 4df44ed..e876d89 100644 (file)
@@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address)
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+                               unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
index ddd8058..22995c5 100644 (file)
@@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size);
  */
 extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
 extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+                               unsigned long prot_val);
 
 /*
  * The default ioremap() behavior is non-cached:
index eb815cf..5c7f8f6 100644 (file)
@@ -170,6 +170,12 @@ struct vm_operations_struct {
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
        int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+
+       /* called by access_process_vm when get_user_pages() fails, typically
+        * for use by special VMAs that can switch between memory and hardware
+        */
+       int (*access)(struct vm_area_struct *vma, unsigned long addr,
+                     void *buf, int len, int write);
 #ifdef CONFIG_NUMA
        /*
         * set_policy() op must add a reference to any non-NULL @new mempolicy
@@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
                        struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows);
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+                       void *buf, int len, int write);
 
 static inline void unmap_shared_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen)
index 46dbed4..8735032 100644 (file)
@@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr)
 
 #endif /* __HAVE_ARCH_GATE_AREA */
 
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+static resource_size_t follow_phys(struct vm_area_struct *vma,
+                       unsigned long address, unsigned int flags,
+                       unsigned long *prot)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *ptep, pte;
+       spinlock_t *ptl;
+       resource_size_t phys_addr = 0;
+       struct mm_struct *mm = vma->vm_mm;
+
+       VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+
+       pgd = pgd_offset(mm, address);
+       if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+               goto no_page_table;
+
+       pud = pud_offset(pgd, address);
+       if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+               goto no_page_table;
+
+       pmd = pmd_offset(pud, address);
+       if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+               goto no_page_table;
+
+       /* We cannot handle huge page PFN maps. Luckily they don't exist. */
+       if (pmd_huge(*pmd))
+               goto no_page_table;
+
+       ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+       if (!ptep)
+               goto out;
+
+       pte = *ptep;
+       if (!pte_present(pte))
+               goto unlock;
+       if ((flags & FOLL_WRITE) && !pte_write(pte))
+               goto unlock;
+       phys_addr = pte_pfn(pte);
+       phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
+
+       *prot = pgprot_val(pte_pgprot(pte));
+
+unlock:
+       pte_unmap_unlock(ptep, ptl);
+out:
+       return phys_addr;
+no_page_table:
+       return 0;
+}
+
+int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+                       void *buf, int len, int write)
+{
+       resource_size_t phys_addr;
+       unsigned long prot = 0;
+       void *maddr;
+       int offset = addr & (PAGE_SIZE-1);
+
+       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+               return -EINVAL;
+
+       phys_addr = follow_phys(vma, addr, write, &prot);
+
+       if (!phys_addr)
+               return -EINVAL;
+
+       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       if (write)
+               memcpy_toio(maddr + offset, buf, len);
+       else
+               memcpy_fromio(buf, maddr + offset, len);
+       iounmap(maddr);
+
+       return len;
+}
+#endif
+
 /*
  * Access another process' address space.
  * Source/target buffer must be kernel space,
@@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 {
        struct mm_struct *mm;
        struct vm_area_struct *vma;
-       struct page *page;
        void *old_buf = buf;
 
        mm = get_task_mm(tsk);
@@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
        while (len) {
                int bytes, ret, offset;
                void *maddr;
+               struct page *page = NULL;
 
                ret = get_user_pages(tsk, mm, addr, 1,
                                write, 1, &page, &vma);
-               if (ret <= 0)
-                       break;
-
-               bytes = len;
-               offset = addr & (PAGE_SIZE-1);
-               if (bytes > PAGE_SIZE-offset)
-                       bytes = PAGE_SIZE-offset;
-
-               maddr = kmap(page);
-               if (write) {
-                       copy_to_user_page(vma, page, addr,
-                                         maddr + offset, buf, bytes);
-                       set_page_dirty_lock(page);
+               if (ret <= 0) {
+                       /*
+                        * Check if this is a VM_IO | VM_PFNMAP VMA, which
+                        * we can access using slightly different code.
+                        */
+#ifdef CONFIG_HAVE_IOREMAP_PROT
+                       vma = find_vma(mm, addr);
+                       if (!vma)
+                               break;
+                       if (vma->vm_ops && vma->vm_ops->access)
+                               ret = vma->vm_ops->access(vma, addr, buf,
+                                                         len, write);
+                       if (ret <= 0)
+#endif
+                               break;
+                       bytes = ret;
                } else {
-                       copy_from_user_page(vma, page, addr,
-                                           buf, maddr + offset, bytes);
+                       bytes = len;
+                       offset = addr & (PAGE_SIZE-1);
+                       if (bytes > PAGE_SIZE-offset)
+                               bytes = PAGE_SIZE-offset;
+
+                       maddr = kmap(page);
+                       if (write) {
+                               copy_to_user_page(vma, page, addr,
+                                                 maddr + offset, buf, bytes);
+                               set_page_dirty_lock(page);
+                       } else {
+                               copy_from_user_page(vma, page, addr,
+                                                   buf, maddr + offset, bytes);
+                       }
+                       kunmap(page);
+                       page_cache_release(page);
                }
-               kunmap(page);
-               page_cache_release(page);
                len -= bytes;
                buf += bytes;
                addr += bytes;