Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[pandora-kernel.git] / mm / memory.c
index 20d5f74..61e66f0 100644 (file)
@@ -1359,7 +1359,7 @@ split_fallthrough:
                 */
                mark_page_accessed(page);
        }
-       if (flags & FOLL_MLOCK) {
+       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
                /*
                 * The preliminary mapping check is mainly to avoid the
                 * pointless overhead of lock_page on the ZERO_PAGE
@@ -1410,6 +1410,12 @@ no_page_table:
        return page;
 }
 
+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+{
+       return stack_guard_page_start(vma, addr) ||
+              stack_guard_page_end(vma, addr+PAGE_SIZE);
+}
+
 /**
  * __get_user_pages() - pin user pages in memory
  * @tsk:       task_struct of target task
@@ -1486,9 +1492,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                struct vm_area_struct *vma;
 
                vma = find_extend_vma(mm, start);
-               if (!vma && in_gate_area(tsk, start)) {
+               if (!vma && in_gate_area(mm, start)) {
                        unsigned long pg = start & PAGE_MASK;
-                       struct vm_area_struct *gate_vma = get_gate_vma(tsk);
                        pgd_t *pgd;
                        pud_t *pud;
                        pmd_t *pmd;
@@ -1513,10 +1518,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                pte_unmap(pte);
                                return i ? : -EFAULT;
                        }
+                       vma = get_gate_vma(mm);
                        if (pages) {
                                struct page *page;
 
-                               page = vm_normal_page(gate_vma, start, *pte);
+                               page = vm_normal_page(vma, start, *pte);
                                if (!page) {
                                        if (!(gup_flags & FOLL_DUMP) &&
                                             is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1536,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                get_page(page);
                        }
                        pte_unmap(pte);
-                       if (vmas)
-                               vmas[i] = gate_vma;
-                       i++;
-                       start += PAGE_SIZE;
-                       nr_pages--;
-                       continue;
+                       goto next_page;
                }
 
                if (!vma ||
@@ -1565,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                int ret;
                                unsigned int fault_flags = 0;
 
+                               /* For mlock, just skip the stack guard page. */
+                               if (foll_flags & FOLL_MLOCK) {
+                                       if (stack_guard_page(vma, start))
+                                               goto next_page;
+                               }
                                if (foll_flags & FOLL_WRITE)
                                        fault_flags |= FAULT_FLAG_WRITE;
                                if (nonblocking)
@@ -1591,10 +1597,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
-                               if (ret & VM_FAULT_MAJOR)
-                                       tsk->maj_flt++;
-                               else
-                                       tsk->min_flt++;
+
+                               if (tsk) {
+                                       if (ret & VM_FAULT_MAJOR)
+                                               tsk->maj_flt++;
+                                       else
+                                               tsk->min_flt++;
+                               }
 
                                if (ret & VM_FAULT_RETRY) {
                                        if (nonblocking)
@@ -1628,6 +1637,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                flush_anon_page(vma, page, start);
                                flush_dcache_page(page);
                        }
+next_page:
                        if (vmas)
                                vmas[i] = vma;
                        i++;
@@ -1641,7 +1651,8 @@ EXPORT_SYMBOL(__get_user_pages);
 
 /**
  * get_user_pages() - pin user pages in memory
- * @tsk:       task_struct of target task
+ * @tsk:       the task_struct to use for page fault accounting, or
+ *             NULL if faults are not to be recorded.
  * @mm:                mm_struct of target mm
  * @start:     starting user address
  * @nr_pages:  number of pages from start to pin
@@ -3382,7 +3393,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * run pte_offset_map on the pmd, if an huge pmd could
         * materialize from under us from a different thread.
         */
-       if (unlikely(__pte_alloc(mm, vma, pmd, address)))
+       if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
                return VM_FAULT_OOM;
        /* if an huge pmd materialized from under us just retry later */
        if (unlikely(pmd_trans_huge(*pmd)))
@@ -3499,7 +3510,7 @@ static int __init gate_vma_init(void)
 __initcall(gate_vma_init);
 #endif
 
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
 #ifdef AT_SYSINFO_EHDR
        return &gate_vma;
@@ -3508,7 +3519,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
 #endif
 }
 
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
 {
 #ifdef AT_SYSINFO_EHDR
        if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
@@ -3649,20 +3660,15 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 #endif
 
 /*
- * Access another process' address space.
- * Source/target buffer must be kernel space,
- * Do not walk the page table directly, use get_user_pages
+ * Access another process' address space as given in mm.  If non-NULL, use the
+ * given task for page fault accounting.
  */
-int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long addr, void *buf, int len, int write)
 {
-       struct mm_struct *mm;
        struct vm_area_struct *vma;
        void *old_buf = buf;
 
-       mm = get_task_mm(tsk);
-       if (!mm)
-               return 0;
-
        down_read(&mm->mmap_sem);
        /* ignore errors, just check how much was successfully transferred */
        while (len) {
@@ -3679,7 +3685,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
                         */
 #ifdef CONFIG_HAVE_IOREMAP_PROT
                        vma = find_vma(mm, addr);
-                       if (!vma)
+                       if (!vma || vma->vm_start > addr)
                                break;
                        if (vma->vm_ops && vma->vm_ops->access)
                                ret = vma->vm_ops->access(vma, addr, buf,
@@ -3711,11 +3717,47 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
                addr += bytes;
        }
        up_read(&mm->mmap_sem);
-       mmput(mm);
 
        return buf - old_buf;
 }
 
+/**
+ * access_remote_vm - access another process' address space
+ * @mm:                the mm_struct of the target address space
+ * @addr:      start address to access
+ * @buf:       source or destination buffer
+ * @len:       number of bytes to transfer
+ * @write:     whether the access is a write
+ *
+ * The caller must hold a reference on @mm.
+ */
+int access_remote_vm(struct mm_struct *mm, unsigned long addr,
+               void *buf, int len, int write)
+{
+       return __access_remote_vm(NULL, mm, addr, buf, len, write);
+}
+
+/*
+ * Access another process' address space.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+int access_process_vm(struct task_struct *tsk, unsigned long addr,
+               void *buf, int len, int write)
+{
+       struct mm_struct *mm;
+       int ret;
+
+       mm = get_task_mm(tsk);
+       if (!mm)
+               return 0;
+
+       ret = __access_remote_vm(tsk, mm, addr, buf, len, write);
+       mmput(mm);
+
+       return ret;
+}
+
 /*
  * Print the name of a VMA.
  */