X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmemory.c;h=788a628103405e955a7d28f9935be690eb6d021a;hb=cdb3826b9958c204bc8ffda2cf9bbe2d899ef90c;hp=6fe77acbc1cd7c3ae02fe8c732fca66c49e0f1d1;hpb=e0d7ff168a71299919f01500b3d507aae0c67513;p=pandora-kernel.git diff --git a/mm/memory.c b/mm/memory.c index 6fe77acbc1cd..788a62810340 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -498,6 +498,17 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + /* + * Don't copy ptes where a page fault will fill them correctly. + * Fork becomes much lighter when there are big shared or private + * readonly mappings. The tradeoff is that copy_page_range is more + * efficient than faulting. + */ + if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { + if (!vma->anon_vma) + return 0; + } + if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); @@ -551,7 +562,8 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, page->index > details->last_index)) continue; } - ptent = ptep_get_and_clear(tlb->mm, addr, pte); + ptent = ptep_get_and_clear_full(tlb->mm, addr, pte, + tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; @@ -579,7 +591,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, continue; if (!pte_file(ptent)) free_swap_and_cache(pte_to_swp_entry(ptent)); - pte_clear(tlb->mm, addr, pte); + pte_clear_full(tlb->mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap(pte - 1); } @@ -913,9 +925,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, pud = pud_offset(pgd, pg); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, pg); - BUG_ON(pmd_none(*pmd)); + if (pmd_none(*pmd)) + return i ? : -EFAULT; pte = pte_offset_map(pmd, pg); - BUG_ON(pte_none(*pte)); + if (pte_none(*pte)) { + pte_unmap(pte); + return i ? : -EFAULT; + } if (pages) { pages[i] = pte_page(*pte); get_page(pages[i]); @@ -940,11 +956,13 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } spin_lock(&mm->page_table_lock); do { + int write_access = write; struct page *page; - int lookup_write = write; cond_resched_lock(&mm->page_table_lock); - while (!(page = follow_page(mm, start, lookup_write))) { + while (!(page = follow_page(mm, start, write_access))) { + int ret; + /* * Shortcut for anonymous pages. We don't want * to force the creation of pages tables for @@ -952,13 +970,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * nobody touched so far. This is important * for doing a core dump for these mappings. */ - if (!lookup_write && - untouched_anonymous_page(mm,vma,start)) { + if (!write && untouched_anonymous_page(mm,vma,start)) { page = ZERO_PAGE(start); break; } spin_unlock(&mm->page_table_lock); - switch (handle_mm_fault(mm,vma,start,write)) { + ret = __handle_mm_fault(mm, vma, start, write_access); + + /* + * The VM_FAULT_WRITE bit tells us that do_wp_page has + * broken COW when necessary, even if maybe_mkwrite + * decided not to set pte_write. We can thus safely do + * subsequent page lookups as if they were reads. + */ + if (ret & VM_FAULT_WRITE) + write_access = 0; + + switch (ret & ~VM_FAULT_WRITE) { case VM_FAULT_MINOR: tsk->min_flt++; break; @@ -972,14 +1000,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, default: BUG(); } - /* - * Now that we have performed a write fault - * and surely no longer have a shared page we - * shouldn't write, we shouldn't ignore an - * unwritable page in the page table if - * we are forcing write access. - */ - lookup_write = write && !force; spin_lock(&mm->page_table_lock); } if (pages) { @@ -1229,6 +1249,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); pte_t entry; + int ret; if (unlikely(!pfn_valid(pfn))) { /* @@ -1256,7 +1277,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, lazy_mmu_prot_update(entry); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; + return VM_FAULT_MINOR|VM_FAULT_WRITE; } } pte_unmap(page_table); @@ -1283,6 +1304,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, /* * Re-check the pte - we dropped the lock */ + ret = VM_FAULT_MINOR; spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, address); if (likely(pte_same(*page_table, pte))) { @@ -1299,12 +1321,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, /* Free the old page.. */ new_page = old_page; + ret |= VM_FAULT_WRITE; } pte_unmap(page_table); page_cache_release(new_page); page_cache_release(old_page); spin_unlock(&mm->page_table_lock); - return VM_FAULT_MINOR; + return ret; no_new_page: page_cache_release(old_page); @@ -1933,7 +1956,7 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma, * Fall back to the linear mapping if the fs does not support * ->populate: */ - if (!vma->vm_ops || !vma->vm_ops->populate || + if (!vma->vm_ops->populate || (write_access && !(vma->vm_flags & VM_SHARED))) { pte_clear(mm, address, pte); return do_no_page(mm, vma, address, write_access, pte, pmd); @@ -1996,7 +2019,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, if (write_access) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, pmd, entry); - entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); @@ -2011,7 +2033,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, /* * By the time we get here, we already hold the mm semaphore */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, +int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *pgd;