unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
+ /*
+ * Don't copy ptes where a page fault will fill them correctly.
+ * Fork becomes much lighter when there are big shared or private
+ * readonly mappings. The tradeoff is that copy_page_range is more
+ * efficient than faulting.
+ */
+ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+ if (!vma->anon_vma)
+ return 0;
+ }
+
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
page->index > details->last_index))
continue;
}
- ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+ ptent = ptep_get_and_clear_full(tlb->mm, addr, pte,
+ tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
continue;
if (!pte_file(ptent))
free_swap_and_cache(pte_to_swp_entry(ptent));
- pte_clear(tlb->mm, addr, pte);
+ pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(pte - 1);
}
pud = pud_offset(pgd, pg);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, pg);
- BUG_ON(pmd_none(*pmd));
+ if (pmd_none(*pmd))
+ return i ? : -EFAULT;
pte = pte_offset_map(pmd, pg);
- BUG_ON(pte_none(*pte));
+ if (pte_none(*pte)) {
+ pte_unmap(pte);
+ return i ? : -EFAULT;
+ }
if (pages) {
pages[i] = pte_page(*pte);
get_page(pages[i]);
}
spin_lock(&mm->page_table_lock);
do {
+ int write_access = write;
struct page *page;
- int lookup_write = write;
cond_resched_lock(&mm->page_table_lock);
- while (!(page = follow_page(mm, start, lookup_write))) {
+ while (!(page = follow_page(mm, start, write_access))) {
+ int ret;
+
/*
* Shortcut for anonymous pages. We don't want
* to force the creation of pages tables for
* nobody touched so far. This is important
* for doing a core dump for these mappings.
*/
- if (!lookup_write &&
- untouched_anonymous_page(mm,vma,start)) {
+ if (!write && untouched_anonymous_page(mm,vma,start)) {
page = ZERO_PAGE(start);
break;
}
spin_unlock(&mm->page_table_lock);
- switch (handle_mm_fault(mm,vma,start,write)) {
+ ret = __handle_mm_fault(mm, vma, start, write_access);
+
+ /*
+ * The VM_FAULT_WRITE bit tells us that do_wp_page has
+ * broken COW when necessary, even if maybe_mkwrite
+ * decided not to set pte_write. We can thus safely do
+ * subsequent page lookups as if they were reads.
+ */
+ if (ret & VM_FAULT_WRITE)
+ write_access = 0;
+
+ switch (ret & ~VM_FAULT_WRITE) {
case VM_FAULT_MINOR:
tsk->min_flt++;
break;
default:
BUG();
}
- /*
- * Now that we have performed a write fault
- * and surely no longer have a shared page we
- * shouldn't write, we shouldn't ignore an
- * unwritable page in the page table if
- * we are forcing write access.
- */
- lookup_write = write && !force;
spin_lock(&mm->page_table_lock);
}
if (pages) {
struct page *old_page, *new_page;
unsigned long pfn = pte_pfn(pte);
pte_t entry;
+ int ret;
if (unlikely(!pfn_valid(pfn))) {
/*
lazy_mmu_prot_update(entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR;
+ return VM_FAULT_MINOR|VM_FAULT_WRITE;
}
}
pte_unmap(page_table);
/*
* Re-check the pte - we dropped the lock
*/
+ ret = VM_FAULT_MINOR;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, pte))) {
/* Free the old page.. */
new_page = old_page;
+ ret |= VM_FAULT_WRITE;
}
pte_unmap(page_table);
page_cache_release(new_page);
page_cache_release(old_page);
spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR;
+ return ret;
no_new_page:
page_cache_release(old_page);
* Fall back to the linear mapping if the fs does not support
* ->populate:
*/
- if (!vma->vm_ops || !vma->vm_ops->populate ||
+ if (!vma->vm_ops->populate ||
(write_access && !(vma->vm_flags & VM_SHARED))) {
pte_clear(mm, address, pte);
return do_no_page(mm, vma, address, write_access, pte, pmd);
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, pmd, entry);
-
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
#if !defined(__HAVE_ARCH_GATE_AREA)
#if defined(AT_SYSINFO_EHDR)
-struct vm_area_struct gate_vma;
+static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{