X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fhugetlb.c;h=728e9bda12ea9971c1cf0212184b1ca2c2d94020;hb=c12489ad40a9db6b17225e0f38151d41c6175fe8;hp=a1b30d45459e827249da8c6226ccf080f15d65a6;hpb=1c59827d1da9bcd6970800d4f8a031b5859e8b4c;p=pandora-kernel.git diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1b30d45459e..728e9bda12ea 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -103,6 +103,9 @@ static int __init hugetlb_init(void) unsigned long i; struct page *page; + if (HPAGE_SHIFT == 0) + return 0; + for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&hugepage_freelists[i]); @@ -234,7 +237,6 @@ unsigned long hugetlb_total_pages(void) { return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); } -EXPORT_SYMBOL(hugetlb_total_pages); /* * We cannot handle pagefaults against hugetlb pages at all. They cause @@ -277,19 +279,23 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, unsigned long addr; for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + src_pte = huge_pte_offset(src, addr); + if (!src_pte) + continue; dst_pte = huge_pte_alloc(dst, addr); if (!dst_pte) goto nomem; + spin_lock(&dst->page_table_lock); spin_lock(&src->page_table_lock); - src_pte = huge_pte_offset(src, addr); - if (src_pte && !pte_none(*src_pte)) { + if (!pte_none(*src_pte)) { entry = *src_pte; ptepage = pte_page(entry); get_page(ptepage); - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE); set_huge_pte_at(dst, addr, dst_pte, entry); } spin_unlock(&src->page_table_lock); + spin_unlock(&dst->page_table_lock); } return 0; @@ -310,12 +316,14 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(start & ~HPAGE_MASK); BUG_ON(end & ~HPAGE_MASK); + spin_lock(&mm->page_table_lock); + + /* Update high watermark before we lower rss */ + update_hiwater_rss(mm); + for (address = start; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); - if (! ptep) - /* This can happen on truncate, or if an - * mmap() is aborted due to an error before - * the prefault */ + if (!ptep) continue; pte = huge_ptep_get_and_clear(mm, address, ptep); @@ -324,74 +332,99 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, page = pte_page(pte); put_page(page); - add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE)); + add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); } + + spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); } -void zap_hugepage_range(struct vm_area_struct *vma, - unsigned long start, unsigned long length) +static struct page *find_lock_huge_page(struct address_space *mapping, + unsigned long idx) { - struct mm_struct *mm = vma->vm_mm; + struct page *page; + int err; + struct inode *inode = mapping->host; + unsigned long size; + +retry: + page = find_lock_page(mapping, idx); + if (page) + goto out; + + /* Check to make sure the mapping hasn't been truncated */ + size = i_size_read(inode) >> HPAGE_SHIFT; + if (idx >= size) + goto out; + + if (hugetlb_get_quota(mapping)) + goto out; + page = alloc_huge_page(); + if (!page) { + hugetlb_put_quota(mapping); + goto out; + } - spin_lock(&mm->page_table_lock); - unmap_hugepage_range(vma, start, start + length); - spin_unlock(&mm->page_table_lock); + err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); + if (err) { + put_page(page); + hugetlb_put_quota(mapping); + if (err == -EEXIST) + goto retry; + page = NULL; + } +out: + return page; } -int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) { - struct mm_struct *mm = current->mm; - unsigned long addr; - int ret = 0; + int ret = VM_FAULT_SIGBUS; + unsigned long idx; + unsigned long size; + pte_t *pte; + struct page *page; + struct address_space *mapping; - WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON(vma->vm_start & ~HPAGE_MASK); - BUG_ON(vma->vm_end & ~HPAGE_MASK); + pte = huge_pte_alloc(mm, address); + if (!pte) + goto out; - hugetlb_prefault_arch_hook(mm); + mapping = vma->vm_file->f_mapping; + idx = ((address - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + + /* + * Use page lock to guard against racing truncation + * before we get page_table_lock. + */ + page = find_lock_huge_page(mapping, idx); + if (!page) + goto out; spin_lock(&mm->page_table_lock); - for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { - unsigned long idx; - pte_t *pte = huge_pte_alloc(mm, addr); - struct page *page; + size = i_size_read(mapping->host) >> HPAGE_SHIFT; + if (idx >= size) + goto backout; - if (!pte) { - ret = -ENOMEM; - goto out; - } + ret = VM_FAULT_MINOR; + if (!pte_none(*pte)) + goto backout; - idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) - + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); - page = find_get_page(mapping, idx); - if (!page) { - /* charge the fs quota first */ - if (hugetlb_get_quota(mapping)) { - ret = -ENOMEM; - goto out; - } - page = alloc_huge_page(); - if (!page) { - hugetlb_put_quota(mapping); - ret = -ENOMEM; - goto out; - } - ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); - if (! ret) { - unlock_page(page); - } else { - hugetlb_put_quota(mapping); - free_huge_page(page); - goto out; - } - } - add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); - set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page)); - } -out: + add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE); + set_huge_pte_at(mm, address, pte, make_huge_pte(vma, page)); spin_unlock(&mm->page_table_lock); + unlock_page(page); +out: return ret; + +backout: + spin_unlock(&mm->page_table_lock); + hugetlb_put_quota(mapping); + unlock_page(page); + put_page(page); + goto out; } int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -401,34 +434,36 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vpfn, vaddr = *position; int remainder = *length; - BUG_ON(!is_vm_hugetlb_page(vma)); - vpfn = vaddr/PAGE_SIZE; spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { + pte_t *pte; + struct page *page; - if (pages) { - pte_t *pte; - struct page *page; - - /* Some archs (sparc64, sh*) have multiple - * pte_ts to each hugepage. We have to make - * sure we get the first, for the page - * indexing below to work. */ - pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); - - /* the hugetlb file might have been truncated */ - if (!pte || pte_none(*pte)) { - remainder = 0; - if (!i) - i = -EFAULT; - break; - } + /* + * Some archs (sparc64, sh*) have multiple pte_ts to + * each hugepage. We have to make * sure we get the + * first, for the page indexing below to work. + */ + pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + if (!pte || pte_none(*pte)) { + int ret; + + spin_unlock(&mm->page_table_lock); + ret = hugetlb_fault(mm, vma, vaddr, 0); + spin_lock(&mm->page_table_lock); + if (ret == VM_FAULT_MINOR) + continue; - WARN_ON(!PageCompound(page)); + remainder = 0; + if (!i) + i = -EFAULT; + break; + } + if (pages) { + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; get_page(page); pages[i] = page; }