mm/hugetlb.c must #include <asm/io.h>
[pandora-kernel.git] / mm / hugetlb.c
index 1a6fe87..b3c7864 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
-
+#include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -31,9 +31,12 @@ static int max_hstate;
 unsigned int default_hstate_idx;
 struct hstate hstates[HUGE_MAX_HSTATE];
 
+__initdata LIST_HEAD(huge_boot_pages);
+
 /* for command line parsing */
 static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
+static unsigned long __initdata default_hstate_size;
 
 #define for_each_hstate(h) \
        for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++)
@@ -339,13 +342,13 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 }
 
 /* Returns true if the VMA has associated reserve pages */
-static int vma_has_private_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma)
 {
        if (vma->vm_flags & VM_SHARED)
-               return 0;
-       if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
-               return 0;
-       return 1;
+               return 1;
+       if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+               return 1;
+       return 0;
 }
 
 static void clear_huge_page(struct page *page,
@@ -417,7 +420,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
         * have no page reserves. This check ensures that reservations are
         * not "stolen". The child may still get SIGKILLed
         */
-       if (!vma_has_private_reserves(vma) &&
+       if (!vma_has_reserves(vma) &&
                        h->free_huge_pages - h->resv_huge_pages == 0)
                return NULL;
 
@@ -924,14 +927,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        return page;
 }
 
-static __initdata LIST_HEAD(huge_boot_pages);
-
-struct huge_bootmem_page {
-       struct list_head list;
-       struct hstate *hstate;
-};
-
-static int __init alloc_bootmem_huge_page(struct hstate *h)
+__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
 {
        struct huge_bootmem_page *m;
        int nr_nodes = nodes_weight(node_online_map);
@@ -981,15 +977,10 @@ static void __init gather_bootmem_prealloc(void)
        }
 }
 
-static void __init hugetlb_init_one_hstate(struct hstate *h)
+static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
        unsigned long i;
 
-       for (i = 0; i < MAX_NUMNODES; ++i)
-               INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-
-       h->hugetlb_next_nid = first_node(node_online_map);
-
        for (i = 0; i < h->max_huge_pages; ++i) {
                if (h->order >= MAX_ORDER) {
                        if (!alloc_bootmem_huge_page(h))
@@ -997,7 +988,7 @@ static void __init hugetlb_init_one_hstate(struct hstate *h)
                } else if (!alloc_fresh_huge_page(h))
                        break;
        }
-       h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i;
+       h->max_huge_pages = i;
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -1005,23 +996,36 @@ static void __init hugetlb_init_hstates(void)
        struct hstate *h;
 
        for_each_hstate(h) {
-               hugetlb_init_one_hstate(h);
+               /* oversize hugepages were init'ed in early boot */
+               if (h->order < MAX_ORDER)
+                       hugetlb_hstate_alloc_pages(h);
        }
 }
 
+static char * __init memfmt(char *buf, unsigned long n)
+{
+       if (n >= (1UL << 30))
+               sprintf(buf, "%lu GB", n >> 30);
+       else if (n >= (1UL << 20))
+               sprintf(buf, "%lu MB", n >> 20);
+       else
+               sprintf(buf, "%lu KB", n >> 10);
+       return buf;
+}
+
 static void __init report_hugepages(void)
 {
        struct hstate *h;
 
        for_each_hstate(h) {
-               printk(KERN_INFO "Total HugeTLB memory allocated, "
-                               "%ld %dMB pages\n",
-                               h->free_huge_pages,
-                               1 << (h->order + PAGE_SHIFT - 20));
+               char buf[32];
+               printk(KERN_INFO "HugeTLB registered %s page size, "
+                                "pre-allocated %ld pages\n",
+                       memfmt(buf, huge_page_size(h)),
+                       h->free_huge_pages);
        }
 }
 
-#ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(struct hstate *h, unsigned long count)
 {
@@ -1279,11 +1283,14 @@ static int __init hugetlb_init(void)
 {
        BUILD_BUG_ON(HPAGE_SHIFT == 0);
 
-       if (!size_to_hstate(HPAGE_SIZE)) {
-               hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-               parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+       if (!size_to_hstate(default_hstate_size)) {
+               default_hstate_size = HPAGE_SIZE;
+               if (!size_to_hstate(default_hstate_size))
+                       hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
        }
-       default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
+       default_hstate_idx = size_to_hstate(default_hstate_size) - hstates;
+       if (default_hstate_max_huge_pages)
+               default_hstate.max_huge_pages = default_hstate_max_huge_pages;
 
        hugetlb_init_hstates();
 
@@ -1301,6 +1308,8 @@ module_init(hugetlb_init);
 void __init hugetlb_add_hstate(unsigned order)
 {
        struct hstate *h;
+       unsigned long i;
+
        if (size_to_hstate(PAGE_SIZE << order)) {
                printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
                return;
@@ -1310,15 +1319,21 @@ void __init hugetlb_add_hstate(unsigned order)
        h = &hstates[max_hstate++];
        h->order = order;
        h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+       h->nr_huge_pages = 0;
+       h->free_huge_pages = 0;
+       for (i = 0; i < MAX_NUMNODES; ++i)
+               INIT_LIST_HEAD(&h->hugepage_freelists[i]);
+       h->hugetlb_next_nid = first_node(node_online_map);
        snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
                                        huge_page_size(h)/1024);
-       hugetlb_init_one_hstate(h);
+
        parsed_hstate = h;
 }
 
-static int __init hugetlb_setup(char *s)
+static int __init hugetlb_nrpages_setup(char *s)
 {
        unsigned long *mhp;
+       static unsigned long *last_mhp;
 
        /*
         * !max_hstate means we haven't parsed a hugepagesz= parameter yet,
@@ -1329,12 +1344,35 @@ static int __init hugetlb_setup(char *s)
        else
                mhp = &parsed_hstate->max_huge_pages;
 
+       if (mhp == last_mhp) {
+               printk(KERN_WARNING "hugepages= specified twice without "
+                       "interleaving hugepagesz=, ignoring\n");
+               return 1;
+       }
+
        if (sscanf(s, "%lu", mhp) <= 0)
                *mhp = 0;
 
+       /*
+        * Global state is always initialized later in hugetlb_init.
+        * But we need to allocate >= MAX_ORDER hstates here early to still
+        * use the bootmem allocator.
+        */
+       if (max_hstate && parsed_hstate->order >= MAX_ORDER)
+               hugetlb_hstate_alloc_pages(parsed_hstate);
+
+       last_mhp = mhp;
+
        return 1;
 }
-__setup("hugepages=", hugetlb_setup);
+__setup("hugepages=", hugetlb_nrpages_setup);
+
+static int __init hugetlb_default_setup(char *s)
+{
+       default_hstate_size = memparse(s, &s);
+       return 1;
+}
+__setup("default_hugepagesz=", hugetlb_default_setup);
 
 static unsigned int cpuset_mems_nr(unsigned int *array)
 {
@@ -1347,6 +1385,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
        return nr;
 }
 
+#ifdef CONFIG_SYSCTL
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
                           struct file *file, void __user *buffer,
                           size_t *length, loff_t *ppos)
@@ -1513,8 +1552,10 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 
                kref_put(&reservations->refs, resv_map_release);
 
-               if (reserve)
+               if (reserve) {
                        hugetlb_acct_memory(h, -reserve);
+                       hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
+               }
        }
 }
 
@@ -1963,6 +2004,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return ret;
 }
 
+/* Can be overriden by architectures */
+__attribute__((weak)) struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+              pud_t *pud, int write)
+{
+       BUG();
+       return NULL;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        struct page **pages, struct vm_area_struct **vmas,
                        unsigned long *position, int *length, int i,