x86, e820: remove end_user_pfn
[pandora-kernel.git] / mm / bootmem.c
index 2ccea70..8d9f60e 100644 (file)
@@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
  * might be used for boot-time allocations - or it might get added
  * to the free page pool later on.
  */
-static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
                        unsigned long addr, unsigned long size, int flags)
 {
        unsigned long sidx, eidx;
        unsigned long i;
-       int ret;
+
+       BUG_ON(!size);
+
+       /* out of range, don't hold other */
+       if (addr + size < bdata->node_boot_start ||
+               PFN_DOWN(addr) > bdata->node_low_pfn)
+               return 0;
 
        /*
-        * round up, partially reserved pages are considered
-        * fully reserved.
+        * Round up to index to the range.
         */
+       if (addr > bdata->node_boot_start)
+               sidx= PFN_DOWN(addr - bdata->node_boot_start);
+       else
+               sidx = 0;
+
+       eidx = PFN_UP(addr + size - bdata->node_boot_start);
+       if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+               eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+       for (i = sidx; i < eidx; i++) {
+               if (test_bit(i, bdata->node_bootmem_map)) {
+                       if (flags & BOOTMEM_EXCLUSIVE)
+                               return -EBUSY;
+               }
+       }
+
+       return 0;
+
+}
+
+static void __init reserve_bootmem_core(bootmem_data_t *bdata,
+                       unsigned long addr, unsigned long size, int flags)
+{
+       unsigned long sidx, eidx;
+       unsigned long i;
+
        BUG_ON(!size);
-       BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
-       BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
-       BUG_ON(addr < bdata->node_boot_start);
 
-       sidx = PFN_DOWN(addr - bdata->node_boot_start);
+       /* out of range */
+       if (addr + size < bdata->node_boot_start ||
+               PFN_DOWN(addr) > bdata->node_low_pfn)
+               return;
+
+       /*
+        * Round up to index to the range.
+        */
+       if (addr > bdata->node_boot_start)
+               sidx= PFN_DOWN(addr - bdata->node_boot_start);
+       else
+               sidx = 0;
+
        eidx = PFN_UP(addr + size - bdata->node_boot_start);
+       if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+               eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
 
-       for (i = sidx; i < eidx; i++)
+       for (i = sidx; i < eidx; i++) {
                if (test_and_set_bit(i, bdata->node_bootmem_map)) {
 #ifdef CONFIG_DEBUG_BOOTMEM
                        printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
 #endif
-                       if (flags & BOOTMEM_EXCLUSIVE) {
-                               ret = -EBUSY;
-                               goto err;
-                       }
                }
-
-       return 0;
-
-err:
-       /* unreserve memory we accidentally reserved */
-       for (i--; i >= sidx; i--)
-               clear_bit(i, bdata->node_bootmem_map);
-
-       return ret;
+       }
 }
 
 static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -206,9 +236,11 @@ void * __init
 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
              unsigned long align, unsigned long goal, unsigned long limit)
 {
-       unsigned long offset, remaining_size, areasize, preferred;
+       unsigned long areasize, preferred;
        unsigned long i, start = 0, incr, eidx, end_pfn;
        void *ret;
+       unsigned long node_boot_start;
+       void *node_bootmem_map;
 
        if (!size) {
                printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
        }
        BUG_ON(align & (align-1));
 
-       if (limit && bdata->node_boot_start >= limit)
-               return NULL;
-
        /* on nodes without memory - bootmem_map is NULL */
        if (!bdata->node_bootmem_map)
                return NULL;
 
+       /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
+       node_boot_start = bdata->node_boot_start;
+       node_bootmem_map = bdata->node_bootmem_map;
+       if (align) {
+               node_boot_start = ALIGN(bdata->node_boot_start, align);
+               if (node_boot_start > bdata->node_boot_start)
+                       node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
+                           PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
+       }
+
+       if (limit && node_boot_start >= limit)
+               return NULL;
+
        end_pfn = bdata->node_low_pfn;
        limit = PFN_DOWN(limit);
        if (limit && end_pfn > limit)
                end_pfn = limit;
 
-       eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
-       offset = 0;
-       if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
-               offset = align - (bdata->node_boot_start & (align - 1UL));
-       offset = PFN_DOWN(offset);
+       eidx = end_pfn - PFN_DOWN(node_boot_start);
 
        /*
         * We try to allocate bootmem pages above 'goal'
         * first, then we try to allocate lower pages.
         */
-       if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
-               preferred = goal - bdata->node_boot_start;
+       preferred = 0;
+       if (goal && PFN_DOWN(goal) < end_pfn) {
+               if (goal > node_boot_start)
+                       preferred = goal - node_boot_start;
 
-               if (bdata->last_success >= preferred)
+               if (bdata->last_success > node_boot_start &&
+                       bdata->last_success - node_boot_start >= preferred)
                        if (!limit || (limit && limit > bdata->last_success))
-                               preferred = bdata->last_success;
-       } else
-               preferred = 0;
+                               preferred = bdata->last_success - node_boot_start;
+       }
 
-       preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+       preferred = PFN_DOWN(ALIGN(preferred, align));
        areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
        incr = align >> PAGE_SHIFT ? : 1;
 
 restart_scan:
-       for (i = preferred; i < eidx; i += incr) {
+       for (i = preferred; i < eidx;) {
                unsigned long j;
-               i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+
+               i = find_next_zero_bit(node_bootmem_map, eidx, i);
                i = ALIGN(i, incr);
                if (i >= eidx)
                        break;
-               if (test_bit(i, bdata->node_bootmem_map))
+               if (test_bit(i, node_bootmem_map)) {
+                       i += incr;
                        continue;
+               }
                for (j = i + 1; j < i + areasize; ++j) {
                        if (j >= eidx)
                                goto fail_block;
-                       if (test_bit(j, bdata->node_bootmem_map))
+                       if (test_bit(j, node_bootmem_map))
                                goto fail_block;
                }
                start = i;
                goto found;
        fail_block:
                i = ALIGN(j, incr);
+               if (i == j)
+                       i += incr;
        }
 
-       if (preferred > offset) {
-               preferred = offset;
+       if (preferred > 0) {
+               preferred = 0;
                goto restart_scan;
        }
        return NULL;
 
 found:
-       bdata->last_success = PFN_PHYS(start);
+       bdata->last_success = PFN_PHYS(start) + node_boot_start;
        BUG_ON(start >= eidx);
 
        /*
@@ -289,6 +334,7 @@ found:
         */
        if (align < PAGE_SIZE &&
            bdata->last_offset && bdata->last_pos+1 == start) {
+               unsigned long offset, remaining_size;
                offset = ALIGN(bdata->last_offset, align);
                BUG_ON(offset > PAGE_SIZE);
                remaining_size = PAGE_SIZE - offset;
@@ -297,14 +343,12 @@ found:
                        /* last_pos unchanged */
                        bdata->last_offset = offset + size;
                        ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-                                          offset +
-                                          bdata->node_boot_start);
+                                          offset + node_boot_start);
                } else {
                        remaining_size = size - remaining_size;
                        areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
                        ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
-                                          offset +
-                                          bdata->node_boot_start);
+                                          offset + node_boot_start);
                        bdata->last_pos = start + areasize - 1;
                        bdata->last_offset = remaining_size;
                }
@@ -312,14 +356,14 @@ found:
        } else {
                bdata->last_pos = start + areasize - 1;
                bdata->last_offset = size & ~PAGE_MASK;
-               ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+               ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
        }
 
        /*
         * Reserve the area now:
         */
        for (i = start; i < start + areasize; i++)
-               if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+               if (unlikely(test_and_set_bit(i, node_bootmem_map)))
                        BUG();
        memset(ret, 0, size);
        return ret;
@@ -398,10 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
        return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
                                 unsigned long size, int flags)
 {
+       int ret;
+
+       ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+       if (ret < 0)
+               return -ENOMEM;
        reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+
+       return 0;
 }
 
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
@@ -412,6 +463,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
+       register_page_bootmem_info_node(pgdat);
        return free_all_bootmem_core(pgdat);
 }
 
@@ -426,7 +478,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 int __init reserve_bootmem(unsigned long addr, unsigned long size,
                            int flags)
 {
-       return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
+       bootmem_data_t *bdata;
+       int ret;
+
+       list_for_each_entry(bdata, &bdata_list, list) {
+               ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+               if (ret < 0)
+                       return ret;
+       }
+       list_for_each_entry(bdata, &bdata_list, list)
+               reserve_bootmem_core(bdata, addr, size, flags);
+
+       return 0;
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
@@ -484,6 +547,37 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
        return __alloc_bootmem(size, align, goal);
 }
 
+#ifdef CONFIG_SPARSEMEM
+void * __init alloc_bootmem_section(unsigned long size,
+                                   unsigned long section_nr)
+{
+       void *ptr;
+       unsigned long limit, goal, start_nr, end_nr, pfn;
+       struct pglist_data *pgdat;
+
+       pfn = section_nr_to_pfn(section_nr);
+       goal = PFN_PHYS(pfn);
+       limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
+       pgdat = NODE_DATA(early_pfn_to_nid(pfn));
+       ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+                                  limit);
+
+       if (!ptr)
+               return NULL;
+
+       start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
+       end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
+       if (start_nr != section_nr || end_nr != section_nr) {
+               printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
+                      section_nr);
+               free_bootmem_core(pgdat->bdata, __pa(ptr), size);
+               ptr = NULL;
+       }
+
+       return ptr;
+}
+#endif
+
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
 #endif