Merge branch 'stable/cleanups-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / drivers / xen / balloon.c
index 61c0ee7..a767884 100644 (file)
@@ -4,6 +4,12 @@
  * Copyright (c) 2003, B Dragovic
  * Copyright (c) 2003-2004, M Williamson, K Fraser
  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ *
+ * Memory hotplug support was written by Daniel Kiper. Work on
+ * it was sponsored by Google under Google Summer of Code 2010
+ * program. Jeremy Fitzhardinge from Citrix was the mentor for
+ * this project.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
@@ -33,6 +39,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/pagemap.h>
@@ -40,6 +47,9 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/gfp.h>
+#include <linux/notifier.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -193,6 +203,87 @@ static enum bp_state update_schedule(enum bp_state state)
        return BP_EAGAIN;
 }
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static long current_credit(void)
+{
+       return balloon_stats.target_pages - balloon_stats.current_pages -
+               balloon_stats.hotplug_pages;
+}
+
+static bool balloon_is_inflated(void)
+{
+       if (balloon_stats.balloon_low || balloon_stats.balloon_high ||
+                       balloon_stats.balloon_hotplug)
+               return true;
+       else
+               return false;
+}
+
+/*
+ * reserve_additional_memory() adds memory region of size >= credit above
+ * max_pfn. New region is section aligned and size is modified to be multiple
+ * of section size. Those features allow optimal use of address space and
+ * establish proper alignment when this function is called first time after
+ * boot (last section not fully populated at boot time contains unused memory
+ * pages with PG_reserved bit not set; online_pages_range() does not allow page
+ * onlining in whole range if first onlined page does not have PG_reserved
+ * bit set). Real size of added memory is established at page onlining stage.
+ */
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+       int nid, rc;
+       u64 hotplug_start_paddr;
+       unsigned long balloon_hotplug = credit;
+
+       hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn));
+       balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
+       nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
+
+       rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
+
+       if (rc) {
+               pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
+               return BP_EAGAIN;
+       }
+
+       balloon_hotplug -= credit;
+
+       balloon_stats.hotplug_pages += credit;
+       balloon_stats.balloon_hotplug = balloon_hotplug;
+
+       return BP_DONE;
+}
+
+static void xen_online_page(struct page *page)
+{
+       __online_page_set_limits(page);
+
+       mutex_lock(&balloon_mutex);
+
+       __balloon_append(page);
+
+       if (balloon_stats.hotplug_pages)
+               --balloon_stats.hotplug_pages;
+       else
+               --balloon_stats.balloon_hotplug;
+
+       mutex_unlock(&balloon_mutex);
+}
+
+static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+       if (val == MEM_ONLINE)
+               schedule_delayed_work(&balloon_worker, 0);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block xen_memory_nb = {
+       .notifier_call = xen_memory_notifier,
+       .priority = 0
+};
+#else
 static long current_credit(void)
 {
        unsigned long target = balloon_stats.target_pages;
@@ -205,6 +296,21 @@ static long current_credit(void)
        return target - balloon_stats.current_pages;
 }
 
+static bool balloon_is_inflated(void)
+{
+       if (balloon_stats.balloon_low || balloon_stats.balloon_high)
+               return true;
+       else
+               return false;
+}
+
+static enum bp_state reserve_additional_memory(long credit)
+{
+       balloon_stats.target_pages = balloon_stats.current_pages;
+       return BP_DONE;
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
+
 static enum bp_state increase_reservation(unsigned long nr_pages)
 {
        int rc;
@@ -216,6 +322,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
                .domid        = DOMID_SELF
        };
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+       if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) {
+               nr_pages = min(nr_pages, balloon_stats.balloon_hotplug);
+               balloon_stats.hotplug_pages += nr_pages;
+               balloon_stats.balloon_hotplug -= nr_pages;
+               return BP_DONE;
+       }
+#endif
+
        if (nr_pages > ARRAY_SIZE(frame_list))
                nr_pages = ARRAY_SIZE(frame_list);
 
@@ -278,6 +393,15 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                .domid        = DOMID_SELF
        };
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+       if (balloon_stats.hotplug_pages) {
+               nr_pages = min(nr_pages, balloon_stats.hotplug_pages);
+               balloon_stats.hotplug_pages -= nr_pages;
+               balloon_stats.balloon_hotplug += nr_pages;
+               return BP_DONE;
+       }
+#endif
+
        if (nr_pages > ARRAY_SIZE(frame_list))
                nr_pages = ARRAY_SIZE(frame_list);
 
@@ -339,8 +463,12 @@ static void balloon_process(struct work_struct *work)
        do {
                credit = current_credit();
 
-               if (credit > 0)
-                       state = increase_reservation(credit);
+               if (credit > 0) {
+                       if (balloon_is_inflated())
+                               state = increase_reservation(credit);
+                       else
+                               state = reserve_additional_memory(credit);
+               }
 
                if (credit < 0)
                        state = decrease_reservation(-credit, GFP_BALLOON);
@@ -373,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
  * alloc_xenballooned_pages - get pages that have been ballooned out
  * @nr_pages: Number of pages to get
  * @pages: pages returned
+ * @highmem: highmem or lowmem pages
  * @return 0 on success, error otherwise
  */
-int alloc_xenballooned_pages(int nr_pages, struct page **pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
 {
        int pgno = 0;
        struct page *page;
        mutex_lock(&balloon_mutex);
        while (pgno < nr_pages) {
-               page = balloon_retrieve(true);
-               if (page) {
+               page = balloon_retrieve(highmem);
+               if (page && PageHighMem(page) == highmem) {
                        pages[pgno++] = page;
                } else {
                        enum bp_state st;
-                       st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+                       if (page)
+                               balloon_append(page);
+                       st = decrease_reservation(nr_pages - pgno,
+                                       highmem ? GFP_HIGHUSER : GFP_USER);
                        if (st != BP_DONE)
                                goto out_undo;
                }
@@ -427,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL(free_xenballooned_pages);
 
-static int __init balloon_init(void)
+static void __init balloon_add_region(unsigned long start_pfn,
+                                     unsigned long pages)
 {
        unsigned long pfn, extra_pfn_end;
        struct page *page;
 
+       /*
+        * If the amount of usable memory has been limited (e.g., with
+        * the 'mem' command line parameter), don't add pages beyond
+        * this limit.
+        */
+       extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+       for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+               page = pfn_to_page(pfn);
+               /* totalram_pages and totalhigh_pages do not
+                  include the boot-time balloon extension, so
+                  don't subtract from it. */
+               __balloon_append(page);
+       }
+}
+
+static int __init balloon_init(void)
+{
+       int i;
+
        if (!xen_domain())
                return -ENODEV;
 
        pr_info("xen/balloon: Initialising balloon driver.\n");
 
-       balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
+       balloon_stats.current_pages = xen_pv_domain()
+               ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
+               : max_pfn;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
@@ -447,25 +602,22 @@ static int __init balloon_init(void)
        balloon_stats.retry_count = 1;
        balloon_stats.max_retry_count = RETRY_UNLIMITED;
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+       balloon_stats.hotplug_pages = 0;
+       balloon_stats.balloon_hotplug = 0;
+
+       set_online_page_callback(&xen_online_page);
+       register_memory_notifier(&xen_memory_nb);
+#endif
+
        /*
-        * Initialise the balloon with excess memory space.  We need
-        * to make sure we don't add memory which doesn't exist or
-        * logically exist.  The E820 map can be trimmed to be smaller
-        * than the amount of physical memory due to the mem= command
-        * line parameter.  And if this is a 32-bit non-HIGHMEM kernel
-        * on a system with memory which requires highmem to access,
-        * don't try to use it.
+        * Initialize the balloon with pages from the extra memory
+        * regions (see arch/x86/xen/setup.c).
         */
-       extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
-                           (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
-       for (pfn = PFN_UP(xen_extra_mem_start);
-            pfn < extra_pfn_end;
-            pfn++) {
-               page = pfn_to_page(pfn);
-               /* totalram_pages and totalhigh_pages do not include the boot-time
-                  balloon extension, so don't subtract from it. */
-               __balloon_append(page);
-       }
+       for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
+               if (xen_extra_mem[i].size)
+                       balloon_add_region(PFN_UP(xen_extra_mem[i].start),
+                                          PFN_DOWN(xen_extra_mem[i].size));
 
        return 0;
 }