Merge branches 'stable/bug.fixes-3.2' and 'stable/mmu.fixes' of git://git.kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 25 Oct 2011 07:17:47 +0000 (09:17 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 25 Oct 2011 07:17:47 +0000 (09:17 +0200)
* 'stable/bug.fixes-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/p2m/debugfs: Make type_name more obvious.
  xen/p2m/debugfs: Fix potential pointer exception.
  xen/enlighten: Fix compile warnings and set cx to known value.
  xen/xenbus: Remove the unnecessary check.
  xen/irq: If we fail during msi_capability_init return proper error code.
  xen/events: Don't check the info for NULL as it is already done.
  xen/events: BUG() when we can't allocate our event->irq array.

* 'stable/mmu.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen: Fix selfballooning and ensure it doesn't go too far
  xen/gntdev: Fix sleep-inside-spinlock
  xen: modify kernel mappings corresponding to granted pages
  xen: add an "highmem" parameter to alloc_xenballooned_pages
  xen/p2m: Use SetPagePrivate and its friends for M2P overrides.
  xen/p2m: Make debug/xen/mmu/p2m visible again.
  Revert "xen/debug: WARN_ON when identity PFN has no _PAGE_IOMAP flag set."

15 files changed:
arch/x86/include/asm/xen/page.h
arch/x86/pci/xen.c
arch/x86/xen/Kconfig
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/p2m.c
drivers/block/xen-blkback/blkback.c
drivers/xen/balloon.c
drivers/xen/events.c
drivers/xen/gntdev.c
drivers/xen/grant-table.c
drivers/xen/xen-selfballoon.c
drivers/xen/xenbus/xenbus_probe_backend.c
include/xen/balloon.h
include/xen/grant_table.h

index 7ff4669..c34f96c 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
+#include <xen/grant_table.h>
 #include <xen/features.h>
 
 /* Xen machine address */
@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
                                             unsigned long pfn_e);
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
-                           bool clear_pte);
+                           struct gnttab_map_grant_ref *kmap_op);
 extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
-#ifdef CONFIG_XEN_DEBUG_FS
-extern int p2m_dump_show(struct seq_file *m, void *v);
-#endif
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        unsigned long mfn;
index 1017c7b..11a9301 100644 (file)
@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
                                               "pcifront-msi-x" :
                                               "pcifront-msi",
                                                DOMID_SELF);
-               if (irq < 0)
+               if (irq < 0) {
+                       ret = irq;
                        goto free;
+               }
                i++;
        }
        kfree(v);
@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
                if (msg.data != XEN_PIRQ_MSI_DATA ||
                    xen_irq_from_pirq(pirq) < 0) {
                        pirq = xen_allocate_pirq_msi(dev, msidesc);
-                       if (pirq < 0)
+                       if (pirq < 0) {
+                               irq = -ENODEV;
                                goto error;
+                       }
                        xen_msi_compose_msg(dev, pirq, &msg);
                        __write_msi_msg(msidesc, &msg);
                        dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
@@ -244,7 +248,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 error:
        dev_err(&dev->dev,
                "Xen PCI frontend has not registered MSI/MSI-X support!\n");
-       return -ENODEV;
+       return irq;
 }
 
 #ifdef CONFIG_XEN_DOM0
index 5cc821c..ae559fe 100644 (file)
@@ -49,11 +49,3 @@ config XEN_DEBUG_FS
        help
          Enable statistics output and various tuning options in debugfs.
          Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
-       bool "Enable Xen debug checks"
-       depends on XEN
-       default n
-       help
-         Enable various WARN_ON checks in the Xen MMU code.
-         Enabling this option WILL incur a significant performance overhead.
index 2d69617..da8afd5 100644 (file)
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
                        ~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
                          (1 << X86_FEATURE_ACPI));  /* disable ACPI */
        ax = 1;
+       cx = 0;
        xen_cpuid(&ax, &bx, &cx, &dx);
 
        xsave_mask =
index 3dd53f9..87f6673 100644 (file)
@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
 
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
-       phys_addr_t addr = (pte & PTE_PFN_MASK);
-       phys_addr_t other_addr;
-       bool io_page = false;
-       pte_t _pte;
-
-       if (pte & _PAGE_IOMAP)
-               io_page = true;
-
-       _pte = xen_make_pte(pte);
-
-       if (!addr)
-               return _pte;
-
-       if (io_page &&
-           (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
-               other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
-               WARN_ONCE(addr != other_addr,
-                       "0x%lx is using VM_IO, but it is 0x%lx!\n",
-                       (unsigned long)addr, (unsigned long)other_addr);
-       } else {
-               pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
-               other_addr = (_pte.pte & PTE_PFN_MASK);
-               WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
-                       "0x%lx is missing VM_IO (and wasn't fixed)!\n",
-                       (unsigned long)addr);
-       }
-
-       return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
 static pgd_t xen_make_pgd(pgdval_t pgd)
 {
        pgd = pte_pfn_to_mfn(pgd);
@@ -1992,9 +1957,6 @@ void __init xen_ident_map_ISA(void)
 
 static void __init xen_post_allocator_init(void)
 {
-#ifdef CONFIG_XEN_DEBUG
-       pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
        pv_mmu_ops.set_pte = xen_set_pte;
        pv_mmu_ops.set_pmd = xen_set_pmd;
        pv_mmu_ops.set_pud = xen_set_pud;
@@ -2404,17 +2366,3 @@ out:
        return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
-       return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
-       .open           = p2m_dump_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
index 58efeb9..1b267e7 100644 (file)
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
 
+#include "multicalls.h"
 #include "xen-ops.h"
 
 static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+               struct gnttab_map_grant_ref *kmap_op)
 {
        unsigned long flags;
        unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
                                        "m2p_add_override: pfn %lx not mapped", pfn))
                        return -EINVAL;
        }
-
-       page->private = mfn;
+       WARN_ON(PagePrivate(page));
+       SetPagePrivate(page);
+       set_page_private(page, mfn);
        page->index = pfn_to_mfn(pfn);
 
        if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
                return -ENOMEM;
 
-       if (clear_pte && !PageHighMem(page))
-               /* Just zap old mapping for now */
-               pte_clear(&init_mm, address, ptep);
+       if (kmap_op != NULL) {
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs =
+                               xen_mc_entry(sizeof(*kmap_op));
+
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_map_grant_ref, kmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+               }
+               /* let's use dev_bus_addr to record the old mfn instead */
+               kmap_op->dev_bus_addr = page->index;
+               page->index = (unsigned long) kmap_op;
+       }
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_del(&page->lru);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
-       set_phys_to_machine(pfn, page->index);
+       WARN_ON(!PagePrivate(page));
+       ClearPagePrivate(page);
 
-       if (clear_pte && !PageHighMem(page))
-               set_pte_at(&init_mm, address, ptep,
-                               pfn_pte(pfn, PAGE_KERNEL));
-               /* No tlb flush necessary because the caller already
-                * left the pte unmapped. */
+       if (clear_pte) {
+               struct gnttab_map_grant_ref *map_op =
+                       (struct gnttab_map_grant_ref *) page->index;
+               set_phys_to_machine(pfn, map_op->dev_bus_addr);
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs;
+                       struct gnttab_unmap_grant_ref *unmap_op;
+
+                       /*
+                        * It might be that we queued all the m2p grant table
+                        * hypercalls in a multicall, then m2p_remove_override
+                        * get called before the multicall has actually been
+                        * issued. In this case handle is going to -1 because
+                        * it hasn't been modified yet.
+                        */
+                       if (map_op->handle == -1)
+                               xen_mc_flush();
+                       /*
+                        * Now if map_op->handle is negative it means that the
+                        * hypercall actually returned an error.
+                        */
+                       if (map_op->handle == GNTST_general_error) {
+                               printk(KERN_WARNING "m2p_remove_override: "
+                                               "pfn %lx mfn %lx, failed to modify kernel mappings",
+                                               pfn, mfn);
+                               return -1;
+                       }
+
+                       mcs = xen_mc_entry(
+                                       sizeof(struct gnttab_unmap_grant_ref));
+                       unmap_op = mcs.args;
+                       unmap_op->host_addr = map_op->host_addr;
+                       unmap_op->handle = map_op->handle;
+                       unmap_op->dev_bus_addr = 0;
+
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+                       set_pte_at(&init_mm, address, ptep,
+                                       pfn_pte(pfn, PAGE_KERNEL));
+                       __flush_tlb_single(address);
+                       map_op->host_addr = 0;
+               }
+       } else
+               set_phys_to_machine(pfn, page->index);
 
        return 0;
 }
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
        spin_lock_irqsave(&m2p_override_lock, flags);
 
        list_for_each_entry(p, bucket, lru) {
-               if (p->private == mfn) {
+               if (page_private(p) == mfn) {
                        ret = p;
                        break;
                }
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 
 #ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
 {
        static const char * const level_name[] = { "top", "middle",
-                                               "entry", "abnormal" };
-       static const char * const type_name[] = { "identity", "missing",
-                                               "pfn", "abnormal"};
+                                               "entry", "abnormal", "error"};
 #define TYPE_IDENTITY 0
 #define TYPE_MISSING 1
 #define TYPE_PFN 2
 #define TYPE_UNKNOWN 3
+       static const char * const type_name[] = {
+                               [TYPE_IDENTITY] = "identity",
+                               [TYPE_MISSING] = "missing",
+                               [TYPE_PFN] = "pfn",
+                               [TYPE_UNKNOWN] = "abnormal"};
        unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
        unsigned int uninitialized_var(prev_level);
        unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
 #undef TYPE_PFN
 #undef TYPE_UNKNOWN
 }
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+       .open           = p2m_dump_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+       struct dentry *d_xen = xen_init_debugfs();
+
+       if (d_xen == NULL)
+               return -ENOMEM;
+
+       d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+       debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+       return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */
index 2330a9a..1540792 100644 (file)
@@ -396,7 +396,7 @@ static int xen_blkbk_map(struct blkif_request *req,
                        continue;
 
                ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
-                       blkbk->pending_page(pending_req, i), false);
+                       blkbk->pending_page(pending_req, i), NULL);
                if (ret) {
                        pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
                                 (unsigned long)map[i].dev_bus_addr, ret);
index fc43b53..5876e1a 100644 (file)
@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
  * alloc_xenballooned_pages - get pages that have been ballooned out
  * @nr_pages: Number of pages to get
  * @pages: pages returned
+ * @highmem: highmem or lowmem pages
  * @return 0 on success, error otherwise
  */
-int alloc_xenballooned_pages(int nr_pages, struct page** pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
 {
        int pgno = 0;
        struct page* page;
        mutex_lock(&balloon_mutex);
        while (pgno < nr_pages) {
-               page = balloon_retrieve(true);
-               if (page) {
+               page = balloon_retrieve(highmem);
+               if (page && PageHighMem(page) == highmem) {
                        pages[pgno++] = page;
                } else {
                        enum bp_state st;
-                       st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+                       if (page)
+                               balloon_append(page);
+                       st = decrease_reservation(nr_pages - pgno,
+                                       highmem ? GFP_HIGHUSER : GFP_USER);
                        if (st != BP_DONE)
                                goto out_undo;
                }
index 7523719..212a5c8 100644 (file)
@@ -432,7 +432,8 @@ static int __must_check xen_allocate_irq_dynamic(void)
 
        irq = irq_alloc_desc_from(first, -1);
 
-       xen_irq_init(irq);
+       if (irq >= 0)
+               xen_irq_init(irq);
 
        return irq;
 }
@@ -713,7 +714,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
        mutex_lock(&irq_mapping_update_lock);
 
        irq = xen_allocate_irq_dynamic();
-       if (irq == -1)
+       if (irq < 0)
                goto out;
 
        irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -729,7 +730,7 @@ out:
 error_irq:
        mutex_unlock(&irq_mapping_update_lock);
        xen_free_irq(irq);
-       return -1;
+       return ret;
 }
 #endif
 
@@ -779,7 +780,7 @@ int xen_irq_from_pirq(unsigned pirq)
        mutex_lock(&irq_mapping_update_lock);
 
        list_for_each_entry(info, &xen_irq_list_head, list) {
-               if (info == NULL || info->type != IRQT_PIRQ)
+               if (info->type != IRQT_PIRQ)
                        continue;
                irq = info->irq;
                if (info->u.pirq.pirq == pirq)
@@ -1670,6 +1671,7 @@ void __init xen_init_IRQ(void)
 
        evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
                                    GFP_KERNEL);
+       BUG_ON(!evtchn_to_irq);
        for (i = 0; i < NR_EVENT_CHANNELS; i++)
                evtchn_to_irq[i] = -1;
 
index f914b26..880798a 100644 (file)
@@ -83,6 +83,7 @@ struct grant_map {
        struct ioctl_gntdev_grant_ref *grants;
        struct gnttab_map_grant_ref   *map_ops;
        struct gnttab_unmap_grant_ref *unmap_ops;
+       struct gnttab_map_grant_ref   *kmap_ops;
        struct page **pages;
 };
 
@@ -116,19 +117,22 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
        add->grants    = kzalloc(sizeof(add->grants[0])    * count, GFP_KERNEL);
        add->map_ops   = kzalloc(sizeof(add->map_ops[0])   * count, GFP_KERNEL);
        add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
+       add->kmap_ops  = kzalloc(sizeof(add->kmap_ops[0])  * count, GFP_KERNEL);
        add->pages     = kzalloc(sizeof(add->pages[0])     * count, GFP_KERNEL);
        if (NULL == add->grants    ||
            NULL == add->map_ops   ||
            NULL == add->unmap_ops ||
+           NULL == add->kmap_ops  ||
            NULL == add->pages)
                goto err;
 
-       if (alloc_xenballooned_pages(count, add->pages))
+       if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
                goto err;
 
        for (i = 0; i < count; i++) {
                add->map_ops[i].handle = -1;
                add->unmap_ops[i].handle = -1;
+               add->kmap_ops[i].handle = -1;
        }
 
        add->index = 0;
@@ -142,6 +146,7 @@ err:
        kfree(add->grants);
        kfree(add->map_ops);
        kfree(add->unmap_ops);
+       kfree(add->kmap_ops);
        kfree(add);
        return NULL;
 }
@@ -243,10 +248,35 @@ static int map_grant_pages(struct grant_map *map)
                        gnttab_set_unmap_op(&map->unmap_ops[i], addr,
                                map->flags, -1 /* handle */);
                }
+       } else {
+               /*
+                * Setup the map_ops corresponding to the pte entries pointing
+                * to the kernel linear addresses of the struct pages.
+                * These ptes are completely different from the user ptes dealt
+                * with find_grant_ptes.
+                */
+               for (i = 0; i < map->count; i++) {
+                       unsigned level;
+                       unsigned long address = (unsigned long)
+                               pfn_to_kaddr(page_to_pfn(map->pages[i]));
+                       pte_t *ptep;
+                       u64 pte_maddr = 0;
+                       BUG_ON(PageHighMem(map->pages[i]));
+
+                       ptep = lookup_address(address, &level);
+                       pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
+                       gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
+                               map->flags |
+                               GNTMAP_host_map |
+                               GNTMAP_contains_pte,
+                               map->grants[i].ref,
+                               map->grants[i].domid);
+               }
        }
 
        pr_debug("map %d+%d\n", map->index, map->count);
-       err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+       err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+                       map->pages, map->count);
        if (err)
                return err;
 
@@ -462,13 +492,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 
        pr_debug("priv %p\n", priv);
 
-       spin_lock(&priv->lock);
        while (!list_empty(&priv->maps)) {
                map = list_entry(priv->maps.next, struct grant_map, next);
                list_del(&map->next);
                gntdev_put_map(map);
        }
-       spin_unlock(&priv->lock);
 
        if (use_ptemod)
                mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -532,10 +560,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
        map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
        if (map) {
                list_del(&map->next);
-               gntdev_put_map(map);
                err = 0;
        }
        spin_unlock(&priv->lock);
+       if (map)
+               gntdev_put_map(map);
        return err;
 }
 
index 4f44b34..8c71ab8 100644 (file)
@@ -448,7 +448,8 @@ unsigned int gnttab_max_grant_frames(void)
 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
-                   struct page **pages, unsigned int count)
+                       struct gnttab_map_grant_ref *kmap_ops,
+                       struct page **pages, unsigned int count)
 {
        int i, ret;
        pte_t *pte;
@@ -488,8 +489,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
                         */
                        return -EOPNOTSUPP;
                }
-               ret = m2p_add_override(mfn, pages[i],
-                                      map_ops[i].flags & GNTMAP_contains_pte);
+               ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
                if (ret)
                        return ret;
        }
index 6ea852e..d93c708 100644 (file)
@@ -68,6 +68,8 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/module.h>
@@ -93,6 +95,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
 /* In HZ, controls frequency of worker invocation. */
 static unsigned int selfballoon_interval __read_mostly = 5;
 
+/*
+ * Minimum usable RAM in MB for selfballooning target for balloon.
+ * If non-zero, it is added to totalreserve_pages and self-ballooning
+ * will not balloon below the sum.  If zero, a piecewise linear function
+ * is calculated as a minimum and added to totalreserve_pages.  Note that
+ * setting this value indiscriminately may cause OOMs and crashes.
+ */
+static unsigned int selfballoon_min_usable_mb;
+
 static void selfballoon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
 
@@ -189,20 +200,23 @@ static int __init xen_selfballooning_setup(char *s)
 __setup("selfballooning", xen_selfballooning_setup);
 #endif /* CONFIG_FRONTSWAP */
 
+#define MB2PAGES(mb)   ((mb) << (20 - PAGE_SHIFT))
+
 /*
  * Use current balloon size, the goal (vm_committed_as), and hysteresis
  * parameters to set a new target balloon size
  */
 static void selfballoon_process(struct work_struct *work)
 {
-       unsigned long cur_pages, goal_pages, tgt_pages;
+       unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
+       unsigned long useful_pages;
        bool reset_timer = false;
 
        if (xen_selfballooning_enabled) {
-               cur_pages = balloon_stats.current_pages;
+               cur_pages = totalram_pages;
                tgt_pages = cur_pages; /* default is no change */
                goal_pages = percpu_counter_read_positive(&vm_committed_as) +
-                       balloon_stats.current_pages - totalram_pages;
+                               totalreserve_pages;
 #ifdef CONFIG_FRONTSWAP
                /* allow space for frontswap pages to be repatriated */
                if (frontswap_selfshrinking && frontswap_enabled)
@@ -217,7 +231,26 @@ static void selfballoon_process(struct work_struct *work)
                                ((goal_pages - cur_pages) /
                                  selfballoon_uphysteresis);
                /* else if cur_pages == goal_pages, no change */
-               balloon_set_new_target(tgt_pages);
+               useful_pages = max_pfn - totalreserve_pages;
+               if (selfballoon_min_usable_mb != 0)
+                       floor_pages = totalreserve_pages +
+                                       MB2PAGES(selfballoon_min_usable_mb);
+               /* piecewise linear function ending in ~3% slope */
+               else if (useful_pages < MB2PAGES(16))
+                       floor_pages = max_pfn; /* not worth ballooning */
+               else if (useful_pages < MB2PAGES(64))
+                       floor_pages = totalreserve_pages + MB2PAGES(16) +
+                                       ((useful_pages - MB2PAGES(16)) >> 1);
+               else if (useful_pages < MB2PAGES(512))
+                       floor_pages = totalreserve_pages + MB2PAGES(40) +
+                                       ((useful_pages - MB2PAGES(40)) >> 3);
+               else /* useful_pages >= MB2PAGES(512) */
+                       floor_pages = totalreserve_pages + MB2PAGES(99) +
+                                       ((useful_pages - MB2PAGES(99)) >> 5);
+               if (tgt_pages < floor_pages)
+                       tgt_pages = floor_pages;
+               balloon_set_new_target(tgt_pages +
+                       balloon_stats.current_pages - totalram_pages);
                reset_timer = true;
        }
 #ifdef CONFIG_FRONTSWAP
@@ -340,6 +373,31 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,
 static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
                   show_selfballoon_uphys, store_selfballoon_uphys);
 
+SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
+                               selfballoon_min_usable_mb);
+
+static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev,
+                                              struct sysdev_attribute *attr,
+                                              const char *buf,
+                                              size_t count)
+{
+       unsigned long val;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       err = strict_strtoul(buf, 10, &val);
+       if (err || val == 0)
+               return -EINVAL;
+       selfballoon_min_usable_mb = val;
+       return count;
+}
+
+static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
+                  show_selfballoon_min_usable_mb,
+                  store_selfballoon_min_usable_mb);
+
+
 #ifdef CONFIG_FRONTSWAP
 SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
 
@@ -421,6 +479,7 @@ static struct attribute *selfballoon_attrs[] = {
        &attr_selfballoon_interval.attr,
        &attr_selfballoon_downhysteresis.attr,
        &attr_selfballoon_uphysteresis.attr,
+       &attr_selfballoon_min_usable_mb.attr,
 #ifdef CONFIG_FRONTSWAP
        &attr_frontswap_selfshrinking.attr,
        &attr_frontswap_hysteresis.attr,
index 60adf91..32417b5 100644 (file)
@@ -104,8 +104,6 @@ static int xenbus_uevent_backend(struct device *dev,
 
        xdev = to_xenbus_device(dev);
        bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
-       if (xdev == NULL)
-               return -ENODEV;
 
        if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
                return -ENOMEM;
index 76f7538..d29c153 100644 (file)
@@ -25,8 +25,9 @@ extern struct balloon_stats balloon_stats;
 
 void balloon_set_new_target(unsigned long target);
 
-int alloc_xenballooned_pages(int nr_pages, struct page** pages);
-void free_xenballooned_pages(int nr_pages, struct page** pages);
+int alloc_xenballooned_pages(int nr_pages, struct page **pages,
+               bool highmem);
+void free_xenballooned_pages(int nr_pages, struct page **pages);
 
 struct sys_device;
 #ifdef CONFIG_XEN_SELFBALLOONING
index b1fab6b..6b99bfb 100644 (file)
@@ -156,6 +156,7 @@ unsigned int gnttab_max_grant_frames(void);
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+                       struct gnttab_map_grant_ref *kmap_ops,
                    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
                      struct page **pages, unsigned int count);