Merge branches 'stable/backend.base.v3' and 'stable/gntalloc.v7' of git://git.kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 May 2011 23:14:25 +0000 (16:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 May 2011 23:14:25 +0000 (16:14 -0700)
* 'stable/backend.base.v3' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/pci: Fix compiler error when CONFIG_XEN_PRIVILEGED_GUEST is not set.
  xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions.
  xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override.
  xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot.
  xen/irq: Export 'xen_pirq_from_irq' function.
  xen/irq: Add support to check if IRQ line is shared with other domains.
  xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF.
  xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.

* 'stable/gntalloc.v7' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/gntdev,gntalloc: Remove unneeded VM flags

1  2  3 
drivers/xen/events.c
drivers/xen/gntdev.c

diff --combined drivers/xen/events.c
@@@@ -5,7 -5,7 -5,7 +5,7 @@@@
    * domain gets 1024 event channels, but NR_IRQ is not that large, we
    * must dynamically map irqs<->event channels.  The event channels
    * interface with the rest of the kernel by defining a xen interrupt
  - * chip.  When an event is recieved, it is mapped to an irq and sent
  + * chip.  When an event is received, it is mapped to an irq and sent
    * through the normal interrupt processing path.
    *
    * There are four kinds of events which can be mapped to an event
    */
   static DEFINE_SPINLOCK(irq_mapping_update_lock);
   
  +static LIST_HEAD(xen_irq_list_head);
  +
   /* IRQ <-> VIRQ mapping. */
   static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
   
@@@@ -87,9 -87,9 -85,7 +87,9 @@@@ enum xen_irq_type 
    */
   struct irq_info
   {
  +     struct list_head list;
        enum xen_irq_type type; /* type */
  +     unsigned irq;
        unsigned short evtchn;  /* event channel */
        unsigned short cpu;     /* cpu bound */
   
                        unsigned short gsi;
                        unsigned char vector;
                        unsigned char flags;
+ +                     uint16_t domid;
                } pirq;
        } u;
   };
   #define PIRQ_NEEDS_EOI       (1 << 0)
   #define PIRQ_SHAREABLE       (1 << 1)
   
  -static struct irq_info *irq_info;
  -static int *pirq_to_irq;
  -
   static int *evtchn_to_irq;
  -struct cpu_evtchn_s {
  -     unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
  -};
  -
  -static __initdata struct cpu_evtchn_s init_evtchn_mask = {
  -     .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
  -};
  -static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
   
  -static inline unsigned long *cpu_evtchn_mask(int cpu)
  -{
  -     return cpu_evtchn_mask_p[cpu].bits;
  -}
  +static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
  +                   cpu_evtchn_mask);
   
   /* Xen will never allocate port zero for any purpose. */
   #define VALID_EVTCHN(chn)    ((chn) != 0)
@@@@ -119,86 -120,88 -128,46 +120,88 @@@@ static struct irq_chip xen_dynamic_chip
   static struct irq_chip xen_percpu_chip;
   static struct irq_chip xen_pirq_chip;
   
  -/* Constructor for packed IRQ information. */
  -static struct irq_info mk_unbound_info(void)
  +/* Get info for IRQ */
  +static struct irq_info *info_for_irq(unsigned irq)
   {
  -     return (struct irq_info) { .type = IRQT_UNBOUND };
  +     return irq_get_handler_data(irq);
   }
   
  -static struct irq_info mk_evtchn_info(unsigned short evtchn)
  +/* Constructors for packed IRQ information. */
  +static void xen_irq_info_common_init(struct irq_info *info,
  +                                  unsigned irq,
  +                                  enum xen_irq_type type,
  +                                  unsigned short evtchn,
  +                                  unsigned short cpu)
   {
  -     return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn,
  -                     .cpu = 0 };
  +
  +     BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
  +
  +     info->type = type;
  +     info->irq = irq;
  +     info->evtchn = evtchn;
  +     info->cpu = cpu;
  +
  +     evtchn_to_irq[evtchn] = irq;
   }
   
  -static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi)
  +static void xen_irq_info_evtchn_init(unsigned irq,
  +                                  unsigned short evtchn)
   {
  -     return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn,
  -                     .cpu = 0, .u.ipi = ipi };
  +     struct irq_info *info = info_for_irq(irq);
  +
  +     xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
   }
   
  -static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
  +static void xen_irq_info_ipi_init(unsigned cpu,
  +                               unsigned irq,
  +                               unsigned short evtchn,
  +                               enum ipi_vector ipi)
   {
  -     return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn,
  -                     .cpu = 0, .u.virq = virq };
  +     struct irq_info *info = info_for_irq(irq);
  +
  +     xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
  +
  +     info->u.ipi = ipi;
  +
  +     per_cpu(ipi_to_irq, cpu)[ipi] = irq;
   }
   
  -static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
  -                                 unsigned short gsi, unsigned short vector)
  +static void xen_irq_info_virq_init(unsigned cpu,
  +                                unsigned irq,
  +                                unsigned short evtchn,
  +                                unsigned short virq)
   {
  -     return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
  -                     .cpu = 0,
  -                     .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
  +     struct irq_info *info = info_for_irq(irq);
  +
  +     xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
  +
  +     info->u.virq = virq;
  +
  +     per_cpu(virq_to_irq, cpu)[virq] = irq;
   }
   
  -/*
  - * Accessors for packed IRQ information.
  - */
  -static struct irq_info *info_for_irq(unsigned irq)
  +static void xen_irq_info_pirq_init(unsigned irq,
  +                                unsigned short evtchn,
  +                                unsigned short pirq,
  +                                unsigned short gsi,
  +                                unsigned short vector,
+ +                                uint16_t domid,
  +                                unsigned char flags)
   {
  -     return &irq_info[irq];
  +     struct irq_info *info = info_for_irq(irq);
  +
  +     xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
  +
  +     info->u.pirq.pirq = pirq;
  +     info->u.pirq.gsi = gsi;
  +     info->u.pirq.vector = vector;
+ +     info->u.pirq.domid = domid;
  +     info->u.pirq.flags = flags;
   }
   
  +/*
  + * Accessors for packed IRQ information.
  + */
   static unsigned int evtchn_from_irq(unsigned irq)
   {
        if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
@@@@ -243,6 -246,6 -212,26 +246,6 @@@@ static unsigned pirq_from_irq(unsigned 
        return info->u.pirq.pirq;
   }
   
  -static unsigned gsi_from_irq(unsigned irq)
  -{
  -     struct irq_info *info = info_for_irq(irq);
  -
  -     BUG_ON(info == NULL);
  -     BUG_ON(info->type != IRQT_PIRQ);
  -
  -     return info->u.pirq.gsi;
  -}
  -
  -static unsigned vector_from_irq(unsigned irq)
  -{
  -     struct irq_info *info = info_for_irq(irq);
  -
  -     BUG_ON(info == NULL);
  -     BUG_ON(info->type != IRQT_PIRQ);
  -
  -     return info->u.pirq.vector;
  -}
  -
   static enum xen_irq_type type_from_irq(unsigned irq)
   {
        return info_for_irq(irq)->type;
@@@@ -278,7 -281,7 -267,7 +281,7 @@@@ static inline unsigned long active_evtc
                                           unsigned int idx)
   {
        return (sh->evtchn_pending[idx] &
  -             cpu_evtchn_mask(cpu)[idx] &
  +             per_cpu(cpu_evtchn_mask, cpu)[idx] &
                ~sh->evtchn_mask[idx]);
   }
   
@@@@ -288,31 -291,31 -277,31 +291,31 @@@@ static void bind_evtchn_to_cpu(unsigne
   
        BUG_ON(irq == -1);
   #ifdef CONFIG_SMP
  -     cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
  +     cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
   #endif
   
  -     clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
  -     set_bit(chn, cpu_evtchn_mask(cpu));
  +     clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
  +     set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
   
  -     irq_info[irq].cpu = cpu;
  +     info_for_irq(irq)->cpu = cpu;
   }
   
   static void init_evtchn_cpu_bindings(void)
   {
        int i;
   #ifdef CONFIG_SMP
  -     struct irq_desc *desc;
  +     struct irq_info *info;
   
        /* By default all event channels notify CPU#0. */
  -     for_each_irq_desc(i, desc) {
  -             cpumask_copy(desc->affinity, cpumask_of(0));
  +     list_for_each_entry(info, &xen_irq_list_head, list) {
  +             struct irq_desc *desc = irq_to_desc(info->irq);
  +             cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
        }
   #endif
   
        for_each_possible_cpu(i)
  -             memset(cpu_evtchn_mask(i),
  -                    (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s));
  -
  +             memset(per_cpu(cpu_evtchn_mask, i),
  +                    (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
   }
   
   static inline void clear_evtchn(int port)
@@@@ -387,90 -390,90 -376,81 +390,90 @@@@ static void unmask_evtchn(int port
        put_cpu();
   }
   
  -static int get_nr_hw_irqs(void)
  +static void xen_irq_init(unsigned irq)
   {
  -     int ret = 1;
  +     struct irq_info *info;
  +     struct irq_desc *desc = irq_to_desc(irq);
   
  -#ifdef CONFIG_X86_IO_APIC
  -     ret = get_nr_irqs_gsi();
  +#ifdef CONFIG_SMP
  +     /* By default all event channels notify CPU#0. */
  +     cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
   #endif
   
  -     return ret;
  +     info = kzalloc(sizeof(*info), GFP_KERNEL);
  +     if (info == NULL)
  +             panic("Unable to allocate metadata for IRQ%d\n", irq);
  +
  +     info->type = IRQT_UNBOUND;
  +
  +     irq_set_handler_data(irq, info);
  +
  +     list_add_tail(&info->list, &xen_irq_list_head);
   }
   
  -static int find_unbound_pirq(int type)
  +static int __must_check xen_allocate_irq_dynamic(void)
   {
  -     int rc, i;
  -     struct physdev_get_free_pirq op_get_free_pirq;
  -     op_get_free_pirq.type = type;
  +     int first = 0;
  +     int irq;
   
  -     rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
  -     if (!rc)
  -             return op_get_free_pirq.pirq;
  +#ifdef CONFIG_X86_IO_APIC
  +     /*
  +      * For an HVM guest or domain 0 which see "real" (emulated or
  +      * actual respectively) GSIs we allocate dynamic IRQs
  +      * e.g. those corresponding to event channels or MSIs
  +      * etc. from the range above those "real" GSIs to avoid
  +      * collisions.
  +      */
  +     if (xen_initial_domain() || xen_hvm_domain())
  +             first = get_nr_irqs_gsi();
  +#endif
   
  -     for (i = 0; i < nr_irqs; i++) {
  -             if (pirq_to_irq[i] < 0)
  -                     return i;
  -     }
  -     return -1;
  +     irq = irq_alloc_desc_from(first, -1);
  +
  +     xen_irq_init(irq);
  +
  +     return irq;
   }
   
  -static int find_unbound_irq(void)
  +static int __must_check xen_allocate_irq_gsi(unsigned gsi)
   {
  -     struct irq_data *data;
  -     int irq, res;
  -     int bottom = get_nr_hw_irqs();
  -     int top = nr_irqs-1;
  -
  -     if (bottom == nr_irqs)
  -             goto no_irqs;
  +     int irq;
   
  -     /* This loop starts from the top of IRQ space and goes down.
  -      * We need this b/c if we have a PCI device in a Xen PV guest
  -      * we do not have an IO-APIC (though the backend might have them)
  -      * mapped in. To not have a collision of physical IRQs with the Xen
  -      * event channels start at the top of the IRQ space for virtual IRQs.
  +     /*
  +      * A PV guest has no concept of a GSI (since it has no ACPI
  +      * nor access to/knowledge of the physical APICs). Therefore
  +      * all IRQs are dynamically allocated from the entire IRQ
  +      * space.
         */
  -     for (irq = top; irq > bottom; irq--) {
  -             data = irq_get_irq_data(irq);
  -             /* only 15->0 have init'd desc; handle irq > 16 */
  -             if (!data)
  -                     break;
  -             if (data->chip == &no_irq_chip)
  -                     break;
  -             if (data->chip != &xen_dynamic_chip)
  -                     continue;
  -             if (irq_info[irq].type == IRQT_UNBOUND)
  -                     return irq;
  -     }
  +     if (xen_pv_domain() && !xen_initial_domain())
  +             return xen_allocate_irq_dynamic();
   
  -     if (irq == bottom)
  -             goto no_irqs;
  -
  -     res = irq_alloc_desc_at(irq, -1);
  +     /* Legacy IRQ descriptors are already allocated by the arch. */
  +     if (gsi < NR_IRQS_LEGACY)
  +             irq = gsi;
  +     else
  +             irq = irq_alloc_desc_at(gsi, -1);
   
  -     if (WARN_ON(res != irq))
  -             return -1;
  +     xen_irq_init(irq);
   
        return irq;
  -
  -no_irqs:
  -     panic("No available IRQ to bind to: increase nr_irqs!\n");
   }
   
  -static bool identity_mapped_irq(unsigned irq)
  +static void xen_free_irq(unsigned irq)
   {
  -     /* identity map all the hardware irqs */
  -     return irq < get_nr_hw_irqs();
  +     struct irq_info *info = irq_get_handler_data(irq);
  +
  +     list_del(&info->list);
  +
  +     irq_set_handler_data(irq, NULL);
  +
  +     kfree(info);
  +
  +     /* Legacy IRQ descriptors are managed by the arch. */
  +     if (irq < NR_IRQS_LEGACY)
  +             return;
  +
  +     irq_free_desc(irq);
   }
   
   static void pirq_unmask_notify(int irq)
@@@@ -506,7 -509,7 -486,7 +509,7 @@@@ static bool probing_irq(int irq
        return desc && desc->action == NULL;
   }
   
  -static unsigned int startup_pirq(unsigned int irq)
  +static unsigned int __startup_pirq(unsigned int irq)
   {
        struct evtchn_bind_pirq bind_pirq;
        struct irq_info *info = info_for_irq(irq);
@@@@ -544,15 -547,15 -524,9 +547,15 @@@@ out
        return 0;
   }
   
  -static void shutdown_pirq(unsigned int irq)
  +static unsigned int startup_pirq(struct irq_data *data)
  +{
  +     return __startup_pirq(data->irq);
  +}
  +
  +static void shutdown_pirq(struct irq_data *data)
   {
        struct evtchn_close close;
  +     unsigned int irq = data->irq;
        struct irq_info *info = info_for_irq(irq);
        int evtchn = evtchn_from_irq(irq);
   
        info->evtchn = 0;
   }
   
  -static void enable_pirq(unsigned int irq)
  +static void enable_pirq(struct irq_data *data)
   {
  -     startup_pirq(irq);
  +     startup_pirq(data);
   }
   
  -static void disable_pirq(unsigned int irq)
  +static void disable_pirq(struct irq_data *data)
   {
   }
   
  -static void ack_pirq(unsigned int irq)
  +static void ack_pirq(struct irq_data *data)
   {
  -     int evtchn = evtchn_from_irq(irq);
  +     int evtchn = evtchn_from_irq(data->irq);
   
  -     move_native_irq(irq);
  +     irq_move_irq(data);
   
        if (VALID_EVTCHN(evtchn)) {
                mask_evtchn(evtchn);
        }
   }
   
  -static void end_pirq(unsigned int irq)
  -{
  -     int evtchn = evtchn_from_irq(irq);
  -     struct irq_desc *desc = irq_to_desc(irq);
  -
  -     if (WARN_ON(!desc))
  -             return;
  -
  -     if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
  -         (IRQ_DISABLED|IRQ_PENDING)) {
  -             shutdown_pirq(irq);
  -     } else if (VALID_EVTCHN(evtchn)) {
  -             unmask_evtchn(evtchn);
  -             pirq_unmask_notify(irq);
  -     }
  -}
  -
   static int find_irq_by_gsi(unsigned gsi)
   {
  -     int irq;
  -
  -     for (irq = 0; irq < nr_irqs; irq++) {
  -             struct irq_info *info = info_for_irq(irq);
  +     struct irq_info *info;
   
  -             if (info == NULL || info->type != IRQT_PIRQ)
  +     list_for_each_entry(info, &xen_irq_list_head, list) {
  +             if (info->type != IRQT_PIRQ)
                        continue;
   
  -             if (gsi_from_irq(irq) == gsi)
  -                     return irq;
  +             if (info->u.pirq.gsi == gsi)
  +                     return info->irq;
        }
   
        return -1;
   }
   
  -int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
  +int xen_allocate_pirq_gsi(unsigned gsi)
   {
  -     return xen_map_pirq_gsi(gsi, gsi, shareable, name);
  +     return gsi;
   }
   
  -/* xen_map_pirq_gsi might allocate irqs from the top down, as a
  - * consequence don't assume that the irq number returned has a low value
  - * or can be used as a pirq number unless you know otherwise.
  - *
  - * One notable exception is when xen_map_pirq_gsi is called passing an
  - * hardware gsi as argument, in that case the irq number returned
  - * matches the gsi number passed as second argument.
  +/*
  + * Do not make any assumptions regarding the relationship between the
  + * IRQ number returned here and the Xen pirq argument.
    *
    * Note: We don't assign an event channel until the irq actually started
    * up.  Return an existing irq if we've already got one for the gsi.
    */
  -int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
  +int xen_bind_pirq_gsi_to_irq(unsigned gsi,
  +                          unsigned pirq, int shareable, char *name)
   {
  -     int irq = 0;
  +     int irq = -1;
        struct physdev_irq irq_op;
   
        spin_lock(&irq_mapping_update_lock);
   
  -     if ((pirq > nr_irqs) || (gsi > nr_irqs)) {
  -             printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n",
  -                     pirq > nr_irqs ? "pirq" :"",
  -                     gsi > nr_irqs ? "gsi" : "");
  -             goto out;
  -     }
  -
        irq = find_irq_by_gsi(gsi);
        if (irq != -1) {
                printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
                goto out;       /* XXX need refcount? */
        }
   
  -     /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
  -      * we are using the !xen_initial_domain() to drop in the function.*/
  -     if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
  -                             xen_pv_domain())) {
  -             irq = gsi;
  -             irq_alloc_desc_at(irq, -1);
  -     } else
  -             irq = find_unbound_irq();
  +     irq = xen_allocate_irq_gsi(gsi);
  +     if (irq < 0)
  +             goto out;
   
  -     set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
  -                                   handle_level_irq, name);
  +     irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
  +                                   name);
   
        irq_op.irq = irq;
        irq_op.vector = 0;
         * this in the priv domain. */
        if (xen_initial_domain() &&
            HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
  -             irq_free_desc(irq);
  +             xen_free_irq(irq);
                irq = -ENOSPC;
                goto out;
        }
   
-       xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector,
  -     irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
  -     irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
  -     pirq_to_irq[pirq] = irq;
+ +     xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
  +                            shareable ? PIRQ_SHAREABLE : 0);
   
   out:
        spin_unlock(&irq_mapping_update_lock);
   }
   
   #ifdef CONFIG_PCI_MSI
  -#include <linux/msi.h>
  -#include "../pci/msi.h"
  -
  -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
  +int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
   {
  -     spin_lock(&irq_mapping_update_lock);
  -
  -     if (alloc & XEN_ALLOC_IRQ) {
  -             *irq = find_unbound_irq();
  -             if (*irq == -1)
  -                     goto out;
  -     }
  -
  -     if (alloc & XEN_ALLOC_PIRQ) {
  -             *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
  -             if (*pirq == -1)
  -                     goto out;
  -     }
  +     int rc;
  +     struct physdev_get_free_pirq op_get_free_pirq;
   
  -     set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
  -                                   handle_level_irq, name);
  +     op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
  +     rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
   
  -     irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
  -     pirq_to_irq[*pirq] = *irq;
  +     WARN_ONCE(rc == -ENOSYS,
  +               "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
   
  -out:
  -     spin_unlock(&irq_mapping_update_lock);
  +     return rc ? -1 : op_get_free_pirq.pirq;
   }
   
  -int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
  +int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-                            int pirq, int vector, const char *name)
+ +                          int pirq, int vector, const char *name,
+ +                          domid_t domid)
   {
  -     int irq = -1;
  -     struct physdev_map_pirq map_irq;
  -     int rc;
  -     int pos;
  -     u32 table_offset, bir;
  -
  -     memset(&map_irq, 0, sizeof(map_irq));
  -     map_irq.domid = DOMID_SELF;
  -     map_irq.type = MAP_PIRQ_TYPE_MSI;
  -     map_irq.index = -1;
  -     map_irq.pirq = -1;
  -     map_irq.bus = dev->bus->number;
  -     map_irq.devfn = dev->devfn;
  -
  -     if (type == PCI_CAP_ID_MSIX) {
  -             pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
  -
  -             pci_read_config_dword(dev, msix_table_offset_reg(pos),
  -                                     &table_offset);
  -             bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
  -
  -             map_irq.table_base = pci_resource_start(dev, bir);
  -             map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
  -     }
  +     int irq, ret;
   
        spin_lock(&irq_mapping_update_lock);
   
  -     irq = find_unbound_irq();
  -
  +     irq = xen_allocate_irq_dynamic();
        if (irq == -1)
                goto out;
   
  -     rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
  -     if (rc) {
  -             printk(KERN_WARNING "xen map irq failed %d\n", rc);
  -
  -             irq_free_desc(irq);
  -
  -             irq = -1;
  -             goto out;
  -     }
  -     irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
  -
  -     set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
  -                     handle_level_irq,
  -                     (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
  +     irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
  +                                   name);
   
-       xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0);
+ +     xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
  +     ret = irq_set_msi_desc(irq, msidesc);
  +     if (ret < 0)
  +             goto error_irq;
   out:
        spin_unlock(&irq_mapping_update_lock);
        return irq;
  +error_irq:
  +     spin_unlock(&irq_mapping_update_lock);
  +     xen_free_irq(irq);
  +     return -1;
   }
   #endif
   
@@@@ -722,43 -726,56 -773,38 +726,56 @@@@ int xen_destroy_irq(int irq
   
        if (xen_initial_domain()) {
                unmap_irq.pirq = info->u.pirq.pirq;
- -             unmap_irq.domid = DOMID_SELF;
+ +             unmap_irq.domid = info->u.pirq.domid;
                rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
- -             if (rc) {
+ +             /* If another domain quits without making the pci_disable_msix
+ +              * call, the Xen hypervisor takes care of freeing the PIRQs
+ +              * (free_domain_pirqs).
+ +              */
+ +             if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
+ +                     printk(KERN_INFO "domain %d does not have %d anymore\n",
+ +                             info->u.pirq.domid, info->u.pirq.pirq);
+ +             else if (rc) {
                        printk(KERN_WARNING "unmap irq failed %d\n", rc);
                        goto out;
                }
  -             pirq_to_irq[info->u.pirq.pirq] = -1;
        }
  -     irq_info[irq] = mk_unbound_info();
   
  -     irq_free_desc(irq);
  +     xen_free_irq(irq);
   
   out:
        spin_unlock(&irq_mapping_update_lock);
        return rc;
   }
   
  -int xen_vector_from_irq(unsigned irq)
  +int xen_irq_from_pirq(unsigned pirq)
   {
  -     return vector_from_irq(irq);
  -}
  +     int irq;
   
  -int xen_gsi_from_irq(unsigned irq)
  -{
  -     return gsi_from_irq(irq);
  +     struct irq_info *info;
  +
  +     spin_lock(&irq_mapping_update_lock);
  +
  +     list_for_each_entry(info, &xen_irq_list_head, list) {
  +             if (info == NULL || info->type != IRQT_PIRQ)
  +                     continue;
  +             irq = info->irq;
  +             if (info->u.pirq.pirq == pirq)
  +                     goto out;
  +     }
  +     irq = -1;
  +out:
  +     spin_unlock(&irq_mapping_update_lock);
  +
  +     return irq;
   }
   
  -int xen_irq_from_pirq(unsigned pirq)
+ +
+ +int xen_pirq_from_irq(unsigned irq)
+  {
  -     return pirq_to_irq[pirq];
+ +     return pirq_from_irq(irq);
+  }
  -
+ +EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
   int bind_evtchn_to_irq(unsigned int evtchn)
   {
        int irq;
        irq = evtchn_to_irq[evtchn];
   
        if (irq == -1) {
  -             irq = find_unbound_irq();
  +             irq = xen_allocate_irq_dynamic();
  +             if (irq == -1)
  +                     goto out;
   
  -             set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
  +             irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
                                              handle_fasteoi_irq, "event");
   
  -             evtchn_to_irq[evtchn] = irq;
  -             irq_info[irq] = mk_evtchn_info(evtchn);
  +             xen_irq_info_evtchn_init(irq, evtchn);
        }
   
  +out:
        spin_unlock(&irq_mapping_update_lock);
   
        return irq;
@@@@ -795,11 -812,11 -839,11 +812,11 @@@@ static int bind_ipi_to_irq(unsigned in
        irq = per_cpu(ipi_to_irq, cpu)[ipi];
   
        if (irq == -1) {
  -             irq = find_unbound_irq();
  +             irq = xen_allocate_irq_dynamic();
                if (irq < 0)
                        goto out;
   
  -             set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
  +             irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
                                              handle_percpu_irq, "ipi");
   
                bind_ipi.vcpu = cpu;
                        BUG();
                evtchn = bind_ipi.port;
   
  -             evtchn_to_irq[evtchn] = irq;
  -             irq_info[irq] = mk_ipi_info(evtchn, ipi);
  -             per_cpu(ipi_to_irq, cpu)[ipi] = irq;
  +             xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
   
                bind_evtchn_to_cpu(evtchn, cpu);
        }
        return irq;
   }
   
  +static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
  +                                       unsigned int remote_port)
  +{
  +     struct evtchn_bind_interdomain bind_interdomain;
  +     int err;
  +
  +     bind_interdomain.remote_dom  = remote_domain;
  +     bind_interdomain.remote_port = remote_port;
  +
  +     err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
  +                                       &bind_interdomain);
  +
  +     return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
  +}
  +
   
   int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
   {
        irq = per_cpu(virq_to_irq, cpu)[virq];
   
        if (irq == -1) {
  -             irq = find_unbound_irq();
  +             irq = xen_allocate_irq_dynamic();
  +             if (irq == -1)
  +                     goto out;
   
  -             set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
  +             irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
                                              handle_percpu_irq, "virq");
   
                bind_virq.virq = virq;
                        BUG();
                evtchn = bind_virq.port;
   
  -             evtchn_to_irq[evtchn] = irq;
  -             irq_info[irq] = mk_virq_info(evtchn, virq);
  -
  -             per_cpu(virq_to_irq, cpu)[virq] = irq;
  +             xen_irq_info_virq_init(cpu, irq, evtchn, virq);
   
                bind_evtchn_to_cpu(evtchn, cpu);
        }
   
  +out:
        spin_unlock(&irq_mapping_update_lock);
   
        return irq;
@@@@ -900,9 -917,9 -931,11 +917,9 @@@@ static void unbind_from_irq(unsigned in
                evtchn_to_irq[evtchn] = -1;
        }
   
  -     if (irq_info[irq].type != IRQT_UNBOUND) {
  -             irq_info[irq] = mk_unbound_info();
  +     BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
   
  -             irq_free_desc(irq);
  -     }
  +     xen_free_irq(irq);
   
        spin_unlock(&irq_mapping_update_lock);
   }
@@@@ -912,11 -929,12 -945,10 +929,11 @@@@ int bind_evtchn_to_irqhandler(unsigned 
                              unsigned long irqflags,
                              const char *devname, void *dev_id)
   {
 --     unsigned int irq;
 --     int retval;
 ++     int irq, retval;
   
        irq = bind_evtchn_to_irq(evtchn);
  +     if (irq < 0)
  +             return irq;
        retval = request_irq(irq, handler, irqflags, devname, dev_id);
        if (retval != 0) {
                unbind_from_irq(irq);
   }
   EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
   
  +int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
  +                                       unsigned int remote_port,
  +                                       irq_handler_t handler,
  +                                       unsigned long irqflags,
  +                                       const char *devname,
  +                                       void *dev_id)
  +{
  +     int irq, retval;
  +
  +     irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
  +     if (irq < 0)
  +             return irq;
  +
  +     retval = request_irq(irq, handler, irqflags, devname, dev_id);
  +     if (retval != 0) {
  +             unbind_from_irq(irq);
  +             return retval;
  +     }
  +
  +     return irq;
  +}
  +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
  +
   int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
                            irq_handler_t handler,
                            unsigned long irqflags, const char *devname, void *dev_id)
   {
 --     unsigned int irq;
 --     int retval;
 ++     int irq, retval;
   
        irq = bind_virq_to_irq(virq, cpu);
  +     if (irq < 0)
  +             return irq;
        retval = request_irq(irq, handler, irqflags, devname, dev_id);
        if (retval != 0) {
                unbind_from_irq(irq);
@@@@ -982,7 -1001,7 -990,7 +999,7 @@@@ int bind_ipi_to_irqhandler(enum ipi_vec
        if (irq < 0)
                return irq;
   
  -     irqflags |= IRQF_NO_SUSPEND;
  +     irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
        retval = request_irq(irq, handler, irqflags, devname, dev_id);
        if (retval != 0) {
                unbind_from_irq(irq);
@@@@ -1010,7 -1029,7 -1018,7 +1027,7 @@@@ irqreturn_t xen_debug_interrupt(int irq
   {
        struct shared_info *sh = HYPERVISOR_shared_info;
        int cpu = smp_processor_id();
  -     unsigned long *cpu_evtchn = cpu_evtchn_mask(cpu);
  +     unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
        int i;
        unsigned long flags;
        static DEFINE_SPINLOCK(debug_lock);
   }
   
   static DEFINE_PER_CPU(unsigned, xed_nesting_count);
  +static DEFINE_PER_CPU(unsigned int, current_word_idx);
  +static DEFINE_PER_CPU(unsigned int, current_bit_idx);
  +
  +/*
  + * Mask out the i least significant bits of w
  + */
  +#define MASK_LSBS(w, i) (w & ((~0UL) << i))
   
   /*
    * Search the CPUs pending events bitmasks.  For each one found, map
    */
   static void __xen_evtchn_do_upcall(void)
   {
  +     int start_word_idx, start_bit_idx;
  +     int word_idx, bit_idx;
  +     int i;
        int cpu = get_cpu();
        struct shared_info *s = HYPERVISOR_shared_info;
        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
                wmb();
   #endif
                pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
  -             while (pending_words != 0) {
  +
  +             start_word_idx = __this_cpu_read(current_word_idx);
  +             start_bit_idx = __this_cpu_read(current_bit_idx);
  +
  +             word_idx = start_word_idx;
  +
  +             for (i = 0; pending_words != 0; i++) {
                        unsigned long pending_bits;
  -                     int word_idx = __ffs(pending_words);
  -                     pending_words &= ~(1UL << word_idx);
  +                     unsigned long words;
  +
  +                     words = MASK_LSBS(pending_words, word_idx);
  +
  +                     /*
  +                      * If we masked out all events, wrap to beginning.
  +                      */
  +                     if (words == 0) {
  +                             word_idx = 0;
  +                             bit_idx = 0;
  +                             continue;
  +                     }
  +                     word_idx = __ffs(words);
  +
  +                     pending_bits = active_evtchns(cpu, s, word_idx);
  +                     bit_idx = 0; /* usually scan entire word from start */
  +                     if (word_idx == start_word_idx) {
  +                             /* We scan the starting word in two parts */
  +                             if (i == 0)
  +                                     /* 1st time: start in the middle */
  +                                     bit_idx = start_bit_idx;
  +                             else
  +                                     /* 2nd time: mask bits done already */
  +                                     bit_idx &= (1UL << start_bit_idx) - 1;
  +                     }
   
  -                     while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
  -                             int bit_idx = __ffs(pending_bits);
  -                             int port = (word_idx * BITS_PER_LONG) + bit_idx;
  -                             int irq = evtchn_to_irq[port];
  +                     do {
  +                             unsigned long bits;
  +                             int port, irq;
                                struct irq_desc *desc;
   
  +                             bits = MASK_LSBS(pending_bits, bit_idx);
  +
  +                             /* If we masked out all events, move on. */
  +                             if (bits == 0)
  +                                     break;
  +
  +                             bit_idx = __ffs(bits);
  +
  +                             /* Process port. */
  +                             port = (word_idx * BITS_PER_LONG) + bit_idx;
  +                             irq = evtchn_to_irq[port];
  +
                                mask_evtchn(port);
                                clear_evtchn(port);
   
                                        if (desc)
                                                generic_handle_irq_desc(irq, desc);
                                }
  -                     }
  +
  +                             bit_idx = (bit_idx + 1) % BITS_PER_LONG;
  +
  +                             /* Next caller starts at last processed + 1 */
  +                             __this_cpu_write(current_word_idx,
  +                                              bit_idx ? word_idx :
  +                                              (word_idx+1) % BITS_PER_LONG);
  +                             __this_cpu_write(current_bit_idx, bit_idx);
  +                     } while (bit_idx != 0);
  +
  +                     /* Scan start_l1i twice; all others once. */
  +                     if ((word_idx != start_word_idx) || (i != 0))
  +                             pending_words &= ~(1UL << word_idx);
  +
  +                     word_idx = (word_idx + 1) % BITS_PER_LONG;
                }
   
                BUG_ON(!irqs_disabled());
@@@@ -1251,7 -1270,7 -1195,8 +1268,7 @@@@ void rebind_evtchn_irq(int evtchn, int 
           so there should be a proper type */
        BUG_ON(info->type == IRQT_UNBOUND);
   
  -     evtchn_to_irq[evtchn] = irq;
  -     irq_info[irq] = mk_evtchn_info(evtchn);
  +     xen_irq_info_evtchn_init(irq, evtchn);
   
        spin_unlock(&irq_mapping_update_lock);
   
@@@@ -1268,14 -1287,14 -1213,10 +1285,14 @@@@ static int rebind_irq_to_cpu(unsigned i
        struct evtchn_bind_vcpu bind_vcpu;
        int evtchn = evtchn_from_irq(irq);
   
  -     /* events delivered via platform PCI interrupts are always
  -      * routed to vcpu 0 */
  -     if (!VALID_EVTCHN(evtchn) ||
  -             (xen_hvm_domain() && !xen_have_vector_callback))
  +     if (!VALID_EVTCHN(evtchn))
  +             return -1;
  +
  +     /*
  +      * Events delivered via platform PCI interrupts are always
  +      * routed to vcpu 0 and hence cannot be rebound.
  +      */
  +     if (xen_hvm_domain() && !xen_have_vector_callback)
                return -1;
   
        /* Send future instances of this interrupt to other vcpu. */
        return 0;
   }
   
  -static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
  +static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
  +                         bool force)
   {
        unsigned tcpu = cpumask_first(dest);
   
  -     return rebind_irq_to_cpu(irq, tcpu);
  +     return rebind_irq_to_cpu(data->irq, tcpu);
   }
   
   int resend_irq_on_evtchn(unsigned int irq)
        return 1;
   }
   
  -static void enable_dynirq(unsigned int irq)
  +static void enable_dynirq(struct irq_data *data)
   {
  -     int evtchn = evtchn_from_irq(irq);
  +     int evtchn = evtchn_from_irq(data->irq);
   
        if (VALID_EVTCHN(evtchn))
                unmask_evtchn(evtchn);
   }
   
  -static void disable_dynirq(unsigned int irq)
  +static void disable_dynirq(struct irq_data *data)
   {
  -     int evtchn = evtchn_from_irq(irq);
  +     int evtchn = evtchn_from_irq(data->irq);
   
        if (VALID_EVTCHN(evtchn))
                mask_evtchn(evtchn);
   }
   
  -static void ack_dynirq(unsigned int irq)
  +static void ack_dynirq(struct irq_data *data)
   {
  -     int evtchn = evtchn_from_irq(irq);
  +     int evtchn = evtchn_from_irq(data->irq);
   
  -     move_masked_irq(irq);
  +     irq_move_masked_irq(data);
   
        if (VALID_EVTCHN(evtchn))
                unmask_evtchn(evtchn);
   }
   
  -static int retrigger_dynirq(unsigned int irq)
  +static int retrigger_dynirq(struct irq_data *data)
   {
  -     int evtchn = evtchn_from_irq(irq);
  +     int evtchn = evtchn_from_irq(data->irq);
        struct shared_info *sh = HYPERVISOR_shared_info;
        int ret = 0;
   
        return ret;
   }
   
  -static void restore_cpu_pirqs(void)
  +static void restore_pirqs(void)
   {
        int pirq, rc, irq, gsi;
        struct physdev_map_pirq map_irq;
  +     struct irq_info *info;
   
  -     for (pirq = 0; pirq < nr_irqs; pirq++) {
  -             irq = pirq_to_irq[pirq];
  -             if (irq == -1)
  +     list_for_each_entry(info, &xen_irq_list_head, list) {
  +             if (info->type != IRQT_PIRQ)
                        continue;
   
  +             pirq = info->u.pirq.pirq;
  +             gsi = info->u.pirq.gsi;
  +             irq = info->irq;
  +
                /* save/restore of PT devices doesn't work, so at this point the
                 * only devices present are GSI based emulated devices */
  -             gsi = gsi_from_irq(irq);
                if (!gsi)
                        continue;
   
                if (rc) {
                        printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
                                        gsi, irq, pirq, rc);
  -                     irq_info[irq] = mk_unbound_info();
  -                     pirq_to_irq[pirq] = -1;
  +                     xen_free_irq(irq);
                        continue;
                }
   
                printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
   
  -             startup_pirq(irq);
  +             __startup_pirq(irq);
        }
   }
   
@@@@ -1420,7 -1439,7 -1358,8 +1437,7 @@@@ static void restore_cpu_virqs(unsigned 
                evtchn = bind_virq.port;
   
                /* Record the new mapping. */
  -             evtchn_to_irq[evtchn] = irq;
  -             irq_info[irq] = mk_virq_info(evtchn, virq);
  +             xen_irq_info_virq_init(cpu, irq, evtchn, virq);
                bind_evtchn_to_cpu(evtchn, cpu);
        }
   }
@@@@ -1444,7 -1463,7 -1383,8 +1461,7 @@@@ static void restore_cpu_ipis(unsigned i
                evtchn = bind_ipi.port;
   
                /* Record the new mapping. */
  -             evtchn_to_irq[evtchn] = irq;
  -             irq_info[irq] = mk_ipi_info(evtchn, ipi);
  +             xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
                bind_evtchn_to_cpu(evtchn, cpu);
        }
   }
@@@@ -1502,10 -1521,22 -1442,10 +1519,22 @@@@ void xen_poll_irq(int irq
        xen_poll_irq_timeout(irq, 0 /* no timeout */);
   }
   
+ +/* Check whether the IRQ line is shared with other guests. */
+ +int xen_test_irq_shared(int irq)
+ +{
+ +     struct irq_info *info = info_for_irq(irq);
+ +     struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
+ +
+ +     if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+ +             return 0;
+ +     return !(irq_status.flags & XENIRQSTAT_shared);
+ +}
+ +EXPORT_SYMBOL_GPL(xen_test_irq_shared);
+ +
   void xen_irq_resume(void)
   {
  -     unsigned int cpu, irq, evtchn;
  -     struct irq_desc *desc;
  +     unsigned int cpu, evtchn;
  +     struct irq_info *info;
   
        init_evtchn_cpu_bindings();
   
                mask_evtchn(evtchn);
   
        /* No IRQ <-> event-channel mappings. */
  -     for (irq = 0; irq < nr_irqs; irq++)
  -             irq_info[irq].evtchn = 0; /* zap event-channel binding */
  +     list_for_each_entry(info, &xen_irq_list_head, list)
  +             info->evtchn = 0; /* zap event-channel binding */
   
        for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
                evtchn_to_irq[evtchn] = -1;
                restore_cpu_ipis(cpu);
        }
   
  -     /*
  -      * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
  -      * are not handled by the IRQ core.
  -      */
  -     for_each_irq_desc(irq, desc) {
  -             if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
  -                     continue;
  -             if (desc->status & IRQ_DISABLED)
  -                     continue;
  -
  -             evtchn = evtchn_from_irq(irq);
  -             if (evtchn == -1)
  -                     continue;
  -
  -             unmask_evtchn(evtchn);
  -     }
  -
  -     restore_cpu_pirqs();
  +     restore_pirqs();
   }
   
   static struct irq_chip xen_dynamic_chip __read_mostly = {
  -     .name           = "xen-dyn",
  +     .name                   = "xen-dyn",
   
  -     .disable        = disable_dynirq,
  -     .mask           = disable_dynirq,
  -     .unmask         = enable_dynirq,
  +     .irq_disable            = disable_dynirq,
  +     .irq_mask               = disable_dynirq,
  +     .irq_unmask             = enable_dynirq,
   
  -     .eoi            = ack_dynirq,
  -     .set_affinity   = set_affinity_irq,
  -     .retrigger      = retrigger_dynirq,
  +     .irq_eoi                = ack_dynirq,
  +     .irq_set_affinity       = set_affinity_irq,
  +     .irq_retrigger          = retrigger_dynirq,
   };
   
   static struct irq_chip xen_pirq_chip __read_mostly = {
  -     .name           = "xen-pirq",
  +     .name                   = "xen-pirq",
   
  -     .startup        = startup_pirq,
  -     .shutdown       = shutdown_pirq,
  +     .irq_startup            = startup_pirq,
  +     .irq_shutdown           = shutdown_pirq,
   
  -     .enable         = enable_pirq,
  -     .unmask         = enable_pirq,
  +     .irq_enable             = enable_pirq,
  +     .irq_unmask             = enable_pirq,
   
  -     .disable        = disable_pirq,
  -     .mask           = disable_pirq,
  +     .irq_disable            = disable_pirq,
  +     .irq_mask               = disable_pirq,
   
  -     .ack            = ack_pirq,
  -     .end            = end_pirq,
  +     .irq_ack                = ack_pirq,
   
  -     .set_affinity   = set_affinity_irq,
  +     .irq_set_affinity       = set_affinity_irq,
   
  -     .retrigger      = retrigger_dynirq,
  +     .irq_retrigger          = retrigger_dynirq,
   };
   
   static struct irq_chip xen_percpu_chip __read_mostly = {
  -     .name           = "xen-percpu",
  +     .name                   = "xen-percpu",
   
  -     .disable        = disable_dynirq,
  -     .mask           = disable_dynirq,
  -     .unmask         = enable_dynirq,
  +     .irq_disable            = disable_dynirq,
  +     .irq_mask               = disable_dynirq,
  +     .irq_unmask             = enable_dynirq,
   
  -     .ack            = ack_dynirq,
  +     .irq_ack                = ack_dynirq,
   };
   
   int xen_set_callback_via(uint64_t via)
@@@@ -1611,6 -1642,6 -1569,17 +1640,6 @@@@ void __init xen_init_IRQ(void
   {
        int i;
   
  -     cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
  -                                 GFP_KERNEL);
  -     irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
  -
  -     /* We are using nr_irqs as the maximum number of pirq available but
  -      * that number is actually chosen by Xen and we don't know exactly
  -      * what it is. Be careful choosing high pirq numbers. */
  -     pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL);
  -     for (i = 0; i < nr_irqs; i++)
  -             pirq_to_irq[i] = -1;
  -
        evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
                                    GFP_KERNEL);
        for (i = 0; i < NR_EVENT_CHANNELS; i++)
diff --combined drivers/xen/gntdev.c
   
   #include <xen/xen.h>
   #include <xen/grant_table.h>
  +#include <xen/balloon.h>
   #include <xen/gntdev.h>
   #include <xen/events.h>
   #include <asm/xen/hypervisor.h>
@@@@ -123,10 -123,10 -122,10 +123,10 @@@@ static struct grant_map *gntdev_alloc_m
            NULL == add->pages)
                goto err;
   
  +     if (alloc_xenballooned_pages(count, add->pages))
  +             goto err;
  +
        for (i = 0; i < count; i++) {
  -             add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
  -             if (add->pages[i] == NULL)
  -                     goto err;
                add->map_ops[i].handle = -1;
                add->unmap_ops[i].handle = -1;
        }
        return add;
   
   err:
  -     if (add->pages)
  -             for (i = 0; i < count; i++) {
  -                     if (add->pages[i])
  -                             __free_page(add->pages[i]);
  -             }
        kfree(add->pages);
        kfree(add->grants);
        kfree(add->map_ops);
@@@@ -180,6 -180,6 -184,8 +180,6 @@@@ static struct grant_map *gntdev_find_ma
   
   static void gntdev_put_map(struct grant_map *map)
   {
  -     int i;
  -
        if (!map)
                return;
   
                if (!use_ptemod)
                        unmap_grant_pages(map, 0, map->count);
   
  -             for (i = 0; i < map->count; i++) {
  -                     uint32_t check, *tmp;
  -                     if (!map->pages[i])
  -                             continue;
  -                     /* XXX When unmapping in an HVM domain, Xen will
  -                      * sometimes end up mapping the GFN to an invalid MFN.
  -                      * In this case, writes will be discarded and reads will
  -                      * return all 0xFF bytes.  Leak these unusable GFNs
  -                      * until Xen supports fixing their p2m mapping.
  -                      *
  -                      * Confirmed present in Xen 4.1-RC3 with HVM source
  -                      */
  -                     tmp = kmap(map->pages[i]);
  -                     *tmp = 0xdeaddead;
  -                     mb();
  -                     check = *tmp;
  -                     kunmap(map->pages[i]);
  -                     if (check == 0xdeaddead)
  -                             __free_page(map->pages[i]);
  -                     else
  -                             pr_debug("Discard page %d=%ld\n", i,
  -                                     page_to_pfn(map->pages[i]));
  -             }
  +             free_xenballooned_pages(map->count, map->pages);
        }
        kfree(map->pages);
        kfree(map->grants);
@@@@ -273,7 -273,7 -301,7 +273,7 @@@@ static int __unmap_grant_pages(struct g
                                map->vma->vm_start + map->notify.addr;
                        err = copy_to_user(tmp, &err, 1);
                        if (err)
  -                             return err;
  +                             return -EFAULT;
                        map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
                } else if (pgno >= offset && pgno < offset + pages) {
                        uint8_t *tmp = kmap(map->pages[pgno]);
@@@@ -330,17 -330,17 -358,26 +330,26 @@@@ static int unmap_grant_pages(struct gra
   
   /* ------------------------------------------------------------------ */
   
++ static void gntdev_vma_open(struct vm_area_struct *vma)
++ {
++      struct grant_map *map = vma->vm_private_data;
++ 
++      pr_debug("gntdev_vma_open %p\n", vma);
++      atomic_inc(&map->users);
++ }
++ 
   static void gntdev_vma_close(struct vm_area_struct *vma)
   {
        struct grant_map *map = vma->vm_private_data;
   
--      pr_debug("close %p\n", vma);
++      pr_debug("gntdev_vma_close %p\n", vma);
        map->vma = NULL;
        vma->vm_private_data = NULL;
        gntdev_put_map(map);
   }
   
   static struct vm_operations_struct gntdev_vmops = {
++      .open = gntdev_vma_open,
        .close = gntdev_vma_close,
   };
   
@@@@ -652,7 -652,7 -689,10 +661,10 @@@@ static int gntdev_mmap(struct file *fli
   
        vma->vm_ops = &gntdev_vmops;
   
--      vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
++      vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND;
++ 
++      if (use_ptemod)
++              vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP;
   
        vma->vm_private_data = map;
   
        if (map->flags) {
                if ((vma->vm_flags & VM_WRITE) &&
                                (map->flags & GNTMAP_readonly))
  -                     return -EINVAL;
  +                     goto out_unlock_put;
        } else {
                map->flags = GNTMAP_host_map;
                if (!(vma->vm_flags & VM_WRITE))
@@@@ -700,8 -700,8 -740,6 +712,8 @@@@ unlock_out
        spin_unlock(&priv->lock);
        return err;
   
  +out_unlock_put:
  +     spin_unlock(&priv->lock);
   out_put_map:
        if (use_ptemod)
                map->vma = NULL;