More X86-64 boot options can be found in
Documentation/x86/x86_64/boot-options.txt .
X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
+ XEN Xen support is enabled
In addition, the following text indicates that the option:
control method, with respect to putting devices into
low power states, to be enforced (the ACPI 2.0 ordering
of _PTS is used by default).
- s4_nonvs prevents the kernel from saving/restoring the
- ACPI NVS memory during hibernation.
+ nonvs prevents the kernel from saving/restoring the
+ ACPI NVS memory during suspend/hibernation and resume.
sci_force_enable causes the kernel to set SCI_EN directly
on resume from S1/S3 (which is against the ACPI spec,
but some broken systems don't work without it).
If there are multiple matching configurations changing
the same attribute, the last one is used.
- lmb=debug [KNL] Enable lmb debug messages.
+ memblock=debug [KNL] Enable memblock debug messages.
load_ramdisk= [RAM] List of ramdisks to load from floppy
See Documentation/blockdev/ramdisk.txt.
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
xd_geo= See header of drivers/block/xd.c.
+ xen_emul_unplug= [HW,X86,XEN]
+ Unplug Xen emulated devices
+ Format: [unplug0,][unplug1]
+ ide-disks -- unplug primary master IDE devices
+ aux-ide-disks -- unplug non-primary-master IDE devices
+ nics -- unplug network devices
+ all -- unplug all emulated devices (NICs and IDE disks)
+ ignore -- continue loading the Xen platform PCI driver even
+ if the version check failed
+
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
* masked off.
*/
sysret_audit:
- movq %rax,%rsi /* second arg, syscall return value */
- cmpq $0,%rax /* is it < 0? */
+ movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
+ cmpq $0,%rsi /* is it < 0? */
setl %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
CFI_ENDPROC
END(xen_failsafe_callback)
+ apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
+ xen_hvm_callback_vector xen_evtchn_do_upcall
+
#endif /* CONFIG_XEN */
/*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
+ #include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
+ #include <xen/interface/memory.h>
#include <xen/features.h>
#include <xen/page.h>
+ #include <xen/hvm.h>
#include <xen/hvc-console.h>
#include <asm/paravirt.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/reboot.h>
+ #include <asm/setup.h>
#include <asm/stackprotector.h>
+ #include <asm/hypervisor.h>
#include "xen-ops.h"
#include "mmu.h"
void *xen_initial_gdt;
+ RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
+ __read_mostly int xen_have_vector_callback;
+ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
static int have_vcpu_info_placement = 1;
+static void clamp_max_cpus(void)
+{
+#ifdef CONFIG_SMP
+ if (setup_max_cpus > MAX_VIRT_CPUS)
+ setup_max_cpus = MAX_VIRT_CPUS;
+#endif
+}
+
static void xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
struct vcpu_info *vcpup;
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
- if (!have_vcpu_info_placement)
- return; /* already tested, not available */
+ if (cpu < MAX_VIRT_CPUS)
+ per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
- vcpup = &per_cpu(xen_vcpu_info, cpu);
+ if (!have_vcpu_info_placement) {
+ if (cpu >= MAX_VIRT_CPUS)
+ clamp_max_cpus();
+ return;
+ }
+ vcpup = &per_cpu(xen_vcpu_info, cpu);
info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
if (err) {
printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
have_vcpu_info_placement = 0;
+ clamp_max_cpus();
} else {
/* This cpu is using the registered vcpu info, even if
later ones fail to. */
#endif
-
static void xen_clts(void)
{
struct multicall_space mcs;
.patch = xen_patch,
};
- static const struct pv_time_ops xen_time_ops __initdata = {
- .sched_clock = xen_clocksource_read,
- };
-
static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.cpuid = xen_cpuid,
xen_reboot(SHUTDOWN_crash);
}
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ xen_reboot(SHUTDOWN_crash);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+ .notifier_call= xen_panic_event,
+};
+
+int xen_panic_handler_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+ return 0;
+}
+
static const struct machine_ops __initdata xen_machine_ops = {
.restart = xen_restart,
.halt = xen_machine_halt,
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
- x86_init.timers.timer_init = xen_time_init;
- x86_init.timers.setup_percpu_clockev = x86_init_noop;
- x86_cpuinit.setup_percpu_clockev = x86_init_noop;
-
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
- x86_platform.set_wallclock = xen_set_wallclock;
+ xen_init_time_ops();
/*
* Set up some pagetable state before starting to set any ptes.
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
+
+ static uint32_t xen_cpuid_base(void)
+ {
+ uint32_t base, eax, ebx, ecx, edx;
+ char signature[13];
+
+ for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ cpuid(base, &eax, &ebx, &ecx, &edx);
+ *(uint32_t *)(signature + 0) = ebx;
+ *(uint32_t *)(signature + 4) = ecx;
+ *(uint32_t *)(signature + 8) = edx;
+ signature[12] = 0;
+
+ if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
+ return base;
+ }
+
+ return 0;
+ }
+
+ static int init_hvm_pv_info(int *major, int *minor)
+ {
+ uint32_t eax, ebx, ecx, edx, pages, msr, base;
+ u64 pfn;
+
+ base = xen_cpuid_base();
+ cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+
+ *major = eax >> 16;
+ *minor = eax & 0xffff;
+ printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
+
+ cpuid(base + 2, &pages, &msr, &ecx, &edx);
+
+ pfn = __pa(hypercall_page);
+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+ xen_setup_features();
+
+ pv_info = xen_info;
+ pv_info.kernel_rpl = 0;
+
+ xen_domain_type = XEN_HVM_DOMAIN;
+
+ return 0;
+ }
+
+ void xen_hvm_init_shared_info(void)
+ {
+ int cpu;
+ struct xen_add_to_physmap xatp;
+ static struct shared_info *shared_info_page = 0;
+
+ if (!shared_info_page)
+ shared_info_page = (struct shared_info *)
+ extend_brk(PAGE_SIZE, PAGE_SIZE);
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+
+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+
+ /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ * page, we use it in the event channel upcall and in some pvclock
+ * related functions. We don't need the vcpu_info placement
+ * optimizations because we don't use any pv_mmu or pv_irq op on
+ * HVM.
+ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+ * online but xen_hvm_init_shared_info is run at resume time too and
+ * in that case multiple vcpus might be online. */
+ for_each_online_cpu(cpu) {
+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ }
+ }
+
+ #ifdef CONFIG_XEN_PVHVM
+ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+ {
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+ }
+
+ static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+ .notifier_call = xen_hvm_cpu_notify,
+ };
+
+ static void __init xen_hvm_guest_init(void)
+ {
+ int r;
+ int major, minor;
+
+ r = init_hvm_pv_info(&major, &minor);
+ if (r < 0)
+ return;
+
+ xen_hvm_init_shared_info();
+
+ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
+ register_cpu_notifier(&xen_hvm_cpu_notifier);
+ xen_unplug_emulated_devices();
+ have_vcpu_info_placement = 0;
+ x86_init.irqs.intr_init = xen_init_IRQ;
+ xen_hvm_init_time_ops();
+ xen_hvm_init_mmu_ops();
+ }
+
+ static bool __init xen_hvm_platform(void)
+ {
+ if (xen_pv_domain())
+ return false;
+
+ if (!xen_cpuid_base())
+ return false;
+
+ return true;
+ }
+
+ const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+ .name = "Xen HVM",
+ .detect = xen_hvm_platform,
+ .init_platform = xen_hvm_guest_init,
+ };
+ EXPORT_SYMBOL(x86_hyper_xen_hvm);
+ #endif
#include <asm/xen/hypercall.h>
#include <xen/events.h>
+ #include <xen/features.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
account_idle_ticks(ticks);
}
-/*
- * Xen sched_clock implementation. Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-static unsigned long long xen_sched_clock(void)
-{
- struct vcpu_runstate_info state;
- cycle_t now;
- u64 ret;
- s64 offset;
-
- /*
- * Ideally sched_clock should be called on a per-cpu basis
- * anyway, so preempt should already be disabled, but that's
- * not current practice at the moment.
- */
- preempt_disable();
-
- now = xen_clocksource_read();
-
- get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- offset = now - state.state_entry_time;
- if (offset < 0)
- offset = 0;
-
- ret = state.time[RUNSTATE_blocked] +
- state.time[RUNSTATE_running] +
- offset;
-
- preempt_enable();
-
- return ret;
-}
-
-
/* Get the TSC speed from Xen */
- unsigned long xen_tsc_khz(void)
+ static unsigned long xen_tsc_khz(void)
{
struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
put_cpu_var(xen_vcpu);
}
- unsigned long xen_get_wallclock(void)
+ static unsigned long xen_get_wallclock(void)
{
struct timespec ts;
return ts.tv_sec;
}
- int xen_set_wallclock(unsigned long now)
+ static int xen_set_wallclock(unsigned long now)
{
/* do nothing for domU */
return -1;
}
}
- __init void xen_time_init(void)
+ static const struct pv_time_ops xen_time_ops __initdata = {
- .sched_clock = xen_sched_clock,
++ .sched_clock = xen_clocksource_read,
+ };
+
+ static __init void xen_time_init(void)
{
int cpu = smp_processor_id();
struct timespec tp;
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
+
+ __init void xen_init_time_ops(void)
+ {
+ pv_time_ops = xen_time_ops;
+
+ x86_init.timers.timer_init = xen_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+ x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+ x86_platform.set_wallclock = xen_set_wallclock;
+ }
+
+ #ifdef CONFIG_XEN_PVHVM
+ static void xen_hvm_setup_cpu_clockevents(void)
+ {
+ int cpu = smp_processor_id();
+ xen_setup_runstate_info(cpu);
+ xen_setup_timer(cpu);
+ xen_setup_cpu_clockevents();
+ }
+
+ __init void xen_hvm_init_time_ops(void)
+ {
+ /* vector callback is needed otherwise we cannot receive interrupts
+ * on cpu > 0 */
+ if (!xen_have_vector_callback && num_present_cpus() > 1)
+ return;
+ if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
+ printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
+ "disable pv timer\n");
+ return;
+ }
+
+ pv_time_ops = xen_time_ops;
+ x86_init.timers.setup_percpu_clockev = xen_time_init;
+ x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+ x86_platform.set_wallclock = xen_set_wallclock;
+ }
+ #endif
void xen_enable_syscall(void);
void xen_vcpu_restore(void);
+ void xen_callback_vector(void);
+ void xen_hvm_init_shared_info(void);
+ void __init xen_unplug_emulated_devices(void);
+
void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
- unsigned long xen_tsc_khz(void);
- void __init xen_time_init(void);
- unsigned long xen_get_wallclock(void);
- int xen_set_wallclock(unsigned long time);
- unsigned long long xen_sched_clock(void);
+ void __init xen_init_time_ops(void);
+ void __init xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
void xen_sysret64(void);
void xen_adjust_exception_frame(void);
+extern int xen_panic_handler_init(void);
+
#endif /* XEN_OPS_H */
#include <xen/events.h>
#include <xen/page.h>
+ #include <xen/platform_pci.h>
+ #include <xen/hvm.h>
+
#include "xenbus_comms.h"
#include "xenbus_probe.h"
{
int ret = 0;
- if (xenstored_ready > 0)
- ret = nb->notifier_call(nb, 0, NULL);
- else
- blocking_notifier_chain_register(&xenstore_chain, nb);
+ blocking_notifier_chain_register(&xenstore_chain, nb);
return ret;
}
/* Notify others that xenstore is up */
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
}
+ EXPORT_SYMBOL_GPL(xenbus_probe);
- static int __init xenbus_probe_init(void)
+ static int __init xenbus_probe_initcall(void)
+ {
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (xen_initial_domain() || xen_hvm_domain())
+ return 0;
+
+ xenbus_probe(NULL);
+ return 0;
+ }
+
+ device_initcall(xenbus_probe_initcall);
+
+ static int __init xenbus_init(void)
{
int err = 0;
if (xen_initial_domain()) {
/* dom0 not yet supported */
} else {
+ if (xen_hvm_domain()) {
+ uint64_t v = 0;
+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+ if (err)
+ goto out_error;
+ xen_store_evtchn = (int)v;
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto out_error;
+ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ } else {
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+ }
xenstored_ready = 1;
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
}
- xen_store_interface = mfn_to_virt(xen_store_mfn);
/* Initialize the interface to xenstore. */
err = xs_init();
goto out_unreg_back;
}
- if (!xen_initial_domain())
- xenbus_probe(NULL);
-
#ifdef CONFIG_XEN_COMPAT_XENFS
/*
* Create xenfs mountpoint in /proc for compatibility with
return err;
}
- postcore_initcall(xenbus_probe_init);
+ postcore_initcall(xenbus_init);
MODULE_LICENSE("GPL");
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
+ if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ return -ENODEV;
+
ready_to_wait_for_devices = 1;
wait_for_devices(NULL);
return 0;
/* Align . to a 8 byte boundary equals to maximum function alignment. */
#define ALIGN_FUNCTION() . = ALIGN(8)
+/*
+ * Align to a 32 byte boundary equal to the
+ * alignment gcc 4.5 uses for a struct
+ */
+#define STRUCT_ALIGN() . = ALIGN(32)
+
/* The actual configuration determine if the init/exit sections
* are handled as text/data or they can be discarded (which
* often happens at runtime)
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
+ \
+ STRUCT_ALIGN(); \
FTRACE_EVENTS() \
+ \
+ STRUCT_ALIGN(); \
TRACE_SYSCALLS()
/*
*/
#define INIT_TASK_DATA_SECTION(align) \
. = ALIGN(align); \
- .data..init_task : { \
+ .data..init_task : AT(ADDR(.data..init_task) - LOAD_OFFSET) { \
INIT_TASK_DATA(align) \
}
EXIT_DATA \
EXIT_CALL \
*(.discard) \
+ *(.discard.*) \
}
/**