[PATCH] ppc64: kexec support for ppc64

author R Sharada <sharada@in.ibm.com>

Sat, 25 Jun 2005 21:58:10 +0000 (14:58 -0700)

committer Linus Torvalds <torvalds@ppc970.osdl.org>

Sat, 25 Jun 2005 23:24:51 +0000 (16:24 -0700)
author R Sharada <sharada@in.ibm.com>
Sat, 25 Jun 2005 21:58:10 +0000 (14:58 -0700)
committer Linus Torvalds <torvalds@ppc970.osdl.org>
Sat, 25 Jun 2005 23:24:51 +0000 (16:24 -0700)
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig

index 5f40b43..f804f25 100644 (file)
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -142,6 +142,23 @@ config PPC_SPLPAR
           processors, that is, which share physical processors between
           two or more partitions.
  
+config KEXEC
+       bool "kexec system call (EXPERIMENTAL)"
+       depends on PPC_MULTIPLATFORM && EXPERIMENTAL
+       help
+         kexec is a system call that implements the ability to shutdown your
+         current kernel, and to start another kernel.  It is like a reboot
+         but it is indepedent of the system firmware.  And like a reboot
+         you can start any kernel with it, not just Linux.
+
+         The name comes from the similiarity to the exec system call.
+
+         It is an ongoing process to be certain the hardware in a machine
+         is properly shutdown, so do not be surprised if this code does not
+         initially work for you.  It may help to enable device hotplugging
+         support.  As of this writing the exact hardware interface is
+         strongly in flux, so no good recommendation can be made.
+
  config IBMVIO
         depends on PPC_PSERIES || PPC_ISERIES
         bool
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile

index dffbfb7..d9b2660 100644 (file)
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \
  obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \
                          bpa_iic.o spider-pic.o
  
+obj-$(CONFIG_KEXEC)            += machine_kexec.o
  obj-$(CONFIG_EEH)              += eeh.o
  obj-$(CONFIG_PROC_FS)          += proc_ppc64.o
  obj-$(CONFIG_RTAS_FLASH)       += rtas_flash.o
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S

index 02c8f4e..675c270 100644 (file)
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -1194,7 +1194,7 @@ _GLOBAL(pSeries_secondary_smp_init)
         bl      .__restore_cpu_setup
  
         /* Set up a paca value for this processor. Since we have the
-        * physical cpu id in r3, we need to search the pacas to find
+        * physical cpu id in r24, we need to search the pacas to find
          * which logical id maps to our physical one.
          */
         LOADADDR(r13, paca)             /* Get base vaddr of paca array  */
@@ -1207,8 +1207,8 @@ _GLOBAL(pSeries_secondary_smp_init)
         cmpwi   r5,NR_CPUS
         blt     1b
  
-99:    HMT_LOW                         /* Couldn't find our CPU id      */
-       b       99b
+       mr      r3,r24                  /* not found, copy phys to r3    */
+       b       .kexec_wait             /* next kernel might do better   */
  
  2:     mtspr   SPRG3,r13               /* Save vaddr of paca in SPRG3   */
         /* From now on, r24 is expected to be logica cpuid */
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c

new file mode 100644 (file)

index 0000000..217965d
--- /dev/null
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -0,0 +1,301 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>      /* _end */
+#include <asm/prom.h>
+
+#define HASH_GROUP_SIZE 0x80   /* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(void)
+{
+       /* do nothing right now */
+       /* smp_relase_cpus() if we want smp on panic kernel */
+       /* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+       int i;
+       unsigned long begin, end;       /* limits of segment */
+       unsigned long low, high;        /* limits of blocked memory range */
+       struct device_node *node;
+       unsigned long *basep;
+       unsigned int *sizep;
+
+       if (!ppc_md.hpte_clear_all)
+               return -ENOENT;
+
+       /*
+        * Since we use the kernel fault handlers and paging code to
+        * handle the virtual mode, we must make sure no destination
+        * overlaps kernel static data or bss.
+        */
+       for(i = 0; i < image->nr_segments; i++)
+               if (image->segment[i].mem < __pa(_end))
+                       return -ETXTBSY;
+
+       /*
+        * For non-LPAR, we absolutely can not overwrite the mmu hash
+        * table, since we are still using the bolted entries in it to
+        * do the copy.  Check that here.
+        *
+        * It is safe if the end is below the start of the blocked
+        * region (end <= low), or if the beginning is after the
+        * end of the blocked region (begin >= high).  Use the
+        * boolean identity !(a || b)  === (!a && !b).
+        */
+       if (htab_address) {
+               low = __pa(htab_address);
+               high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+               for(i = 0; i < image->nr_segments; i++) {
+                       begin = image->segment[i].mem;
+                       end = begin + image->segment[i].memsz;
+
+                       if ((begin < high) && (end > low))
+                               return -ETXTBSY;
+               }
+       }
+
+       /* We also should not overwrite the tce tables */
+       for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+                       node = of_find_node_by_type(node, "pci")) {
+               basep = (unsigned long *)get_property(node, "linux,tce-base",
+                                                       NULL);
+               sizep = (unsigned int *)get_property(node, "linux,tce-size",
+                                                       NULL);
+               if (basep == NULL || sizep == NULL)
+                       continue;
+
+               low = *basep;
+               high = low + (*sizep);
+
+               for(i = 0; i < image->nr_segments; i++) {
+                       begin = image->segment[i].mem;
+                       end = begin + image->segment[i].memsz;
+
+                       if ((begin < high) && (end > low))
+                               return -ETXTBSY;
+               }
+       }
+
+       return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+       /* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+       unsigned long entry;
+       unsigned long *ptr;
+       void *dest;
+       void *addr;
+
+       /*
+        * We rely on kexec_load to create a lists that properly
+        * initializes these pointers before they are used.
+        * We will still crash if the list is wrong, but at least
+        * the compiler will be quiet.
+        */
+       ptr = NULL;
+       dest = NULL;
+
+       for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+               addr = __va(entry & PAGE_MASK);
+
+               switch (entry & IND_FLAGS) {
+               case IND_DESTINATION:
+                       dest = addr;
+                       break;
+               case IND_INDIRECTION:
+                       ptr = addr;
+                       break;
+               case IND_SOURCE:
+                       copy_page(dest, addr);
+                       dest += PAGE_SIZE;
+               }
+       }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+       long i, nr_segments = image->nr_segments;
+       struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+       /* save the ranges on the stack to efficiently flush the icache */
+       memcpy(ranges, image->segment, sizeof(ranges));
+
+       /*
+        * After this call we may not use anything allocated in dynamic
+        * memory, including *image.
+        *
+        * Only globals and the stack are allowed.
+        */
+       copy_segments(image->head);
+
+       /*
+        * we need to clear the icache for all dest pages sometime,
+        * including ones that were in place on the original copy
+        */
+       for (i = 0; i < nr_segments; i++)
+               flush_icache_range(ranges[i].mem + KERNELBASE,
+                               ranges[i].mem + KERNELBASE +
+                               ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+       if (ppc_md.cpu_irq_down)
+               ppc_md.cpu_irq_down();
+
+       local_irq_disable();
+       kexec_smp_wait();
+       /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+       int my_cpu, i, notified=-1;
+
+       smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+       my_cpu = get_cpu();
+
+       /* check the others cpus are now down (via paca hw cpu id == -1) */
+       for (i=0; i < NR_CPUS; i++) {
+               if (i == my_cpu)
+                       continue;
+
+               while (paca[i].hw_cpu_id != -1) {
+                       if (!cpu_possible(i)) {
+                               printk("kexec: cpu %d hw_cpu_id %d is not"
+                                               " possible, ignoring\n",
+                                               i, paca[i].hw_cpu_id);
+                               break;
+                       }
+                       if (!cpu_online(i)) {
+                               /* Fixme: this can be spinning in
+                                * pSeries_secondary_wait with a paca
+                                * waiting for it to go online.
+                                */
+                               printk("kexec: cpu %d hw_cpu_id %d is not"
+                                               " online, ignoring\n",
+                                               i, paca[i].hw_cpu_id);
+                               break;
+                       }
+                       if (i != notified) {
+                               printk( "kexec: waiting for cpu %d (physical"
+                                               " %d) to go down\n",
+                                               i, paca[i].hw_cpu_id);
+                               notified = i;
+                       }
+               }
+       }
+
+       /* after we tell the others to go down */
+       if (ppc_md.cpu_irq_down)
+               ppc_md.cpu_irq_down();
+
+       put_cpu();
+
+       local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+       /*
+        * move the secondarys to us so that we can copy
+        * the new kernel 0-0x100 safely
+        *
+        * do this if kexec in setup.c ?
+        */
+       smp_relase_cpus();
+       if (ppc_md.cpu_irq_down)
+               ppc_md.cpu_irq_down();
+       local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled.  It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image.  We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+       __attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+       void *image, void *control, void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+       /* prepare control code if any */
+
+       /* shutdown other cpus into our wait loop and quiesce interrupts */
+       kexec_prepare_cpus();
+
+       /* switch to a staticly allocated stack.  Based on irq stack code.
+        * XXX: the task struct will likely be invalid once we do the copy!
+        */
+       kexec_stack.thread_info.task = current_thread_info()->task;
+       kexec_stack.thread_info.flags = 0;
+
+       /* Some things are best done in assembly.  Finding globals with
+        * a toc is easier in C, so pass in what we can.
+        */
+       kexec_sequence(&kexec_stack, image->start, image,
+                       page_address(image->control_code_page),
+                       ppc_md.hpte_clear_all);
+       /* NOTREACHED */
+}
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S

index e3c73b3..f3dea0c 100644 (file)
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -680,6 +680,177 @@ _GLOBAL(kernel_thread)
         ld      r30,-16(r1)
         blr
  
+/* kexec_wait(phys_cpu)
+ *
+ * wait for the flag to change, indicating this kernel is going away but
+ * the slave code for the next one is at addresses 0 to 100.
+ *
+ * This is used by all slaves.
+ *
+ * Physical (hardware) cpu id should be in r3.
+ */
+_GLOBAL(kexec_wait)
+       bl      1f
+1:     mflr    r5
+       addi    r5,r5,kexec_flag-1b
+
+99:    HMT_LOW
+#ifdef CONFIG_KEXEC            /* use no memory without kexec */
+       lwz     r4,0(r5)
+       cmpwi   0,r4,0
+       bnea    0x60
+#endif
+       b       99b
+
+/* this can be in text because we won't change it until we are
+ * running in real anyways
+ */
+kexec_flag:
+       .long   0
+
+
+#ifdef CONFIG_KEXEC
+
+/* kexec_smp_wait(void)
+ *
+ * call with interrupts off
+ * note: this is a terminal routine, it does not save lr
+ *
+ * get phys id from paca
+ * set paca id to -1 to say we got here
+ * switch to real mode
+ * join other cpus in kexec_wait(phys_id)
+ */
+_GLOBAL(kexec_smp_wait)
+       lhz     r3,PACAHWCPUID(r13)
+       li      r4,-1
+       sth     r4,PACAHWCPUID(r13)     /* let others know we left */
+       bl      real_mode
+       b       .kexec_wait
+
+/*
+ * switch to real mode (turn mmu off)
+ * we use the early kernel trick that the hardware ignores bits
+ * 0 and 1 (big endian) of the effective address in real mode
+ *
+ * don't overwrite r3 here, it is live for kexec_wait above.
+ */
+real_mode:     /* assume normal blr return */
+1:     li      r9,MSR_RI
+       li      r10,MSR_DR|MSR_IR
+       mflr    r11             /* return address to SRR0 */
+       mfmsr   r12
+       andc    r9,r12,r9
+       andc    r10,r12,r10
+
+       mtmsrd  r9,1
+       mtspr   SPRN_SRR1,r10
+       mtspr   SPRN_SRR0,r11
+       rfid
+
+
+/*
+ * kexec_sequence(newstack, start, image, control, clear_all())
+ *
+ * does the grungy work with stack switching and real mode switches
+ * also does simple calls to other code
+ */
+
+_GLOBAL(kexec_sequence)
+       mflr    r0
+       std     r0,16(r1)
+
+       /* switch stacks to newstack -- &kexec_stack.stack */
+       stdu    r1,THREAD_SIZE-112(r3)
+       mr      r1,r3
+
+       li      r0,0
+       std     r0,16(r1)
+
+       /* save regs for local vars on new stack.
+        * yes, we won't go back, but ...
+        */
+       std     r31,-8(r1)
+       std     r30,-16(r1)
+       std     r29,-24(r1)
+       std     r28,-32(r1)
+       std     r27,-40(r1)
+       std     r26,-48(r1)
+       std     r25,-56(r1)
+
+       stdu    r1,-112-64(r1)
+
+       /* save args into preserved regs */
+       mr      r31,r3                  /* newstack (both) */
+       mr      r30,r4                  /* start (real) */
+       mr      r29,r5                  /* image (virt) */
+       mr      r28,r6                  /* control, unused */
+       mr      r27,r7                  /* clear_all() fn desc */
+       mr      r26,r8                  /* spare */
+       lhz     r25,PACAHWCPUID(r13)    /* get our phys cpu from paca */
+
+       /* disable interrupts, we are overwriting kernel data next */
+       mfmsr   r3
+       rlwinm  r3,r3,0,17,15
+       mtmsrd  r3,1
+
+       /* copy dest pages, flush whole dest image */
+       mr      r3,r29
+       bl      .kexec_copy_flush       /* (image) */
+
+       /* turn off mmu */
+       bl      real_mode
+
+       /* clear out hardware hash page table and tlb */
+       ld      r5,0(r27)               /* deref function descriptor */
+       mtctr   r5
+       bctrl                           /* ppc_md.hash_clear_all(void); */
+
+/*
+ *   kexec image calling is:
+ *      the first 0x100 bytes of the entry point are copied to 0
+ *
+ *      all slaves branch to slave = 0x60 (absolute)
+ *              slave(phys_cpu_id);
+ *
+ *      master goes to start = entry point
+ *              start(phys_cpu_id, start, 0);
+ *
+ *
+ *   a wrapper is needed to call existing kernels, here is an approximate
+ *   description of one method:
+ *
+ * v2: (2.6.10)
+ *   start will be near the boot_block (maybe 0x100 bytes before it?)
+ *   it will have a 0x60, which will b to boot_block, where it will wait
+ *   and 0 will store phys into struct boot-block and load r3 from there,
+ *   copy kernel 0-0x100 and tell slaves to back down to 0x60 again
+ *
+ * v1: (2.6.9)
+ *    boot block will have all cpus scanning device tree to see if they
+ *    are the boot cpu ?????
+ *    other device tree differences (prop sizes, va vs pa, etc)...
+ */
+
+       /* copy  0x100 bytes starting at start to 0 */
+       li      r3,0
+       mr      r4,r30
+       li      r5,0x100
+       li      r6,0
+       bl      .copy_and_flush /* (dest, src, copy limit, start offset) */
+1:     /* assume normal blr return */
+
+       /* release other cpus to the new kernel secondary start at 0x60 */
+       mflr    r5
+       li      r6,1
+       stw     r6,kexec_flag-1b(5)
+       mr      r3,r25  # my phys cpu
+       mr      r4,r30  # start, aka phys mem offset
+       mtlr    4
+       li      r5,0
+       blr     /* image->start(physid, image->start, 0); */
+#endif /* CONFIG_KEXEC */
+
  /* Why isn't this a) automatic, b) written in 'C'? */  
         .balign 8
  _GLOBAL(sys_call_table32)
@@ -951,7 +1122,7 @@ _GLOBAL(sys_call_table32)
         .llong .compat_sys_mq_timedreceive /* 265 */
         .llong .compat_sys_mq_notify
         .llong .compat_sys_mq_getsetattr
-       .llong .sys_ni_syscall          /* 268 reserved for sys_kexec_load */
+       .llong .compat_sys_kexec_load
         .llong .sys32_add_key
         .llong .sys32_request_key
         .llong .compat_sys_keyctl
@@ -1227,7 +1398,7 @@ _GLOBAL(sys_call_table)
         .llong .sys_mq_timedreceive     /* 265 */
         .llong .sys_mq_notify
         .llong .sys_mq_getsetattr
-       .llong .sys_ni_syscall          /* 268 reserved for sys_kexec_load */
+       .llong .sys_kexec_load
         .llong .sys_add_key
         .llong .sys_request_key         /* 270 */
         .llong .sys_keyctl
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c

index 593ea5b..e8fbab1 100644 (file)
--- a/arch/ppc64/kernel/mpic.c
+++ b/arch/ppc64/kernel/mpic.c
@@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void)
  #endif /* CONFIG_SMP */
  }
  
+/*
+ * XXX: someone who knows mpic should check this.
+ * do we need to eoi the ipi here (see xics comments)?
+ * or can we reset the mpic in the new kernel?
+ */
+void mpic_teardown_this_cpu(void)
+{
+       struct mpic *mpic = mpic_primary;
+       unsigned long flags;
+       u32 msk = 1 << hard_smp_processor_id();
+       unsigned int i;
+
+       BUG_ON(mpic == NULL);
+
+       DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+       spin_lock_irqsave(&mpic_lock, flags);
+
+       /* let the mpic know we don't want intrs.  */
+       for (i = 0; i < mpic->num_sources ; i++)
+               mpic_irq_write(i, MPIC_IRQ_DESTINATION,
+                       mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
+
+       /* Set current processor priority to max */
+       mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
+
+       spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+
  void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
  {
         struct mpic *mpic = mpic_primary;
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h

index 63e1771..99fbbc9 100644 (file)
--- a/arch/ppc64/kernel/mpic.h
+++ b/arch/ppc64/kernel/mpic.h
@@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq);
  /* Setup a non-boot CPU */
  extern void mpic_setup_this_cpu(void);
  
+/* Clean up for kexec (or cpu offline or ...) */
+extern void mpic_teardown_this_cpu(void);
+
  /* Request IPIs on primary mpic */
  extern void mpic_request_ipis(void);
  
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c

index f2b4124..44d9af7 100644 (file)
--- a/arch/ppc64/kernel/pSeries_setup.c
+++ b/arch/ppc64/kernel/pSeries_setup.c
@@ -187,14 +187,16 @@ static void __init pSeries_setup_arch(void)
  {
         /* Fixup ppc_md depending on the type of interrupt controller */
         if (ppc64_interrupt_controller == IC_OPEN_PIC) {
-               ppc_md.init_IRQ       = pSeries_init_mpic; 
+               ppc_md.init_IRQ       = pSeries_init_mpic;
                 ppc_md.get_irq        = mpic_get_irq;
+               ppc_md.cpu_irq_down   = mpic_teardown_this_cpu;
                 /* Allocate the mpic now, so that find_and_init_phbs() can
                  * fill the ISUs */
                 pSeries_setup_mpic();
         } else {
                 ppc_md.init_IRQ       = xics_init_IRQ;
                 ppc_md.get_irq        = xics_get_irq;
+               ppc_md.cpu_irq_down   = xics_teardown_cpu;
         }
  
  #ifdef CONFIG_SMP
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c

index 0a47a5e..d5e4866 100644 (file)
--- a/arch/ppc64/kernel/setup.c
+++ b/arch/ppc64/kernel/setup.c
@@ -677,11 +677,16 @@ void __init setup_system(void)
         DBG(" <- setup_system()\n");
  }
  
-
-void machine_restart(char *cmd)
+/* also used by kexec */
+void machine_shutdown(void)
  {
         if (ppc_md.nvram_sync)
                 ppc_md.nvram_sync();
+}
+
+void machine_restart(char *cmd)
+{
+       machine_shutdown();
         ppc_md.restart(cmd);
  #ifdef CONFIG_SMP
         smp_send_stop();
@@ -690,13 +695,11 @@ void machine_restart(char *cmd)
         local_irq_disable();
         while (1) ;
  }
-
  EXPORT_SYMBOL(machine_restart);
-  
+
  void machine_power_off(void)
  {
-       if (ppc_md.nvram_sync)
-               ppc_md.nvram_sync();
+       machine_shutdown();
         ppc_md.power_off();
  #ifdef CONFIG_SMP
         smp_send_stop();
@@ -705,13 +708,11 @@ void machine_power_off(void)
         local_irq_disable();
         while (1) ;
  }
-
  EXPORT_SYMBOL(machine_power_off);
-  
+
  void machine_halt(void)
  {
-       if (ppc_md.nvram_sync)
-               ppc_md.nvram_sync();
+       machine_shutdown();
         ppc_md.halt();
  #ifdef CONFIG_SMP
         smp_send_stop();
@@ -720,7 +721,6 @@ void machine_halt(void)
         local_irq_disable();
         while (1) ;
  }
-
  EXPORT_SYMBOL(machine_halt);
  
  static int ppc64_panic_event(struct notifier_block *this,
diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c

index 879f39b..677c445 100644 (file)
--- a/arch/ppc64/kernel/xics.c
+++ b/arch/ppc64/kernel/xics.c
@@ -647,6 +647,31 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
         }
  }
  
+void xics_teardown_cpu(void)
+{
+       int cpu = smp_processor_id();
+       int status;
+
+       ops->cppr_info(cpu, 0x00);
+       iosync();
+
+       /*
+        * we need to EOI the IPI if we got here from kexec down IPI
+        *
+        * xics doesn't care if we duplicate an EOI as long as we
+        * don't EOI and raise priority.
+        *
+        * probably need to check all the other interrupts too
+        * should we be flagging idle loop instead?
+        * or creating some task to be scheduled?
+        */
+       ops->xirr_info_set(cpu, XICS_IPI);
+
+       status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
+               (1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
+       WARN_ON(status != 0);
+}
+
  #ifdef CONFIG_HOTPLUG_CPU
  
  /* Interrupts are disabled. */
diff --git a/include/asm-ppc64/kexec.h b/include/asm-ppc64/kexec.h

new file mode 100644 (file)

index 0000000..511908a
--- /dev/null
+++ b/include/asm-ppc64/kexec.h
@@ -0,0 +1,41 @@
+#ifndef _PPC64_KEXEC_H
+#define _PPC64_KEXEC_H
+
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+/* XXX: since we copy virt we can use any page we allocate */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+/* XXX: I want to allow initrd in highmem.  otherwise set to rmo on lpar */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+/* XXX: unused today, ppc32 uses TASK_SIZE, probably left over from use_mm  */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* XXX: today we don't use this at all, althogh we have a static stack */
+#define KEXEC_CONTROL_CODE_SIZE 4096
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_PPC64
+
+#define MAX_NOTE_BYTES 1024
+
+#ifndef __ASSEMBLY__
+
+typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
+
+extern note_buf_t crash_notes[];
+
+extern void kexec_smp_wait(void);      /* get and clear naca physid, wait for
+                                         master to copy new code to 0 */
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PPC_KEXEC_H */
+
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h

index 553b2ea..9cdad3e 100644 (file)
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -86,6 +86,7 @@ struct machdep_calls {
  
         void            (*init_IRQ)(void);
         int             (*get_irq)(struct pt_regs *);
+       void            (*cpu_irq_down)(void);
  
         /* PCI stuff */
         void            (*pcibios_fixup)(void);
diff --git a/include/asm-ppc64/xics.h b/include/asm-ppc64/xics.h

index fdec5e7..0c45e14 100644 (file)
--- a/include/asm-ppc64/xics.h
+++ b/include/asm-ppc64/xics.h
@@ -17,6 +17,7 @@
  void xics_init_IRQ(void);
  int xics_get_irq(struct pt_regs *);
  void xics_setup_cpu(void);
+void xics_teardown_cpu(void);
  void xics_cause_IPI(int cpu);
  void xics_request_IPIs(void);
  void xics_migrate_irqs_away(void);
author	R Sharada <sharada@in.ibm.com>
	Sat, 25 Jun 2005 21:58:10 +0000 (14:58 -0700)
committer	Linus Torvalds <torvalds@ppc970.osdl.org>
	Sat, 25 Jun 2005 23:24:51 +0000 (16:24 -0700)
arch/ppc64/Kconfig		patch \| blob \| history
arch/ppc64/kernel/Makefile		patch \| blob \| history
arch/ppc64/kernel/head.S		patch \| blob \| history
arch/ppc64/kernel/machine_kexec.c	[new file with mode: 0644]	patch \| blob
arch/ppc64/kernel/misc.S		patch \| blob \| history
arch/ppc64/kernel/mpic.c		patch \| blob \| history
arch/ppc64/kernel/mpic.h		patch \| blob \| history
arch/ppc64/kernel/pSeries_setup.c		patch \| blob \| history
arch/ppc64/kernel/setup.c		patch \| blob \| history
arch/ppc64/kernel/xics.c		patch \| blob \| history
include/asm-ppc64/kexec.h	[new file with mode: 0644]	patch \| blob
include/asm-ppc64/machdep.h		patch \| blob \| history
include/asm-ppc64/xics.h		patch \| blob \| history