powerpc/powernv: Invoke opal_cec_reboot2() on unrecoverable machine check errors.

author Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Fri, 31 Jul 2015 15:54:38 +0000 (21:24 +0530)

committer Michael Ellerman <mpe@ellerman.id.au>

Thu, 6 Aug 2015 05:10:18 +0000 (15:10 +1000)
author Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Fri, 31 Jul 2015 15:54:38 +0000 (21:24 +0530)
committer Michael Ellerman <mpe@ellerman.id.au>
Thu, 6 Aug 2015 05:10:18 +0000 (15:10 +1000)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h

index 4de3c69..47d549a 100644 (file)
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -154,7 +154,8 @@
  #define OPAL_FLASH_WRITE                       111
  #define OPAL_FLASH_ERASE                       112
  #define OPAL_PRD_MSG                           113
-#define OPAL_LAST                              113
+#define OPAL_CEC_REBOOT2                       116
+#define OPAL_LAST                              116
  
  /* Device tree flags */
  
@@ -857,6 +858,12 @@ enum OpalSysCooling {
         OPAL_SYSCOOL_INSF       = 0x0001, /* System insufficient cooling */
  };
  
+/* Argument to OPAL_CEC_REBOOT2() */
+enum {
+       OPAL_REBOOT_NORMAL              = 0,
+       OPAL_REBOOT_PLATFORM_ERROR      = 1,
+};
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h

index a091c27..48762b5 100644 (file)
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -44,6 +44,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
                        uint32_t hour_min);
  int64_t opal_cec_power_down(uint64_t request);
  int64_t opal_cec_reboot(void);
+int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag);
  int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
  int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
  int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S

index 88e4333..6224176 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -202,6 +202,7 @@ OPAL_CALL(opal_rtc_read,                    OPAL_RTC_READ);
  OPAL_CALL(opal_rtc_write,                      OPAL_RTC_WRITE);
  OPAL_CALL(opal_cec_power_down,                 OPAL_CEC_POWER_DOWN);
  OPAL_CALL(opal_cec_reboot,                     OPAL_CEC_REBOOT);
+OPAL_CALL(opal_cec_reboot2,                    OPAL_CEC_REBOOT2);
  OPAL_CALL(opal_read_nvram,                     OPAL_READ_NVRAM);
  OPAL_CALL(opal_write_nvram,                    OPAL_WRITE_NVRAM);
  OPAL_CALL(opal_handle_interrupt,               OPAL_HANDLE_INTERRUPT);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c

index f084afa..a2b53f2 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -441,6 +441,7 @@ static int opal_recover_mce(struct pt_regs *regs,
  int opal_machine_check(struct pt_regs *regs)
  {
         struct machine_check_event evt;
+       int ret;
  
         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
                 return 0;
@@ -455,6 +456,40 @@ int opal_machine_check(struct pt_regs *regs)
  
         if (opal_recover_mce(regs, &evt))
                 return 1;
+
+       /*
+        * Unrecovered machine check, we are heading to panic path.
+        *
+        * We may have hit this MCE in very early stage of kernel
+        * initialization even before opal-prd has started running. If
+        * this is the case then this MCE error may go un-noticed or
+        * un-analyzed if we go down panic path. We need to inform
+        * BMC/OCC about this error so that they can collect relevant
+        * data for error analysis before rebooting.
+        * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
+        * This function may not return on BMC based system.
+        */
+       ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
+                       "Unrecoverable Machine Check exception");
+       if (ret == OPAL_UNSUPPORTED) {
+               pr_emerg("Reboot type %d not supported\n",
+                                       OPAL_REBOOT_PLATFORM_ERROR);
+       }
+
+       /*
+        * We reached here. There can be three possibilities:
+        * 1. We are running on a firmware level that do not support
+        *    opal_cec_reboot2()
+        * 2. We are running on a firmware level that do not support
+        *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
+        * 3. We are running on FSP based system that does not need opal
+        *    to trigger checkstop explicitly for error analysis. The FSP
+        *    PRD component would have already got notified about this
+        *    error through other channels.
+        *
+        * In any case, let us just fall through. We anyway heading
+        * down to panic path.
+        */
         return 0;
  }
author	Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
	Fri, 31 Jul 2015 15:54:38 +0000 (21:24 +0530)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Thu, 6 Aug 2015 05:10:18 +0000 (15:10 +1000)
arch/powerpc/include/asm/opal-api.h		patch \| blob \| history
arch/powerpc/include/asm/opal.h		patch \| blob \| history
arch/powerpc/platforms/powernv/opal-wrappers.S		patch \| blob \| history
arch/powerpc/platforms/powernv/opal.c		patch \| blob \| history