ACPI, APEI, GHES, Distinguish interleaved error report in kernel log
authorHuang Ying <ying.huang@intel.com>
Thu, 8 Dec 2011 03:25:44 +0000 (11:25 +0800)
committerLen Brown <len.brown@intel.com>
Tue, 17 Jan 2012 08:54:31 +0000 (03:54 -0500)
In most cases, printk only guarantees messages from different printk
calling will not be interleaved between each other.  But, one APEI
GHES hardware error report will involve multiple printk calling,
normally each for one line.  So it is possible that the hardware error
report comes from different generic hardware error source will be
interleaved.

In this patch, a sequence number is prefixed to each line of error
report.  So that, even if they are interleaved, they still can be
distinguished by the prefixed sequence number.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
drivers/acpi/apei/ghes.c

index 511b971..9dcb2d8 100644 (file)
@@ -506,16 +506,22 @@ static void __ghes_print_estatus(const char *pfx,
                                 const struct acpi_hest_generic *generic,
                                 const struct acpi_hest_generic_status *estatus)
 {
+       static atomic_t seqno;
+       unsigned int curr_seqno;
+       char pfx_seq[64];
+
        if (pfx == NULL) {
                if (ghes_severity(estatus->error_severity) <=
                    GHES_SEV_CORRECTED)
-                       pfx = KERN_WARNING HW_ERR;
+                       pfx = KERN_WARNING;
                else
-                       pfx = KERN_ERR HW_ERR;
+                       pfx = KERN_ERR;
        }
+       curr_seqno = atomic_inc_return(&seqno);
+       snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
        printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
-              pfx, generic->header.source_id);
-       apei_estatus_print(pfx, estatus);
+              pfx_seq, generic->header.source_id);
+       apei_estatus_print(pfx_seq, estatus);
 }
 
 static int ghes_print_estatus(const char *pfx,
@@ -798,7 +804,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 
        if (sev_global >= GHES_SEV_PANIC) {
                oops_begin();
-               __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
+               __ghes_print_estatus(KERN_EMERG, ghes_global->generic,
                                     ghes_global->estatus);
                /* reboot to log the error! */
                if (panic_timeout == 0)