arch/x86_64/kernel/mce.c

   1 /*
   2  * Machine check handler.
   3  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
   4  * Rest from unknown author(s).
   5  * 2004 Andi Kleen. Rewrote most of it.
   6  */
   7
   8 #include <linux/init.h>
   9 #include <linux/types.h>
  10 #include <linux/kernel.h>
  11 #include <linux/sched.h>
  12 #include <linux/string.h>
  13 #include <linux/rcupdate.h>
  14 #include <linux/kallsyms.h>
  15 #include <linux/sysdev.h>
  16 #include <linux/miscdevice.h>
  17 #include <linux/fs.h>
  18 #include <linux/capability.h>
  19 #include <linux/cpu.h>
  20 #include <linux/percpu.h>
  21 #include <linux/ctype.h>
  22 #include <asm/processor.h>
  23 #include <asm/msr.h>
  24 #include <asm/mce.h>
  25 #include <asm/kdebug.h>
  26 #include <asm/uaccess.h>
  27 #include <asm/smp.h>
  28
  29 #define MISC_MCELOG_MINOR 227
  30 #define NR_BANKS 6
  31
  32 atomic_t mce_entry;
  33
  34 static int mce_dont_init;
  35
  36 /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
  37    3: never panic or exit (for testing only) */
  38 static int tolerant = 1;
  39 static int banks;
  40 static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
  41 static unsigned long console_logged;
  42 static int notify_user;
  43 static int rip_msr;
  44 static int mce_bootlog = 1;
  45
  46 /*
  47  * Lockless MCE logging infrastructure.
  48  * This avoids deadlocks on printk locks without having to break locks. Also
  49  * separate MCEs from kernel messages to avoid bogus bug reports.
  50  */
  51
  52 struct mce_log mcelog = {
  53         MCE_LOG_SIGNATURE,
  54         MCE_LOG_LEN,
  55 };
  56
  57 void mce_log(struct mce *mce)
  58 {
  59         unsigned next, entry;
  60         mce->finished = 0;
  61         wmb();
  62         for (;;) {
  63                 entry = rcu_dereference(mcelog.next);
  64                 /* The rmb forces the compiler to reload next in each
  65                     iteration */
  66                 rmb();
  67                 for (;;) {
  68                         /* When the buffer fills up discard new entries. Assume
  69                            that the earlier errors are the more interesting. */
  70                         if (entry >= MCE_LOG_LEN) {
  71                                 set_bit(MCE_OVERFLOW, &mcelog.flags);
  72                                 return;
  73                         }
  74                         /* Old left over entry. Skip. */
  75                         if (mcelog.entry[entry].finished) {
  76                                 entry++;
  77                                 continue;
  78                         }
  79                         break;
  80                 }
  81                 smp_rmb();
  82                 next = entry + 1;
  83                 if (cmpxchg(&mcelog.next, entry, next) == entry)
  84                         break;
  85         }
  86         memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
  87         wmb();
  88         mcelog.entry[entry].finished = 1;
  89         wmb();
  90
  91         if (!test_and_set_bit(0, &console_logged))
  92                 notify_user = 1;
  93 }
  94
  95 static void print_mce(struct mce *m)
  96 {
  97         printk(KERN_EMERG "\n"
  98                KERN_EMERG "HARDWARE ERROR\n"
  99                KERN_EMERG
 100                "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
 101                m->cpu, m->mcgstatus, m->bank, m->status);
 102         if (m->rip) {
 103                 printk(KERN_EMERG
 104                        "RIP%s %02x:<%016Lx> ",
 105                        !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
 106                        m->cs, m->rip);
 107                 if (m->cs == __KERNEL_CS)
 108                         print_symbol("{%s}", m->rip);
 109                 printk("\n");
 110         }
 111         printk(KERN_EMERG "TSC %Lx ", m->tsc);
 112         if (m->addr)
 113                 printk("ADDR %Lx ", m->addr);
 114         if (m->misc)
 115                 printk("MISC %Lx ", m->misc);
 116         printk("\n");
 117         printk(KERN_EMERG "This is not a software problem!\n");
 118         printk(KERN_EMERG
 119     "Run through mcelog --ascii to decode and contact your hardware vendor\n");
 120 }
 121
 122 static void mce_panic(char *msg, struct mce *backup, unsigned long start)
 123 {
 124         int i;
 125         oops_begin();
 126         for (i = 0; i < MCE_LOG_LEN; i++) {
 127                 unsigned long tsc = mcelog.entry[i].tsc;
 128                 if (time_before(tsc, start))
 129                         continue;
 130                 print_mce(&mcelog.entry[i]);
 131                 if (backup && mcelog.entry[i].tsc == backup->tsc)
 132                         backup = NULL;
 133         }
 134         if (backup)
 135                 print_mce(backup);
 136         if (tolerant >= 3)
 137                 printk("Fake panic: %s\n", msg);
 138         else
 139                 panic(msg);
 140 }
 141
 142 static int mce_available(struct cpuinfo_x86 *c)
 143 {
 144         return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 145 }
 146
 147 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 148 {
 149         if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
 150                 m->rip = regs->rip;
 151                 m->cs = regs->cs;
 152         } else {
 153                 m->rip = 0;
 154                 m->cs = 0;
 155         }
 156         if (rip_msr) {
 157                 /* Assume the RIP in the MSR is exact. Is this true? */
 158                 m->mcgstatus |= MCG_STATUS_EIPV;
 159                 rdmsrl(rip_msr, m->rip);
 160                 m->cs = 0;
 161         }
 162 }
 163
 164 /*
 165  * The actual machine check handler
 166  */
 167
 168 void do_machine_check(struct pt_regs * regs, long error_code)
 169 {
 170         struct mce m, panicm;
 171         int nowayout = (tolerant < 1);
 172         int kill_it = 0;
 173         u64 mcestart = 0;
 174         int i;
 175         int panicm_found = 0;
 176
 177         atomic_inc(&mce_entry);
 178
 179         if (regs)
 180                 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
 181         if (!banks)
 182                 goto out2;
 183
 184         memset(&m, 0, sizeof(struct mce));
 185         m.cpu = safe_smp_processor_id();
 186         rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
 187         if (!(m.mcgstatus & MCG_STATUS_RIPV))
 188                 kill_it = 1;
 189
 190         rdtscll(mcestart);
 191         barrier();
 192
 193         for (i = 0; i < banks; i++) {
 194                 if (!bank[i])
 195                         continue;
 196
 197                 m.misc = 0;
 198                 m.addr = 0;
 199                 m.bank = i;
 200                 m.tsc = 0;
 201
 202                 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
 203                 if ((m.status & MCI_STATUS_VAL) == 0)
 204                         continue;
 205
 206                 if (m.status & MCI_STATUS_EN) {
 207                         /* In theory _OVER could be a nowayout too, but
 208                            assume any overflowed errors were no fatal. */
 209                         nowayout |= !!(m.status & MCI_STATUS_PCC);
 210                         kill_it |= !!(m.status & MCI_STATUS_UC);
 211                 }
 212
 213                 if (m.status & MCI_STATUS_MISCV)
 214                         rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
 215                 if (m.status & MCI_STATUS_ADDRV)
 216                         rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
 217
 218                 mce_get_rip(&m, regs);
 219                 if (error_code >= 0)
 220                         rdtscll(m.tsc);
 221                 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
 222                 if (error_code != -2)
 223                         mce_log(&m);
 224
 225                 /* Did this bank cause the exception? */
 226                 /* Assume that the bank with uncorrectable errors did it,
 227                    and that there is only a single one. */
 228                 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
 229                         panicm = m;
 230                         panicm_found = 1;
 231                 }
 232
 233                 add_taint(TAINT_MACHINE_CHECK);
 234         }
 235
 236         /* Never do anything final in the polling timer */
 237         if (!regs)
 238                 goto out;
 239
 240         /* If we didn't find an uncorrectable error, pick
 241            the last one (shouldn't happen, just being safe). */
 242         if (!panicm_found)
 243                 panicm = m;
 244         if (nowayout)
 245                 mce_panic("Machine check", &panicm, mcestart);
 246         if (kill_it) {
 247                 int user_space = 0;
 248
 249                 if (m.mcgstatus & MCG_STATUS_RIPV)
 250                         user_space = panicm.rip && (panicm.cs & 3);
 251
 252                 /* When the machine was in user space and the CPU didn't get
 253                    confused it's normally not necessary to panic, unless you
 254                    are paranoid (tolerant == 0)
 255
 256                    RED-PEN could be more tolerant for MCEs in idle,
 257                    but most likely they occur at boot anyways, where
 258                    it is best to just halt the machine. */
 259                 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
 260                     (unsigned)current->pid <= 1)
 261                         mce_panic("Uncorrected machine check", &panicm, mcestart);
 262
 263                 /* do_exit takes an awful lot of locks and has as
 264                    slight risk of deadlocking. If you don't want that
 265                    don't set tolerant >= 2 */
 266                 if (tolerant < 3)
 267                         do_exit(SIGBUS);
 268         }
 269
 270  out:
 271         /* Last thing done in the machine check exception to clear state. */
 272         wrmsrl(MSR_IA32_MCG_STATUS, 0);
 273  out2:
 274         atomic_dec(&mce_entry);
 275 }
 276
 277 /*
 278  * Periodic polling timer for "silent" machine check errors.
 279  */
 280
 281 static int check_interval = 5 * 60; /* 5 minutes */
 282 static void mcheck_timer(void *data);
 283 static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
 284
 285 static void mcheck_check_cpu(void *info)
 286 {
 287         if (mce_available(&current_cpu_data))
 288                 do_machine_check(NULL, 0);
 289 }
 290
 291 static void mcheck_timer(void *data)
 292 {
 293         on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
 294         schedule_delayed_work(&mcheck_work, check_interval * HZ);
 295
 296         /*
 297          * It's ok to read stale data here for notify_user and
 298          * console_logged as we'll simply get the updated versions
 299          * on the next mcheck_timer execution and atomic operations
 300          * on console_logged act as synchronization for notify_user
 301          * writes.
 302          */
 303         if (notify_user && console_logged) {
 304                 notify_user = 0;
 305                 clear_bit(0, &console_logged);
 306                 printk(KERN_INFO "Machine check events logged\n");
 307         }
 308 }
 309
 310
 311 static __init int periodic_mcheck_init(void)
 312 {
 313         if (check_interval)
 314                 schedule_delayed_work(&mcheck_work, check_interval*HZ);
 315         return 0;
 316 }
 317 __initcall(periodic_mcheck_init);
 318
 319
 320 /*
 321  * Initialize Machine Checks for a CPU.
 322  */
 323 static void mce_init(void *dummy)
 324 {
 325         u64 cap;
 326         int i;
 327
 328         rdmsrl(MSR_IA32_MCG_CAP, cap);
 329         banks = cap & 0xff;
 330         if (banks > NR_BANKS) {
 331                 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
 332                 banks = NR_BANKS;
 333         }
 334         /* Use accurate RIP reporting if available. */
 335         if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
 336                 rip_msr = MSR_IA32_MCG_EIP;
 337
 338         /* Log the machine checks left over from the previous reset.
 339            This also clears all registers */
 340         do_machine_check(NULL, mce_bootlog ? -1 : -2);
 341
 342         set_in_cr4(X86_CR4_MCE);
 343
 344         if (cap & MCG_CTL_P)
 345                 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 346
 347         for (i = 0; i < banks; i++) {
 348                 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
 349                 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
 350         }
 351 }
 352
 353 /* Add per CPU specific workarounds here */
 354 static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 355 {
 356         /* This should be disabled by the BIOS, but isn't always */
 357         if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
 358                 /* disable GART TBL walk error reporting, which trips off
 359                    incorrectly with the IOMMU & 3ware & Cerberus. */
 360                 clear_bit(10, &bank[4]);
 361                 /* Lots of broken BIOS around that don't clear them
 362                    by default and leave crap in there. Don't log. */
 363                 mce_bootlog = 0;
 364         }
 365
 366 }
 367
 368 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 369 {
 370         switch (c->x86_vendor) {
 371         case X86_VENDOR_INTEL:
 372                 mce_intel_feature_init(c);
 373                 break;
 374         case X86_VENDOR_AMD:
 375                 mce_amd_feature_init(c);
 376                 break;
 377         default:
 378                 break;
 379         }
 380 }
 381
 382 /*
 383  * Called for each booted CPU to set up machine checks.
 384  * Must be called with preempt off.
 385  */
 386 void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
 387 {
 388         static cpumask_t mce_cpus = CPU_MASK_NONE;
 389
 390         mce_cpu_quirks(c);
 391
 392         if (mce_dont_init ||
 393             cpu_test_and_set(smp_processor_id(), mce_cpus) ||
 394             !mce_available(c))
 395                 return;
 396
 397         mce_init(NULL);
 398         mce_cpu_features(c);
 399 }
 400
 401 /*
 402  * Character device to read and clear the MCE log.
 403  */
 404
 405 static void collect_tscs(void *data)
 406 {
 407         unsigned long *cpu_tsc = (unsigned long *)data;
 408         rdtscll(cpu_tsc[smp_processor_id()]);
 409 }
 410
 411 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
 412 {
 413         unsigned long *cpu_tsc;
 414         static DECLARE_MUTEX(mce_read_sem);
 415         unsigned next;
 416         char __user *buf = ubuf;
 417         int i, err;
 418
 419         cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
 420         if (!cpu_tsc)
 421                 return -ENOMEM;
 422
 423         down(&mce_read_sem);
 424         next = rcu_dereference(mcelog.next);
 425
 426         /* Only supports full reads right now */
 427         if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
 428                 up(&mce_read_sem);
 429                 kfree(cpu_tsc);
 430                 return -EINVAL;
 431         }
 432
 433         err = 0;
 434         for (i = 0; i < next; i++) {
 435                 unsigned long start = jiffies;
 436                 while (!mcelog.entry[i].finished) {
 437                         if (!time_before(jiffies, start + 2)) {
 438                                 memset(mcelog.entry + i,0, sizeof(struct mce));
 439                                 continue;
 440                         }
 441                         cpu_relax();
 442                 }
 443                 smp_rmb();
 444                 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
 445                 buf += sizeof(struct mce);
 446         }
 447
 448         memset(mcelog.entry, 0, next * sizeof(struct mce));
 449         mcelog.next = 0;
 450
 451         synchronize_sched();
 452
 453         /* Collect entries that were still getting written before the synchronize. */
 454
 455         on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
 456         for (i = next; i < MCE_LOG_LEN; i++) {
 457                 if (mcelog.entry[i].finished &&
 458                     mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
 459                         err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
 460                         smp_rmb();
 461                         buf += sizeof(struct mce);
 462                         memset(&mcelog.entry[i], 0, sizeof(struct mce));
 463                 }
 464         }
 465         up(&mce_read_sem);
 466         kfree(cpu_tsc);
 467         return err ? -EFAULT : buf - ubuf;
 468 }
 469
 470 static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
 471 {
 472         int __user *p = (int __user *)arg;
 473         if (!capable(CAP_SYS_ADMIN))
 474                 return -EPERM;
 475         switch (cmd) {
 476         case MCE_GET_RECORD_LEN:
 477                 return put_user(sizeof(struct mce), p);
 478         case MCE_GET_LOG_LEN:
 479                 return put_user(MCE_LOG_LEN, p);
 480         case MCE_GETCLEAR_FLAGS: {
 481                 unsigned flags;
 482                 do {
 483                         flags = mcelog.flags;
 484                 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
 485                 return put_user(flags, p);
 486         }
 487         default:
 488                 return -ENOTTY;
 489         }
 490 }
 491
 492 static struct file_operations mce_chrdev_ops = {
 493         .read = mce_read,
 494         .ioctl = mce_ioctl,
 495 };
 496
 497 static struct miscdevice mce_log_device = {
 498         MISC_MCELOG_MINOR,
 499         "mcelog",
 500         &mce_chrdev_ops,
 501 };
 502
 503 /*
 504  * Old style boot options parsing. Only for compatibility.
 505  */
 506
 507 static int __init mcheck_disable(char *str)
 508 {
 509         mce_dont_init = 1;
 510         return 1;
 511 }
 512
 513 /* mce=off disables machine check. Note you can reenable it later
 514    using sysfs.
 515    mce=TOLERANCELEVEL (number, see above)
 516    mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
 517    mce=nobootlog Don't log MCEs from before booting. */
 518 static int __init mcheck_enable(char *str)
 519 {
 520         if (*str == '=')
 521                 str++;
 522         if (!strcmp(str, "off"))
 523                 mce_dont_init = 1;
 524         else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
 525                 mce_bootlog = str[0] == 'b';
 526         else if (isdigit(str[0]))
 527                 get_option(&str, &tolerant);
 528         else
 529                 printk("mce= argument %s ignored. Please use /sys", str);
 530         return 1;
 531 }
 532
 533 __setup("nomce", mcheck_disable);
 534 __setup("mce", mcheck_enable);
 535
 536 /*
 537  * Sysfs support
 538  */
 539
 540 /* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
 541    Only one CPU is active at this time, the others get readded later using
 542    CPU hotplug. */
 543 static int mce_resume(struct sys_device *dev)
 544 {
 545         mce_init(NULL);
 546         return 0;
 547 }
 548
 549 /* Reinit MCEs after user configuration changes */
 550 static void mce_restart(void)
 551 {
 552         if (check_interval)
 553                 cancel_delayed_work(&mcheck_work);
 554         /* Timer race is harmless here */
 555         on_each_cpu(mce_init, NULL, 1, 1);
 556         if (check_interval)
 557                 schedule_delayed_work(&mcheck_work, check_interval*HZ);
 558 }
 559
 560 static struct sysdev_class mce_sysclass = {
 561         .resume = mce_resume,
 562         set_kset_name("machinecheck"),
 563 };
 564
 565 static DEFINE_PER_CPU(struct sys_device, device_mce);
 566
 567 /* Why are there no generic functions for this? */
 568 #define ACCESSOR(name, var, start) \
 569         static ssize_t show_ ## name(struct sys_device *s, char *buf) {                    \
 570                 return sprintf(buf, "%lx\n", (unsigned long)var);                  \
 571         }                                                                          \
 572         static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
 573                 char *end;                                                         \
 574                 unsigned long new = simple_strtoul(buf, &end, 0);                  \
 575                 if (end == buf) return -EINVAL;                                    \
 576                 var = new;                                                         \
 577                 start;                                                             \
 578                 return end-buf;                                                    \
 579         }                                                                          \
 580         static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
 581
 582 ACCESSOR(bank0ctl,bank[0],mce_restart())
 583 ACCESSOR(bank1ctl,bank[1],mce_restart())
 584 ACCESSOR(bank2ctl,bank[2],mce_restart())
 585 ACCESSOR(bank3ctl,bank[3],mce_restart())
 586 ACCESSOR(bank4ctl,bank[4],mce_restart())
 587 ACCESSOR(bank5ctl,bank[5],mce_restart())
 588 static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
 589         &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
 590         &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
 591 ACCESSOR(tolerant,tolerant,)
 592 ACCESSOR(check_interval,check_interval,mce_restart())
 593
 594 /* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
 595 static __cpuinit int mce_create_device(unsigned int cpu)
 596 {
 597         int err;
 598         int i;
 599         if (!mce_available(&cpu_data[cpu]))
 600                 return -EIO;
 601
 602         per_cpu(device_mce,cpu).id = cpu;
 603         per_cpu(device_mce,cpu).cls = &mce_sysclass;
 604
 605         err = sysdev_register(&per_cpu(device_mce,cpu));
 606
 607         if (!err) {
 608                 for (i = 0; i < banks; i++)
 609                         sysdev_create_file(&per_cpu(device_mce,cpu),
 610                                 bank_attributes[i]);
 611                 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
 612                 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
 613         }
 614         return err;
 615 }
 616
 617 #ifdef CONFIG_HOTPLUG_CPU
 618 static __cpuinit void mce_remove_device(unsigned int cpu)
 619 {
 620         int i;
 621
 622         for (i = 0; i < banks; i++)
 623                 sysdev_remove_file(&per_cpu(device_mce,cpu),
 624                         bank_attributes[i]);
 625         sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
 626         sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
 627         sysdev_unregister(&per_cpu(device_mce,cpu));
 628 }
 629 #endif
 630
 631 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 632 static int
 633 mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 634 {
 635         unsigned int cpu = (unsigned long)hcpu;
 636
 637         switch (action) {
 638         case CPU_ONLINE:
 639                 mce_create_device(cpu);
 640                 break;
 641 #ifdef CONFIG_HOTPLUG_CPU
 642         case CPU_DEAD:
 643                 mce_remove_device(cpu);
 644                 break;
 645 #endif
 646         }
 647         return NOTIFY_OK;
 648 }
 649
 650 static struct notifier_block mce_cpu_notifier = {
 651         .notifier_call = mce_cpu_callback,
 652 };
 653
 654 static __init int mce_init_device(void)
 655 {
 656         int err;
 657         int i = 0;
 658
 659         if (!mce_available(&boot_cpu_data))
 660                 return -EIO;
 661         err = sysdev_class_register(&mce_sysclass);
 662
 663         for_each_online_cpu(i) {
 664                 mce_create_device(i);
 665         }
 666
 667         register_cpu_notifier(&mce_cpu_notifier);
 668         misc_register(&mce_log_device);
 669         return err;
 670 }
 671
 672 device_initcall(mce_init_device);