1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/edac.h>
29 #include <linux/mmzone.h>
30 #include <linux/edac_mce.h>
31 #include <linux/spinlock.h>
33 #include "edac_core.h"
35 /* To use the new pci_[read/write]_config_qword instead of two dword */
39 * Alter this version for the module when modifications are made
41 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
42 #define EDAC_MOD_STR "i7core_edac"
44 /* HACK: temporary, just to enable all logs, for now */
46 #define debugf0(fmt, arg...) edac_printk(KERN_INFO, "i7core", fmt, ##arg)
51 #define i7core_printk(level, fmt, arg...) \
52 edac_printk(level, "i7core", fmt, ##arg)
54 #define i7core_mc_printk(mci, level, fmt, arg...) \
55 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
58 * i7core Memory Controller Registers
61 /* OFFSETS for Device 0 Function 0 */
63 #define MC_CFG_CONTROL 0x90
65 /* OFFSETS for Device 3 Function 0 */
67 #define MC_CONTROL 0x48
68 #define MC_STATUS 0x4c
69 #define MC_MAX_DOD 0x64
72 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
73 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
76 #define MC_TEST_ERR_RCV1 0x60
77 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
79 #define MC_TEST_ERR_RCV0 0x64
80 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
81 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
83 /* OFFSETS for Devices 4,5 and 6 Function 0 */
85 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
86 #define THREE_DIMMS_PRESENT (1 << 24)
87 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
88 #define QUAD_RANK_PRESENT (1 << 22)
89 #define REGISTERED_DIMM (1 << 15)
91 #define MC_CHANNEL_MAPPER 0x60
92 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
93 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
95 #define MC_CHANNEL_RANK_PRESENT 0x7c
96 #define RANK_PRESENT_MASK 0xffff
98 #define MC_CHANNEL_ADDR_MATCH 0xf0
99 #define MC_CHANNEL_ERROR_MASK 0xf8
100 #define MC_CHANNEL_ERROR_INJECT 0xfc
101 #define INJECT_ADDR_PARITY 0x10
102 #define INJECT_ECC 0x08
103 #define MASK_CACHELINE 0x06
104 #define MASK_FULL_CACHELINE 0x06
105 #define MASK_MSB32_CACHELINE 0x04
106 #define MASK_LSB32_CACHELINE 0x02
107 #define NO_MASK_CACHELINE 0x00
108 #define REPEAT_EN 0x01
110 /* OFFSETS for Devices 4,5 and 6 Function 1 */
111 #define MC_DOD_CH_DIMM0 0x48
112 #define MC_DOD_CH_DIMM1 0x4c
113 #define MC_DOD_CH_DIMM2 0x50
114 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
115 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
116 #define DIMM_PRESENT_MASK (1 << 9)
117 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
118 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
119 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
120 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
121 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
122 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
123 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
124 #define MC_DOD_NUMCOL_MASK 3
125 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
127 #define MC_RANK_PRESENT 0x7c
129 #define MC_SAG_CH_0 0x80
130 #define MC_SAG_CH_1 0x84
131 #define MC_SAG_CH_2 0x88
132 #define MC_SAG_CH_3 0x8c
133 #define MC_SAG_CH_4 0x90
134 #define MC_SAG_CH_5 0x94
135 #define MC_SAG_CH_6 0x98
136 #define MC_SAG_CH_7 0x9c
138 #define MC_RIR_LIMIT_CH_0 0x40
139 #define MC_RIR_LIMIT_CH_1 0x44
140 #define MC_RIR_LIMIT_CH_2 0x48
141 #define MC_RIR_LIMIT_CH_3 0x4C
142 #define MC_RIR_LIMIT_CH_4 0x50
143 #define MC_RIR_LIMIT_CH_5 0x54
144 #define MC_RIR_LIMIT_CH_6 0x58
145 #define MC_RIR_LIMIT_CH_7 0x5C
146 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
148 #define MC_RIR_WAY_CH 0x80
149 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
150 #define MC_RIR_WAY_RANK_MASK 0x7
157 #define MAX_DIMMS 3 /* Max DIMMS per channel */
158 #define NUM_SOCKETS 2 /* Max number of MC sockets */
159 #define MAX_MCR_FUNC 4
160 #define MAX_CHAN_FUNC 3
170 struct i7core_inject {
178 /* Error address mask */
179 int channel, dimm, rank, bank, page, col;
182 struct i7core_channel {
187 struct pci_id_descr {
191 struct pci_dev *pdev[NUM_SOCKETS];
195 struct pci_dev *pci_noncore[NUM_SOCKETS];
196 struct pci_dev *pci_mcr[NUM_SOCKETS][MAX_MCR_FUNC + 1];
197 struct pci_dev *pci_ch[NUM_SOCKETS][NUM_CHANS][MAX_CHAN_FUNC + 1];
199 struct i7core_info info;
200 struct i7core_inject inject;
201 struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS];
203 int sockets; /* Number of sockets */
204 int channels; /* Number of active channels */
206 int ce_count_available[NUM_SOCKETS];
207 /* ECC corrected errors counts per dimm */
208 unsigned long ce_count[NUM_SOCKETS][MAX_DIMMS];
209 int last_ce_count[NUM_SOCKETS][MAX_DIMMS];
212 struct edac_mce edac_mce;
213 struct mce mce_entry[MCE_LOG_LEN];
218 /* Device name and register DID (Device ID) */
219 struct i7core_dev_info {
220 const char *ctl_name; /* name for this device */
221 u16 fsb_mapping_errors; /* DID for the branchmap,control */
224 #define PCI_DESCR(device, function, device_id) \
226 .func = (function), \
227 .dev_id = (device_id)
229 struct pci_id_descr pci_devs[] = {
230 /* Memory controller */
231 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
232 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
233 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM is supported */
234 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
237 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
238 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
239 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
240 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
243 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
244 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
245 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
246 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
249 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
250 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
251 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
252 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
254 /* Generic Non-core registers */
256 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
257 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
258 * the probing code needs to test for the other address in case of
259 * failure of this one
261 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
264 #define N_DEVS ARRAY_SIZE(pci_devs)
267 * pci_device_id table for which devices we are looking for
268 * This should match the first device at pci_devs table
270 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
271 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
272 {0,} /* 0 terminated list. */
276 /* Table of devices attributes supported by this driver */
277 static const struct i7core_dev_info i7core_devs[] = {
279 .ctl_name = "i7 Core",
280 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7_MCR,
284 static struct edac_pci_ctl_info *i7core_pci;
286 /****************************************************************************
287 Anciliary status routines
288 ****************************************************************************/
290 /* MC_CONTROL bits */
291 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
292 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
295 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3))
296 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
298 /* MC_MAX_DOD read functions */
299 static inline int numdimms(u32 dimms)
301 return (dimms & 0x3) + 1;
304 static inline int numrank(u32 rank)
306 static int ranks[4] = { 1, 2, 4, -EINVAL };
308 return ranks[rank & 0x3];
311 static inline int numbank(u32 bank)
313 static int banks[4] = { 4, 8, 16, -EINVAL };
315 return banks[bank & 0x3];
318 static inline int numrow(u32 row)
320 static int rows[8] = {
321 1 << 12, 1 << 13, 1 << 14, 1 << 15,
322 1 << 16, -EINVAL, -EINVAL, -EINVAL,
325 return rows[row & 0x7];
328 static inline int numcol(u32 col)
330 static int cols[8] = {
331 1 << 10, 1 << 11, 1 << 12, -EINVAL,
333 return cols[col & 0x3];
336 /****************************************************************************
337 Memory check routines
338 ****************************************************************************/
339 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
344 for (i = 0; i < N_DEVS; i++) {
345 if (!pci_devs[i].pdev[socket])
348 if (PCI_SLOT(pci_devs[i].pdev[socket]->devfn) == slot &&
349 PCI_FUNC(pci_devs[i].pdev[socket]->devfn) == func) {
350 return pci_devs[i].pdev[socket];
358 * i7core_get_active_channels() - gets the number of channels and csrows
359 * @socket: Quick Path Interconnect socket
360 * @channels: Number of channels that will be returned
361 * @csrows: Number of csrows found
363 * Since EDAC core needs to know in advance the number of available channels
364 * and csrows, in order to allocate memory for csrows/channels, it is needed
365 * to run two similar steps. At the first step, implemented on this function,
366 * it checks the number of csrows/channels present at one socket.
367 * this is used in order to properly allocate the size of mci components.
369 * It should be noticed that none of the current available datasheets explain
370 * or even mention how csrows are seen by the memory controller. So, we need
371 * to add a fake description for csrows.
372 * So, this driver is attributing one DIMM memory for one csrow.
374 static int i7core_get_active_channels(u8 socket, unsigned *channels,
377 struct pci_dev *pdev = NULL;
384 pdev = get_pdev_slot_func(socket, 3, 0);
386 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
391 /* Device 3 function 0 reads */
392 pci_read_config_dword(pdev, MC_STATUS, &status);
393 pci_read_config_dword(pdev, MC_CONTROL, &control);
395 for (i = 0; i < NUM_CHANS; i++) {
397 /* Check if the channel is active */
398 if (!(control & (1 << (8 + i))))
401 /* Check if the channel is disabled */
402 if (status & (1 << i))
405 pdev = get_pdev_slot_func(socket, i + 4, 1);
407 i7core_printk(KERN_ERR, "Couldn't find socket %d "
412 /* Devices 4-6 function 1 */
413 pci_read_config_dword(pdev,
414 MC_DOD_CH_DIMM0, &dimm_dod[0]);
415 pci_read_config_dword(pdev,
416 MC_DOD_CH_DIMM1, &dimm_dod[1]);
417 pci_read_config_dword(pdev,
418 MC_DOD_CH_DIMM2, &dimm_dod[2]);
422 for (j = 0; j < 3; j++) {
423 if (!DIMM_PRESENT(dimm_dod[j]))
429 debugf0("Number of active channels on socket %d: %d\n",
435 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
437 struct i7core_pvt *pvt = mci->pvt_info;
438 struct csrow_info *csr;
439 struct pci_dev *pdev;
441 unsigned long last_page = 0;
445 /* Get data from the MC register, function 0 */
446 pdev = pvt->pci_mcr[socket][0];
450 /* Device 3 function 0 reads */
451 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
452 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
453 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
454 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
456 debugf0("MC control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
457 pvt->info.mc_control, pvt->info.mc_status,
458 pvt->info.max_dod, pvt->info.ch_map);
460 if (ECC_ENABLED(pvt)) {
461 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
463 mode = EDAC_S8ECD8ED;
465 mode = EDAC_S4ECD4ED;
467 debugf0("ECC disabled\n");
471 /* FIXME: need to handle the error codes */
472 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked\n",
473 numdimms(pvt->info.max_dod),
474 numrank(pvt->info.max_dod >> 2),
475 numbank(pvt->info.max_dod >> 4));
476 debugf0("DOD Max rows x colums = 0x%x x 0x%x\n",
477 numrow(pvt->info.max_dod >> 6),
478 numcol(pvt->info.max_dod >> 9));
480 debugf0("Memory channel configuration:\n");
482 for (i = 0; i < NUM_CHANS; i++) {
483 u32 data, dimm_dod[3], value[8];
485 if (!CH_ACTIVE(pvt, i)) {
486 debugf0("Channel %i is not active\n", i);
489 if (CH_DISABLED(pvt, i)) {
490 debugf0("Channel %i is disabled\n", i);
494 /* Devices 4-6 function 0 */
495 pci_read_config_dword(pvt->pci_ch[socket][i][0],
496 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
498 pvt->channel[socket][i].ranks = (data & QUAD_RANK_PRESENT) ?
501 if (data & REGISTERED_DIMM)
506 if (data & THREE_DIMMS_PRESENT)
507 pvt->channel[i].dimms = 3;
508 else if (data & SINGLE_QUAD_RANK_PRESENT)
509 pvt->channel[i].dimms = 1;
511 pvt->channel[i].dimms = 2;
514 /* Devices 4-6 function 1 */
515 pci_read_config_dword(pvt->pci_ch[socket][i][1],
516 MC_DOD_CH_DIMM0, &dimm_dod[0]);
517 pci_read_config_dword(pvt->pci_ch[socket][i][1],
518 MC_DOD_CH_DIMM1, &dimm_dod[1]);
519 pci_read_config_dword(pvt->pci_ch[socket][i][1],
520 MC_DOD_CH_DIMM2, &dimm_dod[2]);
522 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
523 "%d ranks, %cDIMMs\n",
525 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
527 pvt->channel[socket][i].ranks,
528 (data & REGISTERED_DIMM) ? 'R' : 'U');
530 for (j = 0; j < 3; j++) {
531 u32 banks, ranks, rows, cols;
534 if (!DIMM_PRESENT(dimm_dod[j]))
537 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
538 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
539 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
540 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
542 /* DDR3 has 8 I/O banks */
543 size = (rows * cols * banks * ranks) >> (20 - 3);
545 pvt->channel[socket][i].dimms++;
547 debugf0("\tdimm %d (0x%08x) %d Mb offset: %x, "
549 "numrank: %d, numrow: %#x, numcol: %#x\n",
550 j, dimm_dod[j], size,
551 RANKOFFSET(dimm_dod[j]),
552 banks, ranks, rows, cols);
555 npages = size >> (PAGE_SHIFT - 20);
557 npages = size << (20 - PAGE_SHIFT);
560 csr = &mci->csrows[*csrow];
561 csr->first_page = last_page + 1;
563 csr->last_page = last_page;
564 csr->nr_pages = npages;
568 csr->csrow_idx = *csrow;
569 csr->nr_channels = 1;
571 csr->channels[0].chan_idx = i;
572 csr->channels[0].ce_count = 0;
582 csr->dtype = DEV_X16;
585 csr->dtype = DEV_UNKNOWN;
588 csr->edac_mode = mode;
594 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
595 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
596 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
597 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
598 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
599 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
600 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
601 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
602 debugf0("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
603 for (j = 0; j < 8; j++)
604 debugf0("\t\t%#x\t%#x\t%#x\n",
605 (value[j] >> 27) & 0x1,
606 (value[j] >> 24) & 0x7,
607 (value[j] && ((1 << 24) - 1)));
613 /****************************************************************************
614 Error insertion routines
615 ****************************************************************************/
617 /* The i7core has independent error injection features per channel.
618 However, to have a simpler code, we don't allow enabling error injection
619 on more than one channel.
620 Also, since a change at an inject parameter will be applied only at enable,
621 we're disabling error injection on all write calls to the sysfs nodes that
622 controls the error code injection.
624 static int disable_inject(struct mem_ctl_info *mci)
626 struct i7core_pvt *pvt = mci->pvt_info;
628 pvt->inject.enable = 0;
630 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
633 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
634 MC_CHANNEL_ERROR_MASK, 0);
640 * i7core inject inject.socket
642 * accept and store error injection inject.socket value
644 static ssize_t i7core_inject_socket_store(struct mem_ctl_info *mci,
645 const char *data, size_t count)
647 struct i7core_pvt *pvt = mci->pvt_info;
651 rc = strict_strtoul(data, 10, &value);
652 if ((rc < 0) || (value > pvt->sockets))
655 pvt->inject.section = (u32) value;
659 static ssize_t i7core_inject_socket_show(struct mem_ctl_info *mci,
662 struct i7core_pvt *pvt = mci->pvt_info;
663 return sprintf(data, "%d\n", pvt->inject.socket);
667 * i7core inject inject.section
669 * accept and store error injection inject.section value
670 * bit 0 - refers to the lower 32-byte half cacheline
671 * bit 1 - refers to the upper 32-byte half cacheline
673 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
674 const char *data, size_t count)
676 struct i7core_pvt *pvt = mci->pvt_info;
680 if (pvt->inject.enable)
683 rc = strict_strtoul(data, 10, &value);
684 if ((rc < 0) || (value > 3))
687 pvt->inject.section = (u32) value;
691 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
694 struct i7core_pvt *pvt = mci->pvt_info;
695 return sprintf(data, "0x%08x\n", pvt->inject.section);
701 * accept and store error injection inject.section value
702 * bit 0 - repeat enable - Enable error repetition
703 * bit 1 - inject ECC error
704 * bit 2 - inject parity error
706 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
707 const char *data, size_t count)
709 struct i7core_pvt *pvt = mci->pvt_info;
713 if (pvt->inject.enable)
716 rc = strict_strtoul(data, 10, &value);
717 if ((rc < 0) || (value > 7))
720 pvt->inject.type = (u32) value;
724 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
727 struct i7core_pvt *pvt = mci->pvt_info;
728 return sprintf(data, "0x%08x\n", pvt->inject.type);
732 * i7core_inject_inject.eccmask_store
734 * The type of error (UE/CE) will depend on the inject.eccmask value:
735 * Any bits set to a 1 will flip the corresponding ECC bit
736 * Correctable errors can be injected by flipping 1 bit or the bits within
737 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
738 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
739 * uncorrectable error to be injected.
741 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
742 const char *data, size_t count)
744 struct i7core_pvt *pvt = mci->pvt_info;
748 if (pvt->inject.enable)
751 rc = strict_strtoul(data, 10, &value);
755 pvt->inject.eccmask = (u32) value;
759 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
762 struct i7core_pvt *pvt = mci->pvt_info;
763 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
769 * The type of error (UE/CE) will depend on the inject.eccmask value:
770 * Any bits set to a 1 will flip the corresponding ECC bit
771 * Correctable errors can be injected by flipping 1 bit or the bits within
772 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
773 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
774 * uncorrectable error to be injected.
776 static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
777 const char *data, size_t count)
779 struct i7core_pvt *pvt = mci->pvt_info;
784 if (pvt->inject.enable)
788 cmd = strsep((char **) &data, ":");
791 val = strsep((char **) &data, " \n\t");
795 if (!strcasecmp(val, "any"))
798 rc = strict_strtol(val, 10, &value);
799 if ((rc < 0) || (value < 0))
803 if (!strcasecmp(cmd, "channel")) {
805 pvt->inject.channel = value;
808 } else if (!strcasecmp(cmd, "dimm")) {
810 pvt->inject.dimm = value;
813 } else if (!strcasecmp(cmd, "rank")) {
815 pvt->inject.rank = value;
818 } else if (!strcasecmp(cmd, "bank")) {
820 pvt->inject.bank = value;
823 } else if (!strcasecmp(cmd, "page")) {
825 pvt->inject.page = value;
828 } else if (!strcasecmp(cmd, "col") ||
829 !strcasecmp(cmd, "column")) {
831 pvt->inject.col = value;
840 static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
843 struct i7core_pvt *pvt = mci->pvt_info;
844 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
846 if (pvt->inject.channel < 0)
847 sprintf(channel, "any");
849 sprintf(channel, "%d", pvt->inject.channel);
850 if (pvt->inject.dimm < 0)
851 sprintf(dimm, "any");
853 sprintf(dimm, "%d", pvt->inject.dimm);
854 if (pvt->inject.bank < 0)
855 sprintf(bank, "any");
857 sprintf(bank, "%d", pvt->inject.bank);
858 if (pvt->inject.rank < 0)
859 sprintf(rank, "any");
861 sprintf(rank, "%d", pvt->inject.rank);
862 if (pvt->inject.page < 0)
863 sprintf(page, "any");
865 sprintf(page, "0x%04x", pvt->inject.page);
866 if (pvt->inject.col < 0)
869 sprintf(col, "0x%04x", pvt->inject.col);
871 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
872 "rank: %s\npage: %s\ncolumn: %s\n",
873 channel, dimm, bank, rank, page, col);
877 * This routine prepares the Memory Controller for error injection.
878 * The error will be injected when some process tries to write to the
879 * memory that matches the given criteria.
880 * The criteria can be set in terms of a mask where dimm, rank, bank, page
881 * and col can be specified.
882 * A -1 value for any of the mask items will make the MCU to ignore
883 * that matching criteria for error injection.
885 * It should be noticed that the error will only happen after a write operation
886 * on a memory that matches the condition. if REPEAT_EN is not enabled at
887 * inject mask, then it will produce just one error. Otherwise, it will repeat
888 * until the injectmask would be cleaned.
890 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
891 * is reliable enough to check if the MC is using the
892 * three channels. However, this is not clear at the datasheet.
894 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
895 const char *data, size_t count)
897 struct i7core_pvt *pvt = mci->pvt_info;
903 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
906 rc = strict_strtoul(data, 10, &enable);
911 pvt->inject.enable = 1;
917 /* Sets pvt->inject.dimm mask */
918 if (pvt->inject.dimm < 0)
921 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
922 mask |= (pvt->inject.dimm & 0x3L) << 35;
924 mask |= (pvt->inject.dimm & 0x1L) << 36;
927 /* Sets pvt->inject.rank mask */
928 if (pvt->inject.rank < 0)
931 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
932 mask |= (pvt->inject.rank & 0x1L) << 34;
934 mask |= (pvt->inject.rank & 0x3L) << 34;
937 /* Sets pvt->inject.bank mask */
938 if (pvt->inject.bank < 0)
941 mask |= (pvt->inject.bank & 0x15L) << 30;
943 /* Sets pvt->inject.page mask */
944 if (pvt->inject.page < 0)
947 mask |= (pvt->inject.page & 0xffffL) << 14;
949 /* Sets pvt->inject.column mask */
950 if (pvt->inject.col < 0)
953 mask |= (pvt->inject.col & 0x3fffL);
955 /* Unlock writes to registers */
956 pci_write_config_dword(pvt->pci_noncore[pvt->inject.socket],
957 MC_CFG_CONTROL, 0x2);
960 /* Zeroes error count registers */
961 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
962 MC_TEST_ERR_RCV1, 0);
963 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
964 MC_TEST_ERR_RCV0, 0);
965 pvt->ce_count_available[pvt->inject.socket] = 0;
969 pci_write_config_qword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
970 MC_CHANNEL_ADDR_MATCH, mask);
972 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
973 MC_CHANNEL_ADDR_MATCH, mask);
974 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
975 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
981 pci_read_config_qword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
982 MC_CHANNEL_ADDR_MATCH, &rdmask);
983 debugf0("Inject addr match write 0x%016llx, read: 0x%016llx\n",
986 u32 rdmask1, rdmask2;
988 pci_read_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
989 MC_CHANNEL_ADDR_MATCH, &rdmask1);
990 pci_read_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
991 MC_CHANNEL_ADDR_MATCH + 4, &rdmask2);
993 debugf0("Inject addr match write 0x%016llx, read: 0x%08x 0x%08x\n",
994 mask, rdmask1, rdmask2);
998 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
999 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1003 * bits 1-2: MASK_HALF_CACHELINE
1005 * bit 4: INJECT_ADDR_PARITY
1008 injectmask = (pvt->inject.type & 1) |
1009 (pvt->inject.section & 0x3) << 1 |
1010 (pvt->inject.type & 0x6) << (3 - 1);
1012 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1013 MC_CHANNEL_ERROR_MASK, injectmask);
1016 /* lock writes to registers */
1017 pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, 0);
1019 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1021 mask, pvt->inject.eccmask, injectmask);
1027 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1030 struct i7core_pvt *pvt = mci->pvt_info;
1033 pci_read_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1034 MC_CHANNEL_ERROR_MASK, &injectmask);
1036 debugf0("Inject error read: 0x%018x\n", injectmask);
1038 if (injectmask & 0x0c)
1039 pvt->inject.enable = 1;
1041 return sprintf(data, "%d\n", pvt->inject.enable);
1044 static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1046 unsigned i, count, total = 0;
1047 struct i7core_pvt *pvt = mci->pvt_info;
1049 for (i = 0; i < pvt->sockets; i++) {
1050 if (!pvt->ce_count_available[i])
1051 count = sprintf(data, "socket 0 data unavailable\n");
1053 count = sprintf(data, "socket %d, dimm0: %lu\n"
1054 "dimm1: %lu\ndimm2: %lu\n",
1056 pvt->ce_count[i][0],
1057 pvt->ce_count[i][1],
1058 pvt->ce_count[i][2]);
1069 static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
1072 .name = "inject_socket",
1073 .mode = (S_IRUGO | S_IWUSR)
1075 .show = i7core_inject_socket_show,
1076 .store = i7core_inject_socket_store,
1079 .name = "inject_section",
1080 .mode = (S_IRUGO | S_IWUSR)
1082 .show = i7core_inject_section_show,
1083 .store = i7core_inject_section_store,
1086 .name = "inject_type",
1087 .mode = (S_IRUGO | S_IWUSR)
1089 .show = i7core_inject_type_show,
1090 .store = i7core_inject_type_store,
1093 .name = "inject_eccmask",
1094 .mode = (S_IRUGO | S_IWUSR)
1096 .show = i7core_inject_eccmask_show,
1097 .store = i7core_inject_eccmask_store,
1100 .name = "inject_addrmatch",
1101 .mode = (S_IRUGO | S_IWUSR)
1103 .show = i7core_inject_addrmatch_show,
1104 .store = i7core_inject_addrmatch_store,
1107 .name = "inject_enable",
1108 .mode = (S_IRUGO | S_IWUSR)
1110 .show = i7core_inject_enable_show,
1111 .store = i7core_inject_enable_store,
1114 .name = "corrected_error_counts",
1115 .mode = (S_IRUGO | S_IWUSR)
1117 .show = i7core_ce_regs_show,
1122 /****************************************************************************
1123 Device initialization routines: put/get, init/exit
1124 ****************************************************************************/
1127 * i7core_put_devices 'put' all the devices that we have
1128 * reserved via 'get'
1130 static void i7core_put_devices(void)
1134 for (i = 0; i < NUM_SOCKETS; i++)
1135 for (j = 0; j < N_DEVS; j++)
1136 pci_dev_put(pci_devs[j].pdev[i]);
1140 * i7core_get_devices Find and perform 'get' operation on the MCH's
1141 * device/functions we want to reference for this driver
1143 * Need to 'get' device 16 func 1 and func 2
1145 int i7core_get_onedevice(struct pci_dev **prev, int devno)
1147 struct pci_dev *pdev = NULL;
1151 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1152 pci_devs[devno].dev_id, *prev);
1155 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1156 * aren't announced by acpi. So, we need to use a legacy scan probing
1159 if (unlikely(!pdev && !devno && !prev)) {
1160 pcibios_scan_specific_bus(254);
1161 pcibios_scan_specific_bus(255);
1163 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1164 pci_devs[devno].dev_id, *prev);
1168 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1169 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1170 * to probe for the alternate address in case of failure
1172 if (pci_devs[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
1173 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1174 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
1183 * Dev 3 function 2 only exists on chips with RDIMMs
1184 * so, it is ok to not found it
1186 if ((pci_devs[devno].dev == 3) && (pci_devs[devno].func == 2)) {
1191 i7core_printk(KERN_ERR,
1192 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1193 pci_devs[devno].dev, pci_devs[devno].func,
1194 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1196 /* End of list, leave */
1199 bus = pdev->bus->number;
1206 if (socket >= NUM_SOCKETS) {
1207 i7core_printk(KERN_ERR,
1208 "Unexpected socket for "
1209 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1210 bus, pci_devs[devno].dev, pci_devs[devno].func,
1211 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1216 if (pci_devs[devno].pdev[socket]) {
1217 i7core_printk(KERN_ERR,
1218 "Duplicated device for "
1219 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1220 bus, pci_devs[devno].dev, pci_devs[devno].func,
1221 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1226 pci_devs[devno].pdev[socket] = pdev;
1229 if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[devno].dev ||
1230 PCI_FUNC(pdev->devfn) != pci_devs[devno].func)) {
1231 i7core_printk(KERN_ERR,
1232 "Device PCI ID %04x:%04x "
1233 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1234 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id,
1235 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1236 bus, pci_devs[devno].dev, pci_devs[devno].func);
1240 /* Be sure that the device is enabled */
1241 if (unlikely(pci_enable_device(pdev) < 0)) {
1242 i7core_printk(KERN_ERR,
1244 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1245 bus, pci_devs[devno].dev, pci_devs[devno].func,
1246 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1250 i7core_printk(KERN_INFO,
1251 "Registered socket %d "
1252 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1253 socket, bus, pci_devs[devno].dev, pci_devs[devno].func,
1254 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1261 static int i7core_get_devices(void)
1264 struct pci_dev *pdev = NULL;
1266 for (i = 0; i < N_DEVS; i++) {
1269 if (i7core_get_onedevice(&pdev, i) < 0) {
1270 i7core_put_devices();
1278 static int mci_bind_devs(struct mem_ctl_info *mci)
1280 struct i7core_pvt *pvt = mci->pvt_info;
1281 struct pci_dev *pdev;
1282 int i, j, func, slot;
1284 for (i = 0; i < pvt->sockets; i++) {
1285 for (j = 0; j < N_DEVS; j++) {
1286 pdev = pci_devs[j].pdev[i];
1290 func = PCI_FUNC(pdev->devfn);
1291 slot = PCI_SLOT(pdev->devfn);
1293 if (unlikely(func > MAX_MCR_FUNC))
1295 pvt->pci_mcr[i][func] = pdev;
1296 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1297 if (unlikely(func > MAX_CHAN_FUNC))
1299 pvt->pci_ch[i][slot - 4][func] = pdev;
1300 } else if (!slot && !func)
1301 pvt->pci_noncore[i] = pdev;
1305 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1306 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1314 i7core_printk(KERN_ERR, "Device %d, function %d "
1315 "is out of the expected range\n",
1320 /****************************************************************************
1321 Error check routines
1322 ****************************************************************************/
1324 /* This function is based on the device 3 function 4 registers as described on:
1325 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1326 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1327 * also available at:
1328 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1330 static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1332 struct i7core_pvt *pvt = mci->pvt_info;
1334 int new0, new1, new2;
1336 if (!pvt->pci_mcr[socket][4]) {
1337 debugf0("%s MCR registers not found\n",__func__);
1341 /* Corrected error reads */
1342 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1);
1343 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0);
1345 /* Store the new values */
1346 new2 = DIMM2_COR_ERR(rcv1);
1347 new1 = DIMM1_COR_ERR(rcv0);
1348 new0 = DIMM0_COR_ERR(rcv0);
1351 debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n",
1352 (pvt->ce_count_available ? "UPDATE" : "READ"),
1353 rcv1, rcv0, new0, new1, new2);
1356 /* Updates CE counters if it is not the first time here */
1357 if (pvt->ce_count_available[socket]) {
1358 /* Updates CE counters */
1359 int add0, add1, add2;
1361 add2 = new2 - pvt->last_ce_count[socket][2];
1362 add1 = new1 - pvt->last_ce_count[socket][1];
1363 add0 = new0 - pvt->last_ce_count[socket][0];
1367 pvt->ce_count[socket][2] += add2;
1371 pvt->ce_count[socket][1] += add1;
1375 pvt->ce_count[socket][0] += add0;
1377 pvt->ce_count_available[socket] = 1;
1379 /* Store the new values */
1380 pvt->last_ce_count[socket][2] = new2;
1381 pvt->last_ce_count[socket][1] = new1;
1382 pvt->last_ce_count[socket][0] = new0;
1386 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1387 * Architectures Software Developer’s Manual Volume 3B.
1388 * Nehalem are defined as family 0x06, model 0x1a
1390 * The MCA registers used here are the following ones:
1391 * struct mce field MCA Register
1392 * m->status MSR_IA32_MC8_STATUS
1393 * m->addr MSR_IA32_MC8_ADDR
1394 * m->misc MSR_IA32_MC8_MISC
1395 * In the case of Nehalem, the error information is masked at .status and .misc
1398 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1401 char *type, *optype, *err, *msg;
1402 unsigned long error = m->status & 0x1ff0000l;
1403 u32 optypenum = (m->status >> 4) & 0x07;
1404 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1405 u32 dimm = (m->misc >> 16) & 0x3;
1406 u32 channel = (m->misc >> 18) & 0x3;
1407 u32 syndrome = m->misc >> 32;
1408 u32 errnum = find_first_bit(&error, 32);
1410 if (m->mcgstatus & 1)
1415 switch (optypenum) {
1417 optype = "generic undef request";
1420 optype = "read error";
1423 optype = "write error";
1426 optype = "addr/cmd error";
1429 optype = "scrubbing error";
1432 optype = "reserved";
1438 err = "read ECC error";
1441 err = "RAS ECC error";
1444 err = "write parity error";
1447 err = "redundacy loss";
1453 err = "memory range error";
1456 err = "RTID out of range";
1459 err = "address parity error";
1462 err = "byte enable parity error";
1468 /* FIXME: should convert addr into bank and rank information */
1469 msg = kasprintf(GFP_ATOMIC,
1470 "%s (addr = 0x%08llx, socket=%d, Dimm=%d, Channel=%d, "
1471 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1472 type, (long long) m->addr, m->cpu, dimm, channel,
1473 syndrome, core_err_cnt, (long long)m->status,
1474 (long long)m->misc, optype, err);
1478 /* Call the helper to output message */
1479 edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */,
1480 0, 0 /* FIXME: should be channel here */, msg);
1486 * i7core_check_error Retrieve and process errors reported by the
1487 * hardware. Called by the Core module.
1489 static void i7core_check_error(struct mem_ctl_info *mci)
1491 struct i7core_pvt *pvt = mci->pvt_info;
1494 struct mce *m = NULL;
1495 unsigned long flags;
1497 debugf0(__FILE__ ": %s()\n", __func__);
1499 /* Copy all mce errors into a temporary buffer */
1500 spin_lock_irqsave(&pvt->mce_lock, flags);
1501 if (pvt->mce_count) {
1502 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
1504 count = pvt->mce_count;
1505 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1509 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1511 /* proccess mcelog errors */
1512 for (i = 0; i < count; i++)
1513 i7core_mce_output_error(mci, &m[i]);
1517 /* check memory count errors */
1518 for (i = 0; i < pvt->sockets; i++)
1519 check_mc_test_err(mci, i);
1523 * i7core_mce_check_error Replicates mcelog routine to get errors
1524 * This routine simply queues mcelog errors, and
1525 * return. The error itself should be handled later
1526 * by i7core_check_error.
1528 static int i7core_mce_check_error(void *priv, struct mce *mce)
1530 struct mem_ctl_info *mci = priv;
1531 struct i7core_pvt *pvt = mci->pvt_info;
1532 unsigned long flags;
1534 debugf0(__FILE__ ": %s()\n", __func__);
1537 * Just let mcelog handle it if the error is
1538 * outside the memory controller
1540 if (((mce->status & 0xffff) >> 7) != 1)
1543 /* Bank 8 registers are the only ones that we know how to handle */
1547 spin_lock_irqsave(&pvt->mce_lock, flags);
1548 if (pvt->mce_count < MCE_LOG_LEN) {
1549 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1552 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1554 /* Handle fatal errors immediately */
1555 if (mce->mcgstatus & 1)
1556 i7core_check_error(mci);
1558 /* Advice mcelog that the error were handled */
1563 * i7core_probe Probe for ONE instance of device to see if it is
1566 * 0 for FOUND a device
1567 * < 0 for error code
1569 static int __devinit i7core_probe(struct pci_dev *pdev,
1570 const struct pci_device_id *id)
1572 struct mem_ctl_info *mci;
1573 struct i7core_pvt *pvt;
1574 int num_channels = 0;
1577 int dev_idx = id->driver_data;
1581 if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs)))
1584 /* get the pci devices we want to reserve for our use */
1585 rc = i7core_get_devices();
1586 if (unlikely(rc < 0))
1590 for (i = NUM_SOCKETS - 1; i > 0; i--)
1591 if (pci_devs[0].pdev[i]) {
1596 for (i = 0; i < sockets; i++) {
1600 /* Check the number of active and not disabled channels */
1601 rc = i7core_get_active_channels(i, &channels, &csrows);
1602 if (unlikely(rc < 0))
1605 num_channels += channels;
1606 num_csrows += csrows;
1609 /* allocate a new MC control structure */
1610 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1611 if (unlikely(!mci)) {
1616 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1618 mci->dev = &pdev->dev; /* record ptr to the generic device */
1619 pvt = mci->pvt_info;
1620 memset(pvt, 0, sizeof(*pvt));
1621 pvt->sockets = sockets;
1625 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1626 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1629 mci->mtype_cap = MEM_FLAG_DDR3;
1630 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1631 mci->edac_cap = EDAC_FLAG_NONE;
1632 mci->mod_name = "i7core_edac.c";
1633 mci->mod_ver = I7CORE_REVISION;
1634 mci->ctl_name = i7core_devs[dev_idx].ctl_name;
1635 mci->dev_name = pci_name(pdev);
1636 mci->ctl_page_to_phys = NULL;
1637 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
1638 /* Set the function pointer to an actual operation function */
1639 mci->edac_check = i7core_check_error;
1641 /* Store pci devices at mci for faster access */
1642 rc = mci_bind_devs(mci);
1643 if (unlikely(rc < 0))
1646 /* Get dimm basic config */
1647 for (i = 0; i < sockets; i++)
1648 get_dimm_config(mci, &csrow, i);
1650 /* add this new MC control structure to EDAC's list of MCs */
1651 if (unlikely(edac_mc_add_mc(mci))) {
1652 debugf0("MC: " __FILE__
1653 ": %s(): failed edac_mc_add_mc()\n", __func__);
1654 /* FIXME: perhaps some code should go here that disables error
1655 * reporting if we just enabled it
1662 /* allocating generic PCI control info */
1663 i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1664 if (unlikely(!i7core_pci)) {
1666 "%s(): Unable to create PCI control\n",
1669 "%s(): PCI error report via EDAC not setup\n",
1673 /* Default error mask is any memory */
1674 pvt->inject.channel = 0;
1675 pvt->inject.dimm = -1;
1676 pvt->inject.rank = -1;
1677 pvt->inject.bank = -1;
1678 pvt->inject.page = -1;
1679 pvt->inject.col = -1;
1681 /* Registers on edac_mce in order to receive memory errors */
1682 pvt->edac_mce.priv = mci;
1683 pvt->edac_mce.check_error = i7core_mce_check_error;
1684 spin_lock_init(&pvt->mce_lock);
1686 rc = edac_mce_register(&pvt->edac_mce);
1687 if (unlikely (rc < 0)) {
1688 debugf0("MC: " __FILE__
1689 ": %s(): failed edac_mce_register()\n", __func__);
1693 i7core_printk(KERN_INFO, "Driver loaded.\n");
1701 i7core_put_devices();
1706 * i7core_remove destructor for one instance of device
1709 static void __devexit i7core_remove(struct pci_dev *pdev)
1711 struct mem_ctl_info *mci;
1712 struct i7core_pvt *pvt;
1714 debugf0(__FILE__ ": %s()\n", __func__);
1717 edac_pci_release_generic_ctl(i7core_pci);
1720 mci = edac_mc_del_mc(&pdev->dev);
1724 /* Unregisters on edac_mce in order to receive memory errors */
1725 pvt = mci->pvt_info;
1726 edac_mce_unregister(&pvt->edac_mce);
1728 /* retrieve references to resources, and free those resources */
1729 i7core_put_devices();
1734 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1737 * i7core_driver pci_driver structure for this module
1740 static struct pci_driver i7core_driver = {
1741 .name = "i7core_edac",
1742 .probe = i7core_probe,
1743 .remove = __devexit_p(i7core_remove),
1744 .id_table = i7core_pci_tbl,
1748 * i7core_init Module entry function
1749 * Try to initialize this module for its devices
1751 static int __init i7core_init(void)
1755 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1757 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1760 pci_rc = pci_register_driver(&i7core_driver);
1762 return (pci_rc < 0) ? pci_rc : 0;
1766 * i7core_exit() Module exit function
1767 * Unregister the driver
1769 static void __exit i7core_exit(void)
1771 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1772 pci_unregister_driver(&i7core_driver);
1775 module_init(i7core_init);
1776 module_exit(i7core_exit);
1778 MODULE_LICENSE("GPL");
1779 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1780 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1781 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1784 module_param(edac_op_state, int, 0444);
1785 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");