1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/edac.h>
29 #include <linux/mmzone.h>
31 #include "edac_core.h"
33 /* To use the new pci_[read/write]_config_qword instead of two dword */
37 * Alter this version for the module when modifications are made
39 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
40 #define EDAC_MOD_STR "i7core_edac"
42 /* HACK: temporary, just to enable all logs, for now */
44 #define debugf0(fmt, arg...) edac_printk(KERN_INFO, "i7core", fmt, ##arg)
49 #define i7core_printk(level, fmt, arg...) \
50 edac_printk(level, "i7core", fmt, ##arg)
52 #define i7core_mc_printk(mci, level, fmt, arg...) \
53 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
56 * i7core Memory Controller Registers
59 /* OFFSETS for Device 3 Function 0 */
61 #define MC_CONTROL 0x48
62 #define MC_STATUS 0x4c
63 #define MC_MAX_DOD 0x64
66 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
67 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
70 #define MC_TEST_ERR_RCV1 0x60
71 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
73 #define MC_TEST_ERR_RCV0 0x64
74 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
75 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
77 /* OFFSETS for Devices 4,5 and 6 Function 0 */
79 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
80 #define THREE_DIMMS_PRESENT (1 << 24)
81 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
82 #define QUAD_RANK_PRESENT (1 << 22)
83 #define REGISTERED_DIMM (1 << 15)
85 #define MC_CHANNEL_MAPPER 0x60
86 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
87 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
89 #define MC_CHANNEL_RANK_PRESENT 0x7c
90 #define RANK_PRESENT_MASK 0xffff
92 #define MC_CHANNEL_ADDR_MATCH 0xf0
93 #define MC_CHANNEL_ERROR_MASK 0xf8
94 #define MC_CHANNEL_ERROR_INJECT 0xfc
95 #define INJECT_ADDR_PARITY 0x10
96 #define INJECT_ECC 0x08
97 #define MASK_CACHELINE 0x06
98 #define MASK_FULL_CACHELINE 0x06
99 #define MASK_MSB32_CACHELINE 0x04
100 #define MASK_LSB32_CACHELINE 0x02
101 #define NO_MASK_CACHELINE 0x00
102 #define REPEAT_EN 0x01
104 /* OFFSETS for Devices 4,5 and 6 Function 1 */
105 #define MC_DOD_CH_DIMM0 0x48
106 #define MC_DOD_CH_DIMM1 0x4c
107 #define MC_DOD_CH_DIMM2 0x50
108 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
109 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
110 #define DIMM_PRESENT_MASK (1 << 9)
111 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
112 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
113 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
114 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
115 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
116 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3))
117 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 3)
118 #define MC_DOD_NUMCOL_MASK 3
119 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
121 #define MC_RANK_PRESENT 0x7c
123 #define MC_SAG_CH_0 0x80
124 #define MC_SAG_CH_1 0x84
125 #define MC_SAG_CH_2 0x88
126 #define MC_SAG_CH_3 0x8c
127 #define MC_SAG_CH_4 0x90
128 #define MC_SAG_CH_5 0x94
129 #define MC_SAG_CH_6 0x98
130 #define MC_SAG_CH_7 0x9c
132 #define MC_RIR_LIMIT_CH_0 0x40
133 #define MC_RIR_LIMIT_CH_1 0x44
134 #define MC_RIR_LIMIT_CH_2 0x48
135 #define MC_RIR_LIMIT_CH_3 0x4C
136 #define MC_RIR_LIMIT_CH_4 0x50
137 #define MC_RIR_LIMIT_CH_5 0x54
138 #define MC_RIR_LIMIT_CH_6 0x58
139 #define MC_RIR_LIMIT_CH_7 0x5C
140 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
142 #define MC_RIR_WAY_CH 0x80
143 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
144 #define MC_RIR_WAY_RANK_MASK 0x7
151 #define MAX_DIMMS 3 /* Max DIMMS per channel */
152 #define MAX_MCR_FUNC 4
153 #define MAX_CHAN_FUNC 3
163 struct i7core_inject {
170 /* Error address mask */
171 int channel, dimm, rank, bank, page, col;
174 struct i7core_channel {
179 struct pci_id_descr {
183 struct pci_dev *pdev;
187 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
188 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
189 struct i7core_info info;
190 struct i7core_inject inject;
191 struct i7core_channel channel[NUM_CHANS];
192 int channels; /* Number of active channels */
194 int ce_count_available;
195 unsigned long ce_count[MAX_DIMMS]; /* ECC corrected errors counts per dimm */
196 int last_ce_count[MAX_DIMMS];
200 /* Device name and register DID (Device ID) */
201 struct i7core_dev_info {
202 const char *ctl_name; /* name for this device */
203 u16 fsb_mapping_errors; /* DID for the branchmap,control */
206 #define PCI_DESCR(device, function, device_id) \
208 .func = (function), \
209 .dev_id = (device_id)
211 struct pci_id_descr pci_devs[] = {
212 /* Memory controller */
213 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
214 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
215 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM is supported */
216 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
219 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
220 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
221 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
222 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
225 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
226 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
227 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
228 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
231 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
232 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
233 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
234 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
236 #define N_DEVS ARRAY_SIZE(pci_devs)
239 * pci_device_id table for which devices we are looking for
240 * This should match the first device at pci_devs table
242 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
243 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_MCR)},
244 {0,} /* 0 terminated list. */
248 /* Table of devices attributes supported by this driver */
249 static const struct i7core_dev_info i7core_devs[] = {
251 .ctl_name = "i7 Core",
252 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7_MCR,
256 static struct edac_pci_ctl_info *i7core_pci;
258 /****************************************************************************
259 Anciliary status routines
260 ****************************************************************************/
262 /* MC_CONTROL bits */
263 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
264 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
267 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3))
268 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
270 /* MC_MAX_DOD read functions */
271 static inline int numdimms(u32 dimms)
273 return (dimms & 0x3) + 1;
276 static inline int numrank(u32 rank)
278 static int ranks[4] = { 1, 2, 4, -EINVAL };
280 return ranks[rank & 0x3];
283 static inline int numbank(u32 bank)
285 static int banks[4] = { 4, 8, 16, -EINVAL };
287 return banks[bank & 0x3];
290 static inline int numrow(u32 row)
292 static int rows[8] = {
293 1 << 12, 1 << 13, 1 << 14, 1 << 15,
294 1 << 16, -EINVAL, -EINVAL, -EINVAL,
297 return rows[row & 0x7];
300 static inline int numcol(u32 col)
302 static int cols[8] = {
303 1 << 10, 1 << 11, 1 << 12, -EINVAL,
305 return cols[col & 0x3];
309 /****************************************************************************
310 Memory check routines
311 ****************************************************************************/
312 static int i7core_get_active_channels(int *channels)
314 struct pci_dev *pdev = NULL;
320 for (i = 0; i < N_DEVS; i++) {
321 if (!pci_devs[i].pdev)
324 if (PCI_SLOT(pci_devs[i].pdev->devfn) == 3 &&
325 PCI_FUNC(pci_devs[i].pdev->devfn) == 0) {
326 pdev = pci_devs[i].pdev;
332 i7core_printk(KERN_ERR, "Couldn't find fn 3.0!!!\n");
336 /* Device 3 function 0 reads */
337 pci_read_config_dword(pdev, MC_STATUS, &status);
338 pci_read_config_dword(pdev, MC_CONTROL, &control);
340 for (i = 0; i < NUM_CHANS; i++) {
341 /* Check if the channel is active */
342 if (!(control & (1 << (8 + i))))
345 /* Check if the channel is disabled */
346 if (status & (1 << i)) {
353 debugf0("Number of active channels: %d\n", *channels);
358 static int get_dimm_config(struct mem_ctl_info *mci)
360 struct i7core_pvt *pvt = mci->pvt_info;
361 struct csrow_info *csr;
362 struct pci_dev *pdev;
367 /* Get data from the MC register, function 0 */
368 pdev = pvt->pci_mcr[0];
372 /* Device 3 function 0 reads */
373 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
374 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
375 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
376 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
378 debugf0("MC control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
379 pvt->info.mc_control, pvt->info.mc_status,
380 pvt->info.max_dod, pvt->info.ch_map);
382 if (ECC_ENABLED(pvt)) {
383 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt)?8:4);
385 mode = EDAC_S8ECD8ED;
387 mode = EDAC_S4ECD4ED;
389 debugf0("ECC disabled\n");
393 /* FIXME: need to handle the error codes */
394 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked\n",
395 numdimms(pvt->info.max_dod),
396 numrank(pvt->info.max_dod >> 2),
397 numbank(pvt->info.max_dod >> 4));
398 debugf0("DOD Max rows x colums = 0x%x x 0x%x\n",
399 numrow(pvt->info.max_dod >> 6),
400 numcol(pvt->info.max_dod >> 9));
402 debugf0("Memory channel configuration:\n");
404 for (i = 0; i < NUM_CHANS; i++) {
405 u32 data, dimm_dod[3], value[8];
407 if (!CH_ACTIVE(pvt, i)) {
408 debugf0("Channel %i is not active\n", i);
411 if (CH_DISABLED(pvt, i)) {
412 debugf0("Channel %i is disabled\n", i);
416 /* Devices 4-6 function 0 */
417 pci_read_config_dword(pvt->pci_ch[i][0],
418 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
420 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT)? 4 : 2;
422 if (data & REGISTERED_DIMM)
427 if (data & THREE_DIMMS_PRESENT)
428 pvt->channel[i].dimms = 3;
429 else if (data & SINGLE_QUAD_RANK_PRESENT)
430 pvt->channel[i].dimms = 1;
432 pvt->channel[i].dimms = 2;
435 /* Devices 4-6 function 1 */
436 pci_read_config_dword(pvt->pci_ch[i][1],
437 MC_DOD_CH_DIMM0, &dimm_dod[0]);
438 pci_read_config_dword(pvt->pci_ch[i][1],
439 MC_DOD_CH_DIMM1, &dimm_dod[1]);
440 pci_read_config_dword(pvt->pci_ch[i][1],
441 MC_DOD_CH_DIMM2, &dimm_dod[2]);
443 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
444 "%d ranks, %cDIMMs\n",
446 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
448 pvt->channel[i].ranks,
449 (data & REGISTERED_DIMM)? 'R' : 'U');
451 for (j = 0; j < 3; j++) {
452 u32 banks, ranks, rows, cols;
454 if (!DIMM_PRESENT(dimm_dod[j]))
457 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
458 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
459 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
460 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
462 pvt->channel[i].dimms++;
464 debugf0("\tdimm %d offset: %x, numbank: %#x, "
465 "numrank: %#x, numrow: %#x, numcol: %#x\n",
467 RANKOFFSET(dimm_dod[j]),
468 banks, ranks, rows, cols);
470 csr = &mci->csrows[csrow];
476 csr->csrow_idx = csrow;
486 csr->dtype = DEV_X16;
489 csr->dtype = DEV_UNKNOWN;
492 csr->edac_mode = mode;
498 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
499 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
500 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
501 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
502 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
503 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
504 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
505 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
506 printk("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
507 for (j = 0; j < 8; j++)
508 printk("\t\t%#x\t%#x\t%#x\n",
509 (value[j] >> 27) & 0x1,
510 (value[j] >> 24) & 0x7,
511 (value[j] && ((1 << 24) - 1)));
517 /****************************************************************************
518 Error insertion routines
519 ****************************************************************************/
521 /* The i7core has independent error injection features per channel.
522 However, to have a simpler code, we don't allow enabling error injection
523 on more than one channel.
524 Also, since a change at an inject parameter will be applied only at enable,
525 we're disabling error injection on all write calls to the sysfs nodes that
526 controls the error code injection.
528 static int disable_inject(struct mem_ctl_info *mci)
530 struct i7core_pvt *pvt = mci->pvt_info;
532 pvt->inject.enable = 0;
534 if (!pvt->pci_ch[pvt->inject.channel][0])
537 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
538 MC_CHANNEL_ERROR_MASK, 0);
544 * i7core inject inject.section
546 * accept and store error injection inject.section value
547 * bit 0 - refers to the lower 32-byte half cacheline
548 * bit 1 - refers to the upper 32-byte half cacheline
550 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
551 const char *data, size_t count)
553 struct i7core_pvt *pvt = mci->pvt_info;
557 if (pvt->inject.enable)
560 rc = strict_strtoul(data, 10, &value);
561 if ((rc < 0) || (value > 3))
564 pvt->inject.section = (u32) value;
568 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
571 struct i7core_pvt *pvt = mci->pvt_info;
572 return sprintf(data, "0x%08x\n", pvt->inject.section);
578 * accept and store error injection inject.section value
579 * bit 0 - repeat enable - Enable error repetition
580 * bit 1 - inject ECC error
581 * bit 2 - inject parity error
583 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
584 const char *data, size_t count)
586 struct i7core_pvt *pvt = mci->pvt_info;
590 if (pvt->inject.enable)
593 rc = strict_strtoul(data, 10, &value);
594 if ((rc < 0) || (value > 7))
597 pvt->inject.type = (u32) value;
601 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
604 struct i7core_pvt *pvt = mci->pvt_info;
605 return sprintf(data, "0x%08x\n", pvt->inject.type);
609 * i7core_inject_inject.eccmask_store
611 * The type of error (UE/CE) will depend on the inject.eccmask value:
612 * Any bits set to a 1 will flip the corresponding ECC bit
613 * Correctable errors can be injected by flipping 1 bit or the bits within
614 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
615 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
616 * uncorrectable error to be injected.
618 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
619 const char *data, size_t count)
621 struct i7core_pvt *pvt = mci->pvt_info;
625 if (pvt->inject.enable)
628 rc = strict_strtoul(data, 10, &value);
632 pvt->inject.eccmask = (u32) value;
636 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
639 struct i7core_pvt *pvt = mci->pvt_info;
640 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
646 * The type of error (UE/CE) will depend on the inject.eccmask value:
647 * Any bits set to a 1 will flip the corresponding ECC bit
648 * Correctable errors can be injected by flipping 1 bit or the bits within
649 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
650 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
651 * uncorrectable error to be injected.
653 static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
654 const char *data, size_t count)
656 struct i7core_pvt *pvt = mci->pvt_info;
661 if (pvt->inject.enable)
665 cmd = strsep((char **) &data, ":");
668 val = strsep((char **) &data, " \n\t");
672 if (!strcasecmp(val,"any"))
675 rc = strict_strtol(val, 10, &value);
676 if ((rc < 0) || (value < 0))
680 if (!strcasecmp(cmd,"channel")) {
682 pvt->inject.channel = value;
685 } else if (!strcasecmp(cmd,"dimm")) {
687 pvt->inject.dimm = value;
690 } else if (!strcasecmp(cmd,"rank")) {
692 pvt->inject.rank = value;
695 } else if (!strcasecmp(cmd,"bank")) {
697 pvt->inject.bank = value;
700 } else if (!strcasecmp(cmd,"page")) {
702 pvt->inject.page = value;
705 } else if (!strcasecmp(cmd,"col") ||
706 !strcasecmp(cmd,"column")) {
708 pvt->inject.col = value;
717 static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
720 struct i7core_pvt *pvt = mci->pvt_info;
721 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
723 if (pvt->inject.channel < 0)
724 sprintf(channel, "any");
726 sprintf(channel, "%d", pvt->inject.channel);
727 if (pvt->inject.dimm < 0)
728 sprintf(dimm, "any");
730 sprintf(dimm, "%d", pvt->inject.dimm);
731 if (pvt->inject.bank < 0)
732 sprintf(bank, "any");
734 sprintf(bank, "%d", pvt->inject.bank);
735 if (pvt->inject.rank < 0)
736 sprintf(rank, "any");
738 sprintf(rank, "%d", pvt->inject.rank);
739 if (pvt->inject.page < 0)
740 sprintf(page, "any");
742 sprintf(page, "0x%04x", pvt->inject.page);
743 if (pvt->inject.col < 0)
746 sprintf(col, "0x%04x", pvt->inject.col);
748 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
749 "rank: %s\npage: %s\ncolumn: %s\n",
750 channel, dimm, bank, rank, page, col);
754 * This routine prepares the Memory Controller for error injection.
755 * The error will be injected when some process tries to write to the
756 * memory that matches the given criteria.
757 * The criteria can be set in terms of a mask where dimm, rank, bank, page
758 * and col can be specified.
759 * A -1 value for any of the mask items will make the MCU to ignore
760 * that matching criteria for error injection.
762 * It should be noticed that the error will only happen after a write operation
763 * on a memory that matches the condition. if REPEAT_EN is not enabled at
764 * inject mask, then it will produce just one error. Otherwise, it will repeat
765 * until the injectmask would be cleaned.
767 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
768 * is reliable enough to check if the MC is using the
769 * three channels. However, this is not clear at the datasheet.
771 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
772 const char *data, size_t count)
774 struct i7core_pvt *pvt = mci->pvt_info;
780 if (!pvt->pci_ch[pvt->inject.channel][0])
783 rc = strict_strtoul(data, 10, &enable);
788 pvt->inject.enable = 1;
794 /* Sets pvt->inject.dimm mask */
795 if (pvt->inject.dimm < 0)
798 if (pvt->channel[pvt->inject.channel].dimms > 2)
799 mask |= (pvt->inject.dimm & 0x3L) << 35;
801 mask |= (pvt->inject.dimm & 0x1L) << 36;
804 /* Sets pvt->inject.rank mask */
805 if (pvt->inject.rank < 0)
808 if (pvt->channel[pvt->inject.channel].dimms > 2)
809 mask |= (pvt->inject.rank & 0x1L) << 34;
811 mask |= (pvt->inject.rank & 0x3L) << 34;
814 /* Sets pvt->inject.bank mask */
815 if (pvt->inject.bank < 0)
818 mask |= (pvt->inject.bank & 0x15L) << 30;
820 /* Sets pvt->inject.page mask */
821 if (pvt->inject.page < 0)
824 mask |= (pvt->inject.page & 0xffffL) << 14;
826 /* Sets pvt->inject.column mask */
827 if (pvt->inject.col < 0)
830 mask |= (pvt->inject.col & 0x3fffL);
833 pci_write_config_qword(pvt->pci_ch[pvt->inject.channel][0],
834 MC_CHANNEL_ADDR_MATCH, mask);
836 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
837 MC_CHANNEL_ADDR_MATCH, mask);
838 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
839 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
845 pci_read_config_qword(pvt->pci_ch[pvt->inject.channel][0],
846 MC_CHANNEL_ADDR_MATCH, &rdmask);
847 debugf0("Inject addr match write 0x%016llx, read: 0x%016llx\n",
850 u32 rdmask1, rdmask2;
852 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
853 MC_CHANNEL_ADDR_MATCH, &rdmask1);
854 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
855 MC_CHANNEL_ADDR_MATCH + 4, &rdmask2);
857 debugf0("Inject addr match write 0x%016llx, read: 0x%08x%08x\n",
858 mask, rdmask1, rdmask2);
862 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
863 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
867 * bits 1-2: MASK_HALF_CACHELINE
869 * bit 4: INJECT_ADDR_PARITY
872 injectmask = (pvt->inject.type & 1) |
873 (pvt->inject.section & 0x3) << 1 |
874 (pvt->inject.type & 0x6) << (3 - 1);
876 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
877 MC_CHANNEL_ERROR_MASK, injectmask);
879 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
880 mask, pvt->inject.eccmask, injectmask);
887 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
890 struct i7core_pvt *pvt = mci->pvt_info;
893 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
894 MC_CHANNEL_ERROR_MASK, &injectmask);
896 debugf0("Inject error read: 0x%018x\n", injectmask);
898 if (injectmask & 0x0c)
899 pvt->inject.enable = 1;
901 return sprintf(data, "%d\n", pvt->inject.enable);
904 static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
906 struct i7core_pvt *pvt = mci->pvt_info;
908 if (!pvt->ce_count_available)
909 return sprintf(data, "unavailable\n");
911 return sprintf(data, "dimm0: %lu\ndimm1: %lu\ndimm2: %lu\n",
920 static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
924 .name = "inject_section",
925 .mode = (S_IRUGO | S_IWUSR)
927 .show = i7core_inject_section_show,
928 .store = i7core_inject_section_store,
931 .name = "inject_type",
932 .mode = (S_IRUGO | S_IWUSR)
934 .show = i7core_inject_type_show,
935 .store = i7core_inject_type_store,
938 .name = "inject_eccmask",
939 .mode = (S_IRUGO | S_IWUSR)
941 .show = i7core_inject_eccmask_show,
942 .store = i7core_inject_eccmask_store,
945 .name = "inject_addrmatch",
946 .mode = (S_IRUGO | S_IWUSR)
948 .show = i7core_inject_addrmatch_show,
949 .store = i7core_inject_addrmatch_store,
952 .name = "inject_enable",
953 .mode = (S_IRUGO | S_IWUSR)
955 .show = i7core_inject_enable_show,
956 .store = i7core_inject_enable_store,
959 .name = "corrected_error_counts",
960 .mode = (S_IRUGO | S_IWUSR)
962 .show = i7core_ce_regs_show,
967 /****************************************************************************
968 Device initialization routines: put/get, init/exit
969 ****************************************************************************/
972 * i7core_put_devices 'put' all the devices that we have
975 static void i7core_put_devices(void)
979 for (i = 0; i < N_DEVS; i++)
980 pci_dev_put(pci_devs[i].pdev);
984 * i7core_get_devices Find and perform 'get' operation on the MCH's
985 * device/functions we want to reference for this driver
987 * Need to 'get' device 16 func 1 and func 2
989 static int i7core_get_devices(void)
992 struct pci_dev *pdev = NULL;
994 for (i = 0; i < N_DEVS; i++) {
995 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
996 pci_devs[i].dev_id, NULL);
998 pci_devs[i].pdev = pdev;
1000 i7core_printk(KERN_ERR,
1001 "Device not found: PCI ID %04x:%04x "
1002 "(dev %d, func %d)\n",
1003 PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
1004 pci_devs[i].dev,pci_devs[i].func);
1006 /* Dev 3 function 2 only exists on chips with RDIMMs */
1007 if ((pci_devs[i].dev == 3) && (pci_devs[i].func == 2))
1010 /* End of list, leave */
1016 if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[i].dev ||
1017 PCI_FUNC(pdev->devfn) != pci_devs[i].func)) {
1018 i7core_printk(KERN_ERR,
1019 "Device PCI ID %04x:%04x "
1020 "has fn %d.%d instead of fn %d.%d\n",
1021 PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
1022 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1023 pci_devs[i].dev, pci_devs[i].func);
1028 /* Be sure that the device is enabled */
1029 rc = pci_enable_device(pdev);
1030 if (unlikely(rc < 0)) {
1031 i7core_printk(KERN_ERR,
1032 "Couldn't enable PCI ID %04x:%04x "
1034 PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
1035 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1039 i7core_printk(KERN_INFO,
1040 "Registered device %0x:%0x fn %d.%d\n",
1041 PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
1042 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1048 i7core_put_devices();
1052 static int mci_bind_devs(struct mem_ctl_info *mci)
1054 struct i7core_pvt *pvt = mci->pvt_info;
1055 struct pci_dev *pdev;
1058 for (i = 0; i < N_DEVS; i++) {
1059 pdev = pci_devs[i].pdev;
1063 func = PCI_FUNC(pdev->devfn);
1064 slot = PCI_SLOT(pdev->devfn);
1066 if (unlikely(func > MAX_MCR_FUNC))
1068 pvt->pci_mcr[func] = pdev;
1069 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1070 if (unlikely(func > MAX_CHAN_FUNC))
1072 pvt->pci_ch[slot - 4][func] = pdev;
1076 debugf0("Associated fn %d.%d, dev = %p\n",
1077 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev);
1082 i7core_printk(KERN_ERR, "Device %d, function %d "
1083 "is out of the expected range\n",
1088 /****************************************************************************
1089 Error check routines
1090 ****************************************************************************/
1092 /* This function is based on the device 3 function 4 registers as described on:
1093 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1094 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1095 * also available at:
1096 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1098 static void check_mc_test_err(struct mem_ctl_info *mci)
1100 struct i7core_pvt *pvt = mci->pvt_info;
1102 int new0, new1, new2;
1104 if (!pvt->pci_mcr[4]) {
1105 debugf0("%s MCR registers not found\n",__func__);
1109 /* Corrected error reads */
1110 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1111 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1113 /* Store the new values */
1114 new2 = DIMM2_COR_ERR(rcv1);
1115 new1 = DIMM1_COR_ERR(rcv0);
1116 new0 = DIMM0_COR_ERR(rcv0);
1118 debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n",
1119 (pvt->ce_count_available ? "UPDATE" : "READ"),
1120 rcv1, rcv0, new0, new1, new2);
1122 /* Updates CE counters if it is not the first time here */
1123 if (pvt->ce_count_available) {
1124 /* Updates CE counters */
1125 int add0, add1, add2;
1127 add2 = new2 - pvt->last_ce_count[2];
1128 add1 = new1 - pvt->last_ce_count[1];
1129 add0 = new0 - pvt->last_ce_count[0];
1133 pvt->ce_count[2] += add2;
1137 pvt->ce_count[1] += add1;
1141 pvt->ce_count[0] += add0;
1143 pvt->ce_count_available = 1;
1145 /* Store the new values */
1146 pvt->last_ce_count[2] = new2;
1147 pvt->last_ce_count[1] = new1;
1148 pvt->last_ce_count[0] = new0;
1152 * i7core_check_error Retrieve and process errors reported by the
1153 * hardware. Called by the Core module.
1155 static void i7core_check_error(struct mem_ctl_info *mci)
1157 check_mc_test_err(mci);
1161 * i7core_probe Probe for ONE instance of device to see if it is
1164 * 0 for FOUND a device
1165 * < 0 for error code
1167 static int __devinit i7core_probe(struct pci_dev *pdev,
1168 const struct pci_device_id *id)
1170 struct mem_ctl_info *mci;
1171 struct i7core_pvt *pvt;
1172 int num_channels = 0;
1174 int dev_idx = id->driver_data;
1177 if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs)))
1180 /* get the pci devices we want to reserve for our use */
1181 rc = i7core_get_devices();
1182 if (unlikely(rc < 0))
1185 /* Check the number of active and not disabled channels */
1186 rc = i7core_get_active_channels(&num_channels);
1187 if (unlikely (rc < 0))
1190 /* FIXME: we currently don't know the number of csrows */
1191 num_csrows = num_channels;
1193 /* allocate a new MC control structure */
1194 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1195 if (unlikely (!mci)) {
1200 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1202 mci->dev = &pdev->dev; /* record ptr to the generic device */
1204 pvt = mci->pvt_info;
1205 memset(pvt, 0, sizeof(*pvt));
1208 mci->mtype_cap = MEM_FLAG_DDR3; /* FIXME: how to handle RDDR3? */
1209 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1210 mci->edac_cap = EDAC_FLAG_NONE;
1211 mci->mod_name = "i7core_edac.c";
1212 mci->mod_ver = I7CORE_REVISION;
1213 mci->ctl_name = i7core_devs[dev_idx].ctl_name;
1214 mci->dev_name = pci_name(pdev);
1215 mci->ctl_page_to_phys = NULL;
1216 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
1217 /* Set the function pointer to an actual operation function */
1218 mci->edac_check = i7core_check_error;
1220 /* Store pci devices at mci for faster access */
1221 rc = mci_bind_devs(mci);
1222 if (unlikely (rc < 0))
1225 /* Get dimm basic config */
1226 get_dimm_config(mci);
1228 /* add this new MC control structure to EDAC's list of MCs */
1229 if (unlikely(edac_mc_add_mc(mci))) {
1230 debugf0("MC: " __FILE__
1231 ": %s(): failed edac_mc_add_mc()\n", __func__);
1232 /* FIXME: perhaps some code should go here that disables error
1233 * reporting if we just enabled it
1240 /* allocating generic PCI control info */
1241 i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1242 if (unlikely (!i7core_pci)) {
1244 "%s(): Unable to create PCI control\n",
1247 "%s(): PCI error report via EDAC not setup\n",
1251 /* Default error mask is any memory */
1252 pvt->inject.channel = 0;
1253 pvt->inject.dimm = -1;
1254 pvt->inject.rank = -1;
1255 pvt->inject.bank = -1;
1256 pvt->inject.page = -1;
1257 pvt->inject.col = -1;
1259 i7core_printk(KERN_INFO, "Driver loaded.\n");
1267 i7core_put_devices();
1272 * i7core_remove destructor for one instance of device
1275 static void __devexit i7core_remove(struct pci_dev *pdev)
1277 struct mem_ctl_info *mci;
1279 debugf0(__FILE__ ": %s()\n", __func__);
1282 edac_pci_release_generic_ctl(i7core_pci);
1284 mci = edac_mc_del_mc(&pdev->dev);
1289 /* retrieve references to resources, and free those resources */
1290 i7core_put_devices();
1295 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1298 * i7core_driver pci_driver structure for this module
1301 static struct pci_driver i7core_driver = {
1302 .name = "i7core_edac",
1303 .probe = i7core_probe,
1304 .remove = __devexit_p(i7core_remove),
1305 .id_table = i7core_pci_tbl,
1309 * i7core_init Module entry function
1310 * Try to initialize this module for its devices
1312 static int __init i7core_init(void)
1316 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1318 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1321 pci_rc = pci_register_driver(&i7core_driver);
1323 return (pci_rc < 0) ? pci_rc : 0;
1327 * i7core_exit() Module exit function
1328 * Unregister the driver
1330 static void __exit i7core_exit(void)
1332 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1333 pci_unregister_driver(&i7core_driver);
1336 module_init(i7core_init);
1337 module_exit(i7core_exit);
1339 MODULE_LICENSE("GPL");
1340 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1341 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1342 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1345 module_param(edac_op_state, int, 0444);
1346 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");