1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/delay.h>
29 #include <linux/edac.h>
30 #include <linux/mmzone.h>
31 #include <linux/edac_mce.h>
32 #include <linux/smp.h>
33 #include <asm/processor.h>
35 #include "edac_core.h"
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
43 #define MAX_SOCKET_BUSES 2
47 * Alter this version for the module when modifications are made
49 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50 #define EDAC_MOD_STR "i7core_edac"
55 #define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
58 #define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
62 * i7core Memory Controller Registers
65 /* OFFSETS for Device 0 Function 0 */
67 #define MC_CFG_CONTROL 0x90
69 /* OFFSETS for Device 3 Function 0 */
71 #define MC_CONTROL 0x48
72 #define MC_STATUS 0x4c
73 #define MC_MAX_DOD 0x64
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
80 #define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
83 #define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
87 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88 #define MC_COR_ECC_CNT_0 0x80
89 #define MC_COR_ECC_CNT_1 0x84
90 #define MC_COR_ECC_CNT_2 0x88
91 #define MC_COR_ECC_CNT_3 0x8c
92 #define MC_COR_ECC_CNT_4 0x90
93 #define MC_COR_ECC_CNT_5 0x94
95 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
101 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
107 #define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
111 #define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
114 #define MC_CHANNEL_ADDR_MATCH 0xf0
115 #define MC_CHANNEL_ERROR_MASK 0xf8
116 #define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
128 #define MC_DOD_CH_DIMM0 0x48
129 #define MC_DOD_CH_DIMM1 0x4c
130 #define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
144 #define MC_RANK_PRESENT 0x7c
146 #define MC_SAG_CH_0 0x80
147 #define MC_SAG_CH_1 0x84
148 #define MC_SAG_CH_2 0x88
149 #define MC_SAG_CH_3 0x8c
150 #define MC_SAG_CH_4 0x90
151 #define MC_SAG_CH_5 0x94
152 #define MC_SAG_CH_6 0x98
153 #define MC_SAG_CH_7 0x9c
155 #define MC_RIR_LIMIT_CH_0 0x40
156 #define MC_RIR_LIMIT_CH_1 0x44
157 #define MC_RIR_LIMIT_CH_2 0x48
158 #define MC_RIR_LIMIT_CH_3 0x4C
159 #define MC_RIR_LIMIT_CH_4 0x50
160 #define MC_RIR_LIMIT_CH_5 0x54
161 #define MC_RIR_LIMIT_CH_6 0x58
162 #define MC_RIR_LIMIT_CH_7 0x5C
163 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
165 #define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
174 #define MAX_DIMMS 3 /* Max DIMMS per channel */
175 #define MAX_MCR_FUNC 4
176 #define MAX_CHAN_FUNC 3
186 struct i7core_inject {
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
197 struct i7core_channel {
202 struct pci_id_descr {
210 struct list_head list;
212 struct pci_dev **pdev;
214 struct mem_ctl_info *mci;
218 struct pci_dev *pci_noncore;
219 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
220 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
222 struct i7core_dev *i7core_dev;
224 struct i7core_info info;
225 struct i7core_inject inject;
226 struct i7core_channel channel[NUM_CHANS];
228 int channels; /* Number of active channels */
230 int ce_count_available;
231 int csrow_map[NUM_CHANS][MAX_DIMMS];
233 /* ECC corrected errors counts per udimm */
234 unsigned long udimm_ce_count[MAX_DIMMS];
235 int udimm_last_ce_count[MAX_DIMMS];
236 /* ECC corrected errors counts per rdimm */
237 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
238 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
240 unsigned int is_registered;
243 struct edac_mce edac_mce;
245 /* Fifo double buffers */
246 struct mce mce_entry[MCE_LOG_LEN];
247 struct mce mce_outentry[MCE_LOG_LEN];
249 /* Fifo in/out counters */
250 unsigned mce_in, mce_out;
252 /* Count indicator to show errors not got */
253 unsigned mce_overrun;
257 static LIST_HEAD(i7core_edac_list);
258 static DEFINE_MUTEX(i7core_edac_lock);
260 #define PCI_DESCR(device, function, device_id) \
262 .func = (function), \
263 .dev_id = (device_id)
265 struct pci_id_descr pci_dev_descr_i7core[] = {
266 /* Memory controller */
267 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
268 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
269 /* Exists only for RDIMM */
270 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
271 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
274 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
275 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
276 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
277 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
280 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
281 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
282 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
283 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
286 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
287 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
288 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
289 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
291 /* Generic Non-core registers */
293 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
294 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
295 * the probing code needs to test for the other address in case of
296 * failure of this one
298 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
302 struct pci_id_descr pci_dev_descr_lynnfield[] = {
303 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
304 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
305 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
307 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
308 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
309 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
310 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
312 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
313 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
314 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
315 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
318 * This is the PCI device has an alternate address on some
319 * processors like Core i7 860
321 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
325 * pci_device_id table for which devices we are looking for
327 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
328 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
329 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
330 {0,} /* 0 terminated list. */
333 static struct edac_pci_ctl_info *i7core_pci;
335 /****************************************************************************
336 Anciliary status routines
337 ****************************************************************************/
339 /* MC_CONTROL bits */
340 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
341 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
344 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
345 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
347 /* MC_MAX_DOD read functions */
348 static inline int numdimms(u32 dimms)
350 return (dimms & 0x3) + 1;
353 static inline int numrank(u32 rank)
355 static int ranks[4] = { 1, 2, 4, -EINVAL };
357 return ranks[rank & 0x3];
360 static inline int numbank(u32 bank)
362 static int banks[4] = { 4, 8, 16, -EINVAL };
364 return banks[bank & 0x3];
367 static inline int numrow(u32 row)
369 static int rows[8] = {
370 1 << 12, 1 << 13, 1 << 14, 1 << 15,
371 1 << 16, -EINVAL, -EINVAL, -EINVAL,
374 return rows[row & 0x7];
377 static inline int numcol(u32 col)
379 static int cols[8] = {
380 1 << 10, 1 << 11, 1 << 12, -EINVAL,
382 return cols[col & 0x3];
385 static struct i7core_dev *get_i7core_dev(u8 socket)
387 struct i7core_dev *i7core_dev;
389 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
390 if (i7core_dev->socket == socket)
397 /****************************************************************************
398 Memory check routines
399 ****************************************************************************/
400 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
403 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
409 for (i = 0; i < i7core_dev->n_devs; i++) {
410 if (!i7core_dev->pdev[i])
413 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
414 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
415 return i7core_dev->pdev[i];
423 * i7core_get_active_channels() - gets the number of channels and csrows
424 * @socket: Quick Path Interconnect socket
425 * @channels: Number of channels that will be returned
426 * @csrows: Number of csrows found
428 * Since EDAC core needs to know in advance the number of available channels
429 * and csrows, in order to allocate memory for csrows/channels, it is needed
430 * to run two similar steps. At the first step, implemented on this function,
431 * it checks the number of csrows/channels present at one socket.
432 * this is used in order to properly allocate the size of mci components.
434 * It should be noticed that none of the current available datasheets explain
435 * or even mention how csrows are seen by the memory controller. So, we need
436 * to add a fake description for csrows.
437 * So, this driver is attributing one DIMM memory for one csrow.
439 static int i7core_get_active_channels(u8 socket, unsigned *channels,
442 struct pci_dev *pdev = NULL;
449 pdev = get_pdev_slot_func(socket, 3, 0);
451 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
456 /* Device 3 function 0 reads */
457 pci_read_config_dword(pdev, MC_STATUS, &status);
458 pci_read_config_dword(pdev, MC_CONTROL, &control);
460 for (i = 0; i < NUM_CHANS; i++) {
462 /* Check if the channel is active */
463 if (!(control & (1 << (8 + i))))
466 /* Check if the channel is disabled */
467 if (status & (1 << i))
470 pdev = get_pdev_slot_func(socket, i + 4, 1);
472 i7core_printk(KERN_ERR, "Couldn't find socket %d "
477 /* Devices 4-6 function 1 */
478 pci_read_config_dword(pdev,
479 MC_DOD_CH_DIMM0, &dimm_dod[0]);
480 pci_read_config_dword(pdev,
481 MC_DOD_CH_DIMM1, &dimm_dod[1]);
482 pci_read_config_dword(pdev,
483 MC_DOD_CH_DIMM2, &dimm_dod[2]);
487 for (j = 0; j < 3; j++) {
488 if (!DIMM_PRESENT(dimm_dod[j]))
494 debugf0("Number of active channels on socket %d: %d\n",
500 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
502 struct i7core_pvt *pvt = mci->pvt_info;
503 struct csrow_info *csr;
504 struct pci_dev *pdev;
506 unsigned long last_page = 0;
510 /* Get data from the MC register, function 0 */
511 pdev = pvt->pci_mcr[0];
515 /* Device 3 function 0 reads */
516 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
517 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
518 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
519 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
521 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
522 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
523 pvt->info.max_dod, pvt->info.ch_map);
525 if (ECC_ENABLED(pvt)) {
526 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
528 mode = EDAC_S8ECD8ED;
530 mode = EDAC_S4ECD4ED;
532 debugf0("ECC disabled\n");
536 /* FIXME: need to handle the error codes */
537 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
539 numdimms(pvt->info.max_dod),
540 numrank(pvt->info.max_dod >> 2),
541 numbank(pvt->info.max_dod >> 4),
542 numrow(pvt->info.max_dod >> 6),
543 numcol(pvt->info.max_dod >> 9));
545 for (i = 0; i < NUM_CHANS; i++) {
546 u32 data, dimm_dod[3], value[8];
548 if (!pvt->pci_ch[i][0])
551 if (!CH_ACTIVE(pvt, i)) {
552 debugf0("Channel %i is not active\n", i);
555 if (CH_DISABLED(pvt, i)) {
556 debugf0("Channel %i is disabled\n", i);
560 /* Devices 4-6 function 0 */
561 pci_read_config_dword(pvt->pci_ch[i][0],
562 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
564 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
567 if (data & REGISTERED_DIMM)
572 if (data & THREE_DIMMS_PRESENT)
573 pvt->channel[i].dimms = 3;
574 else if (data & SINGLE_QUAD_RANK_PRESENT)
575 pvt->channel[i].dimms = 1;
577 pvt->channel[i].dimms = 2;
580 /* Devices 4-6 function 1 */
581 pci_read_config_dword(pvt->pci_ch[i][1],
582 MC_DOD_CH_DIMM0, &dimm_dod[0]);
583 pci_read_config_dword(pvt->pci_ch[i][1],
584 MC_DOD_CH_DIMM1, &dimm_dod[1]);
585 pci_read_config_dword(pvt->pci_ch[i][1],
586 MC_DOD_CH_DIMM2, &dimm_dod[2]);
588 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
589 "%d ranks, %cDIMMs\n",
591 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
593 pvt->channel[i].ranks,
594 (data & REGISTERED_DIMM) ? 'R' : 'U');
596 for (j = 0; j < 3; j++) {
597 u32 banks, ranks, rows, cols;
600 if (!DIMM_PRESENT(dimm_dod[j]))
603 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
604 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
605 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
606 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
608 /* DDR3 has 8 I/O banks */
609 size = (rows * cols * banks * ranks) >> (20 - 3);
611 pvt->channel[i].dimms++;
613 debugf0("\tdimm %d %d Mb offset: %x, "
614 "bank: %d, rank: %d, row: %#x, col: %#x\n",
616 RANKOFFSET(dimm_dod[j]),
617 banks, ranks, rows, cols);
620 npages = size >> (PAGE_SHIFT - 20);
622 npages = size << (20 - PAGE_SHIFT);
625 csr = &mci->csrows[*csrow];
626 csr->first_page = last_page + 1;
628 csr->last_page = last_page;
629 csr->nr_pages = npages;
633 csr->csrow_idx = *csrow;
634 csr->nr_channels = 1;
636 csr->channels[0].chan_idx = i;
637 csr->channels[0].ce_count = 0;
639 pvt->csrow_map[i][j] = *csrow;
649 csr->dtype = DEV_X16;
652 csr->dtype = DEV_UNKNOWN;
655 csr->edac_mode = mode;
661 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
662 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
663 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
664 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
665 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
666 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
667 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
668 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
669 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
670 for (j = 0; j < 8; j++)
671 debugf1("\t\t%#x\t%#x\t%#x\n",
672 (value[j] >> 27) & 0x1,
673 (value[j] >> 24) & 0x7,
674 (value[j] && ((1 << 24) - 1)));
680 /****************************************************************************
681 Error insertion routines
682 ****************************************************************************/
684 /* The i7core has independent error injection features per channel.
685 However, to have a simpler code, we don't allow enabling error injection
686 on more than one channel.
687 Also, since a change at an inject parameter will be applied only at enable,
688 we're disabling error injection on all write calls to the sysfs nodes that
689 controls the error code injection.
691 static int disable_inject(struct mem_ctl_info *mci)
693 struct i7core_pvt *pvt = mci->pvt_info;
695 pvt->inject.enable = 0;
697 if (!pvt->pci_ch[pvt->inject.channel][0])
700 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
701 MC_CHANNEL_ERROR_INJECT, 0);
707 * i7core inject inject.section
709 * accept and store error injection inject.section value
710 * bit 0 - refers to the lower 32-byte half cacheline
711 * bit 1 - refers to the upper 32-byte half cacheline
713 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
716 struct i7core_pvt *pvt = mci->pvt_info;
720 if (pvt->inject.enable)
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 3))
727 pvt->inject.section = (u32) value;
731 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.section);
741 * accept and store error injection inject.section value
742 * bit 0 - repeat enable - Enable error repetition
743 * bit 1 - inject ECC error
744 * bit 2 - inject parity error
746 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
747 const char *data, size_t count)
749 struct i7core_pvt *pvt = mci->pvt_info;
753 if (pvt->inject.enable)
756 rc = strict_strtoul(data, 10, &value);
757 if ((rc < 0) || (value > 7))
760 pvt->inject.type = (u32) value;
764 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
767 struct i7core_pvt *pvt = mci->pvt_info;
768 return sprintf(data, "0x%08x\n", pvt->inject.type);
772 * i7core_inject_inject.eccmask_store
774 * The type of error (UE/CE) will depend on the inject.eccmask value:
775 * Any bits set to a 1 will flip the corresponding ECC bit
776 * Correctable errors can be injected by flipping 1 bit or the bits within
777 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
778 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
779 * uncorrectable error to be injected.
781 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
782 const char *data, size_t count)
784 struct i7core_pvt *pvt = mci->pvt_info;
788 if (pvt->inject.enable)
791 rc = strict_strtoul(data, 10, &value);
795 pvt->inject.eccmask = (u32) value;
799 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
802 struct i7core_pvt *pvt = mci->pvt_info;
803 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
809 * The type of error (UE/CE) will depend on the inject.eccmask value:
810 * Any bits set to a 1 will flip the corresponding ECC bit
811 * Correctable errors can be injected by flipping 1 bit or the bits within
812 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
813 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
814 * uncorrectable error to be injected.
817 #define DECLARE_ADDR_MATCH(param, limit) \
818 static ssize_t i7core_inject_store_##param( \
819 struct mem_ctl_info *mci, \
820 const char *data, size_t count) \
822 struct i7core_pvt *pvt; \
826 debugf1("%s()\n", __func__); \
827 pvt = mci->pvt_info; \
829 if (pvt->inject.enable) \
830 disable_inject(mci); \
832 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
835 rc = strict_strtoul(data, 10, &value); \
836 if ((rc < 0) || (value >= limit)) \
840 pvt->inject.param = value; \
845 static ssize_t i7core_inject_show_##param( \
846 struct mem_ctl_info *mci, \
849 struct i7core_pvt *pvt; \
851 pvt = mci->pvt_info; \
852 debugf1("%s() pvt=%p\n", __func__, pvt); \
853 if (pvt->inject.param < 0) \
854 return sprintf(data, "any\n"); \
856 return sprintf(data, "%d\n", pvt->inject.param);\
859 #define ATTR_ADDR_MATCH(param) \
863 .mode = (S_IRUGO | S_IWUSR) \
865 .show = i7core_inject_show_##param, \
866 .store = i7core_inject_store_##param, \
869 DECLARE_ADDR_MATCH(channel, 3);
870 DECLARE_ADDR_MATCH(dimm, 3);
871 DECLARE_ADDR_MATCH(rank, 4);
872 DECLARE_ADDR_MATCH(bank, 32);
873 DECLARE_ADDR_MATCH(page, 0x10000);
874 DECLARE_ADDR_MATCH(col, 0x4000);
876 static int write_and_test(struct pci_dev *dev, int where, u32 val)
881 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
882 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885 for (count = 0; count < 10; count++) {
888 pci_write_config_dword(dev, where, val);
889 pci_read_config_dword(dev, where, &read);
895 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
896 "write=%08x. Read=%08x\n",
897 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
904 * This routine prepares the Memory Controller for error injection.
905 * The error will be injected when some process tries to write to the
906 * memory that matches the given criteria.
907 * The criteria can be set in terms of a mask where dimm, rank, bank, page
908 * and col can be specified.
909 * A -1 value for any of the mask items will make the MCU to ignore
910 * that matching criteria for error injection.
912 * It should be noticed that the error will only happen after a write operation
913 * on a memory that matches the condition. if REPEAT_EN is not enabled at
914 * inject mask, then it will produce just one error. Otherwise, it will repeat
915 * until the injectmask would be cleaned.
917 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
918 * is reliable enough to check if the MC is using the
919 * three channels. However, this is not clear at the datasheet.
921 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
922 const char *data, size_t count)
924 struct i7core_pvt *pvt = mci->pvt_info;
930 if (!pvt->pci_ch[pvt->inject.channel][0])
933 rc = strict_strtoul(data, 10, &enable);
938 pvt->inject.enable = 1;
944 /* Sets pvt->inject.dimm mask */
945 if (pvt->inject.dimm < 0)
948 if (pvt->channel[pvt->inject.channel].dimms > 2)
949 mask |= (pvt->inject.dimm & 0x3LL) << 35;
951 mask |= (pvt->inject.dimm & 0x1LL) << 36;
954 /* Sets pvt->inject.rank mask */
955 if (pvt->inject.rank < 0)
958 if (pvt->channel[pvt->inject.channel].dimms > 2)
959 mask |= (pvt->inject.rank & 0x1LL) << 34;
961 mask |= (pvt->inject.rank & 0x3LL) << 34;
964 /* Sets pvt->inject.bank mask */
965 if (pvt->inject.bank < 0)
968 mask |= (pvt->inject.bank & 0x15LL) << 30;
970 /* Sets pvt->inject.page mask */
971 if (pvt->inject.page < 0)
974 mask |= (pvt->inject.page & 0xffff) << 14;
976 /* Sets pvt->inject.column mask */
977 if (pvt->inject.col < 0)
980 mask |= (pvt->inject.col & 0x3fff);
984 * bits 1-2: MASK_HALF_CACHELINE
986 * bit 4: INJECT_ADDR_PARITY
989 injectmask = (pvt->inject.type & 1) |
990 (pvt->inject.section & 0x3) << 1 |
991 (pvt->inject.type & 0x6) << (3 - 1);
993 /* Unlock writes to registers - this register is write only */
994 pci_write_config_dword(pvt->pci_noncore,
995 MC_CFG_CONTROL, 0x2);
997 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
998 MC_CHANNEL_ADDR_MATCH, mask);
999 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1000 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1002 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1003 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1005 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1006 MC_CHANNEL_ERROR_INJECT, injectmask);
1009 * This is something undocumented, based on my tests
1010 * Without writing 8 to this register, errors aren't injected. Not sure
1013 pci_write_config_dword(pvt->pci_noncore,
1016 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1018 mask, pvt->inject.eccmask, injectmask);
1024 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1027 struct i7core_pvt *pvt = mci->pvt_info;
1030 if (!pvt->pci_ch[pvt->inject.channel][0])
1033 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1034 MC_CHANNEL_ERROR_INJECT, &injectmask);
1036 debugf0("Inject error read: 0x%018x\n", injectmask);
1038 if (injectmask & 0x0c)
1039 pvt->inject.enable = 1;
1041 return sprintf(data, "%d\n", pvt->inject.enable);
1044 #define DECLARE_COUNTER(param) \
1045 static ssize_t i7core_show_counter_##param( \
1046 struct mem_ctl_info *mci, \
1049 struct i7core_pvt *pvt = mci->pvt_info; \
1051 debugf1("%s() \n", __func__); \
1052 if (!pvt->ce_count_available || (pvt->is_registered)) \
1053 return sprintf(data, "data unavailable\n"); \
1054 return sprintf(data, "%lu\n", \
1055 pvt->udimm_ce_count[param]); \
1058 #define ATTR_COUNTER(param) \
1061 .name = __stringify(udimm##param), \
1062 .mode = (S_IRUGO | S_IWUSR) \
1064 .show = i7core_show_counter_##param \
1076 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1077 ATTR_ADDR_MATCH(channel),
1078 ATTR_ADDR_MATCH(dimm),
1079 ATTR_ADDR_MATCH(rank),
1080 ATTR_ADDR_MATCH(bank),
1081 ATTR_ADDR_MATCH(page),
1082 ATTR_ADDR_MATCH(col),
1083 { .attr = { .name = NULL } }
1086 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1087 .name = "inject_addrmatch",
1088 .mcidev_attr = i7core_addrmatch_attrs,
1091 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1097 static struct mcidev_sysfs_group i7core_udimm_counters = {
1098 .name = "all_channel_counts",
1099 .mcidev_attr = i7core_udimm_counters_attrs,
1102 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1105 .name = "inject_section",
1106 .mode = (S_IRUGO | S_IWUSR)
1108 .show = i7core_inject_section_show,
1109 .store = i7core_inject_section_store,
1112 .name = "inject_type",
1113 .mode = (S_IRUGO | S_IWUSR)
1115 .show = i7core_inject_type_show,
1116 .store = i7core_inject_type_store,
1119 .name = "inject_eccmask",
1120 .mode = (S_IRUGO | S_IWUSR)
1122 .show = i7core_inject_eccmask_show,
1123 .store = i7core_inject_eccmask_store,
1125 .grp = &i7core_inject_addrmatch,
1128 .name = "inject_enable",
1129 .mode = (S_IRUGO | S_IWUSR)
1131 .show = i7core_inject_enable_show,
1132 .store = i7core_inject_enable_store,
1134 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1135 { .attr = { .name = NULL } }
1138 /****************************************************************************
1139 Device initialization routines: put/get, init/exit
1140 ****************************************************************************/
1143 * i7core_put_devices 'put' all the devices that we have
1144 * reserved via 'get'
1146 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1150 debugf0(__FILE__ ": %s()\n", __func__);
1151 for (i = 0; i < i7core_dev->n_devs; i++) {
1152 struct pci_dev *pdev = i7core_dev->pdev[i];
1155 debugf0("Removing dev %02x:%02x.%d\n",
1157 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1160 kfree(i7core_dev->pdev);
1161 list_del(&i7core_dev->list);
1165 static void i7core_put_all_devices(void)
1167 struct i7core_dev *i7core_dev, *tmp;
1169 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1170 i7core_put_devices(i7core_dev);
1173 static void __init i7core_xeon_pci_fixup(int dev_id)
1175 struct pci_dev *pdev = NULL;
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1179 * aren't announced by acpi. So, we need to use a legacy scan probing
1182 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
1183 if (unlikely(!pdev)) {
1184 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1185 pcibios_scan_specific_bus(255-i);
1190 * i7core_get_devices Find and perform 'get' operation on the MCH's
1191 * device/functions we want to reference for this driver
1193 * Need to 'get' device 16 func 1 and func 2
1195 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1196 struct pci_id_descr *dev_descr, unsigned n_devs)
1198 struct i7core_dev *i7core_dev;
1200 struct pci_dev *pdev = NULL;
1204 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1205 dev_descr->dev_id, *prev);
1208 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1209 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1210 * to probe for the alternate address in case of failure
1212 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1213 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1214 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1216 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1217 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1218 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1227 if (dev_descr->optional)
1230 i7core_printk(KERN_ERR,
1231 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1232 dev_descr->dev, dev_descr->func,
1233 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1235 /* End of list, leave */
1238 bus = pdev->bus->number;
1245 i7core_dev = get_i7core_dev(socket);
1247 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1250 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1252 if (!i7core_dev->pdev) {
1256 i7core_dev->socket = socket;
1257 i7core_dev->n_devs = n_devs;
1258 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1261 if (i7core_dev->pdev[devno]) {
1262 i7core_printk(KERN_ERR,
1263 "Duplicated device for "
1264 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1265 bus, dev_descr->dev, dev_descr->func,
1266 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1271 i7core_dev->pdev[devno] = pdev;
1274 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1275 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1276 i7core_printk(KERN_ERR,
1277 "Device PCI ID %04x:%04x "
1278 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1279 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1280 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1281 bus, dev_descr->dev, dev_descr->func);
1285 /* Be sure that the device is enabled */
1286 if (unlikely(pci_enable_device(pdev) < 0)) {
1287 i7core_printk(KERN_ERR,
1289 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1290 bus, dev_descr->dev, dev_descr->func,
1291 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1295 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1296 socket, bus, dev_descr->dev,
1298 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1305 static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
1308 struct pci_dev *pdev = NULL;
1310 for (i = 0; i < n_devs; i++) {
1313 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1316 i7core_put_all_devices();
1325 static int mci_bind_devs(struct mem_ctl_info *mci,
1326 struct i7core_dev *i7core_dev)
1328 struct i7core_pvt *pvt = mci->pvt_info;
1329 struct pci_dev *pdev;
1332 /* Associates i7core_dev and mci for future usage */
1333 pvt->i7core_dev = i7core_dev;
1334 i7core_dev->mci = mci;
1336 pvt->is_registered = 0;
1337 for (i = 0; i < i7core_dev->n_devs; i++) {
1338 pdev = i7core_dev->pdev[i];
1342 func = PCI_FUNC(pdev->devfn);
1343 slot = PCI_SLOT(pdev->devfn);
1345 if (unlikely(func > MAX_MCR_FUNC))
1347 pvt->pci_mcr[func] = pdev;
1348 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1349 if (unlikely(func > MAX_CHAN_FUNC))
1351 pvt->pci_ch[slot - 4][func] = pdev;
1352 } else if (!slot && !func)
1353 pvt->pci_noncore = pdev;
1357 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1358 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1359 pdev, i7core_dev->socket);
1361 if (PCI_SLOT(pdev->devfn) == 3 &&
1362 PCI_FUNC(pdev->devfn) == 2)
1363 pvt->is_registered = 1;
1367 * Add extra nodes to count errors on udimm
1368 * For registered memory, this is not needed, since the counters
1369 * are already displayed at the standard locations
1371 if (!pvt->is_registered)
1372 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1373 &i7core_udimm_counters;
1378 i7core_printk(KERN_ERR, "Device %d, function %d "
1379 "is out of the expected range\n",
1384 /****************************************************************************
1385 Error check routines
1386 ****************************************************************************/
1387 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1388 int chan, int dimm, int add)
1391 struct i7core_pvt *pvt = mci->pvt_info;
1392 int row = pvt->csrow_map[chan][dimm], i;
1394 for (i = 0; i < add; i++) {
1395 msg = kasprintf(GFP_KERNEL, "Corrected error "
1396 "(Socket=%d channel=%d dimm=%d)",
1397 pvt->i7core_dev->socket, chan, dimm);
1399 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1404 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1405 int chan, int new0, int new1, int new2)
1407 struct i7core_pvt *pvt = mci->pvt_info;
1408 int add0 = 0, add1 = 0, add2 = 0;
1409 /* Updates CE counters if it is not the first time here */
1410 if (pvt->ce_count_available) {
1411 /* Updates CE counters */
1413 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1414 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1415 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1419 pvt->rdimm_ce_count[chan][2] += add2;
1423 pvt->rdimm_ce_count[chan][1] += add1;
1427 pvt->rdimm_ce_count[chan][0] += add0;
1429 pvt->ce_count_available = 1;
1431 /* Store the new values */
1432 pvt->rdimm_last_ce_count[chan][2] = new2;
1433 pvt->rdimm_last_ce_count[chan][1] = new1;
1434 pvt->rdimm_last_ce_count[chan][0] = new0;
1436 /*updated the edac core */
1438 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1440 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1442 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1446 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1448 struct i7core_pvt *pvt = mci->pvt_info;
1450 int i, new0, new1, new2;
1452 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1453 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1455 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1457 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1459 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1461 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1463 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1465 for (i = 0 ; i < 3; i++) {
1466 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1467 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1468 /*if the channel has 3 dimms*/
1469 if (pvt->channel[i].dimms > 2) {
1470 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1471 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1472 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1474 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1475 DIMM_BOT_COR_ERR(rcv[i][0]);
1476 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1477 DIMM_BOT_COR_ERR(rcv[i][1]);
1481 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1485 /* This function is based on the device 3 function 4 registers as described on:
1486 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1487 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1488 * also available at:
1489 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1491 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1493 struct i7core_pvt *pvt = mci->pvt_info;
1495 int new0, new1, new2;
1497 if (!pvt->pci_mcr[4]) {
1498 debugf0("%s MCR registers not found\n", __func__);
1502 /* Corrected test errors */
1503 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1504 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1506 /* Store the new values */
1507 new2 = DIMM2_COR_ERR(rcv1);
1508 new1 = DIMM1_COR_ERR(rcv0);
1509 new0 = DIMM0_COR_ERR(rcv0);
1511 /* Updates CE counters if it is not the first time here */
1512 if (pvt->ce_count_available) {
1513 /* Updates CE counters */
1514 int add0, add1, add2;
1516 add2 = new2 - pvt->udimm_last_ce_count[2];
1517 add1 = new1 - pvt->udimm_last_ce_count[1];
1518 add0 = new0 - pvt->udimm_last_ce_count[0];
1522 pvt->udimm_ce_count[2] += add2;
1526 pvt->udimm_ce_count[1] += add1;
1530 pvt->udimm_ce_count[0] += add0;
1532 if (add0 | add1 | add2)
1533 i7core_printk(KERN_ERR, "New Corrected error(s): "
1534 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1537 pvt->ce_count_available = 1;
1539 /* Store the new values */
1540 pvt->udimm_last_ce_count[2] = new2;
1541 pvt->udimm_last_ce_count[1] = new1;
1542 pvt->udimm_last_ce_count[0] = new0;
1546 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1547 * Architectures Software Developer’s Manual Volume 3B.
1548 * Nehalem are defined as family 0x06, model 0x1a
1550 * The MCA registers used here are the following ones:
1551 * struct mce field MCA Register
1552 * m->status MSR_IA32_MC8_STATUS
1553 * m->addr MSR_IA32_MC8_ADDR
1554 * m->misc MSR_IA32_MC8_MISC
1555 * In the case of Nehalem, the error information is masked at .status and .misc
1558 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1561 struct i7core_pvt *pvt = mci->pvt_info;
1562 char *type, *optype, *err, *msg;
1563 unsigned long error = m->status & 0x1ff0000l;
1564 u32 optypenum = (m->status >> 4) & 0x07;
1565 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1566 u32 dimm = (m->misc >> 16) & 0x3;
1567 u32 channel = (m->misc >> 18) & 0x3;
1568 u32 syndrome = m->misc >> 32;
1569 u32 errnum = find_first_bit(&error, 32);
1572 if (m->mcgstatus & 1)
1577 switch (optypenum) {
1579 optype = "generic undef request";
1582 optype = "read error";
1585 optype = "write error";
1588 optype = "addr/cmd error";
1591 optype = "scrubbing error";
1594 optype = "reserved";
1600 err = "read ECC error";
1603 err = "RAS ECC error";
1606 err = "write parity error";
1609 err = "redundacy loss";
1615 err = "memory range error";
1618 err = "RTID out of range";
1621 err = "address parity error";
1624 err = "byte enable parity error";
1630 /* FIXME: should convert addr into bank and rank information */
1631 msg = kasprintf(GFP_ATOMIC,
1632 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1633 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1634 type, (long long) m->addr, m->cpu, dimm, channel,
1635 syndrome, core_err_cnt, (long long)m->status,
1636 (long long)m->misc, optype, err);
1640 csrow = pvt->csrow_map[channel][dimm];
1642 /* Call the helper to output message */
1643 if (m->mcgstatus & 1)
1644 edac_mc_handle_fbd_ue(mci, csrow, 0,
1645 0 /* FIXME: should be channel here */, msg);
1646 else if (!pvt->is_registered)
1647 edac_mc_handle_fbd_ce(mci, csrow,
1648 0 /* FIXME: should be channel here */, msg);
1654 * i7core_check_error Retrieve and process errors reported by the
1655 * hardware. Called by the Core module.
1657 static void i7core_check_error(struct mem_ctl_info *mci)
1659 struct i7core_pvt *pvt = mci->pvt_info;
1665 * MCE first step: Copy all mce errors into a temporary buffer
1666 * We use a double buffering here, to reduce the risk of
1670 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1675 m = pvt->mce_outentry;
1676 if (pvt->mce_in + count > MCE_LOG_LEN) {
1677 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1679 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1685 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1687 pvt->mce_in += count;
1690 if (pvt->mce_overrun) {
1691 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1694 pvt->mce_overrun = 0;
1698 * MCE second step: parse errors and display
1700 for (i = 0; i < count; i++)
1701 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1704 * Now, let's increment CE error counts
1706 if (!pvt->is_registered)
1707 i7core_udimm_check_mc_ecc_err(mci);
1709 i7core_rdimm_check_mc_ecc_err(mci);
1713 * i7core_mce_check_error Replicates mcelog routine to get errors
1714 * This routine simply queues mcelog errors, and
1715 * return. The error itself should be handled later
1716 * by i7core_check_error.
1717 * WARNING: As this routine should be called at NMI time, extra care should
1718 * be taken to avoid deadlocks, and to be as fast as possible.
1720 static int i7core_mce_check_error(void *priv, struct mce *mce)
1722 struct mem_ctl_info *mci = priv;
1723 struct i7core_pvt *pvt = mci->pvt_info;
1726 * Just let mcelog handle it if the error is
1727 * outside the memory controller
1729 if (((mce->status & 0xffff) >> 7) != 1)
1732 /* Bank 8 registers are the only ones that we know how to handle */
1737 /* Only handle if it is the right mc controller */
1738 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1743 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1749 /* Copy memory error at the ringbuffer */
1750 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1752 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1754 /* Handle fatal errors immediately */
1755 if (mce->mcgstatus & 1)
1756 i7core_check_error(mci);
1758 /* Advice mcelog that the error were handled */
1762 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1763 int num_channels, int num_csrows)
1765 struct mem_ctl_info *mci;
1766 struct i7core_pvt *pvt;
1770 /* allocate a new MC control structure */
1771 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1772 i7core_dev->socket);
1776 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1778 /* record ptr to the generic device */
1779 mci->dev = &i7core_dev->pdev[0]->dev;
1781 pvt = mci->pvt_info;
1782 memset(pvt, 0, sizeof(*pvt));
1785 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1786 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1789 mci->mtype_cap = MEM_FLAG_DDR3;
1790 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1791 mci->edac_cap = EDAC_FLAG_NONE;
1792 mci->mod_name = "i7core_edac.c";
1793 mci->mod_ver = I7CORE_REVISION;
1794 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1795 i7core_dev->socket);
1796 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1797 mci->ctl_page_to_phys = NULL;
1798 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1799 /* Set the function pointer to an actual operation function */
1800 mci->edac_check = i7core_check_error;
1802 /* Store pci devices at mci for faster access */
1803 rc = mci_bind_devs(mci, i7core_dev);
1804 if (unlikely(rc < 0))
1807 /* Get dimm basic config */
1808 get_dimm_config(mci, &csrow);
1810 /* add this new MC control structure to EDAC's list of MCs */
1811 if (unlikely(edac_mc_add_mc(mci))) {
1812 debugf0("MC: " __FILE__
1813 ": %s(): failed edac_mc_add_mc()\n", __func__);
1814 /* FIXME: perhaps some code should go here that disables error
1815 * reporting if we just enabled it
1822 /* allocating generic PCI control info */
1823 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1825 if (unlikely(!i7core_pci)) {
1827 "%s(): Unable to create PCI control\n",
1830 "%s(): PCI error report via EDAC not setup\n",
1834 /* Default error mask is any memory */
1835 pvt->inject.channel = 0;
1836 pvt->inject.dimm = -1;
1837 pvt->inject.rank = -1;
1838 pvt->inject.bank = -1;
1839 pvt->inject.page = -1;
1840 pvt->inject.col = -1;
1842 /* Registers on edac_mce in order to receive memory errors */
1843 pvt->edac_mce.priv = mci;
1844 pvt->edac_mce.check_error = i7core_mce_check_error;
1846 rc = edac_mce_register(&pvt->edac_mce);
1847 if (unlikely(rc < 0)) {
1848 debugf0("MC: " __FILE__
1849 ": %s(): failed edac_mce_register()\n", __func__);
1858 * i7core_probe Probe for ONE instance of device to see if it is
1861 * 0 for FOUND a device
1862 * < 0 for error code
1864 static int __devinit i7core_probe(struct pci_dev *pdev,
1865 const struct pci_device_id *id)
1867 int dev_idx = id->driver_data;
1869 struct i7core_dev *i7core_dev;
1872 * All memory controllers are allocated at the first pass.
1874 if (unlikely(dev_idx >= 1))
1877 /* get the pci devices we want to reserve for our use */
1878 mutex_lock(&i7core_edac_lock);
1880 if (pdev->device == PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0) {
1881 printk(KERN_INFO "i7core_edac: detected a "
1882 "Lynnfield processor\n");
1883 rc = i7core_get_devices(pci_dev_descr_lynnfield,
1884 ARRAY_SIZE(pci_dev_descr_lynnfield));
1886 printk(KERN_INFO "i7core_edac: detected a "
1887 "Nehalem/Nehalem-EP processor\n");
1888 rc = i7core_get_devices(pci_dev_descr_i7core,
1889 ARRAY_SIZE(pci_dev_descr_i7core));
1892 if (unlikely(rc < 0))
1895 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1899 /* Check the number of active and not disabled channels */
1900 rc = i7core_get_active_channels(i7core_dev->socket,
1901 &channels, &csrows);
1902 if (unlikely(rc < 0))
1905 rc = i7core_register_mci(i7core_dev, channels, csrows);
1906 if (unlikely(rc < 0))
1910 i7core_printk(KERN_INFO, "Driver loaded.\n");
1912 mutex_unlock(&i7core_edac_lock);
1916 i7core_put_all_devices();
1918 mutex_unlock(&i7core_edac_lock);
1923 * i7core_remove destructor for one instance of device
1926 static void __devexit i7core_remove(struct pci_dev *pdev)
1928 struct mem_ctl_info *mci;
1929 struct i7core_dev *i7core_dev, *tmp;
1931 debugf0(__FILE__ ": %s()\n", __func__);
1934 edac_pci_release_generic_ctl(i7core_pci);
1937 * we have a trouble here: pdev value for removal will be wrong, since
1938 * it will point to the X58 register used to detect that the machine
1939 * is a Nehalem or upper design. However, due to the way several PCI
1940 * devices are grouped together to provide MC functionality, we need
1941 * to use a different method for releasing the devices
1944 mutex_lock(&i7core_edac_lock);
1945 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1946 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1948 struct i7core_pvt *pvt = mci->pvt_info;
1950 i7core_dev = pvt->i7core_dev;
1951 edac_mce_unregister(&pvt->edac_mce);
1952 kfree(mci->ctl_name);
1954 i7core_put_devices(i7core_dev);
1956 i7core_printk(KERN_ERR,
1957 "Couldn't find mci for socket %d\n",
1958 i7core_dev->socket);
1961 mutex_unlock(&i7core_edac_lock);
1964 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1967 * i7core_driver pci_driver structure for this module
1970 static struct pci_driver i7core_driver = {
1971 .name = "i7core_edac",
1972 .probe = i7core_probe,
1973 .remove = __devexit_p(i7core_remove),
1974 .id_table = i7core_pci_tbl,
1978 * i7core_init Module entry function
1979 * Try to initialize this module for its devices
1981 static int __init i7core_init(void)
1985 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1987 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1990 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
1992 pci_rc = pci_register_driver(&i7core_driver);
1997 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2004 * i7core_exit() Module exit function
2005 * Unregister the driver
2007 static void __exit i7core_exit(void)
2009 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2010 pci_unregister_driver(&i7core_driver);
2013 module_init(i7core_init);
2014 module_exit(i7core_exit);
2016 MODULE_LICENSE("GPL");
2017 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2018 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2019 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2022 module_param(edac_op_state, int, 0444);
2023 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");