1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/delay.h>
29 #include <linux/edac.h>
30 #include <linux/mmzone.h>
31 #include <linux/edac_mce.h>
32 #include <linux/smp.h>
33 #include <asm/processor.h>
35 #include "edac_core.h"
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
43 #define MAX_SOCKET_BUSES 2
47 * Alter this version for the module when modifications are made
49 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50 #define EDAC_MOD_STR "i7core_edac"
55 #define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
58 #define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
62 * i7core Memory Controller Registers
65 /* OFFSETS for Device 0 Function 0 */
67 #define MC_CFG_CONTROL 0x90
69 /* OFFSETS for Device 3 Function 0 */
71 #define MC_CONTROL 0x48
72 #define MC_STATUS 0x4c
73 #define MC_MAX_DOD 0x64
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
80 #define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
83 #define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
87 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88 #define MC_COR_ECC_CNT_0 0x80
89 #define MC_COR_ECC_CNT_1 0x84
90 #define MC_COR_ECC_CNT_2 0x88
91 #define MC_COR_ECC_CNT_3 0x8c
92 #define MC_COR_ECC_CNT_4 0x90
93 #define MC_COR_ECC_CNT_5 0x94
95 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
101 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
107 #define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
111 #define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
114 #define MC_CHANNEL_ADDR_MATCH 0xf0
115 #define MC_CHANNEL_ERROR_MASK 0xf8
116 #define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
128 #define MC_DOD_CH_DIMM0 0x48
129 #define MC_DOD_CH_DIMM1 0x4c
130 #define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
144 #define MC_RANK_PRESENT 0x7c
146 #define MC_SAG_CH_0 0x80
147 #define MC_SAG_CH_1 0x84
148 #define MC_SAG_CH_2 0x88
149 #define MC_SAG_CH_3 0x8c
150 #define MC_SAG_CH_4 0x90
151 #define MC_SAG_CH_5 0x94
152 #define MC_SAG_CH_6 0x98
153 #define MC_SAG_CH_7 0x9c
155 #define MC_RIR_LIMIT_CH_0 0x40
156 #define MC_RIR_LIMIT_CH_1 0x44
157 #define MC_RIR_LIMIT_CH_2 0x48
158 #define MC_RIR_LIMIT_CH_3 0x4C
159 #define MC_RIR_LIMIT_CH_4 0x50
160 #define MC_RIR_LIMIT_CH_5 0x54
161 #define MC_RIR_LIMIT_CH_6 0x58
162 #define MC_RIR_LIMIT_CH_7 0x5C
163 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
165 #define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
174 #define MAX_DIMMS 3 /* Max DIMMS per channel */
175 #define MAX_MCR_FUNC 4
176 #define MAX_CHAN_FUNC 3
186 struct i7core_inject {
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
197 struct i7core_channel {
202 struct pci_id_descr {
210 struct list_head list;
212 struct pci_dev **pdev;
214 struct mem_ctl_info *mci;
218 struct pci_dev *pci_noncore;
219 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
220 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
222 struct i7core_dev *i7core_dev;
224 struct i7core_info info;
225 struct i7core_inject inject;
226 struct i7core_channel channel[NUM_CHANS];
228 int channels; /* Number of active channels */
230 int ce_count_available;
231 int csrow_map[NUM_CHANS][MAX_DIMMS];
233 /* ECC corrected errors counts per udimm */
234 unsigned long udimm_ce_count[MAX_DIMMS];
235 int udimm_last_ce_count[MAX_DIMMS];
236 /* ECC corrected errors counts per rdimm */
237 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
238 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
240 unsigned int is_registered;
243 struct edac_mce edac_mce;
245 /* Fifo double buffers */
246 struct mce mce_entry[MCE_LOG_LEN];
247 struct mce mce_outentry[MCE_LOG_LEN];
249 /* Fifo in/out counters */
250 unsigned mce_in, mce_out;
252 /* Count indicator to show errors not got */
253 unsigned mce_overrun;
257 static LIST_HEAD(i7core_edac_list);
258 static DEFINE_MUTEX(i7core_edac_lock);
260 #define PCI_DESCR(device, function, device_id) \
262 .func = (function), \
263 .dev_id = (device_id)
265 struct pci_id_descr pci_dev_descr_i7core[] = {
266 /* Memory controller */
267 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
268 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
269 /* Exists only for RDIMM */
270 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
271 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
274 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
275 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
276 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
277 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
280 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
281 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
282 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
283 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
286 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
287 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
288 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
289 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
291 /* Generic Non-core registers */
293 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
294 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
295 * the probing code needs to test for the other address in case of
296 * failure of this one
298 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
302 struct pci_id_descr pci_dev_descr_lynnfield[] = {
303 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
304 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
305 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
307 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
308 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
309 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
310 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
312 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
313 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
314 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
315 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
318 * This is the PCI device has an alternate address on some
319 * processors like Core i7 860
321 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
325 * pci_device_id table for which devices we are looking for
327 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
328 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
329 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
330 {0,} /* 0 terminated list. */
333 static struct edac_pci_ctl_info *i7core_pci;
335 /****************************************************************************
336 Anciliary status routines
337 ****************************************************************************/
339 /* MC_CONTROL bits */
340 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
341 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
344 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
345 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
347 /* MC_MAX_DOD read functions */
348 static inline int numdimms(u32 dimms)
350 return (dimms & 0x3) + 1;
353 static inline int numrank(u32 rank)
355 static int ranks[4] = { 1, 2, 4, -EINVAL };
357 return ranks[rank & 0x3];
360 static inline int numbank(u32 bank)
362 static int banks[4] = { 4, 8, 16, -EINVAL };
364 return banks[bank & 0x3];
367 static inline int numrow(u32 row)
369 static int rows[8] = {
370 1 << 12, 1 << 13, 1 << 14, 1 << 15,
371 1 << 16, -EINVAL, -EINVAL, -EINVAL,
374 return rows[row & 0x7];
377 static inline int numcol(u32 col)
379 static int cols[8] = {
380 1 << 10, 1 << 11, 1 << 12, -EINVAL,
382 return cols[col & 0x3];
385 static struct i7core_dev *get_i7core_dev(u8 socket)
387 struct i7core_dev *i7core_dev;
389 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
390 if (i7core_dev->socket == socket)
397 /****************************************************************************
398 Memory check routines
399 ****************************************************************************/
400 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
403 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
409 for (i = 0; i < i7core_dev->n_devs; i++) {
410 if (!i7core_dev->pdev[i])
413 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
414 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
415 return i7core_dev->pdev[i];
423 * i7core_get_active_channels() - gets the number of channels and csrows
424 * @socket: Quick Path Interconnect socket
425 * @channels: Number of channels that will be returned
426 * @csrows: Number of csrows found
428 * Since EDAC core needs to know in advance the number of available channels
429 * and csrows, in order to allocate memory for csrows/channels, it is needed
430 * to run two similar steps. At the first step, implemented on this function,
431 * it checks the number of csrows/channels present at one socket.
432 * this is used in order to properly allocate the size of mci components.
434 * It should be noticed that none of the current available datasheets explain
435 * or even mention how csrows are seen by the memory controller. So, we need
436 * to add a fake description for csrows.
437 * So, this driver is attributing one DIMM memory for one csrow.
439 static int i7core_get_active_channels(u8 socket, unsigned *channels,
442 struct pci_dev *pdev = NULL;
449 pdev = get_pdev_slot_func(socket, 3, 0);
451 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
456 /* Device 3 function 0 reads */
457 pci_read_config_dword(pdev, MC_STATUS, &status);
458 pci_read_config_dword(pdev, MC_CONTROL, &control);
460 for (i = 0; i < NUM_CHANS; i++) {
462 /* Check if the channel is active */
463 if (!(control & (1 << (8 + i))))
466 /* Check if the channel is disabled */
467 if (status & (1 << i))
470 pdev = get_pdev_slot_func(socket, i + 4, 1);
472 i7core_printk(KERN_ERR, "Couldn't find socket %d "
477 /* Devices 4-6 function 1 */
478 pci_read_config_dword(pdev,
479 MC_DOD_CH_DIMM0, &dimm_dod[0]);
480 pci_read_config_dword(pdev,
481 MC_DOD_CH_DIMM1, &dimm_dod[1]);
482 pci_read_config_dword(pdev,
483 MC_DOD_CH_DIMM2, &dimm_dod[2]);
487 for (j = 0; j < 3; j++) {
488 if (!DIMM_PRESENT(dimm_dod[j]))
494 debugf0("Number of active channels on socket %d: %d\n",
500 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
502 struct i7core_pvt *pvt = mci->pvt_info;
503 struct csrow_info *csr;
504 struct pci_dev *pdev;
506 unsigned long last_page = 0;
510 /* Get data from the MC register, function 0 */
511 pdev = pvt->pci_mcr[0];
515 /* Device 3 function 0 reads */
516 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
517 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
518 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
519 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
521 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
522 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
523 pvt->info.max_dod, pvt->info.ch_map);
525 if (ECC_ENABLED(pvt)) {
526 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
528 mode = EDAC_S8ECD8ED;
530 mode = EDAC_S4ECD4ED;
532 debugf0("ECC disabled\n");
536 /* FIXME: need to handle the error codes */
537 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
539 numdimms(pvt->info.max_dod),
540 numrank(pvt->info.max_dod >> 2),
541 numbank(pvt->info.max_dod >> 4),
542 numrow(pvt->info.max_dod >> 6),
543 numcol(pvt->info.max_dod >> 9));
545 for (i = 0; i < NUM_CHANS; i++) {
546 u32 data, dimm_dod[3], value[8];
548 if (!pvt->pci_ch[i][0])
551 if (!CH_ACTIVE(pvt, i)) {
552 debugf0("Channel %i is not active\n", i);
555 if (CH_DISABLED(pvt, i)) {
556 debugf0("Channel %i is disabled\n", i);
560 /* Devices 4-6 function 0 */
561 pci_read_config_dword(pvt->pci_ch[i][0],
562 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
564 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
567 if (data & REGISTERED_DIMM)
572 if (data & THREE_DIMMS_PRESENT)
573 pvt->channel[i].dimms = 3;
574 else if (data & SINGLE_QUAD_RANK_PRESENT)
575 pvt->channel[i].dimms = 1;
577 pvt->channel[i].dimms = 2;
580 /* Devices 4-6 function 1 */
581 pci_read_config_dword(pvt->pci_ch[i][1],
582 MC_DOD_CH_DIMM0, &dimm_dod[0]);
583 pci_read_config_dword(pvt->pci_ch[i][1],
584 MC_DOD_CH_DIMM1, &dimm_dod[1]);
585 pci_read_config_dword(pvt->pci_ch[i][1],
586 MC_DOD_CH_DIMM2, &dimm_dod[2]);
588 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
589 "%d ranks, %cDIMMs\n",
591 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
593 pvt->channel[i].ranks,
594 (data & REGISTERED_DIMM) ? 'R' : 'U');
596 for (j = 0; j < 3; j++) {
597 u32 banks, ranks, rows, cols;
600 if (!DIMM_PRESENT(dimm_dod[j]))
603 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
604 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
605 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
606 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
608 /* DDR3 has 8 I/O banks */
609 size = (rows * cols * banks * ranks) >> (20 - 3);
611 pvt->channel[i].dimms++;
613 debugf0("\tdimm %d %d Mb offset: %x, "
614 "bank: %d, rank: %d, row: %#x, col: %#x\n",
616 RANKOFFSET(dimm_dod[j]),
617 banks, ranks, rows, cols);
620 npages = size >> (PAGE_SHIFT - 20);
622 npages = size << (20 - PAGE_SHIFT);
625 csr = &mci->csrows[*csrow];
626 csr->first_page = last_page + 1;
628 csr->last_page = last_page;
629 csr->nr_pages = npages;
633 csr->csrow_idx = *csrow;
634 csr->nr_channels = 1;
636 csr->channels[0].chan_idx = i;
637 csr->channels[0].ce_count = 0;
639 pvt->csrow_map[i][j] = *csrow;
649 csr->dtype = DEV_X16;
652 csr->dtype = DEV_UNKNOWN;
655 csr->edac_mode = mode;
661 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
662 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
663 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
664 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
665 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
666 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
667 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
668 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
669 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
670 for (j = 0; j < 8; j++)
671 debugf1("\t\t%#x\t%#x\t%#x\n",
672 (value[j] >> 27) & 0x1,
673 (value[j] >> 24) & 0x7,
674 (value[j] && ((1 << 24) - 1)));
680 /****************************************************************************
681 Error insertion routines
682 ****************************************************************************/
684 /* The i7core has independent error injection features per channel.
685 However, to have a simpler code, we don't allow enabling error injection
686 on more than one channel.
687 Also, since a change at an inject parameter will be applied only at enable,
688 we're disabling error injection on all write calls to the sysfs nodes that
689 controls the error code injection.
691 static int disable_inject(struct mem_ctl_info *mci)
693 struct i7core_pvt *pvt = mci->pvt_info;
695 pvt->inject.enable = 0;
697 if (!pvt->pci_ch[pvt->inject.channel][0])
700 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
701 MC_CHANNEL_ERROR_INJECT, 0);
707 * i7core inject inject.section
709 * accept and store error injection inject.section value
710 * bit 0 - refers to the lower 32-byte half cacheline
711 * bit 1 - refers to the upper 32-byte half cacheline
713 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
716 struct i7core_pvt *pvt = mci->pvt_info;
720 if (pvt->inject.enable)
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 3))
727 pvt->inject.section = (u32) value;
731 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.section);
741 * accept and store error injection inject.section value
742 * bit 0 - repeat enable - Enable error repetition
743 * bit 1 - inject ECC error
744 * bit 2 - inject parity error
746 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
747 const char *data, size_t count)
749 struct i7core_pvt *pvt = mci->pvt_info;
753 if (pvt->inject.enable)
756 rc = strict_strtoul(data, 10, &value);
757 if ((rc < 0) || (value > 7))
760 pvt->inject.type = (u32) value;
764 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
767 struct i7core_pvt *pvt = mci->pvt_info;
768 return sprintf(data, "0x%08x\n", pvt->inject.type);
772 * i7core_inject_inject.eccmask_store
774 * The type of error (UE/CE) will depend on the inject.eccmask value:
775 * Any bits set to a 1 will flip the corresponding ECC bit
776 * Correctable errors can be injected by flipping 1 bit or the bits within
777 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
778 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
779 * uncorrectable error to be injected.
781 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
782 const char *data, size_t count)
784 struct i7core_pvt *pvt = mci->pvt_info;
788 if (pvt->inject.enable)
791 rc = strict_strtoul(data, 10, &value);
795 pvt->inject.eccmask = (u32) value;
799 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
802 struct i7core_pvt *pvt = mci->pvt_info;
803 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
809 * The type of error (UE/CE) will depend on the inject.eccmask value:
810 * Any bits set to a 1 will flip the corresponding ECC bit
811 * Correctable errors can be injected by flipping 1 bit or the bits within
812 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
813 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
814 * uncorrectable error to be injected.
817 #define DECLARE_ADDR_MATCH(param, limit) \
818 static ssize_t i7core_inject_store_##param( \
819 struct mem_ctl_info *mci, \
820 const char *data, size_t count) \
822 struct i7core_pvt *pvt; \
826 debugf1("%s()\n", __func__); \
827 pvt = mci->pvt_info; \
829 if (pvt->inject.enable) \
830 disable_inject(mci); \
832 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
835 rc = strict_strtoul(data, 10, &value); \
836 if ((rc < 0) || (value >= limit)) \
840 pvt->inject.param = value; \
845 static ssize_t i7core_inject_show_##param( \
846 struct mem_ctl_info *mci, \
849 struct i7core_pvt *pvt; \
851 pvt = mci->pvt_info; \
852 debugf1("%s() pvt=%p\n", __func__, pvt); \
853 if (pvt->inject.param < 0) \
854 return sprintf(data, "any\n"); \
856 return sprintf(data, "%d\n", pvt->inject.param);\
859 #define ATTR_ADDR_MATCH(param) \
863 .mode = (S_IRUGO | S_IWUSR) \
865 .show = i7core_inject_show_##param, \
866 .store = i7core_inject_store_##param, \
869 DECLARE_ADDR_MATCH(channel, 3);
870 DECLARE_ADDR_MATCH(dimm, 3);
871 DECLARE_ADDR_MATCH(rank, 4);
872 DECLARE_ADDR_MATCH(bank, 32);
873 DECLARE_ADDR_MATCH(page, 0x10000);
874 DECLARE_ADDR_MATCH(col, 0x4000);
876 static int write_and_test(struct pci_dev *dev, int where, u32 val)
881 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
882 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885 for (count = 0; count < 10; count++) {
888 pci_write_config_dword(dev, where, val);
889 pci_read_config_dword(dev, where, &read);
895 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
896 "write=%08x. Read=%08x\n",
897 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
904 * This routine prepares the Memory Controller for error injection.
905 * The error will be injected when some process tries to write to the
906 * memory that matches the given criteria.
907 * The criteria can be set in terms of a mask where dimm, rank, bank, page
908 * and col can be specified.
909 * A -1 value for any of the mask items will make the MCU to ignore
910 * that matching criteria for error injection.
912 * It should be noticed that the error will only happen after a write operation
913 * on a memory that matches the condition. if REPEAT_EN is not enabled at
914 * inject mask, then it will produce just one error. Otherwise, it will repeat
915 * until the injectmask would be cleaned.
917 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
918 * is reliable enough to check if the MC is using the
919 * three channels. However, this is not clear at the datasheet.
921 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
922 const char *data, size_t count)
924 struct i7core_pvt *pvt = mci->pvt_info;
930 if (!pvt->pci_ch[pvt->inject.channel][0])
933 rc = strict_strtoul(data, 10, &enable);
938 pvt->inject.enable = 1;
944 /* Sets pvt->inject.dimm mask */
945 if (pvt->inject.dimm < 0)
948 if (pvt->channel[pvt->inject.channel].dimms > 2)
949 mask |= (pvt->inject.dimm & 0x3LL) << 35;
951 mask |= (pvt->inject.dimm & 0x1LL) << 36;
954 /* Sets pvt->inject.rank mask */
955 if (pvt->inject.rank < 0)
958 if (pvt->channel[pvt->inject.channel].dimms > 2)
959 mask |= (pvt->inject.rank & 0x1LL) << 34;
961 mask |= (pvt->inject.rank & 0x3LL) << 34;
964 /* Sets pvt->inject.bank mask */
965 if (pvt->inject.bank < 0)
968 mask |= (pvt->inject.bank & 0x15LL) << 30;
970 /* Sets pvt->inject.page mask */
971 if (pvt->inject.page < 0)
974 mask |= (pvt->inject.page & 0xffff) << 14;
976 /* Sets pvt->inject.column mask */
977 if (pvt->inject.col < 0)
980 mask |= (pvt->inject.col & 0x3fff);
984 * bits 1-2: MASK_HALF_CACHELINE
986 * bit 4: INJECT_ADDR_PARITY
989 injectmask = (pvt->inject.type & 1) |
990 (pvt->inject.section & 0x3) << 1 |
991 (pvt->inject.type & 0x6) << (3 - 1);
993 /* Unlock writes to registers - this register is write only */
994 pci_write_config_dword(pvt->pci_noncore,
995 MC_CFG_CONTROL, 0x2);
997 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
998 MC_CHANNEL_ADDR_MATCH, mask);
999 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1000 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1002 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1003 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1005 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1006 MC_CHANNEL_ERROR_INJECT, injectmask);
1009 * This is something undocumented, based on my tests
1010 * Without writing 8 to this register, errors aren't injected. Not sure
1013 pci_write_config_dword(pvt->pci_noncore,
1016 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1018 mask, pvt->inject.eccmask, injectmask);
1024 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1027 struct i7core_pvt *pvt = mci->pvt_info;
1030 if (!pvt->pci_ch[pvt->inject.channel][0])
1033 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1034 MC_CHANNEL_ERROR_INJECT, &injectmask);
1036 debugf0("Inject error read: 0x%018x\n", injectmask);
1038 if (injectmask & 0x0c)
1039 pvt->inject.enable = 1;
1041 return sprintf(data, "%d\n", pvt->inject.enable);
1044 #define DECLARE_COUNTER(param) \
1045 static ssize_t i7core_show_counter_##param( \
1046 struct mem_ctl_info *mci, \
1049 struct i7core_pvt *pvt = mci->pvt_info; \
1051 debugf1("%s() \n", __func__); \
1052 if (!pvt->ce_count_available || (pvt->is_registered)) \
1053 return sprintf(data, "data unavailable\n"); \
1054 return sprintf(data, "%lu\n", \
1055 pvt->udimm_ce_count[param]); \
1058 #define ATTR_COUNTER(param) \
1061 .name = __stringify(udimm##param), \
1062 .mode = (S_IRUGO | S_IWUSR) \
1064 .show = i7core_show_counter_##param \
1076 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1077 ATTR_ADDR_MATCH(channel),
1078 ATTR_ADDR_MATCH(dimm),
1079 ATTR_ADDR_MATCH(rank),
1080 ATTR_ADDR_MATCH(bank),
1081 ATTR_ADDR_MATCH(page),
1082 ATTR_ADDR_MATCH(col),
1083 { .attr = { .name = NULL } }
1086 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1087 .name = "inject_addrmatch",
1088 .mcidev_attr = i7core_addrmatch_attrs,
1091 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1097 static struct mcidev_sysfs_group i7core_udimm_counters = {
1098 .name = "all_channel_counts",
1099 .mcidev_attr = i7core_udimm_counters_attrs,
1102 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1105 .name = "inject_section",
1106 .mode = (S_IRUGO | S_IWUSR)
1108 .show = i7core_inject_section_show,
1109 .store = i7core_inject_section_store,
1112 .name = "inject_type",
1113 .mode = (S_IRUGO | S_IWUSR)
1115 .show = i7core_inject_type_show,
1116 .store = i7core_inject_type_store,
1119 .name = "inject_eccmask",
1120 .mode = (S_IRUGO | S_IWUSR)
1122 .show = i7core_inject_eccmask_show,
1123 .store = i7core_inject_eccmask_store,
1125 .grp = &i7core_inject_addrmatch,
1128 .name = "inject_enable",
1129 .mode = (S_IRUGO | S_IWUSR)
1131 .show = i7core_inject_enable_show,
1132 .store = i7core_inject_enable_store,
1134 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1135 { .attr = { .name = NULL } }
1138 /****************************************************************************
1139 Device initialization routines: put/get, init/exit
1140 ****************************************************************************/
1143 * i7core_put_devices 'put' all the devices that we have
1144 * reserved via 'get'
1146 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1150 debugf0(__FILE__ ": %s()\n", __func__);
1151 for (i = 0; i < i7core_dev->n_devs; i++) {
1152 struct pci_dev *pdev = i7core_dev->pdev[i];
1155 debugf0("Removing dev %02x:%02x.%d\n",
1157 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1160 kfree(i7core_dev->pdev);
1161 list_del(&i7core_dev->list);
1165 static void i7core_put_all_devices(void)
1167 struct i7core_dev *i7core_dev, *tmp;
1169 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1170 i7core_put_devices(i7core_dev);
1173 static void __init i7core_xeon_pci_fixup(int dev_id)
1175 struct pci_dev *pdev = NULL;
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1179 * aren't announced by acpi. So, we need to use a legacy scan probing
1182 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
1183 if (unlikely(!pdev)) {
1184 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1185 pcibios_scan_specific_bus(255-i);
1190 * i7core_get_devices Find and perform 'get' operation on the MCH's
1191 * device/functions we want to reference for this driver
1193 * Need to 'get' device 16 func 1 and func 2
1195 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1196 struct pci_id_descr *dev_descr, unsigned n_devs)
1198 struct i7core_dev *i7core_dev;
1200 struct pci_dev *pdev = NULL;
1204 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1205 dev_descr->dev_id, *prev);
1208 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1209 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1210 * to probe for the alternate address in case of failure
1212 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1213 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1214 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1216 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) {
1217 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1218 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1221 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1222 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2,
1232 if (dev_descr->optional)
1235 i7core_printk(KERN_ERR,
1236 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1237 dev_descr->dev, dev_descr->func,
1238 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1240 /* End of list, leave */
1243 bus = pdev->bus->number;
1250 i7core_dev = get_i7core_dev(socket);
1252 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1255 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1257 if (!i7core_dev->pdev) {
1261 i7core_dev->socket = socket;
1262 i7core_dev->n_devs = n_devs;
1263 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1266 if (i7core_dev->pdev[devno]) {
1267 i7core_printk(KERN_ERR,
1268 "Duplicated device for "
1269 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1270 bus, dev_descr->dev, dev_descr->func,
1271 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1276 i7core_dev->pdev[devno] = pdev;
1279 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1280 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1281 i7core_printk(KERN_ERR,
1282 "Device PCI ID %04x:%04x "
1283 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1284 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1285 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1286 bus, dev_descr->dev, dev_descr->func);
1290 /* Be sure that the device is enabled */
1291 if (unlikely(pci_enable_device(pdev) < 0)) {
1292 i7core_printk(KERN_ERR,
1294 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1295 bus, dev_descr->dev, dev_descr->func,
1296 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1300 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1301 socket, bus, dev_descr->dev,
1303 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1310 static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
1313 struct pci_dev *pdev = NULL;
1315 for (i = 0; i < n_devs; i++) {
1318 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1321 i7core_put_all_devices();
1330 static int mci_bind_devs(struct mem_ctl_info *mci,
1331 struct i7core_dev *i7core_dev)
1333 struct i7core_pvt *pvt = mci->pvt_info;
1334 struct pci_dev *pdev;
1337 /* Associates i7core_dev and mci for future usage */
1338 pvt->i7core_dev = i7core_dev;
1339 i7core_dev->mci = mci;
1341 pvt->is_registered = 0;
1342 for (i = 0; i < i7core_dev->n_devs; i++) {
1343 pdev = i7core_dev->pdev[i];
1347 func = PCI_FUNC(pdev->devfn);
1348 slot = PCI_SLOT(pdev->devfn);
1350 if (unlikely(func > MAX_MCR_FUNC))
1352 pvt->pci_mcr[func] = pdev;
1353 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1354 if (unlikely(func > MAX_CHAN_FUNC))
1356 pvt->pci_ch[slot - 4][func] = pdev;
1357 } else if (!slot && !func)
1358 pvt->pci_noncore = pdev;
1362 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1363 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1364 pdev, i7core_dev->socket);
1366 if (PCI_SLOT(pdev->devfn) == 3 &&
1367 PCI_FUNC(pdev->devfn) == 2)
1368 pvt->is_registered = 1;
1372 * Add extra nodes to count errors on udimm
1373 * For registered memory, this is not needed, since the counters
1374 * are already displayed at the standard locations
1376 if (!pvt->is_registered)
1377 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1378 &i7core_udimm_counters;
1383 i7core_printk(KERN_ERR, "Device %d, function %d "
1384 "is out of the expected range\n",
1389 /****************************************************************************
1390 Error check routines
1391 ****************************************************************************/
1392 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1393 int chan, int dimm, int add)
1396 struct i7core_pvt *pvt = mci->pvt_info;
1397 int row = pvt->csrow_map[chan][dimm], i;
1399 for (i = 0; i < add; i++) {
1400 msg = kasprintf(GFP_KERNEL, "Corrected error "
1401 "(Socket=%d channel=%d dimm=%d)",
1402 pvt->i7core_dev->socket, chan, dimm);
1404 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1409 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1410 int chan, int new0, int new1, int new2)
1412 struct i7core_pvt *pvt = mci->pvt_info;
1413 int add0 = 0, add1 = 0, add2 = 0;
1414 /* Updates CE counters if it is not the first time here */
1415 if (pvt->ce_count_available) {
1416 /* Updates CE counters */
1418 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1419 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1420 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1424 pvt->rdimm_ce_count[chan][2] += add2;
1428 pvt->rdimm_ce_count[chan][1] += add1;
1432 pvt->rdimm_ce_count[chan][0] += add0;
1434 pvt->ce_count_available = 1;
1436 /* Store the new values */
1437 pvt->rdimm_last_ce_count[chan][2] = new2;
1438 pvt->rdimm_last_ce_count[chan][1] = new1;
1439 pvt->rdimm_last_ce_count[chan][0] = new0;
1441 /*updated the edac core */
1443 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1445 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1447 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1451 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1453 struct i7core_pvt *pvt = mci->pvt_info;
1455 int i, new0, new1, new2;
1457 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1458 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1460 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1462 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1464 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1466 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1468 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1470 for (i = 0 ; i < 3; i++) {
1471 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1472 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1473 /*if the channel has 3 dimms*/
1474 if (pvt->channel[i].dimms > 2) {
1475 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1476 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1477 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1479 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1480 DIMM_BOT_COR_ERR(rcv[i][0]);
1481 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1482 DIMM_BOT_COR_ERR(rcv[i][1]);
1486 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1490 /* This function is based on the device 3 function 4 registers as described on:
1491 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1492 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1493 * also available at:
1494 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1496 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1498 struct i7core_pvt *pvt = mci->pvt_info;
1500 int new0, new1, new2;
1502 if (!pvt->pci_mcr[4]) {
1503 debugf0("%s MCR registers not found\n", __func__);
1507 /* Corrected test errors */
1508 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1509 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1511 /* Store the new values */
1512 new2 = DIMM2_COR_ERR(rcv1);
1513 new1 = DIMM1_COR_ERR(rcv0);
1514 new0 = DIMM0_COR_ERR(rcv0);
1516 /* Updates CE counters if it is not the first time here */
1517 if (pvt->ce_count_available) {
1518 /* Updates CE counters */
1519 int add0, add1, add2;
1521 add2 = new2 - pvt->udimm_last_ce_count[2];
1522 add1 = new1 - pvt->udimm_last_ce_count[1];
1523 add0 = new0 - pvt->udimm_last_ce_count[0];
1527 pvt->udimm_ce_count[2] += add2;
1531 pvt->udimm_ce_count[1] += add1;
1535 pvt->udimm_ce_count[0] += add0;
1537 if (add0 | add1 | add2)
1538 i7core_printk(KERN_ERR, "New Corrected error(s): "
1539 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1542 pvt->ce_count_available = 1;
1544 /* Store the new values */
1545 pvt->udimm_last_ce_count[2] = new2;
1546 pvt->udimm_last_ce_count[1] = new1;
1547 pvt->udimm_last_ce_count[0] = new0;
1551 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1552 * Architectures Software Developer’s Manual Volume 3B.
1553 * Nehalem are defined as family 0x06, model 0x1a
1555 * The MCA registers used here are the following ones:
1556 * struct mce field MCA Register
1557 * m->status MSR_IA32_MC8_STATUS
1558 * m->addr MSR_IA32_MC8_ADDR
1559 * m->misc MSR_IA32_MC8_MISC
1560 * In the case of Nehalem, the error information is masked at .status and .misc
1563 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1566 struct i7core_pvt *pvt = mci->pvt_info;
1567 char *type, *optype, *err, *msg;
1568 unsigned long error = m->status & 0x1ff0000l;
1569 u32 optypenum = (m->status >> 4) & 0x07;
1570 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1571 u32 dimm = (m->misc >> 16) & 0x3;
1572 u32 channel = (m->misc >> 18) & 0x3;
1573 u32 syndrome = m->misc >> 32;
1574 u32 errnum = find_first_bit(&error, 32);
1577 if (m->mcgstatus & 1)
1582 switch (optypenum) {
1584 optype = "generic undef request";
1587 optype = "read error";
1590 optype = "write error";
1593 optype = "addr/cmd error";
1596 optype = "scrubbing error";
1599 optype = "reserved";
1605 err = "read ECC error";
1608 err = "RAS ECC error";
1611 err = "write parity error";
1614 err = "redundacy loss";
1620 err = "memory range error";
1623 err = "RTID out of range";
1626 err = "address parity error";
1629 err = "byte enable parity error";
1635 /* FIXME: should convert addr into bank and rank information */
1636 msg = kasprintf(GFP_ATOMIC,
1637 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1638 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1639 type, (long long) m->addr, m->cpu, dimm, channel,
1640 syndrome, core_err_cnt, (long long)m->status,
1641 (long long)m->misc, optype, err);
1645 csrow = pvt->csrow_map[channel][dimm];
1647 /* Call the helper to output message */
1648 if (m->mcgstatus & 1)
1649 edac_mc_handle_fbd_ue(mci, csrow, 0,
1650 0 /* FIXME: should be channel here */, msg);
1651 else if (!pvt->is_registered)
1652 edac_mc_handle_fbd_ce(mci, csrow,
1653 0 /* FIXME: should be channel here */, msg);
1659 * i7core_check_error Retrieve and process errors reported by the
1660 * hardware. Called by the Core module.
1662 static void i7core_check_error(struct mem_ctl_info *mci)
1664 struct i7core_pvt *pvt = mci->pvt_info;
1670 * MCE first step: Copy all mce errors into a temporary buffer
1671 * We use a double buffering here, to reduce the risk of
1675 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1678 goto check_ce_error;
1680 m = pvt->mce_outentry;
1681 if (pvt->mce_in + count > MCE_LOG_LEN) {
1682 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1684 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1690 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1692 pvt->mce_in += count;
1695 if (pvt->mce_overrun) {
1696 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1699 pvt->mce_overrun = 0;
1703 * MCE second step: parse errors and display
1705 for (i = 0; i < count; i++)
1706 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1709 * Now, let's increment CE error counts
1712 if (!pvt->is_registered)
1713 i7core_udimm_check_mc_ecc_err(mci);
1715 i7core_rdimm_check_mc_ecc_err(mci);
1719 * i7core_mce_check_error Replicates mcelog routine to get errors
1720 * This routine simply queues mcelog errors, and
1721 * return. The error itself should be handled later
1722 * by i7core_check_error.
1723 * WARNING: As this routine should be called at NMI time, extra care should
1724 * be taken to avoid deadlocks, and to be as fast as possible.
1726 static int i7core_mce_check_error(void *priv, struct mce *mce)
1728 struct mem_ctl_info *mci = priv;
1729 struct i7core_pvt *pvt = mci->pvt_info;
1732 * Just let mcelog handle it if the error is
1733 * outside the memory controller
1735 if (((mce->status & 0xffff) >> 7) != 1)
1738 /* Bank 8 registers are the only ones that we know how to handle */
1743 /* Only handle if it is the right mc controller */
1744 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1749 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1755 /* Copy memory error at the ringbuffer */
1756 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1758 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1760 /* Handle fatal errors immediately */
1761 if (mce->mcgstatus & 1)
1762 i7core_check_error(mci);
1764 /* Advice mcelog that the error were handled */
1768 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1769 int num_channels, int num_csrows)
1771 struct mem_ctl_info *mci;
1772 struct i7core_pvt *pvt;
1776 /* allocate a new MC control structure */
1777 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1778 i7core_dev->socket);
1782 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1784 /* record ptr to the generic device */
1785 mci->dev = &i7core_dev->pdev[0]->dev;
1787 pvt = mci->pvt_info;
1788 memset(pvt, 0, sizeof(*pvt));
1791 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1792 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1795 mci->mtype_cap = MEM_FLAG_DDR3;
1796 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1797 mci->edac_cap = EDAC_FLAG_NONE;
1798 mci->mod_name = "i7core_edac.c";
1799 mci->mod_ver = I7CORE_REVISION;
1800 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1801 i7core_dev->socket);
1802 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1803 mci->ctl_page_to_phys = NULL;
1804 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1805 /* Set the function pointer to an actual operation function */
1806 mci->edac_check = i7core_check_error;
1808 /* Store pci devices at mci for faster access */
1809 rc = mci_bind_devs(mci, i7core_dev);
1810 if (unlikely(rc < 0))
1813 /* Get dimm basic config */
1814 get_dimm_config(mci, &csrow);
1816 /* add this new MC control structure to EDAC's list of MCs */
1817 if (unlikely(edac_mc_add_mc(mci))) {
1818 debugf0("MC: " __FILE__
1819 ": %s(): failed edac_mc_add_mc()\n", __func__);
1820 /* FIXME: perhaps some code should go here that disables error
1821 * reporting if we just enabled it
1828 /* allocating generic PCI control info */
1829 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1831 if (unlikely(!i7core_pci)) {
1833 "%s(): Unable to create PCI control\n",
1836 "%s(): PCI error report via EDAC not setup\n",
1840 /* Default error mask is any memory */
1841 pvt->inject.channel = 0;
1842 pvt->inject.dimm = -1;
1843 pvt->inject.rank = -1;
1844 pvt->inject.bank = -1;
1845 pvt->inject.page = -1;
1846 pvt->inject.col = -1;
1848 /* Registers on edac_mce in order to receive memory errors */
1849 pvt->edac_mce.priv = mci;
1850 pvt->edac_mce.check_error = i7core_mce_check_error;
1852 rc = edac_mce_register(&pvt->edac_mce);
1853 if (unlikely(rc < 0)) {
1854 debugf0("MC: " __FILE__
1855 ": %s(): failed edac_mce_register()\n", __func__);
1865 * i7core_probe Probe for ONE instance of device to see if it is
1868 * 0 for FOUND a device
1869 * < 0 for error code
1871 static int __devinit i7core_probe(struct pci_dev *pdev,
1872 const struct pci_device_id *id)
1874 int dev_idx = id->driver_data;
1876 struct i7core_dev *i7core_dev;
1879 * All memory controllers are allocated at the first pass.
1881 if (unlikely(dev_idx >= 1))
1884 /* get the pci devices we want to reserve for our use */
1885 mutex_lock(&i7core_edac_lock);
1887 if (pdev->device == PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0) {
1888 printk(KERN_INFO "i7core_edac: detected a "
1889 "Lynnfield processor\n");
1890 rc = i7core_get_devices(pci_dev_descr_lynnfield,
1891 ARRAY_SIZE(pci_dev_descr_lynnfield));
1893 printk(KERN_INFO "i7core_edac: detected a "
1894 "Nehalem/Nehalem-EP processor\n");
1895 rc = i7core_get_devices(pci_dev_descr_i7core,
1896 ARRAY_SIZE(pci_dev_descr_i7core));
1899 if (unlikely(rc < 0))
1902 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1906 /* Check the number of active and not disabled channels */
1907 rc = i7core_get_active_channels(i7core_dev->socket,
1908 &channels, &csrows);
1909 if (unlikely(rc < 0))
1912 rc = i7core_register_mci(i7core_dev, channels, csrows);
1913 if (unlikely(rc < 0))
1917 i7core_printk(KERN_INFO, "Driver loaded.\n");
1919 mutex_unlock(&i7core_edac_lock);
1923 i7core_put_all_devices();
1925 mutex_unlock(&i7core_edac_lock);
1930 * i7core_remove destructor for one instance of device
1933 static void __devexit i7core_remove(struct pci_dev *pdev)
1935 struct mem_ctl_info *mci;
1936 struct i7core_dev *i7core_dev, *tmp;
1938 debugf0(__FILE__ ": %s()\n", __func__);
1941 edac_pci_release_generic_ctl(i7core_pci);
1944 * we have a trouble here: pdev value for removal will be wrong, since
1945 * it will point to the X58 register used to detect that the machine
1946 * is a Nehalem or upper design. However, due to the way several PCI
1947 * devices are grouped together to provide MC functionality, we need
1948 * to use a different method for releasing the devices
1951 mutex_lock(&i7core_edac_lock);
1952 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1953 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1955 struct i7core_pvt *pvt = mci->pvt_info;
1957 i7core_dev = pvt->i7core_dev;
1958 edac_mce_unregister(&pvt->edac_mce);
1959 kfree(mci->ctl_name);
1961 i7core_put_devices(i7core_dev);
1963 i7core_printk(KERN_ERR,
1964 "Couldn't find mci for socket %d\n",
1965 i7core_dev->socket);
1968 mutex_unlock(&i7core_edac_lock);
1971 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1974 * i7core_driver pci_driver structure for this module
1977 static struct pci_driver i7core_driver = {
1978 .name = "i7core_edac",
1979 .probe = i7core_probe,
1980 .remove = __devexit_p(i7core_remove),
1981 .id_table = i7core_pci_tbl,
1985 * i7core_init Module entry function
1986 * Try to initialize this module for its devices
1988 static int __init i7core_init(void)
1992 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1994 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1997 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
1999 pci_rc = pci_register_driver(&i7core_driver);
2004 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2011 * i7core_exit() Module exit function
2012 * Unregister the driver
2014 static void __exit i7core_exit(void)
2016 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2017 pci_unregister_driver(&i7core_driver);
2020 module_init(i7core_init);
2021 module_exit(i7core_exit);
2023 MODULE_LICENSE("GPL");
2024 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2025 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2026 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2029 module_param(edac_op_state, int, 0444);
2030 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");