1 /* Intel i7 core/Nehalem Memory Controller kernel module
3 * This driver supports yhe memory controllers found on the Intel
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
11 * Copyright (c) 2009-2010 by:
12 * Mauro Carvalho Chehab <mchehab@redhat.com>
14 * Red Hat Inc. http://www.redhat.com
16 * Forked and adapted from the i5400_edac driver
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 * http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
25 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
40 #include "edac_core.h"
43 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44 * registers start at bus 255, and are not reported by BIOS.
45 * We currently find devices with only 2 sockets. In order to support more QPI
46 * Quick Path Interconnect, just increment this number.
48 #define MAX_SOCKET_BUSES 2
52 * Alter this version for the module when modifications are made
54 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
55 #define EDAC_MOD_STR "i7core_edac"
60 #define i7core_printk(level, fmt, arg...) \
61 edac_printk(level, "i7core", fmt, ##arg)
63 #define i7core_mc_printk(mci, level, fmt, arg...) \
64 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
67 * i7core Memory Controller Registers
70 /* OFFSETS for Device 0 Function 0 */
72 #define MC_CFG_CONTROL 0x90
74 /* OFFSETS for Device 3 Function 0 */
76 #define MC_CONTROL 0x48
77 #define MC_STATUS 0x4c
78 #define MC_MAX_DOD 0x64
81 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
85 #define MC_TEST_ERR_RCV1 0x60
86 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
88 #define MC_TEST_ERR_RCV0 0x64
89 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
90 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
92 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93 #define MC_COR_ECC_CNT_0 0x80
94 #define MC_COR_ECC_CNT_1 0x84
95 #define MC_COR_ECC_CNT_2 0x88
96 #define MC_COR_ECC_CNT_3 0x8c
97 #define MC_COR_ECC_CNT_4 0x90
98 #define MC_COR_ECC_CNT_5 0x94
100 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
101 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
104 /* OFFSETS for Devices 4,5 and 6 Function 0 */
106 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107 #define THREE_DIMMS_PRESENT (1 << 24)
108 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
109 #define QUAD_RANK_PRESENT (1 << 22)
110 #define REGISTERED_DIMM (1 << 15)
112 #define MC_CHANNEL_MAPPER 0x60
113 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
116 #define MC_CHANNEL_RANK_PRESENT 0x7c
117 #define RANK_PRESENT_MASK 0xffff
119 #define MC_CHANNEL_ADDR_MATCH 0xf0
120 #define MC_CHANNEL_ERROR_MASK 0xf8
121 #define MC_CHANNEL_ERROR_INJECT 0xfc
122 #define INJECT_ADDR_PARITY 0x10
123 #define INJECT_ECC 0x08
124 #define MASK_CACHELINE 0x06
125 #define MASK_FULL_CACHELINE 0x06
126 #define MASK_MSB32_CACHELINE 0x04
127 #define MASK_LSB32_CACHELINE 0x02
128 #define NO_MASK_CACHELINE 0x00
129 #define REPEAT_EN 0x01
131 /* OFFSETS for Devices 4,5 and 6 Function 1 */
133 #define MC_DOD_CH_DIMM0 0x48
134 #define MC_DOD_CH_DIMM1 0x4c
135 #define MC_DOD_CH_DIMM2 0x50
136 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
137 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
138 #define DIMM_PRESENT_MASK (1 << 9)
139 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
140 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
141 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
142 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
143 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
144 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
145 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
146 #define MC_DOD_NUMCOL_MASK 3
147 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
149 #define MC_RANK_PRESENT 0x7c
151 #define MC_SAG_CH_0 0x80
152 #define MC_SAG_CH_1 0x84
153 #define MC_SAG_CH_2 0x88
154 #define MC_SAG_CH_3 0x8c
155 #define MC_SAG_CH_4 0x90
156 #define MC_SAG_CH_5 0x94
157 #define MC_SAG_CH_6 0x98
158 #define MC_SAG_CH_7 0x9c
160 #define MC_RIR_LIMIT_CH_0 0x40
161 #define MC_RIR_LIMIT_CH_1 0x44
162 #define MC_RIR_LIMIT_CH_2 0x48
163 #define MC_RIR_LIMIT_CH_3 0x4C
164 #define MC_RIR_LIMIT_CH_4 0x50
165 #define MC_RIR_LIMIT_CH_5 0x54
166 #define MC_RIR_LIMIT_CH_6 0x58
167 #define MC_RIR_LIMIT_CH_7 0x5C
168 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
170 #define MC_RIR_WAY_CH 0x80
171 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
172 #define MC_RIR_WAY_RANK_MASK 0x7
179 #define MAX_DIMMS 3 /* Max DIMMS per channel */
180 #define MAX_MCR_FUNC 4
181 #define MAX_CHAN_FUNC 3
191 struct i7core_inject {
198 /* Error address mask */
199 int channel, dimm, rank, bank, page, col;
202 struct i7core_channel {
207 struct pci_id_descr {
214 struct pci_id_table {
215 struct pci_id_descr *descr;
220 struct list_head list;
222 struct pci_dev **pdev;
224 struct mem_ctl_info *mci;
228 struct pci_dev *pci_noncore;
229 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
230 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
232 struct i7core_dev *i7core_dev;
234 struct i7core_info info;
235 struct i7core_inject inject;
236 struct i7core_channel channel[NUM_CHANS];
238 int channels; /* Number of active channels */
240 int ce_count_available;
241 int csrow_map[NUM_CHANS][MAX_DIMMS];
243 /* ECC corrected errors counts per udimm */
244 unsigned long udimm_ce_count[MAX_DIMMS];
245 int udimm_last_ce_count[MAX_DIMMS];
246 /* ECC corrected errors counts per rdimm */
247 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
250 unsigned int is_registered;
253 struct edac_mce edac_mce;
255 /* Fifo double buffers */
256 struct mce mce_entry[MCE_LOG_LEN];
257 struct mce mce_outentry[MCE_LOG_LEN];
259 /* Fifo in/out counters */
260 unsigned mce_in, mce_out;
262 /* Count indicator to show errors not got */
263 unsigned mce_overrun;
265 /* Struct to control EDAC polling */
266 struct edac_pci_ctl_info *i7core_pci;
270 static LIST_HEAD(i7core_edac_list);
271 static DEFINE_MUTEX(i7core_edac_lock);
273 #define PCI_DESCR(device, function, device_id) \
275 .func = (function), \
276 .dev_id = (device_id)
278 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
279 /* Memory controller */
280 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
281 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
282 /* Exists only for RDIMM */
283 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
284 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
287 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
288 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
289 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
290 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
293 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
294 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
295 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
296 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
299 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
300 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
301 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
302 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
304 /* Generic Non-core registers */
306 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
307 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
308 * the probing code needs to test for the other address in case of
309 * failure of this one
311 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
315 struct pci_id_descr pci_dev_descr_lynnfield[] = {
316 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
317 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
318 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
320 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
321 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
322 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
323 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
325 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
326 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
327 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
328 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
331 * This is the PCI device has an alternate address on some
332 * processors like Core i7 860
334 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
337 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
338 /* Memory controller */
339 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
340 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
341 /* Exists only for RDIMM */
342 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
343 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
346 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
347 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
348 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
349 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
352 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
353 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
354 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
355 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
358 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
359 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
360 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
361 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
363 /* Generic Non-core registers */
364 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
368 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
369 struct pci_id_table pci_dev_table[] = {
370 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
371 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
372 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
376 * pci_device_id table for which devices we are looking for
378 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
379 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
380 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
381 {0,} /* 0 terminated list. */
384 /****************************************************************************
385 Anciliary status routines
386 ****************************************************************************/
388 /* MC_CONTROL bits */
389 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
390 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
393 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
394 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
396 /* MC_MAX_DOD read functions */
397 static inline int numdimms(u32 dimms)
399 return (dimms & 0x3) + 1;
402 static inline int numrank(u32 rank)
404 static int ranks[4] = { 1, 2, 4, -EINVAL };
406 return ranks[rank & 0x3];
409 static inline int numbank(u32 bank)
411 static int banks[4] = { 4, 8, 16, -EINVAL };
413 return banks[bank & 0x3];
416 static inline int numrow(u32 row)
418 static int rows[8] = {
419 1 << 12, 1 << 13, 1 << 14, 1 << 15,
420 1 << 16, -EINVAL, -EINVAL, -EINVAL,
423 return rows[row & 0x7];
426 static inline int numcol(u32 col)
428 static int cols[8] = {
429 1 << 10, 1 << 11, 1 << 12, -EINVAL,
431 return cols[col & 0x3];
434 static struct i7core_dev *get_i7core_dev(u8 socket)
436 struct i7core_dev *i7core_dev;
438 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
439 if (i7core_dev->socket == socket)
446 /****************************************************************************
447 Memory check routines
448 ****************************************************************************/
449 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
452 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
458 for (i = 0; i < i7core_dev->n_devs; i++) {
459 if (!i7core_dev->pdev[i])
462 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
463 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
464 return i7core_dev->pdev[i];
472 * i7core_get_active_channels() - gets the number of channels and csrows
473 * @socket: Quick Path Interconnect socket
474 * @channels: Number of channels that will be returned
475 * @csrows: Number of csrows found
477 * Since EDAC core needs to know in advance the number of available channels
478 * and csrows, in order to allocate memory for csrows/channels, it is needed
479 * to run two similar steps. At the first step, implemented on this function,
480 * it checks the number of csrows/channels present at one socket.
481 * this is used in order to properly allocate the size of mci components.
483 * It should be noticed that none of the current available datasheets explain
484 * or even mention how csrows are seen by the memory controller. So, we need
485 * to add a fake description for csrows.
486 * So, this driver is attributing one DIMM memory for one csrow.
488 static int i7core_get_active_channels(u8 socket, unsigned *channels,
491 struct pci_dev *pdev = NULL;
498 pdev = get_pdev_slot_func(socket, 3, 0);
500 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
505 /* Device 3 function 0 reads */
506 pci_read_config_dword(pdev, MC_STATUS, &status);
507 pci_read_config_dword(pdev, MC_CONTROL, &control);
509 for (i = 0; i < NUM_CHANS; i++) {
511 /* Check if the channel is active */
512 if (!(control & (1 << (8 + i))))
515 /* Check if the channel is disabled */
516 if (status & (1 << i))
519 pdev = get_pdev_slot_func(socket, i + 4, 1);
521 i7core_printk(KERN_ERR, "Couldn't find socket %d "
526 /* Devices 4-6 function 1 */
527 pci_read_config_dword(pdev,
528 MC_DOD_CH_DIMM0, &dimm_dod[0]);
529 pci_read_config_dword(pdev,
530 MC_DOD_CH_DIMM1, &dimm_dod[1]);
531 pci_read_config_dword(pdev,
532 MC_DOD_CH_DIMM2, &dimm_dod[2]);
536 for (j = 0; j < 3; j++) {
537 if (!DIMM_PRESENT(dimm_dod[j]))
543 debugf0("Number of active channels on socket %d: %d\n",
549 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
551 struct i7core_pvt *pvt = mci->pvt_info;
552 struct csrow_info *csr;
553 struct pci_dev *pdev;
555 unsigned long last_page = 0;
559 /* Get data from the MC register, function 0 */
560 pdev = pvt->pci_mcr[0];
564 /* Device 3 function 0 reads */
565 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
566 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
567 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
568 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
570 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
571 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
572 pvt->info.max_dod, pvt->info.ch_map);
574 if (ECC_ENABLED(pvt)) {
575 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
577 mode = EDAC_S8ECD8ED;
579 mode = EDAC_S4ECD4ED;
581 debugf0("ECC disabled\n");
585 /* FIXME: need to handle the error codes */
586 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
588 numdimms(pvt->info.max_dod),
589 numrank(pvt->info.max_dod >> 2),
590 numbank(pvt->info.max_dod >> 4),
591 numrow(pvt->info.max_dod >> 6),
592 numcol(pvt->info.max_dod >> 9));
594 for (i = 0; i < NUM_CHANS; i++) {
595 u32 data, dimm_dod[3], value[8];
597 if (!pvt->pci_ch[i][0])
600 if (!CH_ACTIVE(pvt, i)) {
601 debugf0("Channel %i is not active\n", i);
604 if (CH_DISABLED(pvt, i)) {
605 debugf0("Channel %i is disabled\n", i);
609 /* Devices 4-6 function 0 */
610 pci_read_config_dword(pvt->pci_ch[i][0],
611 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
613 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
616 if (data & REGISTERED_DIMM)
621 if (data & THREE_DIMMS_PRESENT)
622 pvt->channel[i].dimms = 3;
623 else if (data & SINGLE_QUAD_RANK_PRESENT)
624 pvt->channel[i].dimms = 1;
626 pvt->channel[i].dimms = 2;
629 /* Devices 4-6 function 1 */
630 pci_read_config_dword(pvt->pci_ch[i][1],
631 MC_DOD_CH_DIMM0, &dimm_dod[0]);
632 pci_read_config_dword(pvt->pci_ch[i][1],
633 MC_DOD_CH_DIMM1, &dimm_dod[1]);
634 pci_read_config_dword(pvt->pci_ch[i][1],
635 MC_DOD_CH_DIMM2, &dimm_dod[2]);
637 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
638 "%d ranks, %cDIMMs\n",
640 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
642 pvt->channel[i].ranks,
643 (data & REGISTERED_DIMM) ? 'R' : 'U');
645 for (j = 0; j < 3; j++) {
646 u32 banks, ranks, rows, cols;
649 if (!DIMM_PRESENT(dimm_dod[j]))
652 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
653 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
654 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
655 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
657 /* DDR3 has 8 I/O banks */
658 size = (rows * cols * banks * ranks) >> (20 - 3);
660 pvt->channel[i].dimms++;
662 debugf0("\tdimm %d %d Mb offset: %x, "
663 "bank: %d, rank: %d, row: %#x, col: %#x\n",
665 RANKOFFSET(dimm_dod[j]),
666 banks, ranks, rows, cols);
669 npages = size >> (PAGE_SHIFT - 20);
671 npages = size << (20 - PAGE_SHIFT);
674 csr = &mci->csrows[*csrow];
675 csr->first_page = last_page + 1;
677 csr->last_page = last_page;
678 csr->nr_pages = npages;
682 csr->csrow_idx = *csrow;
683 csr->nr_channels = 1;
685 csr->channels[0].chan_idx = i;
686 csr->channels[0].ce_count = 0;
688 pvt->csrow_map[i][j] = *csrow;
698 csr->dtype = DEV_X16;
701 csr->dtype = DEV_UNKNOWN;
704 csr->edac_mode = mode;
710 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
711 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
712 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
713 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
714 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
715 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
716 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
717 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
718 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
719 for (j = 0; j < 8; j++)
720 debugf1("\t\t%#x\t%#x\t%#x\n",
721 (value[j] >> 27) & 0x1,
722 (value[j] >> 24) & 0x7,
723 (value[j] && ((1 << 24) - 1)));
729 /****************************************************************************
730 Error insertion routines
731 ****************************************************************************/
733 /* The i7core has independent error injection features per channel.
734 However, to have a simpler code, we don't allow enabling error injection
735 on more than one channel.
736 Also, since a change at an inject parameter will be applied only at enable,
737 we're disabling error injection on all write calls to the sysfs nodes that
738 controls the error code injection.
740 static int disable_inject(struct mem_ctl_info *mci)
742 struct i7core_pvt *pvt = mci->pvt_info;
744 pvt->inject.enable = 0;
746 if (!pvt->pci_ch[pvt->inject.channel][0])
749 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
750 MC_CHANNEL_ERROR_INJECT, 0);
756 * i7core inject inject.section
758 * accept and store error injection inject.section value
759 * bit 0 - refers to the lower 32-byte half cacheline
760 * bit 1 - refers to the upper 32-byte half cacheline
762 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
763 const char *data, size_t count)
765 struct i7core_pvt *pvt = mci->pvt_info;
769 if (pvt->inject.enable)
772 rc = strict_strtoul(data, 10, &value);
773 if ((rc < 0) || (value > 3))
776 pvt->inject.section = (u32) value;
780 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
783 struct i7core_pvt *pvt = mci->pvt_info;
784 return sprintf(data, "0x%08x\n", pvt->inject.section);
790 * accept and store error injection inject.section value
791 * bit 0 - repeat enable - Enable error repetition
792 * bit 1 - inject ECC error
793 * bit 2 - inject parity error
795 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
796 const char *data, size_t count)
798 struct i7core_pvt *pvt = mci->pvt_info;
802 if (pvt->inject.enable)
805 rc = strict_strtoul(data, 10, &value);
806 if ((rc < 0) || (value > 7))
809 pvt->inject.type = (u32) value;
813 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
816 struct i7core_pvt *pvt = mci->pvt_info;
817 return sprintf(data, "0x%08x\n", pvt->inject.type);
821 * i7core_inject_inject.eccmask_store
823 * The type of error (UE/CE) will depend on the inject.eccmask value:
824 * Any bits set to a 1 will flip the corresponding ECC bit
825 * Correctable errors can be injected by flipping 1 bit or the bits within
826 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
827 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
828 * uncorrectable error to be injected.
830 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
831 const char *data, size_t count)
833 struct i7core_pvt *pvt = mci->pvt_info;
837 if (pvt->inject.enable)
840 rc = strict_strtoul(data, 10, &value);
844 pvt->inject.eccmask = (u32) value;
848 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
851 struct i7core_pvt *pvt = mci->pvt_info;
852 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
858 * The type of error (UE/CE) will depend on the inject.eccmask value:
859 * Any bits set to a 1 will flip the corresponding ECC bit
860 * Correctable errors can be injected by flipping 1 bit or the bits within
861 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
862 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
863 * uncorrectable error to be injected.
866 #define DECLARE_ADDR_MATCH(param, limit) \
867 static ssize_t i7core_inject_store_##param( \
868 struct mem_ctl_info *mci, \
869 const char *data, size_t count) \
871 struct i7core_pvt *pvt; \
875 debugf1("%s()\n", __func__); \
876 pvt = mci->pvt_info; \
878 if (pvt->inject.enable) \
879 disable_inject(mci); \
881 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
884 rc = strict_strtoul(data, 10, &value); \
885 if ((rc < 0) || (value >= limit)) \
889 pvt->inject.param = value; \
894 static ssize_t i7core_inject_show_##param( \
895 struct mem_ctl_info *mci, \
898 struct i7core_pvt *pvt; \
900 pvt = mci->pvt_info; \
901 debugf1("%s() pvt=%p\n", __func__, pvt); \
902 if (pvt->inject.param < 0) \
903 return sprintf(data, "any\n"); \
905 return sprintf(data, "%d\n", pvt->inject.param);\
908 #define ATTR_ADDR_MATCH(param) \
912 .mode = (S_IRUGO | S_IWUSR) \
914 .show = i7core_inject_show_##param, \
915 .store = i7core_inject_store_##param, \
918 DECLARE_ADDR_MATCH(channel, 3);
919 DECLARE_ADDR_MATCH(dimm, 3);
920 DECLARE_ADDR_MATCH(rank, 4);
921 DECLARE_ADDR_MATCH(bank, 32);
922 DECLARE_ADDR_MATCH(page, 0x10000);
923 DECLARE_ADDR_MATCH(col, 0x4000);
925 static int write_and_test(struct pci_dev *dev, int where, u32 val)
930 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
931 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
934 for (count = 0; count < 10; count++) {
937 pci_write_config_dword(dev, where, val);
938 pci_read_config_dword(dev, where, &read);
944 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
945 "write=%08x. Read=%08x\n",
946 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
953 * This routine prepares the Memory Controller for error injection.
954 * The error will be injected when some process tries to write to the
955 * memory that matches the given criteria.
956 * The criteria can be set in terms of a mask where dimm, rank, bank, page
957 * and col can be specified.
958 * A -1 value for any of the mask items will make the MCU to ignore
959 * that matching criteria for error injection.
961 * It should be noticed that the error will only happen after a write operation
962 * on a memory that matches the condition. if REPEAT_EN is not enabled at
963 * inject mask, then it will produce just one error. Otherwise, it will repeat
964 * until the injectmask would be cleaned.
966 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
967 * is reliable enough to check if the MC is using the
968 * three channels. However, this is not clear at the datasheet.
970 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
971 const char *data, size_t count)
973 struct i7core_pvt *pvt = mci->pvt_info;
979 if (!pvt->pci_ch[pvt->inject.channel][0])
982 rc = strict_strtoul(data, 10, &enable);
987 pvt->inject.enable = 1;
993 /* Sets pvt->inject.dimm mask */
994 if (pvt->inject.dimm < 0)
997 if (pvt->channel[pvt->inject.channel].dimms > 2)
998 mask |= (pvt->inject.dimm & 0x3LL) << 35;
1000 mask |= (pvt->inject.dimm & 0x1LL) << 36;
1003 /* Sets pvt->inject.rank mask */
1004 if (pvt->inject.rank < 0)
1007 if (pvt->channel[pvt->inject.channel].dimms > 2)
1008 mask |= (pvt->inject.rank & 0x1LL) << 34;
1010 mask |= (pvt->inject.rank & 0x3LL) << 34;
1013 /* Sets pvt->inject.bank mask */
1014 if (pvt->inject.bank < 0)
1017 mask |= (pvt->inject.bank & 0x15LL) << 30;
1019 /* Sets pvt->inject.page mask */
1020 if (pvt->inject.page < 0)
1023 mask |= (pvt->inject.page & 0xffff) << 14;
1025 /* Sets pvt->inject.column mask */
1026 if (pvt->inject.col < 0)
1029 mask |= (pvt->inject.col & 0x3fff);
1033 * bits 1-2: MASK_HALF_CACHELINE
1035 * bit 4: INJECT_ADDR_PARITY
1038 injectmask = (pvt->inject.type & 1) |
1039 (pvt->inject.section & 0x3) << 1 |
1040 (pvt->inject.type & 0x6) << (3 - 1);
1042 /* Unlock writes to registers - this register is write only */
1043 pci_write_config_dword(pvt->pci_noncore,
1044 MC_CFG_CONTROL, 0x2);
1046 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1047 MC_CHANNEL_ADDR_MATCH, mask);
1048 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1049 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1051 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1052 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1054 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1055 MC_CHANNEL_ERROR_INJECT, injectmask);
1058 * This is something undocumented, based on my tests
1059 * Without writing 8 to this register, errors aren't injected. Not sure
1062 pci_write_config_dword(pvt->pci_noncore,
1065 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1067 mask, pvt->inject.eccmask, injectmask);
1073 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1076 struct i7core_pvt *pvt = mci->pvt_info;
1079 if (!pvt->pci_ch[pvt->inject.channel][0])
1082 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1083 MC_CHANNEL_ERROR_INJECT, &injectmask);
1085 debugf0("Inject error read: 0x%018x\n", injectmask);
1087 if (injectmask & 0x0c)
1088 pvt->inject.enable = 1;
1090 return sprintf(data, "%d\n", pvt->inject.enable);
1093 #define DECLARE_COUNTER(param) \
1094 static ssize_t i7core_show_counter_##param( \
1095 struct mem_ctl_info *mci, \
1098 struct i7core_pvt *pvt = mci->pvt_info; \
1100 debugf1("%s() \n", __func__); \
1101 if (!pvt->ce_count_available || (pvt->is_registered)) \
1102 return sprintf(data, "data unavailable\n"); \
1103 return sprintf(data, "%lu\n", \
1104 pvt->udimm_ce_count[param]); \
1107 #define ATTR_COUNTER(param) \
1110 .name = __stringify(udimm##param), \
1111 .mode = (S_IRUGO | S_IWUSR) \
1113 .show = i7core_show_counter_##param \
1125 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1126 ATTR_ADDR_MATCH(channel),
1127 ATTR_ADDR_MATCH(dimm),
1128 ATTR_ADDR_MATCH(rank),
1129 ATTR_ADDR_MATCH(bank),
1130 ATTR_ADDR_MATCH(page),
1131 ATTR_ADDR_MATCH(col),
1132 { .attr = { .name = NULL } }
1135 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1136 .name = "inject_addrmatch",
1137 .mcidev_attr = i7core_addrmatch_attrs,
1140 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1144 { .attr = { .name = NULL } }
1147 static struct mcidev_sysfs_group i7core_udimm_counters = {
1148 .name = "all_channel_counts",
1149 .mcidev_attr = i7core_udimm_counters_attrs,
1152 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1155 .name = "inject_section",
1156 .mode = (S_IRUGO | S_IWUSR)
1158 .show = i7core_inject_section_show,
1159 .store = i7core_inject_section_store,
1162 .name = "inject_type",
1163 .mode = (S_IRUGO | S_IWUSR)
1165 .show = i7core_inject_type_show,
1166 .store = i7core_inject_type_store,
1169 .name = "inject_eccmask",
1170 .mode = (S_IRUGO | S_IWUSR)
1172 .show = i7core_inject_eccmask_show,
1173 .store = i7core_inject_eccmask_store,
1175 .grp = &i7core_inject_addrmatch,
1178 .name = "inject_enable",
1179 .mode = (S_IRUGO | S_IWUSR)
1181 .show = i7core_inject_enable_show,
1182 .store = i7core_inject_enable_store,
1184 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1185 { .attr = { .name = NULL } }
1188 /****************************************************************************
1189 Device initialization routines: put/get, init/exit
1190 ****************************************************************************/
1193 * i7core_put_devices 'put' all the devices that we have
1194 * reserved via 'get'
1196 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1200 debugf0(__FILE__ ": %s()\n", __func__);
1201 for (i = 0; i < i7core_dev->n_devs; i++) {
1202 struct pci_dev *pdev = i7core_dev->pdev[i];
1205 debugf0("Removing dev %02x:%02x.%d\n",
1207 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1210 kfree(i7core_dev->pdev);
1211 list_del(&i7core_dev->list);
1215 static void i7core_put_all_devices(void)
1217 struct i7core_dev *i7core_dev, *tmp;
1219 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1220 i7core_put_devices(i7core_dev);
1223 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1225 struct pci_dev *pdev = NULL;
1228 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1229 * aren't announced by acpi. So, we need to use a legacy scan probing
1232 while (table && table->descr) {
1233 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1234 if (unlikely(!pdev)) {
1235 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1236 pcibios_scan_specific_bus(255-i);
1243 static unsigned i7core_pci_lastbus(void)
1245 int last_bus = 0, bus;
1246 struct pci_bus *b = NULL;
1248 while ((b = pci_find_next_bus(b)) != NULL) {
1250 debugf0("Found bus %d\n", bus);
1255 debugf0("Last bus %d\n", last_bus);
1261 * i7core_get_devices Find and perform 'get' operation on the MCH's
1262 * device/functions we want to reference for this driver
1264 * Need to 'get' device 16 func 1 and func 2
1266 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1267 struct pci_id_descr *dev_descr, unsigned n_devs,
1270 struct i7core_dev *i7core_dev;
1272 struct pci_dev *pdev = NULL;
1276 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1277 dev_descr->dev_id, *prev);
1280 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1281 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1282 * to probe for the alternate address in case of failure
1284 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1285 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1286 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1288 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1289 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1290 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1299 if (dev_descr->optional)
1305 i7core_printk(KERN_INFO,
1306 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1307 dev_descr->dev, dev_descr->func,
1308 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1310 /* End of list, leave */
1313 bus = pdev->bus->number;
1315 socket = last_bus - bus;
1317 i7core_dev = get_i7core_dev(socket);
1319 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1322 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1324 if (!i7core_dev->pdev) {
1328 i7core_dev->socket = socket;
1329 i7core_dev->n_devs = n_devs;
1330 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1333 if (i7core_dev->pdev[devno]) {
1334 i7core_printk(KERN_ERR,
1335 "Duplicated device for "
1336 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1337 bus, dev_descr->dev, dev_descr->func,
1338 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1343 i7core_dev->pdev[devno] = pdev;
1346 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1347 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1348 i7core_printk(KERN_ERR,
1349 "Device PCI ID %04x:%04x "
1350 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1351 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1352 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1353 bus, dev_descr->dev, dev_descr->func);
1357 /* Be sure that the device is enabled */
1358 if (unlikely(pci_enable_device(pdev) < 0)) {
1359 i7core_printk(KERN_ERR,
1361 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1362 bus, dev_descr->dev, dev_descr->func,
1363 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1367 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1368 socket, bus, dev_descr->dev,
1370 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1377 static int i7core_get_devices(struct pci_id_table *table)
1379 int i, rc, last_bus;
1380 struct pci_dev *pdev = NULL;
1381 struct pci_id_descr *dev_descr;
1383 last_bus = i7core_pci_lastbus();
1385 while (table && table->descr) {
1386 dev_descr = table->descr;
1387 for (i = 0; i < table->n_devs; i++) {
1390 rc = i7core_get_onedevice(&pdev, i,
1399 i7core_put_all_devices();
1411 static int mci_bind_devs(struct mem_ctl_info *mci,
1412 struct i7core_dev *i7core_dev)
1414 struct i7core_pvt *pvt = mci->pvt_info;
1415 struct pci_dev *pdev;
1418 /* Associates i7core_dev and mci for future usage */
1419 pvt->i7core_dev = i7core_dev;
1420 i7core_dev->mci = mci;
1422 pvt->is_registered = 0;
1423 for (i = 0; i < i7core_dev->n_devs; i++) {
1424 pdev = i7core_dev->pdev[i];
1428 func = PCI_FUNC(pdev->devfn);
1429 slot = PCI_SLOT(pdev->devfn);
1431 if (unlikely(func > MAX_MCR_FUNC))
1433 pvt->pci_mcr[func] = pdev;
1434 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1435 if (unlikely(func > MAX_CHAN_FUNC))
1437 pvt->pci_ch[slot - 4][func] = pdev;
1438 } else if (!slot && !func)
1439 pvt->pci_noncore = pdev;
1443 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1444 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1445 pdev, i7core_dev->socket);
1447 if (PCI_SLOT(pdev->devfn) == 3 &&
1448 PCI_FUNC(pdev->devfn) == 2)
1449 pvt->is_registered = 1;
1453 * Add extra nodes to count errors on udimm
1454 * For registered memory, this is not needed, since the counters
1455 * are already displayed at the standard locations
1457 if (!pvt->is_registered)
1458 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1459 &i7core_udimm_counters;
1464 i7core_printk(KERN_ERR, "Device %d, function %d "
1465 "is out of the expected range\n",
1470 /****************************************************************************
1471 Error check routines
1472 ****************************************************************************/
1473 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1474 int chan, int dimm, int add)
1477 struct i7core_pvt *pvt = mci->pvt_info;
1478 int row = pvt->csrow_map[chan][dimm], i;
1480 for (i = 0; i < add; i++) {
1481 msg = kasprintf(GFP_KERNEL, "Corrected error "
1482 "(Socket=%d channel=%d dimm=%d)",
1483 pvt->i7core_dev->socket, chan, dimm);
1485 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1490 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1491 int chan, int new0, int new1, int new2)
1493 struct i7core_pvt *pvt = mci->pvt_info;
1494 int add0 = 0, add1 = 0, add2 = 0;
1495 /* Updates CE counters if it is not the first time here */
1496 if (pvt->ce_count_available) {
1497 /* Updates CE counters */
1499 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1500 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1501 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1505 pvt->rdimm_ce_count[chan][2] += add2;
1509 pvt->rdimm_ce_count[chan][1] += add1;
1513 pvt->rdimm_ce_count[chan][0] += add0;
1515 pvt->ce_count_available = 1;
1517 /* Store the new values */
1518 pvt->rdimm_last_ce_count[chan][2] = new2;
1519 pvt->rdimm_last_ce_count[chan][1] = new1;
1520 pvt->rdimm_last_ce_count[chan][0] = new0;
1522 /*updated the edac core */
1524 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1526 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1528 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1532 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1534 struct i7core_pvt *pvt = mci->pvt_info;
1536 int i, new0, new1, new2;
1538 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1539 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1541 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1543 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1545 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1547 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1549 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1551 for (i = 0 ; i < 3; i++) {
1552 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1553 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1554 /*if the channel has 3 dimms*/
1555 if (pvt->channel[i].dimms > 2) {
1556 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1557 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1558 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1560 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1561 DIMM_BOT_COR_ERR(rcv[i][0]);
1562 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1563 DIMM_BOT_COR_ERR(rcv[i][1]);
1567 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1571 /* This function is based on the device 3 function 4 registers as described on:
1572 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1573 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1574 * also available at:
1575 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1577 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1579 struct i7core_pvt *pvt = mci->pvt_info;
1581 int new0, new1, new2;
1583 if (!pvt->pci_mcr[4]) {
1584 debugf0("%s MCR registers not found\n", __func__);
1588 /* Corrected test errors */
1589 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1590 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1592 /* Store the new values */
1593 new2 = DIMM2_COR_ERR(rcv1);
1594 new1 = DIMM1_COR_ERR(rcv0);
1595 new0 = DIMM0_COR_ERR(rcv0);
1597 /* Updates CE counters if it is not the first time here */
1598 if (pvt->ce_count_available) {
1599 /* Updates CE counters */
1600 int add0, add1, add2;
1602 add2 = new2 - pvt->udimm_last_ce_count[2];
1603 add1 = new1 - pvt->udimm_last_ce_count[1];
1604 add0 = new0 - pvt->udimm_last_ce_count[0];
1608 pvt->udimm_ce_count[2] += add2;
1612 pvt->udimm_ce_count[1] += add1;
1616 pvt->udimm_ce_count[0] += add0;
1618 if (add0 | add1 | add2)
1619 i7core_printk(KERN_ERR, "New Corrected error(s): "
1620 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1623 pvt->ce_count_available = 1;
1625 /* Store the new values */
1626 pvt->udimm_last_ce_count[2] = new2;
1627 pvt->udimm_last_ce_count[1] = new1;
1628 pvt->udimm_last_ce_count[0] = new0;
1632 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1633 * Architectures Software Developer’s Manual Volume 3B.
1634 * Nehalem are defined as family 0x06, model 0x1a
1636 * The MCA registers used here are the following ones:
1637 * struct mce field MCA Register
1638 * m->status MSR_IA32_MC8_STATUS
1639 * m->addr MSR_IA32_MC8_ADDR
1640 * m->misc MSR_IA32_MC8_MISC
1641 * In the case of Nehalem, the error information is masked at .status and .misc
1644 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1647 struct i7core_pvt *pvt = mci->pvt_info;
1648 char *type, *optype, *err, *msg;
1649 unsigned long error = m->status & 0x1ff0000l;
1650 u32 optypenum = (m->status >> 4) & 0x07;
1651 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1652 u32 dimm = (m->misc >> 16) & 0x3;
1653 u32 channel = (m->misc >> 18) & 0x3;
1654 u32 syndrome = m->misc >> 32;
1655 u32 errnum = find_first_bit(&error, 32);
1658 if (m->mcgstatus & 1)
1663 switch (optypenum) {
1665 optype = "generic undef request";
1668 optype = "read error";
1671 optype = "write error";
1674 optype = "addr/cmd error";
1677 optype = "scrubbing error";
1680 optype = "reserved";
1686 err = "read ECC error";
1689 err = "RAS ECC error";
1692 err = "write parity error";
1695 err = "redundacy loss";
1701 err = "memory range error";
1704 err = "RTID out of range";
1707 err = "address parity error";
1710 err = "byte enable parity error";
1716 /* FIXME: should convert addr into bank and rank information */
1717 msg = kasprintf(GFP_ATOMIC,
1718 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1719 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1720 type, (long long) m->addr, m->cpu, dimm, channel,
1721 syndrome, core_err_cnt, (long long)m->status,
1722 (long long)m->misc, optype, err);
1726 csrow = pvt->csrow_map[channel][dimm];
1728 /* Call the helper to output message */
1729 if (m->mcgstatus & 1)
1730 edac_mc_handle_fbd_ue(mci, csrow, 0,
1731 0 /* FIXME: should be channel here */, msg);
1732 else if (!pvt->is_registered)
1733 edac_mc_handle_fbd_ce(mci, csrow,
1734 0 /* FIXME: should be channel here */, msg);
1740 * i7core_check_error Retrieve and process errors reported by the
1741 * hardware. Called by the Core module.
1743 static void i7core_check_error(struct mem_ctl_info *mci)
1745 struct i7core_pvt *pvt = mci->pvt_info;
1751 * MCE first step: Copy all mce errors into a temporary buffer
1752 * We use a double buffering here, to reduce the risk of
1756 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1759 goto check_ce_error;
1761 m = pvt->mce_outentry;
1762 if (pvt->mce_in + count > MCE_LOG_LEN) {
1763 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1765 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1771 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1773 pvt->mce_in += count;
1776 if (pvt->mce_overrun) {
1777 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1780 pvt->mce_overrun = 0;
1784 * MCE second step: parse errors and display
1786 for (i = 0; i < count; i++)
1787 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1790 * Now, let's increment CE error counts
1793 if (!pvt->is_registered)
1794 i7core_udimm_check_mc_ecc_err(mci);
1796 i7core_rdimm_check_mc_ecc_err(mci);
1800 * i7core_mce_check_error Replicates mcelog routine to get errors
1801 * This routine simply queues mcelog errors, and
1802 * return. The error itself should be handled later
1803 * by i7core_check_error.
1804 * WARNING: As this routine should be called at NMI time, extra care should
1805 * be taken to avoid deadlocks, and to be as fast as possible.
1807 static int i7core_mce_check_error(void *priv, struct mce *mce)
1809 struct mem_ctl_info *mci = priv;
1810 struct i7core_pvt *pvt = mci->pvt_info;
1813 * Just let mcelog handle it if the error is
1814 * outside the memory controller
1816 if (((mce->status & 0xffff) >> 7) != 1)
1819 /* Bank 8 registers are the only ones that we know how to handle */
1824 /* Only handle if it is the right mc controller */
1825 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1830 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1836 /* Copy memory error at the ringbuffer */
1837 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1839 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1841 /* Handle fatal errors immediately */
1842 if (mce->mcgstatus & 1)
1843 i7core_check_error(mci);
1845 /* Advice mcelog that the error were handled */
1849 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1850 int num_channels, int num_csrows)
1852 struct mem_ctl_info *mci;
1853 struct i7core_pvt *pvt;
1857 /* allocate a new MC control structure */
1858 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1859 i7core_dev->socket);
1863 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1865 /* record ptr to the generic device */
1866 mci->dev = &i7core_dev->pdev[0]->dev;
1868 pvt = mci->pvt_info;
1869 memset(pvt, 0, sizeof(*pvt));
1872 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1873 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1876 mci->mtype_cap = MEM_FLAG_DDR3;
1877 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1878 mci->edac_cap = EDAC_FLAG_NONE;
1879 mci->mod_name = "i7core_edac.c";
1880 mci->mod_ver = I7CORE_REVISION;
1881 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1882 i7core_dev->socket);
1883 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1884 mci->ctl_page_to_phys = NULL;
1885 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1886 /* Set the function pointer to an actual operation function */
1887 mci->edac_check = i7core_check_error;
1889 /* Store pci devices at mci for faster access */
1890 rc = mci_bind_devs(mci, i7core_dev);
1891 if (unlikely(rc < 0))
1894 /* Get dimm basic config */
1895 get_dimm_config(mci, &csrow);
1897 /* add this new MC control structure to EDAC's list of MCs */
1898 if (unlikely(edac_mc_add_mc(mci))) {
1899 debugf0("MC: " __FILE__
1900 ": %s(): failed edac_mc_add_mc()\n", __func__);
1901 /* FIXME: perhaps some code should go here that disables error
1902 * reporting if we just enabled it
1909 /* allocating generic PCI control info */
1910 pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1912 if (unlikely(!pvt->i7core_pci)) {
1914 "%s(): Unable to create PCI control\n",
1917 "%s(): PCI error report via EDAC not setup\n",
1921 /* Default error mask is any memory */
1922 pvt->inject.channel = 0;
1923 pvt->inject.dimm = -1;
1924 pvt->inject.rank = -1;
1925 pvt->inject.bank = -1;
1926 pvt->inject.page = -1;
1927 pvt->inject.col = -1;
1929 /* Registers on edac_mce in order to receive memory errors */
1930 pvt->edac_mce.priv = mci;
1931 pvt->edac_mce.check_error = i7core_mce_check_error;
1933 rc = edac_mce_register(&pvt->edac_mce);
1934 if (unlikely(rc < 0)) {
1935 debugf0("MC: " __FILE__
1936 ": %s(): failed edac_mce_register()\n", __func__);
1946 * i7core_probe Probe for ONE instance of device to see if it is
1949 * 0 for FOUND a device
1950 * < 0 for error code
1953 static int probed = 0;
1955 static int __devinit i7core_probe(struct pci_dev *pdev,
1956 const struct pci_device_id *id)
1959 struct i7core_dev *i7core_dev;
1961 /* get the pci devices we want to reserve for our use */
1962 mutex_lock(&i7core_edac_lock);
1965 * All memory controllers are allocated at the first pass.
1967 if (unlikely(probed >= 1)) {
1968 mutex_unlock(&i7core_edac_lock);
1973 rc = i7core_get_devices(pci_dev_table);
1974 if (unlikely(rc < 0))
1977 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1981 /* Check the number of active and not disabled channels */
1982 rc = i7core_get_active_channels(i7core_dev->socket,
1983 &channels, &csrows);
1984 if (unlikely(rc < 0))
1987 rc = i7core_register_mci(i7core_dev, channels, csrows);
1988 if (unlikely(rc < 0))
1992 i7core_printk(KERN_INFO, "Driver loaded.\n");
1994 mutex_unlock(&i7core_edac_lock);
1998 i7core_put_all_devices();
2000 mutex_unlock(&i7core_edac_lock);
2005 * i7core_remove destructor for one instance of device
2008 static void __devexit i7core_remove(struct pci_dev *pdev)
2010 struct mem_ctl_info *mci;
2011 struct i7core_dev *i7core_dev, *tmp;
2012 struct i7core_pvt *pvt;
2014 debugf0(__FILE__ ": %s()\n", __func__);
2017 * we have a trouble here: pdev value for removal will be wrong, since
2018 * it will point to the X58 register used to detect that the machine
2019 * is a Nehalem or upper design. However, due to the way several PCI
2020 * devices are grouped together to provide MC functionality, we need
2021 * to use a different method for releasing the devices
2024 mutex_lock(&i7core_edac_lock);
2025 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2026 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2027 if (unlikely(!mci || !mci->pvt_info)) {
2028 i7core_printk(KERN_ERR,
2029 "Couldn't find mci hanler\n");
2031 pvt = mci->pvt_info;
2032 i7core_dev = pvt->i7core_dev;
2034 if (likely(pvt->i7core_pci))
2035 edac_pci_release_generic_ctl(pvt->i7core_pci);
2037 i7core_printk(KERN_ERR,
2038 "Couldn't find mem_ctl_info for socket %d\n",
2039 i7core_dev->socket);
2040 pvt->i7core_pci = NULL;
2042 edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2044 edac_mce_unregister(&pvt->edac_mce);
2045 kfree(mci->ctl_name);
2047 i7core_put_devices(i7core_dev);
2052 mutex_unlock(&i7core_edac_lock);
2055 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2058 * i7core_driver pci_driver structure for this module
2061 static struct pci_driver i7core_driver = {
2062 .name = "i7core_edac",
2063 .probe = i7core_probe,
2064 .remove = __devexit_p(i7core_remove),
2065 .id_table = i7core_pci_tbl,
2069 * i7core_init Module entry function
2070 * Try to initialize this module for its devices
2072 static int __init i7core_init(void)
2076 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2078 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2081 i7core_xeon_pci_fixup(pci_dev_table);
2083 pci_rc = pci_register_driver(&i7core_driver);
2088 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2095 * i7core_exit() Module exit function
2096 * Unregister the driver
2098 static void __exit i7core_exit(void)
2100 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2101 pci_unregister_driver(&i7core_driver);
2104 module_init(i7core_init);
2105 module_exit(i7core_exit);
2107 MODULE_LICENSE("GPL");
2108 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2109 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2110 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2113 module_param(edac_op_state, int, 0444);
2114 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");