1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/delay.h>
29 #include <linux/edac.h>
30 #include <linux/mmzone.h>
31 #include <linux/edac_mce.h>
32 #include <linux/smp.h>
33 #include <asm/processor.h>
35 #include "edac_core.h"
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
43 #define MAX_SOCKET_BUSES 2
47 * Alter this version for the module when modifications are made
49 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50 #define EDAC_MOD_STR "i7core_edac"
55 #define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
58 #define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
62 * i7core Memory Controller Registers
65 /* OFFSETS for Device 0 Function 0 */
67 #define MC_CFG_CONTROL 0x90
69 /* OFFSETS for Device 3 Function 0 */
71 #define MC_CONTROL 0x48
72 #define MC_STATUS 0x4c
73 #define MC_MAX_DOD 0x64
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
80 #define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
83 #define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
87 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88 #define MC_COR_ECC_CNT_0 0x80
89 #define MC_COR_ECC_CNT_1 0x84
90 #define MC_COR_ECC_CNT_2 0x88
91 #define MC_COR_ECC_CNT_3 0x8c
92 #define MC_COR_ECC_CNT_4 0x90
93 #define MC_COR_ECC_CNT_5 0x94
95 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
101 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
107 #define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
111 #define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
114 #define MC_CHANNEL_ADDR_MATCH 0xf0
115 #define MC_CHANNEL_ERROR_MASK 0xf8
116 #define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
128 #define MC_DOD_CH_DIMM0 0x48
129 #define MC_DOD_CH_DIMM1 0x4c
130 #define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
144 #define MC_RANK_PRESENT 0x7c
146 #define MC_SAG_CH_0 0x80
147 #define MC_SAG_CH_1 0x84
148 #define MC_SAG_CH_2 0x88
149 #define MC_SAG_CH_3 0x8c
150 #define MC_SAG_CH_4 0x90
151 #define MC_SAG_CH_5 0x94
152 #define MC_SAG_CH_6 0x98
153 #define MC_SAG_CH_7 0x9c
155 #define MC_RIR_LIMIT_CH_0 0x40
156 #define MC_RIR_LIMIT_CH_1 0x44
157 #define MC_RIR_LIMIT_CH_2 0x48
158 #define MC_RIR_LIMIT_CH_3 0x4C
159 #define MC_RIR_LIMIT_CH_4 0x50
160 #define MC_RIR_LIMIT_CH_5 0x54
161 #define MC_RIR_LIMIT_CH_6 0x58
162 #define MC_RIR_LIMIT_CH_7 0x5C
163 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
165 #define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
174 #define MAX_DIMMS 3 /* Max DIMMS per channel */
175 #define MAX_MCR_FUNC 4
176 #define MAX_CHAN_FUNC 3
186 struct i7core_inject {
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
197 struct i7core_channel {
202 struct pci_id_descr {
209 struct pci_id_table {
210 struct pci_id_descr *descr;
215 struct list_head list;
217 struct pci_dev **pdev;
219 struct mem_ctl_info *mci;
223 struct pci_dev *pci_noncore;
224 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
225 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
227 struct i7core_dev *i7core_dev;
229 struct i7core_info info;
230 struct i7core_inject inject;
231 struct i7core_channel channel[NUM_CHANS];
233 int channels; /* Number of active channels */
235 int ce_count_available;
236 int csrow_map[NUM_CHANS][MAX_DIMMS];
238 /* ECC corrected errors counts per udimm */
239 unsigned long udimm_ce_count[MAX_DIMMS];
240 int udimm_last_ce_count[MAX_DIMMS];
241 /* ECC corrected errors counts per rdimm */
242 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
243 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
245 unsigned int is_registered;
248 struct edac_mce edac_mce;
250 /* Fifo double buffers */
251 struct mce mce_entry[MCE_LOG_LEN];
252 struct mce mce_outentry[MCE_LOG_LEN];
254 /* Fifo in/out counters */
255 unsigned mce_in, mce_out;
257 /* Count indicator to show errors not got */
258 unsigned mce_overrun;
262 static LIST_HEAD(i7core_edac_list);
263 static DEFINE_MUTEX(i7core_edac_lock);
265 #define PCI_DESCR(device, function, device_id) \
267 .func = (function), \
268 .dev_id = (device_id)
270 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
271 /* Memory controller */
272 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
273 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
274 /* Exists only for RDIMM */
275 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
276 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
279 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
280 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
281 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
282 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
285 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
286 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
287 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
288 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
291 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
292 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
293 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
294 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
296 /* Generic Non-core registers */
298 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
299 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
300 * the probing code needs to test for the other address in case of
301 * failure of this one
303 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
307 struct pci_id_descr pci_dev_descr_lynnfield[] = {
308 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
309 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
310 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
312 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
313 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
314 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
315 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
317 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
318 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
319 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
320 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
323 * This is the PCI device has an alternate address on some
324 * processors like Core i7 860
326 { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) },
329 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
330 /* Memory controller */
331 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
332 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
333 /* Exists only for RDIMM */
334 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
335 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
338 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
339 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
340 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
341 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
344 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
345 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
346 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
347 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
350 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
351 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
352 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
353 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
355 /* Generic Non-core registers */
356 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) },
360 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
361 struct pci_id_table pci_dev_table[] = {
362 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
363 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
364 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
368 * pci_device_id table for which devices we are looking for
370 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
371 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
372 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
373 {0,} /* 0 terminated list. */
376 static struct edac_pci_ctl_info *i7core_pci;
378 /****************************************************************************
379 Anciliary status routines
380 ****************************************************************************/
382 /* MC_CONTROL bits */
383 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
384 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
387 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
388 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
390 /* MC_MAX_DOD read functions */
391 static inline int numdimms(u32 dimms)
393 return (dimms & 0x3) + 1;
396 static inline int numrank(u32 rank)
398 static int ranks[4] = { 1, 2, 4, -EINVAL };
400 return ranks[rank & 0x3];
403 static inline int numbank(u32 bank)
405 static int banks[4] = { 4, 8, 16, -EINVAL };
407 return banks[bank & 0x3];
410 static inline int numrow(u32 row)
412 static int rows[8] = {
413 1 << 12, 1 << 13, 1 << 14, 1 << 15,
414 1 << 16, -EINVAL, -EINVAL, -EINVAL,
417 return rows[row & 0x7];
420 static inline int numcol(u32 col)
422 static int cols[8] = {
423 1 << 10, 1 << 11, 1 << 12, -EINVAL,
425 return cols[col & 0x3];
428 static struct i7core_dev *get_i7core_dev(u8 socket)
430 struct i7core_dev *i7core_dev;
432 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
433 if (i7core_dev->socket == socket)
440 /****************************************************************************
441 Memory check routines
442 ****************************************************************************/
443 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
446 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
452 for (i = 0; i < i7core_dev->n_devs; i++) {
453 if (!i7core_dev->pdev[i])
456 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
457 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
458 return i7core_dev->pdev[i];
466 * i7core_get_active_channels() - gets the number of channels and csrows
467 * @socket: Quick Path Interconnect socket
468 * @channels: Number of channels that will be returned
469 * @csrows: Number of csrows found
471 * Since EDAC core needs to know in advance the number of available channels
472 * and csrows, in order to allocate memory for csrows/channels, it is needed
473 * to run two similar steps. At the first step, implemented on this function,
474 * it checks the number of csrows/channels present at one socket.
475 * this is used in order to properly allocate the size of mci components.
477 * It should be noticed that none of the current available datasheets explain
478 * or even mention how csrows are seen by the memory controller. So, we need
479 * to add a fake description for csrows.
480 * So, this driver is attributing one DIMM memory for one csrow.
482 static int i7core_get_active_channels(u8 socket, unsigned *channels,
485 struct pci_dev *pdev = NULL;
492 pdev = get_pdev_slot_func(socket, 3, 0);
494 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
499 /* Device 3 function 0 reads */
500 pci_read_config_dword(pdev, MC_STATUS, &status);
501 pci_read_config_dword(pdev, MC_CONTROL, &control);
503 for (i = 0; i < NUM_CHANS; i++) {
505 /* Check if the channel is active */
506 if (!(control & (1 << (8 + i))))
509 /* Check if the channel is disabled */
510 if (status & (1 << i))
513 pdev = get_pdev_slot_func(socket, i + 4, 1);
515 i7core_printk(KERN_ERR, "Couldn't find socket %d "
520 /* Devices 4-6 function 1 */
521 pci_read_config_dword(pdev,
522 MC_DOD_CH_DIMM0, &dimm_dod[0]);
523 pci_read_config_dword(pdev,
524 MC_DOD_CH_DIMM1, &dimm_dod[1]);
525 pci_read_config_dword(pdev,
526 MC_DOD_CH_DIMM2, &dimm_dod[2]);
530 for (j = 0; j < 3; j++) {
531 if (!DIMM_PRESENT(dimm_dod[j]))
537 debugf0("Number of active channels on socket %d: %d\n",
543 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
545 struct i7core_pvt *pvt = mci->pvt_info;
546 struct csrow_info *csr;
547 struct pci_dev *pdev;
549 unsigned long last_page = 0;
553 /* Get data from the MC register, function 0 */
554 pdev = pvt->pci_mcr[0];
558 /* Device 3 function 0 reads */
559 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
560 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
561 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
562 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
564 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
565 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
566 pvt->info.max_dod, pvt->info.ch_map);
568 if (ECC_ENABLED(pvt)) {
569 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
571 mode = EDAC_S8ECD8ED;
573 mode = EDAC_S4ECD4ED;
575 debugf0("ECC disabled\n");
579 /* FIXME: need to handle the error codes */
580 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
582 numdimms(pvt->info.max_dod),
583 numrank(pvt->info.max_dod >> 2),
584 numbank(pvt->info.max_dod >> 4),
585 numrow(pvt->info.max_dod >> 6),
586 numcol(pvt->info.max_dod >> 9));
588 for (i = 0; i < NUM_CHANS; i++) {
589 u32 data, dimm_dod[3], value[8];
591 if (!pvt->pci_ch[i][0])
594 if (!CH_ACTIVE(pvt, i)) {
595 debugf0("Channel %i is not active\n", i);
598 if (CH_DISABLED(pvt, i)) {
599 debugf0("Channel %i is disabled\n", i);
603 /* Devices 4-6 function 0 */
604 pci_read_config_dword(pvt->pci_ch[i][0],
605 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
607 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
610 if (data & REGISTERED_DIMM)
615 if (data & THREE_DIMMS_PRESENT)
616 pvt->channel[i].dimms = 3;
617 else if (data & SINGLE_QUAD_RANK_PRESENT)
618 pvt->channel[i].dimms = 1;
620 pvt->channel[i].dimms = 2;
623 /* Devices 4-6 function 1 */
624 pci_read_config_dword(pvt->pci_ch[i][1],
625 MC_DOD_CH_DIMM0, &dimm_dod[0]);
626 pci_read_config_dword(pvt->pci_ch[i][1],
627 MC_DOD_CH_DIMM1, &dimm_dod[1]);
628 pci_read_config_dword(pvt->pci_ch[i][1],
629 MC_DOD_CH_DIMM2, &dimm_dod[2]);
631 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
632 "%d ranks, %cDIMMs\n",
634 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
636 pvt->channel[i].ranks,
637 (data & REGISTERED_DIMM) ? 'R' : 'U');
639 for (j = 0; j < 3; j++) {
640 u32 banks, ranks, rows, cols;
643 if (!DIMM_PRESENT(dimm_dod[j]))
646 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
647 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
648 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
649 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
651 /* DDR3 has 8 I/O banks */
652 size = (rows * cols * banks * ranks) >> (20 - 3);
654 pvt->channel[i].dimms++;
656 debugf0("\tdimm %d %d Mb offset: %x, "
657 "bank: %d, rank: %d, row: %#x, col: %#x\n",
659 RANKOFFSET(dimm_dod[j]),
660 banks, ranks, rows, cols);
663 npages = size >> (PAGE_SHIFT - 20);
665 npages = size << (20 - PAGE_SHIFT);
668 csr = &mci->csrows[*csrow];
669 csr->first_page = last_page + 1;
671 csr->last_page = last_page;
672 csr->nr_pages = npages;
676 csr->csrow_idx = *csrow;
677 csr->nr_channels = 1;
679 csr->channels[0].chan_idx = i;
680 csr->channels[0].ce_count = 0;
682 pvt->csrow_map[i][j] = *csrow;
692 csr->dtype = DEV_X16;
695 csr->dtype = DEV_UNKNOWN;
698 csr->edac_mode = mode;
704 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
705 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
706 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
707 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
708 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
709 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
710 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
711 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
712 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
713 for (j = 0; j < 8; j++)
714 debugf1("\t\t%#x\t%#x\t%#x\n",
715 (value[j] >> 27) & 0x1,
716 (value[j] >> 24) & 0x7,
717 (value[j] && ((1 << 24) - 1)));
723 /****************************************************************************
724 Error insertion routines
725 ****************************************************************************/
727 /* The i7core has independent error injection features per channel.
728 However, to have a simpler code, we don't allow enabling error injection
729 on more than one channel.
730 Also, since a change at an inject parameter will be applied only at enable,
731 we're disabling error injection on all write calls to the sysfs nodes that
732 controls the error code injection.
734 static int disable_inject(struct mem_ctl_info *mci)
736 struct i7core_pvt *pvt = mci->pvt_info;
738 pvt->inject.enable = 0;
740 if (!pvt->pci_ch[pvt->inject.channel][0])
743 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
744 MC_CHANNEL_ERROR_INJECT, 0);
750 * i7core inject inject.section
752 * accept and store error injection inject.section value
753 * bit 0 - refers to the lower 32-byte half cacheline
754 * bit 1 - refers to the upper 32-byte half cacheline
756 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
757 const char *data, size_t count)
759 struct i7core_pvt *pvt = mci->pvt_info;
763 if (pvt->inject.enable)
766 rc = strict_strtoul(data, 10, &value);
767 if ((rc < 0) || (value > 3))
770 pvt->inject.section = (u32) value;
774 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
777 struct i7core_pvt *pvt = mci->pvt_info;
778 return sprintf(data, "0x%08x\n", pvt->inject.section);
784 * accept and store error injection inject.section value
785 * bit 0 - repeat enable - Enable error repetition
786 * bit 1 - inject ECC error
787 * bit 2 - inject parity error
789 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
790 const char *data, size_t count)
792 struct i7core_pvt *pvt = mci->pvt_info;
796 if (pvt->inject.enable)
799 rc = strict_strtoul(data, 10, &value);
800 if ((rc < 0) || (value > 7))
803 pvt->inject.type = (u32) value;
807 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
810 struct i7core_pvt *pvt = mci->pvt_info;
811 return sprintf(data, "0x%08x\n", pvt->inject.type);
815 * i7core_inject_inject.eccmask_store
817 * The type of error (UE/CE) will depend on the inject.eccmask value:
818 * Any bits set to a 1 will flip the corresponding ECC bit
819 * Correctable errors can be injected by flipping 1 bit or the bits within
820 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
821 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
822 * uncorrectable error to be injected.
824 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
825 const char *data, size_t count)
827 struct i7core_pvt *pvt = mci->pvt_info;
831 if (pvt->inject.enable)
834 rc = strict_strtoul(data, 10, &value);
838 pvt->inject.eccmask = (u32) value;
842 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
845 struct i7core_pvt *pvt = mci->pvt_info;
846 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
852 * The type of error (UE/CE) will depend on the inject.eccmask value:
853 * Any bits set to a 1 will flip the corresponding ECC bit
854 * Correctable errors can be injected by flipping 1 bit or the bits within
855 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
856 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
857 * uncorrectable error to be injected.
860 #define DECLARE_ADDR_MATCH(param, limit) \
861 static ssize_t i7core_inject_store_##param( \
862 struct mem_ctl_info *mci, \
863 const char *data, size_t count) \
865 struct i7core_pvt *pvt; \
869 debugf1("%s()\n", __func__); \
870 pvt = mci->pvt_info; \
872 if (pvt->inject.enable) \
873 disable_inject(mci); \
875 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
878 rc = strict_strtoul(data, 10, &value); \
879 if ((rc < 0) || (value >= limit)) \
883 pvt->inject.param = value; \
888 static ssize_t i7core_inject_show_##param( \
889 struct mem_ctl_info *mci, \
892 struct i7core_pvt *pvt; \
894 pvt = mci->pvt_info; \
895 debugf1("%s() pvt=%p\n", __func__, pvt); \
896 if (pvt->inject.param < 0) \
897 return sprintf(data, "any\n"); \
899 return sprintf(data, "%d\n", pvt->inject.param);\
902 #define ATTR_ADDR_MATCH(param) \
906 .mode = (S_IRUGO | S_IWUSR) \
908 .show = i7core_inject_show_##param, \
909 .store = i7core_inject_store_##param, \
912 DECLARE_ADDR_MATCH(channel, 3);
913 DECLARE_ADDR_MATCH(dimm, 3);
914 DECLARE_ADDR_MATCH(rank, 4);
915 DECLARE_ADDR_MATCH(bank, 32);
916 DECLARE_ADDR_MATCH(page, 0x10000);
917 DECLARE_ADDR_MATCH(col, 0x4000);
919 static int write_and_test(struct pci_dev *dev, int where, u32 val)
924 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
925 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
928 for (count = 0; count < 10; count++) {
931 pci_write_config_dword(dev, where, val);
932 pci_read_config_dword(dev, where, &read);
938 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
939 "write=%08x. Read=%08x\n",
940 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
947 * This routine prepares the Memory Controller for error injection.
948 * The error will be injected when some process tries to write to the
949 * memory that matches the given criteria.
950 * The criteria can be set in terms of a mask where dimm, rank, bank, page
951 * and col can be specified.
952 * A -1 value for any of the mask items will make the MCU to ignore
953 * that matching criteria for error injection.
955 * It should be noticed that the error will only happen after a write operation
956 * on a memory that matches the condition. if REPEAT_EN is not enabled at
957 * inject mask, then it will produce just one error. Otherwise, it will repeat
958 * until the injectmask would be cleaned.
960 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
961 * is reliable enough to check if the MC is using the
962 * three channels. However, this is not clear at the datasheet.
964 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
965 const char *data, size_t count)
967 struct i7core_pvt *pvt = mci->pvt_info;
973 if (!pvt->pci_ch[pvt->inject.channel][0])
976 rc = strict_strtoul(data, 10, &enable);
981 pvt->inject.enable = 1;
987 /* Sets pvt->inject.dimm mask */
988 if (pvt->inject.dimm < 0)
991 if (pvt->channel[pvt->inject.channel].dimms > 2)
992 mask |= (pvt->inject.dimm & 0x3LL) << 35;
994 mask |= (pvt->inject.dimm & 0x1LL) << 36;
997 /* Sets pvt->inject.rank mask */
998 if (pvt->inject.rank < 0)
1001 if (pvt->channel[pvt->inject.channel].dimms > 2)
1002 mask |= (pvt->inject.rank & 0x1LL) << 34;
1004 mask |= (pvt->inject.rank & 0x3LL) << 34;
1007 /* Sets pvt->inject.bank mask */
1008 if (pvt->inject.bank < 0)
1011 mask |= (pvt->inject.bank & 0x15LL) << 30;
1013 /* Sets pvt->inject.page mask */
1014 if (pvt->inject.page < 0)
1017 mask |= (pvt->inject.page & 0xffff) << 14;
1019 /* Sets pvt->inject.column mask */
1020 if (pvt->inject.col < 0)
1023 mask |= (pvt->inject.col & 0x3fff);
1027 * bits 1-2: MASK_HALF_CACHELINE
1029 * bit 4: INJECT_ADDR_PARITY
1032 injectmask = (pvt->inject.type & 1) |
1033 (pvt->inject.section & 0x3) << 1 |
1034 (pvt->inject.type & 0x6) << (3 - 1);
1036 /* Unlock writes to registers - this register is write only */
1037 pci_write_config_dword(pvt->pci_noncore,
1038 MC_CFG_CONTROL, 0x2);
1040 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1041 MC_CHANNEL_ADDR_MATCH, mask);
1042 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1043 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1045 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1046 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1048 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1049 MC_CHANNEL_ERROR_INJECT, injectmask);
1052 * This is something undocumented, based on my tests
1053 * Without writing 8 to this register, errors aren't injected. Not sure
1056 pci_write_config_dword(pvt->pci_noncore,
1059 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1061 mask, pvt->inject.eccmask, injectmask);
1067 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1070 struct i7core_pvt *pvt = mci->pvt_info;
1073 if (!pvt->pci_ch[pvt->inject.channel][0])
1076 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1077 MC_CHANNEL_ERROR_INJECT, &injectmask);
1079 debugf0("Inject error read: 0x%018x\n", injectmask);
1081 if (injectmask & 0x0c)
1082 pvt->inject.enable = 1;
1084 return sprintf(data, "%d\n", pvt->inject.enable);
1087 #define DECLARE_COUNTER(param) \
1088 static ssize_t i7core_show_counter_##param( \
1089 struct mem_ctl_info *mci, \
1092 struct i7core_pvt *pvt = mci->pvt_info; \
1094 debugf1("%s() \n", __func__); \
1095 if (!pvt->ce_count_available || (pvt->is_registered)) \
1096 return sprintf(data, "data unavailable\n"); \
1097 return sprintf(data, "%lu\n", \
1098 pvt->udimm_ce_count[param]); \
1101 #define ATTR_COUNTER(param) \
1104 .name = __stringify(udimm##param), \
1105 .mode = (S_IRUGO | S_IWUSR) \
1107 .show = i7core_show_counter_##param \
1119 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1120 ATTR_ADDR_MATCH(channel),
1121 ATTR_ADDR_MATCH(dimm),
1122 ATTR_ADDR_MATCH(rank),
1123 ATTR_ADDR_MATCH(bank),
1124 ATTR_ADDR_MATCH(page),
1125 ATTR_ADDR_MATCH(col),
1126 { .attr = { .name = NULL } }
1129 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1130 .name = "inject_addrmatch",
1131 .mcidev_attr = i7core_addrmatch_attrs,
1134 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1140 static struct mcidev_sysfs_group i7core_udimm_counters = {
1141 .name = "all_channel_counts",
1142 .mcidev_attr = i7core_udimm_counters_attrs,
1145 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1148 .name = "inject_section",
1149 .mode = (S_IRUGO | S_IWUSR)
1151 .show = i7core_inject_section_show,
1152 .store = i7core_inject_section_store,
1155 .name = "inject_type",
1156 .mode = (S_IRUGO | S_IWUSR)
1158 .show = i7core_inject_type_show,
1159 .store = i7core_inject_type_store,
1162 .name = "inject_eccmask",
1163 .mode = (S_IRUGO | S_IWUSR)
1165 .show = i7core_inject_eccmask_show,
1166 .store = i7core_inject_eccmask_store,
1168 .grp = &i7core_inject_addrmatch,
1171 .name = "inject_enable",
1172 .mode = (S_IRUGO | S_IWUSR)
1174 .show = i7core_inject_enable_show,
1175 .store = i7core_inject_enable_store,
1177 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1178 { .attr = { .name = NULL } }
1181 /****************************************************************************
1182 Device initialization routines: put/get, init/exit
1183 ****************************************************************************/
1186 * i7core_put_devices 'put' all the devices that we have
1187 * reserved via 'get'
1189 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1193 debugf0(__FILE__ ": %s()\n", __func__);
1194 for (i = 0; i < i7core_dev->n_devs; i++) {
1195 struct pci_dev *pdev = i7core_dev->pdev[i];
1198 debugf0("Removing dev %02x:%02x.%d\n",
1200 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1203 kfree(i7core_dev->pdev);
1204 list_del(&i7core_dev->list);
1208 static void i7core_put_all_devices(void)
1210 struct i7core_dev *i7core_dev, *tmp;
1212 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1213 i7core_put_devices(i7core_dev);
1216 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1218 struct pci_dev *pdev = NULL;
1221 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1222 * aren't announced by acpi. So, we need to use a legacy scan probing
1225 while (table && table->descr) {
1226 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1227 if (unlikely(!pdev)) {
1228 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1229 pcibios_scan_specific_bus(255-i);
1236 * i7core_get_devices Find and perform 'get' operation on the MCH's
1237 * device/functions we want to reference for this driver
1239 * Need to 'get' device 16 func 1 and func 2
1241 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1242 struct pci_id_descr *dev_descr, unsigned n_devs)
1244 struct i7core_dev *i7core_dev;
1246 struct pci_dev *pdev = NULL;
1250 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1251 dev_descr->dev_id, *prev);
1254 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1255 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1256 * to probe for the alternate address in case of failure
1258 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1259 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1260 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1262 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1263 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1264 PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1273 if (dev_descr->optional)
1279 i7core_printk(KERN_ERR,
1280 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1281 dev_descr->dev, dev_descr->func,
1282 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1284 /* End of list, leave */
1287 bus = pdev->bus->number;
1294 i7core_dev = get_i7core_dev(socket);
1296 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1299 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1301 if (!i7core_dev->pdev) {
1305 i7core_dev->socket = socket;
1306 i7core_dev->n_devs = n_devs;
1307 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1310 if (i7core_dev->pdev[devno]) {
1311 i7core_printk(KERN_ERR,
1312 "Duplicated device for "
1313 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1314 bus, dev_descr->dev, dev_descr->func,
1315 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1320 i7core_dev->pdev[devno] = pdev;
1323 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1324 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1325 i7core_printk(KERN_ERR,
1326 "Device PCI ID %04x:%04x "
1327 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1328 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1329 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1330 bus, dev_descr->dev, dev_descr->func);
1334 /* Be sure that the device is enabled */
1335 if (unlikely(pci_enable_device(pdev) < 0)) {
1336 i7core_printk(KERN_ERR,
1338 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1339 bus, dev_descr->dev, dev_descr->func,
1340 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1344 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1345 socket, bus, dev_descr->dev,
1347 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1354 static int i7core_get_devices(struct pci_id_table *table)
1357 struct pci_dev *pdev = NULL;
1358 struct pci_id_descr *dev_descr;
1360 while (table && table->descr) {
1361 dev_descr = table->descr;
1362 for (i = 0; i < table->n_devs; i++) {
1365 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1372 i7core_put_all_devices();
1384 static int mci_bind_devs(struct mem_ctl_info *mci,
1385 struct i7core_dev *i7core_dev)
1387 struct i7core_pvt *pvt = mci->pvt_info;
1388 struct pci_dev *pdev;
1391 /* Associates i7core_dev and mci for future usage */
1392 pvt->i7core_dev = i7core_dev;
1393 i7core_dev->mci = mci;
1395 pvt->is_registered = 0;
1396 for (i = 0; i < i7core_dev->n_devs; i++) {
1397 pdev = i7core_dev->pdev[i];
1401 func = PCI_FUNC(pdev->devfn);
1402 slot = PCI_SLOT(pdev->devfn);
1404 if (unlikely(func > MAX_MCR_FUNC))
1406 pvt->pci_mcr[func] = pdev;
1407 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1408 if (unlikely(func > MAX_CHAN_FUNC))
1410 pvt->pci_ch[slot - 4][func] = pdev;
1411 } else if (!slot && !func)
1412 pvt->pci_noncore = pdev;
1416 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1417 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1418 pdev, i7core_dev->socket);
1420 if (PCI_SLOT(pdev->devfn) == 3 &&
1421 PCI_FUNC(pdev->devfn) == 2)
1422 pvt->is_registered = 1;
1426 * Add extra nodes to count errors on udimm
1427 * For registered memory, this is not needed, since the counters
1428 * are already displayed at the standard locations
1430 if (!pvt->is_registered)
1431 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1432 &i7core_udimm_counters;
1437 i7core_printk(KERN_ERR, "Device %d, function %d "
1438 "is out of the expected range\n",
1443 /****************************************************************************
1444 Error check routines
1445 ****************************************************************************/
1446 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1447 int chan, int dimm, int add)
1450 struct i7core_pvt *pvt = mci->pvt_info;
1451 int row = pvt->csrow_map[chan][dimm], i;
1453 for (i = 0; i < add; i++) {
1454 msg = kasprintf(GFP_KERNEL, "Corrected error "
1455 "(Socket=%d channel=%d dimm=%d)",
1456 pvt->i7core_dev->socket, chan, dimm);
1458 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1463 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1464 int chan, int new0, int new1, int new2)
1466 struct i7core_pvt *pvt = mci->pvt_info;
1467 int add0 = 0, add1 = 0, add2 = 0;
1468 /* Updates CE counters if it is not the first time here */
1469 if (pvt->ce_count_available) {
1470 /* Updates CE counters */
1472 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1473 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1474 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1478 pvt->rdimm_ce_count[chan][2] += add2;
1482 pvt->rdimm_ce_count[chan][1] += add1;
1486 pvt->rdimm_ce_count[chan][0] += add0;
1488 pvt->ce_count_available = 1;
1490 /* Store the new values */
1491 pvt->rdimm_last_ce_count[chan][2] = new2;
1492 pvt->rdimm_last_ce_count[chan][1] = new1;
1493 pvt->rdimm_last_ce_count[chan][0] = new0;
1495 /*updated the edac core */
1497 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1499 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1501 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1505 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1507 struct i7core_pvt *pvt = mci->pvt_info;
1509 int i, new0, new1, new2;
1511 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1512 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1514 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1516 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1518 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1520 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1522 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1524 for (i = 0 ; i < 3; i++) {
1525 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1526 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1527 /*if the channel has 3 dimms*/
1528 if (pvt->channel[i].dimms > 2) {
1529 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1530 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1531 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1533 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1534 DIMM_BOT_COR_ERR(rcv[i][0]);
1535 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1536 DIMM_BOT_COR_ERR(rcv[i][1]);
1540 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1544 /* This function is based on the device 3 function 4 registers as described on:
1545 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1546 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1547 * also available at:
1548 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1550 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1552 struct i7core_pvt *pvt = mci->pvt_info;
1554 int new0, new1, new2;
1556 if (!pvt->pci_mcr[4]) {
1557 debugf0("%s MCR registers not found\n", __func__);
1561 /* Corrected test errors */
1562 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1563 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1565 /* Store the new values */
1566 new2 = DIMM2_COR_ERR(rcv1);
1567 new1 = DIMM1_COR_ERR(rcv0);
1568 new0 = DIMM0_COR_ERR(rcv0);
1570 /* Updates CE counters if it is not the first time here */
1571 if (pvt->ce_count_available) {
1572 /* Updates CE counters */
1573 int add0, add1, add2;
1575 add2 = new2 - pvt->udimm_last_ce_count[2];
1576 add1 = new1 - pvt->udimm_last_ce_count[1];
1577 add0 = new0 - pvt->udimm_last_ce_count[0];
1581 pvt->udimm_ce_count[2] += add2;
1585 pvt->udimm_ce_count[1] += add1;
1589 pvt->udimm_ce_count[0] += add0;
1591 if (add0 | add1 | add2)
1592 i7core_printk(KERN_ERR, "New Corrected error(s): "
1593 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1596 pvt->ce_count_available = 1;
1598 /* Store the new values */
1599 pvt->udimm_last_ce_count[2] = new2;
1600 pvt->udimm_last_ce_count[1] = new1;
1601 pvt->udimm_last_ce_count[0] = new0;
1605 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1606 * Architectures Software Developer’s Manual Volume 3B.
1607 * Nehalem are defined as family 0x06, model 0x1a
1609 * The MCA registers used here are the following ones:
1610 * struct mce field MCA Register
1611 * m->status MSR_IA32_MC8_STATUS
1612 * m->addr MSR_IA32_MC8_ADDR
1613 * m->misc MSR_IA32_MC8_MISC
1614 * In the case of Nehalem, the error information is masked at .status and .misc
1617 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1620 struct i7core_pvt *pvt = mci->pvt_info;
1621 char *type, *optype, *err, *msg;
1622 unsigned long error = m->status & 0x1ff0000l;
1623 u32 optypenum = (m->status >> 4) & 0x07;
1624 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1625 u32 dimm = (m->misc >> 16) & 0x3;
1626 u32 channel = (m->misc >> 18) & 0x3;
1627 u32 syndrome = m->misc >> 32;
1628 u32 errnum = find_first_bit(&error, 32);
1631 if (m->mcgstatus & 1)
1636 switch (optypenum) {
1638 optype = "generic undef request";
1641 optype = "read error";
1644 optype = "write error";
1647 optype = "addr/cmd error";
1650 optype = "scrubbing error";
1653 optype = "reserved";
1659 err = "read ECC error";
1662 err = "RAS ECC error";
1665 err = "write parity error";
1668 err = "redundacy loss";
1674 err = "memory range error";
1677 err = "RTID out of range";
1680 err = "address parity error";
1683 err = "byte enable parity error";
1689 /* FIXME: should convert addr into bank and rank information */
1690 msg = kasprintf(GFP_ATOMIC,
1691 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1692 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1693 type, (long long) m->addr, m->cpu, dimm, channel,
1694 syndrome, core_err_cnt, (long long)m->status,
1695 (long long)m->misc, optype, err);
1699 csrow = pvt->csrow_map[channel][dimm];
1701 /* Call the helper to output message */
1702 if (m->mcgstatus & 1)
1703 edac_mc_handle_fbd_ue(mci, csrow, 0,
1704 0 /* FIXME: should be channel here */, msg);
1705 else if (!pvt->is_registered)
1706 edac_mc_handle_fbd_ce(mci, csrow,
1707 0 /* FIXME: should be channel here */, msg);
1713 * i7core_check_error Retrieve and process errors reported by the
1714 * hardware. Called by the Core module.
1716 static void i7core_check_error(struct mem_ctl_info *mci)
1718 struct i7core_pvt *pvt = mci->pvt_info;
1724 * MCE first step: Copy all mce errors into a temporary buffer
1725 * We use a double buffering here, to reduce the risk of
1729 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1732 goto check_ce_error;
1734 m = pvt->mce_outentry;
1735 if (pvt->mce_in + count > MCE_LOG_LEN) {
1736 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1738 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1744 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1746 pvt->mce_in += count;
1749 if (pvt->mce_overrun) {
1750 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1753 pvt->mce_overrun = 0;
1757 * MCE second step: parse errors and display
1759 for (i = 0; i < count; i++)
1760 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1763 * Now, let's increment CE error counts
1766 if (!pvt->is_registered)
1767 i7core_udimm_check_mc_ecc_err(mci);
1769 i7core_rdimm_check_mc_ecc_err(mci);
1773 * i7core_mce_check_error Replicates mcelog routine to get errors
1774 * This routine simply queues mcelog errors, and
1775 * return. The error itself should be handled later
1776 * by i7core_check_error.
1777 * WARNING: As this routine should be called at NMI time, extra care should
1778 * be taken to avoid deadlocks, and to be as fast as possible.
1780 static int i7core_mce_check_error(void *priv, struct mce *mce)
1782 struct mem_ctl_info *mci = priv;
1783 struct i7core_pvt *pvt = mci->pvt_info;
1786 * Just let mcelog handle it if the error is
1787 * outside the memory controller
1789 if (((mce->status & 0xffff) >> 7) != 1)
1792 /* Bank 8 registers are the only ones that we know how to handle */
1797 /* Only handle if it is the right mc controller */
1798 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1803 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1809 /* Copy memory error at the ringbuffer */
1810 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1812 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1814 /* Handle fatal errors immediately */
1815 if (mce->mcgstatus & 1)
1816 i7core_check_error(mci);
1818 /* Advice mcelog that the error were handled */
1822 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1823 int num_channels, int num_csrows)
1825 struct mem_ctl_info *mci;
1826 struct i7core_pvt *pvt;
1830 /* allocate a new MC control structure */
1831 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1832 i7core_dev->socket);
1836 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1838 /* record ptr to the generic device */
1839 mci->dev = &i7core_dev->pdev[0]->dev;
1841 pvt = mci->pvt_info;
1842 memset(pvt, 0, sizeof(*pvt));
1845 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1846 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1849 mci->mtype_cap = MEM_FLAG_DDR3;
1850 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1851 mci->edac_cap = EDAC_FLAG_NONE;
1852 mci->mod_name = "i7core_edac.c";
1853 mci->mod_ver = I7CORE_REVISION;
1854 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1855 i7core_dev->socket);
1856 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1857 mci->ctl_page_to_phys = NULL;
1858 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1859 /* Set the function pointer to an actual operation function */
1860 mci->edac_check = i7core_check_error;
1862 /* Store pci devices at mci for faster access */
1863 rc = mci_bind_devs(mci, i7core_dev);
1864 if (unlikely(rc < 0))
1867 /* Get dimm basic config */
1868 get_dimm_config(mci, &csrow);
1870 /* add this new MC control structure to EDAC's list of MCs */
1871 if (unlikely(edac_mc_add_mc(mci))) {
1872 debugf0("MC: " __FILE__
1873 ": %s(): failed edac_mc_add_mc()\n", __func__);
1874 /* FIXME: perhaps some code should go here that disables error
1875 * reporting if we just enabled it
1882 /* allocating generic PCI control info */
1883 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1885 if (unlikely(!i7core_pci)) {
1887 "%s(): Unable to create PCI control\n",
1890 "%s(): PCI error report via EDAC not setup\n",
1894 /* Default error mask is any memory */
1895 pvt->inject.channel = 0;
1896 pvt->inject.dimm = -1;
1897 pvt->inject.rank = -1;
1898 pvt->inject.bank = -1;
1899 pvt->inject.page = -1;
1900 pvt->inject.col = -1;
1902 /* Registers on edac_mce in order to receive memory errors */
1903 pvt->edac_mce.priv = mci;
1904 pvt->edac_mce.check_error = i7core_mce_check_error;
1906 rc = edac_mce_register(&pvt->edac_mce);
1907 if (unlikely(rc < 0)) {
1908 debugf0("MC: " __FILE__
1909 ": %s(): failed edac_mce_register()\n", __func__);
1919 * i7core_probe Probe for ONE instance of device to see if it is
1922 * 0 for FOUND a device
1923 * < 0 for error code
1925 static int __devinit i7core_probe(struct pci_dev *pdev,
1926 const struct pci_device_id *id)
1928 int dev_idx = id->driver_data;
1930 struct i7core_dev *i7core_dev;
1933 * All memory controllers are allocated at the first pass.
1935 if (unlikely(dev_idx >= 1))
1938 /* get the pci devices we want to reserve for our use */
1939 mutex_lock(&i7core_edac_lock);
1941 rc = i7core_get_devices(pci_dev_table);
1942 if (unlikely(rc < 0))
1945 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1949 /* Check the number of active and not disabled channels */
1950 rc = i7core_get_active_channels(i7core_dev->socket,
1951 &channels, &csrows);
1952 if (unlikely(rc < 0))
1955 rc = i7core_register_mci(i7core_dev, channels, csrows);
1956 if (unlikely(rc < 0))
1960 i7core_printk(KERN_INFO, "Driver loaded.\n");
1962 mutex_unlock(&i7core_edac_lock);
1966 i7core_put_all_devices();
1968 mutex_unlock(&i7core_edac_lock);
1973 * i7core_remove destructor for one instance of device
1976 static void __devexit i7core_remove(struct pci_dev *pdev)
1978 struct mem_ctl_info *mci;
1979 struct i7core_dev *i7core_dev, *tmp;
1981 debugf0(__FILE__ ": %s()\n", __func__);
1984 edac_pci_release_generic_ctl(i7core_pci);
1987 * we have a trouble here: pdev value for removal will be wrong, since
1988 * it will point to the X58 register used to detect that the machine
1989 * is a Nehalem or upper design. However, due to the way several PCI
1990 * devices are grouped together to provide MC functionality, we need
1991 * to use a different method for releasing the devices
1994 mutex_lock(&i7core_edac_lock);
1995 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1996 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1998 struct i7core_pvt *pvt = mci->pvt_info;
2000 i7core_dev = pvt->i7core_dev;
2001 edac_mce_unregister(&pvt->edac_mce);
2002 kfree(mci->ctl_name);
2004 i7core_put_devices(i7core_dev);
2006 i7core_printk(KERN_ERR,
2007 "Couldn't find mci for socket %d\n",
2008 i7core_dev->socket);
2011 mutex_unlock(&i7core_edac_lock);
2014 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2017 * i7core_driver pci_driver structure for this module
2020 static struct pci_driver i7core_driver = {
2021 .name = "i7core_edac",
2022 .probe = i7core_probe,
2023 .remove = __devexit_p(i7core_remove),
2024 .id_table = i7core_pci_tbl,
2028 * i7core_init Module entry function
2029 * Try to initialize this module for its devices
2031 static int __init i7core_init(void)
2035 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2037 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2040 i7core_xeon_pci_fixup(pci_dev_table);
2042 pci_rc = pci_register_driver(&i7core_driver);
2047 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2054 * i7core_exit() Module exit function
2055 * Unregister the driver
2057 static void __exit i7core_exit(void)
2059 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2060 pci_unregister_driver(&i7core_driver);
2063 module_init(i7core_init);
2064 module_exit(i7core_exit);
2066 MODULE_LICENSE("GPL");
2067 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2068 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2069 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2072 module_param(edac_op_state, int, 0444);
2073 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");