i7core_edac: return -ENODEV when devices were already probed
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81
82         /* OFFSETS for Device 3 Function 0 */
83
84 #define MC_CONTROL      0x48
85 #define MC_STATUS       0x4c
86 #define MC_MAX_DOD      0x64
87
88 /*
89  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91  */
92
93 #define MC_TEST_ERR_RCV1        0x60
94   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
95
96 #define MC_TEST_ERR_RCV0        0x64
97   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
98   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
99
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0        0x80
102 #define MC_COR_ECC_CNT_1        0x84
103 #define MC_COR_ECC_CNT_2        0x88
104 #define MC_COR_ECC_CNT_3        0x8c
105 #define MC_COR_ECC_CNT_4        0x90
106 #define MC_COR_ECC_CNT_5        0x94
107
108 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
110
111
112         /* OFFSETS for Devices 4,5 and 6 Function 0 */
113
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115   #define THREE_DIMMS_PRESENT           (1 << 24)
116   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
117   #define QUAD_RANK_PRESENT             (1 << 22)
118   #define REGISTERED_DIMM               (1 << 15)
119
120 #define MC_CHANNEL_MAPPER       0x60
121   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
123
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125   #define RANK_PRESENT_MASK             0xffff
126
127 #define MC_CHANNEL_ADDR_MATCH   0xf0
128 #define MC_CHANNEL_ERROR_MASK   0xf8
129 #define MC_CHANNEL_ERROR_INJECT 0xfc
130   #define INJECT_ADDR_PARITY    0x10
131   #define INJECT_ECC            0x08
132   #define MASK_CACHELINE        0x06
133   #define MASK_FULL_CACHELINE   0x06
134   #define MASK_MSB32_CACHELINE  0x04
135   #define MASK_LSB32_CACHELINE  0x02
136   #define NO_MASK_CACHELINE     0x00
137   #define REPEAT_EN             0x01
138
139         /* OFFSETS for Devices 4,5 and 6 Function 1 */
140
141 #define MC_DOD_CH_DIMM0         0x48
142 #define MC_DOD_CH_DIMM1         0x4c
143 #define MC_DOD_CH_DIMM2         0x50
144   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
145   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
146   #define DIMM_PRESENT_MASK     (1 << 9)
147   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
148   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
149   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
151   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
153   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
154   #define MC_DOD_NUMCOL_MASK            3
155   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
156
157 #define MC_RANK_PRESENT         0x7c
158
159 #define MC_SAG_CH_0     0x80
160 #define MC_SAG_CH_1     0x84
161 #define MC_SAG_CH_2     0x88
162 #define MC_SAG_CH_3     0x8c
163 #define MC_SAG_CH_4     0x90
164 #define MC_SAG_CH_5     0x94
165 #define MC_SAG_CH_6     0x98
166 #define MC_SAG_CH_7     0x9c
167
168 #define MC_RIR_LIMIT_CH_0       0x40
169 #define MC_RIR_LIMIT_CH_1       0x44
170 #define MC_RIR_LIMIT_CH_2       0x48
171 #define MC_RIR_LIMIT_CH_3       0x4C
172 #define MC_RIR_LIMIT_CH_4       0x50
173 #define MC_RIR_LIMIT_CH_5       0x54
174 #define MC_RIR_LIMIT_CH_6       0x58
175 #define MC_RIR_LIMIT_CH_7       0x5C
176 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
177
178 #define MC_RIR_WAY_CH           0x80
179   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
180   #define MC_RIR_WAY_RANK_MASK          0x7
181
182 /*
183  * i7core structs
184  */
185
186 #define NUM_CHANS 3
187 #define MAX_DIMMS 3             /* Max DIMMS per channel */
188 #define MAX_MCR_FUNC  4
189 #define MAX_CHAN_FUNC 3
190
191 struct i7core_info {
192         u32     mc_control;
193         u32     mc_status;
194         u32     max_dod;
195         u32     ch_map;
196 };
197
198
199 struct i7core_inject {
200         int     enable;
201
202         u32     section;
203         u32     type;
204         u32     eccmask;
205
206         /* Error address mask */
207         int channel, dimm, rank, bank, page, col;
208 };
209
210 struct i7core_channel {
211         u32             ranks;
212         u32             dimms;
213 };
214
215 struct pci_id_descr {
216         int                     dev;
217         int                     func;
218         int                     dev_id;
219         int                     optional;
220 };
221
222 struct pci_id_table {
223         const struct pci_id_descr       *descr;
224         int                             n_devs;
225 };
226
227 struct i7core_dev {
228         struct list_head        list;
229         u8                      socket;
230         struct pci_dev          **pdev;
231         int                     n_devs;
232         struct mem_ctl_info     *mci;
233 };
234
235 struct i7core_pvt {
236         struct pci_dev  *pci_noncore;
237         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
238         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240         struct i7core_dev *i7core_dev;
241
242         struct i7core_info      info;
243         struct i7core_inject    inject;
244         struct i7core_channel   channel[NUM_CHANS];
245
246         int             ce_count_available;
247         int             csrow_map[NUM_CHANS][MAX_DIMMS];
248
249                         /* ECC corrected errors counts per udimm */
250         unsigned long   udimm_ce_count[MAX_DIMMS];
251         int             udimm_last_ce_count[MAX_DIMMS];
252                         /* ECC corrected errors counts per rdimm */
253         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
254         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
255
256         unsigned int    is_registered;
257
258         /* mcelog glue */
259         struct edac_mce         edac_mce;
260
261         /* Fifo double buffers */
262         struct mce              mce_entry[MCE_LOG_LEN];
263         struct mce              mce_outentry[MCE_LOG_LEN];
264
265         /* Fifo in/out counters */
266         unsigned                mce_in, mce_out;
267
268         /* Count indicator to show errors not got */
269         unsigned                mce_overrun;
270
271         /* Struct to control EDAC polling */
272         struct edac_pci_ctl_info *i7core_pci;
273 };
274
275 #define PCI_DESCR(device, function, device_id)  \
276         .dev = (device),                        \
277         .func = (function),                     \
278         .dev_id = (device_id)
279
280 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
281                 /* Memory controller */
282         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
283         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
284
285                 /* Exists only for RDIMM */
286         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
287         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
288
289                 /* Channel 0 */
290         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
291         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
292         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
293         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
294
295                 /* Channel 1 */
296         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
297         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
298         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
299         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
300
301                 /* Channel 2 */
302         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
303         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
304         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
305         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
306 };
307
308 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
309         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
310         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
311         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
312
313         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
314         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
315         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
316         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
317
318         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
319         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
320         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
321         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
322 };
323
324 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
325                 /* Memory controller */
326         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
327         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
328                         /* Exists only for RDIMM */
329         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
330         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
331
332                 /* Channel 0 */
333         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
334         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
335         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
336         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
337
338                 /* Channel 1 */
339         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
340         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
341         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
342         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
343
344                 /* Channel 2 */
345         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
346         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
347         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
348         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
349 };
350
351 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
352 static const struct pci_id_table pci_dev_table[] = {
353         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
354         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
355         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
356         {0,}                    /* 0 terminated list. */
357 };
358
359 /*
360  *      pci_device_id   table for which devices we are looking for
361  */
362 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
363         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
364         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
365         {0,}                    /* 0 terminated list. */
366 };
367
368 /****************************************************************************
369                         Anciliary status routines
370  ****************************************************************************/
371
372         /* MC_CONTROL bits */
373 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
374 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
375
376         /* MC_STATUS bits */
377 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
378 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
379
380         /* MC_MAX_DOD read functions */
381 static inline int numdimms(u32 dimms)
382 {
383         return (dimms & 0x3) + 1;
384 }
385
386 static inline int numrank(u32 rank)
387 {
388         static int ranks[4] = { 1, 2, 4, -EINVAL };
389
390         return ranks[rank & 0x3];
391 }
392
393 static inline int numbank(u32 bank)
394 {
395         static int banks[4] = { 4, 8, 16, -EINVAL };
396
397         return banks[bank & 0x3];
398 }
399
400 static inline int numrow(u32 row)
401 {
402         static int rows[8] = {
403                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
404                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
405         };
406
407         return rows[row & 0x7];
408 }
409
410 static inline int numcol(u32 col)
411 {
412         static int cols[8] = {
413                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
414         };
415         return cols[col & 0x3];
416 }
417
418 static struct i7core_dev *get_i7core_dev(u8 socket)
419 {
420         struct i7core_dev *i7core_dev;
421
422         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
423                 if (i7core_dev->socket == socket)
424                         return i7core_dev;
425         }
426
427         return NULL;
428 }
429
430 static struct i7core_dev *alloc_i7core_dev(u8 socket,
431                                            const struct pci_id_table *table)
432 {
433         struct i7core_dev *i7core_dev;
434
435         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
436         if (!i7core_dev)
437                 return NULL;
438
439         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
440                                    GFP_KERNEL);
441         if (!i7core_dev->pdev) {
442                 kfree(i7core_dev);
443                 return NULL;
444         }
445
446         i7core_dev->socket = socket;
447         i7core_dev->n_devs = table->n_devs;
448         list_add_tail(&i7core_dev->list, &i7core_edac_list);
449
450         return i7core_dev;
451 }
452
453 static void free_i7core_dev(struct i7core_dev *i7core_dev)
454 {
455         list_del(&i7core_dev->list);
456         kfree(i7core_dev->pdev);
457         kfree(i7core_dev);
458 }
459
460 /****************************************************************************
461                         Memory check routines
462  ****************************************************************************/
463 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
464                                           unsigned func)
465 {
466         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
467         int i;
468
469         if (!i7core_dev)
470                 return NULL;
471
472         for (i = 0; i < i7core_dev->n_devs; i++) {
473                 if (!i7core_dev->pdev[i])
474                         continue;
475
476                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
477                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
478                         return i7core_dev->pdev[i];
479                 }
480         }
481
482         return NULL;
483 }
484
485 /**
486  * i7core_get_active_channels() - gets the number of channels and csrows
487  * @socket:     Quick Path Interconnect socket
488  * @channels:   Number of channels that will be returned
489  * @csrows:     Number of csrows found
490  *
491  * Since EDAC core needs to know in advance the number of available channels
492  * and csrows, in order to allocate memory for csrows/channels, it is needed
493  * to run two similar steps. At the first step, implemented on this function,
494  * it checks the number of csrows/channels present at one socket.
495  * this is used in order to properly allocate the size of mci components.
496  *
497  * It should be noticed that none of the current available datasheets explain
498  * or even mention how csrows are seen by the memory controller. So, we need
499  * to add a fake description for csrows.
500  * So, this driver is attributing one DIMM memory for one csrow.
501  */
502 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
503                                       unsigned *csrows)
504 {
505         struct pci_dev *pdev = NULL;
506         int i, j;
507         u32 status, control;
508
509         *channels = 0;
510         *csrows = 0;
511
512         pdev = get_pdev_slot_func(socket, 3, 0);
513         if (!pdev) {
514                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
515                               socket);
516                 return -ENODEV;
517         }
518
519         /* Device 3 function 0 reads */
520         pci_read_config_dword(pdev, MC_STATUS, &status);
521         pci_read_config_dword(pdev, MC_CONTROL, &control);
522
523         for (i = 0; i < NUM_CHANS; i++) {
524                 u32 dimm_dod[3];
525                 /* Check if the channel is active */
526                 if (!(control & (1 << (8 + i))))
527                         continue;
528
529                 /* Check if the channel is disabled */
530                 if (status & (1 << i))
531                         continue;
532
533                 pdev = get_pdev_slot_func(socket, i + 4, 1);
534                 if (!pdev) {
535                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
536                                                 "fn %d.%d!!!\n",
537                                                 socket, i + 4, 1);
538                         return -ENODEV;
539                 }
540                 /* Devices 4-6 function 1 */
541                 pci_read_config_dword(pdev,
542                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
543                 pci_read_config_dword(pdev,
544                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
545                 pci_read_config_dword(pdev,
546                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
547
548                 (*channels)++;
549
550                 for (j = 0; j < 3; j++) {
551                         if (!DIMM_PRESENT(dimm_dod[j]))
552                                 continue;
553                         (*csrows)++;
554                 }
555         }
556
557         debugf0("Number of active channels on socket %d: %d\n",
558                 socket, *channels);
559
560         return 0;
561 }
562
563 static int get_dimm_config(const struct mem_ctl_info *mci)
564 {
565         struct i7core_pvt *pvt = mci->pvt_info;
566         struct csrow_info *csr;
567         struct pci_dev *pdev;
568         int i, j;
569         int csrow = 0;
570         unsigned long last_page = 0;
571         enum edac_type mode;
572         enum mem_type mtype;
573
574         /* Get data from the MC register, function 0 */
575         pdev = pvt->pci_mcr[0];
576         if (!pdev)
577                 return -ENODEV;
578
579         /* Device 3 function 0 reads */
580         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
581         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
582         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
583         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
584
585         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
586                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
587                 pvt->info.max_dod, pvt->info.ch_map);
588
589         if (ECC_ENABLED(pvt)) {
590                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
591                 if (ECCx8(pvt))
592                         mode = EDAC_S8ECD8ED;
593                 else
594                         mode = EDAC_S4ECD4ED;
595         } else {
596                 debugf0("ECC disabled\n");
597                 mode = EDAC_NONE;
598         }
599
600         /* FIXME: need to handle the error codes */
601         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
602                 "x%x x 0x%x\n",
603                 numdimms(pvt->info.max_dod),
604                 numrank(pvt->info.max_dod >> 2),
605                 numbank(pvt->info.max_dod >> 4),
606                 numrow(pvt->info.max_dod >> 6),
607                 numcol(pvt->info.max_dod >> 9));
608
609         for (i = 0; i < NUM_CHANS; i++) {
610                 u32 data, dimm_dod[3], value[8];
611
612                 if (!pvt->pci_ch[i][0])
613                         continue;
614
615                 if (!CH_ACTIVE(pvt, i)) {
616                         debugf0("Channel %i is not active\n", i);
617                         continue;
618                 }
619                 if (CH_DISABLED(pvt, i)) {
620                         debugf0("Channel %i is disabled\n", i);
621                         continue;
622                 }
623
624                 /* Devices 4-6 function 0 */
625                 pci_read_config_dword(pvt->pci_ch[i][0],
626                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
627
628                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
629                                                 4 : 2;
630
631                 if (data & REGISTERED_DIMM)
632                         mtype = MEM_RDDR3;
633                 else
634                         mtype = MEM_DDR3;
635 #if 0
636                 if (data & THREE_DIMMS_PRESENT)
637                         pvt->channel[i].dimms = 3;
638                 else if (data & SINGLE_QUAD_RANK_PRESENT)
639                         pvt->channel[i].dimms = 1;
640                 else
641                         pvt->channel[i].dimms = 2;
642 #endif
643
644                 /* Devices 4-6 function 1 */
645                 pci_read_config_dword(pvt->pci_ch[i][1],
646                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
647                 pci_read_config_dword(pvt->pci_ch[i][1],
648                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
649                 pci_read_config_dword(pvt->pci_ch[i][1],
650                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
651
652                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
653                         "%d ranks, %cDIMMs\n",
654                         i,
655                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
656                         data,
657                         pvt->channel[i].ranks,
658                         (data & REGISTERED_DIMM) ? 'R' : 'U');
659
660                 for (j = 0; j < 3; j++) {
661                         u32 banks, ranks, rows, cols;
662                         u32 size, npages;
663
664                         if (!DIMM_PRESENT(dimm_dod[j]))
665                                 continue;
666
667                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
668                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
669                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
670                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
671
672                         /* DDR3 has 8 I/O banks */
673                         size = (rows * cols * banks * ranks) >> (20 - 3);
674
675                         pvt->channel[i].dimms++;
676
677                         debugf0("\tdimm %d %d Mb offset: %x, "
678                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
679                                 j, size,
680                                 RANKOFFSET(dimm_dod[j]),
681                                 banks, ranks, rows, cols);
682
683                         npages = MiB_TO_PAGES(size);
684
685                         csr = &mci->csrows[csrow];
686                         csr->first_page = last_page + 1;
687                         last_page += npages;
688                         csr->last_page = last_page;
689                         csr->nr_pages = npages;
690
691                         csr->page_mask = 0;
692                         csr->grain = 8;
693                         csr->csrow_idx = csrow;
694                         csr->nr_channels = 1;
695
696                         csr->channels[0].chan_idx = i;
697                         csr->channels[0].ce_count = 0;
698
699                         pvt->csrow_map[i][j] = csrow;
700
701                         switch (banks) {
702                         case 4:
703                                 csr->dtype = DEV_X4;
704                                 break;
705                         case 8:
706                                 csr->dtype = DEV_X8;
707                                 break;
708                         case 16:
709                                 csr->dtype = DEV_X16;
710                                 break;
711                         default:
712                                 csr->dtype = DEV_UNKNOWN;
713                         }
714
715                         csr->edac_mode = mode;
716                         csr->mtype = mtype;
717
718                         csrow++;
719                 }
720
721                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
722                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
723                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
724                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
725                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
726                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
727                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
728                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
729                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
730                 for (j = 0; j < 8; j++)
731                         debugf1("\t\t%#x\t%#x\t%#x\n",
732                                 (value[j] >> 27) & 0x1,
733                                 (value[j] >> 24) & 0x7,
734                                 (value[j] && ((1 << 24) - 1)));
735         }
736
737         return 0;
738 }
739
740 /****************************************************************************
741                         Error insertion routines
742  ****************************************************************************/
743
744 /* The i7core has independent error injection features per channel.
745    However, to have a simpler code, we don't allow enabling error injection
746    on more than one channel.
747    Also, since a change at an inject parameter will be applied only at enable,
748    we're disabling error injection on all write calls to the sysfs nodes that
749    controls the error code injection.
750  */
751 static int disable_inject(const struct mem_ctl_info *mci)
752 {
753         struct i7core_pvt *pvt = mci->pvt_info;
754
755         pvt->inject.enable = 0;
756
757         if (!pvt->pci_ch[pvt->inject.channel][0])
758                 return -ENODEV;
759
760         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
761                                 MC_CHANNEL_ERROR_INJECT, 0);
762
763         return 0;
764 }
765
766 /*
767  * i7core inject inject.section
768  *
769  *      accept and store error injection inject.section value
770  *      bit 0 - refers to the lower 32-byte half cacheline
771  *      bit 1 - refers to the upper 32-byte half cacheline
772  */
773 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
774                                            const char *data, size_t count)
775 {
776         struct i7core_pvt *pvt = mci->pvt_info;
777         unsigned long value;
778         int rc;
779
780         if (pvt->inject.enable)
781                 disable_inject(mci);
782
783         rc = strict_strtoul(data, 10, &value);
784         if ((rc < 0) || (value > 3))
785                 return -EIO;
786
787         pvt->inject.section = (u32) value;
788         return count;
789 }
790
791 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
792                                               char *data)
793 {
794         struct i7core_pvt *pvt = mci->pvt_info;
795         return sprintf(data, "0x%08x\n", pvt->inject.section);
796 }
797
798 /*
799  * i7core inject.type
800  *
801  *      accept and store error injection inject.section value
802  *      bit 0 - repeat enable - Enable error repetition
803  *      bit 1 - inject ECC error
804  *      bit 2 - inject parity error
805  */
806 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
807                                         const char *data, size_t count)
808 {
809         struct i7core_pvt *pvt = mci->pvt_info;
810         unsigned long value;
811         int rc;
812
813         if (pvt->inject.enable)
814                 disable_inject(mci);
815
816         rc = strict_strtoul(data, 10, &value);
817         if ((rc < 0) || (value > 7))
818                 return -EIO;
819
820         pvt->inject.type = (u32) value;
821         return count;
822 }
823
824 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
825                                               char *data)
826 {
827         struct i7core_pvt *pvt = mci->pvt_info;
828         return sprintf(data, "0x%08x\n", pvt->inject.type);
829 }
830
831 /*
832  * i7core_inject_inject.eccmask_store
833  *
834  * The type of error (UE/CE) will depend on the inject.eccmask value:
835  *   Any bits set to a 1 will flip the corresponding ECC bit
836  *   Correctable errors can be injected by flipping 1 bit or the bits within
837  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
838  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
839  *   uncorrectable error to be injected.
840  */
841 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
842                                         const char *data, size_t count)
843 {
844         struct i7core_pvt *pvt = mci->pvt_info;
845         unsigned long value;
846         int rc;
847
848         if (pvt->inject.enable)
849                 disable_inject(mci);
850
851         rc = strict_strtoul(data, 10, &value);
852         if (rc < 0)
853                 return -EIO;
854
855         pvt->inject.eccmask = (u32) value;
856         return count;
857 }
858
859 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
860                                               char *data)
861 {
862         struct i7core_pvt *pvt = mci->pvt_info;
863         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
864 }
865
866 /*
867  * i7core_addrmatch
868  *
869  * The type of error (UE/CE) will depend on the inject.eccmask value:
870  *   Any bits set to a 1 will flip the corresponding ECC bit
871  *   Correctable errors can be injected by flipping 1 bit or the bits within
872  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
873  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
874  *   uncorrectable error to be injected.
875  */
876
877 #define DECLARE_ADDR_MATCH(param, limit)                        \
878 static ssize_t i7core_inject_store_##param(                     \
879                 struct mem_ctl_info *mci,                       \
880                 const char *data, size_t count)                 \
881 {                                                               \
882         struct i7core_pvt *pvt;                                 \
883         long value;                                             \
884         int rc;                                                 \
885                                                                 \
886         debugf1("%s()\n", __func__);                            \
887         pvt = mci->pvt_info;                                    \
888                                                                 \
889         if (pvt->inject.enable)                                 \
890                 disable_inject(mci);                            \
891                                                                 \
892         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
893                 value = -1;                                     \
894         else {                                                  \
895                 rc = strict_strtoul(data, 10, &value);          \
896                 if ((rc < 0) || (value >= limit))               \
897                         return -EIO;                            \
898         }                                                       \
899                                                                 \
900         pvt->inject.param = value;                              \
901                                                                 \
902         return count;                                           \
903 }                                                               \
904                                                                 \
905 static ssize_t i7core_inject_show_##param(                      \
906                 struct mem_ctl_info *mci,                       \
907                 char *data)                                     \
908 {                                                               \
909         struct i7core_pvt *pvt;                                 \
910                                                                 \
911         pvt = mci->pvt_info;                                    \
912         debugf1("%s() pvt=%p\n", __func__, pvt);                \
913         if (pvt->inject.param < 0)                              \
914                 return sprintf(data, "any\n");                  \
915         else                                                    \
916                 return sprintf(data, "%d\n", pvt->inject.param);\
917 }
918
919 #define ATTR_ADDR_MATCH(param)                                  \
920         {                                                       \
921                 .attr = {                                       \
922                         .name = #param,                         \
923                         .mode = (S_IRUGO | S_IWUSR)             \
924                 },                                              \
925                 .show  = i7core_inject_show_##param,            \
926                 .store = i7core_inject_store_##param,           \
927         }
928
929 DECLARE_ADDR_MATCH(channel, 3);
930 DECLARE_ADDR_MATCH(dimm, 3);
931 DECLARE_ADDR_MATCH(rank, 4);
932 DECLARE_ADDR_MATCH(bank, 32);
933 DECLARE_ADDR_MATCH(page, 0x10000);
934 DECLARE_ADDR_MATCH(col, 0x4000);
935
936 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
937 {
938         u32 read;
939         int count;
940
941         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
942                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
943                 where, val);
944
945         for (count = 0; count < 10; count++) {
946                 if (count)
947                         msleep(100);
948                 pci_write_config_dword(dev, where, val);
949                 pci_read_config_dword(dev, where, &read);
950
951                 if (read == val)
952                         return 0;
953         }
954
955         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
956                 "write=%08x. Read=%08x\n",
957                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
958                 where, val, read);
959
960         return -EINVAL;
961 }
962
963 /*
964  * This routine prepares the Memory Controller for error injection.
965  * The error will be injected when some process tries to write to the
966  * memory that matches the given criteria.
967  * The criteria can be set in terms of a mask where dimm, rank, bank, page
968  * and col can be specified.
969  * A -1 value for any of the mask items will make the MCU to ignore
970  * that matching criteria for error injection.
971  *
972  * It should be noticed that the error will only happen after a write operation
973  * on a memory that matches the condition. if REPEAT_EN is not enabled at
974  * inject mask, then it will produce just one error. Otherwise, it will repeat
975  * until the injectmask would be cleaned.
976  *
977  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
978  *    is reliable enough to check if the MC is using the
979  *    three channels. However, this is not clear at the datasheet.
980  */
981 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
982                                        const char *data, size_t count)
983 {
984         struct i7core_pvt *pvt = mci->pvt_info;
985         u32 injectmask;
986         u64 mask = 0;
987         int  rc;
988         long enable;
989
990         if (!pvt->pci_ch[pvt->inject.channel][0])
991                 return 0;
992
993         rc = strict_strtoul(data, 10, &enable);
994         if ((rc < 0))
995                 return 0;
996
997         if (enable) {
998                 pvt->inject.enable = 1;
999         } else {
1000                 disable_inject(mci);
1001                 return count;
1002         }
1003
1004         /* Sets pvt->inject.dimm mask */
1005         if (pvt->inject.dimm < 0)
1006                 mask |= 1LL << 41;
1007         else {
1008                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1009                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1010                 else
1011                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1012         }
1013
1014         /* Sets pvt->inject.rank mask */
1015         if (pvt->inject.rank < 0)
1016                 mask |= 1LL << 40;
1017         else {
1018                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1019                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1020                 else
1021                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1022         }
1023
1024         /* Sets pvt->inject.bank mask */
1025         if (pvt->inject.bank < 0)
1026                 mask |= 1LL << 39;
1027         else
1028                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1029
1030         /* Sets pvt->inject.page mask */
1031         if (pvt->inject.page < 0)
1032                 mask |= 1LL << 38;
1033         else
1034                 mask |= (pvt->inject.page & 0xffff) << 14;
1035
1036         /* Sets pvt->inject.column mask */
1037         if (pvt->inject.col < 0)
1038                 mask |= 1LL << 37;
1039         else
1040                 mask |= (pvt->inject.col & 0x3fff);
1041
1042         /*
1043          * bit    0: REPEAT_EN
1044          * bits 1-2: MASK_HALF_CACHELINE
1045          * bit    3: INJECT_ECC
1046          * bit    4: INJECT_ADDR_PARITY
1047          */
1048
1049         injectmask = (pvt->inject.type & 1) |
1050                      (pvt->inject.section & 0x3) << 1 |
1051                      (pvt->inject.type & 0x6) << (3 - 1);
1052
1053         /* Unlock writes to registers - this register is write only */
1054         pci_write_config_dword(pvt->pci_noncore,
1055                                MC_CFG_CONTROL, 0x2);
1056
1057         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1058                                MC_CHANNEL_ADDR_MATCH, mask);
1059         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1060                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1061
1062         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1063                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1064
1065         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1066                                MC_CHANNEL_ERROR_INJECT, injectmask);
1067
1068         /*
1069          * This is something undocumented, based on my tests
1070          * Without writing 8 to this register, errors aren't injected. Not sure
1071          * why.
1072          */
1073         pci_write_config_dword(pvt->pci_noncore,
1074                                MC_CFG_CONTROL, 8);
1075
1076         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1077                 " inject 0x%08x\n",
1078                 mask, pvt->inject.eccmask, injectmask);
1079
1080
1081         return count;
1082 }
1083
1084 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1085                                         char *data)
1086 {
1087         struct i7core_pvt *pvt = mci->pvt_info;
1088         u32 injectmask;
1089
1090         if (!pvt->pci_ch[pvt->inject.channel][0])
1091                 return 0;
1092
1093         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1094                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1095
1096         debugf0("Inject error read: 0x%018x\n", injectmask);
1097
1098         if (injectmask & 0x0c)
1099                 pvt->inject.enable = 1;
1100
1101         return sprintf(data, "%d\n", pvt->inject.enable);
1102 }
1103
1104 #define DECLARE_COUNTER(param)                                  \
1105 static ssize_t i7core_show_counter_##param(                     \
1106                 struct mem_ctl_info *mci,                       \
1107                 char *data)                                     \
1108 {                                                               \
1109         struct i7core_pvt *pvt = mci->pvt_info;                 \
1110                                                                 \
1111         debugf1("%s() \n", __func__);                           \
1112         if (!pvt->ce_count_available || (pvt->is_registered))   \
1113                 return sprintf(data, "data unavailable\n");     \
1114         return sprintf(data, "%lu\n",                           \
1115                         pvt->udimm_ce_count[param]);            \
1116 }
1117
1118 #define ATTR_COUNTER(param)                                     \
1119         {                                                       \
1120                 .attr = {                                       \
1121                         .name = __stringify(udimm##param),      \
1122                         .mode = (S_IRUGO | S_IWUSR)             \
1123                 },                                              \
1124                 .show  = i7core_show_counter_##param            \
1125         }
1126
1127 DECLARE_COUNTER(0);
1128 DECLARE_COUNTER(1);
1129 DECLARE_COUNTER(2);
1130
1131 /*
1132  * Sysfs struct
1133  */
1134
1135 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1136         ATTR_ADDR_MATCH(channel),
1137         ATTR_ADDR_MATCH(dimm),
1138         ATTR_ADDR_MATCH(rank),
1139         ATTR_ADDR_MATCH(bank),
1140         ATTR_ADDR_MATCH(page),
1141         ATTR_ADDR_MATCH(col),
1142         { } /* End of list */
1143 };
1144
1145 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1146         .name  = "inject_addrmatch",
1147         .mcidev_attr = i7core_addrmatch_attrs,
1148 };
1149
1150 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1151         ATTR_COUNTER(0),
1152         ATTR_COUNTER(1),
1153         ATTR_COUNTER(2),
1154         { .attr = { .name = NULL } }
1155 };
1156
1157 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1158         .name  = "all_channel_counts",
1159         .mcidev_attr = i7core_udimm_counters_attrs,
1160 };
1161
1162 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1163         {
1164                 .attr = {
1165                         .name = "inject_section",
1166                         .mode = (S_IRUGO | S_IWUSR)
1167                 },
1168                 .show  = i7core_inject_section_show,
1169                 .store = i7core_inject_section_store,
1170         }, {
1171                 .attr = {
1172                         .name = "inject_type",
1173                         .mode = (S_IRUGO | S_IWUSR)
1174                 },
1175                 .show  = i7core_inject_type_show,
1176                 .store = i7core_inject_type_store,
1177         }, {
1178                 .attr = {
1179                         .name = "inject_eccmask",
1180                         .mode = (S_IRUGO | S_IWUSR)
1181                 },
1182                 .show  = i7core_inject_eccmask_show,
1183                 .store = i7core_inject_eccmask_store,
1184         }, {
1185                 .grp = &i7core_inject_addrmatch,
1186         }, {
1187                 .attr = {
1188                         .name = "inject_enable",
1189                         .mode = (S_IRUGO | S_IWUSR)
1190                 },
1191                 .show  = i7core_inject_enable_show,
1192                 .store = i7core_inject_enable_store,
1193         },
1194         { }     /* End of list */
1195 };
1196
1197 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1198         {
1199                 .attr = {
1200                         .name = "inject_section",
1201                         .mode = (S_IRUGO | S_IWUSR)
1202                 },
1203                 .show  = i7core_inject_section_show,
1204                 .store = i7core_inject_section_store,
1205         }, {
1206                 .attr = {
1207                         .name = "inject_type",
1208                         .mode = (S_IRUGO | S_IWUSR)
1209                 },
1210                 .show  = i7core_inject_type_show,
1211                 .store = i7core_inject_type_store,
1212         }, {
1213                 .attr = {
1214                         .name = "inject_eccmask",
1215                         .mode = (S_IRUGO | S_IWUSR)
1216                 },
1217                 .show  = i7core_inject_eccmask_show,
1218                 .store = i7core_inject_eccmask_store,
1219         }, {
1220                 .grp = &i7core_inject_addrmatch,
1221         }, {
1222                 .attr = {
1223                         .name = "inject_enable",
1224                         .mode = (S_IRUGO | S_IWUSR)
1225                 },
1226                 .show  = i7core_inject_enable_show,
1227                 .store = i7core_inject_enable_store,
1228         }, {
1229                 .grp = &i7core_udimm_counters,
1230         },
1231         { }     /* End of list */
1232 };
1233
1234 /****************************************************************************
1235         Device initialization routines: put/get, init/exit
1236  ****************************************************************************/
1237
1238 /*
1239  *      i7core_put_all_devices  'put' all the devices that we have
1240  *                              reserved via 'get'
1241  */
1242 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1243 {
1244         int i;
1245
1246         debugf0(__FILE__ ": %s()\n", __func__);
1247         for (i = 0; i < i7core_dev->n_devs; i++) {
1248                 struct pci_dev *pdev = i7core_dev->pdev[i];
1249                 if (!pdev)
1250                         continue;
1251                 debugf0("Removing dev %02x:%02x.%d\n",
1252                         pdev->bus->number,
1253                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1254                 pci_dev_put(pdev);
1255         }
1256 }
1257
1258 static void i7core_put_all_devices(void)
1259 {
1260         struct i7core_dev *i7core_dev, *tmp;
1261
1262         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1263                 i7core_put_devices(i7core_dev);
1264                 free_i7core_dev(i7core_dev);
1265         }
1266 }
1267
1268 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1269 {
1270         struct pci_dev *pdev = NULL;
1271         int i;
1272
1273         /*
1274          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1275          * aren't announced by acpi. So, we need to use a legacy scan probing
1276          * to detect them
1277          */
1278         while (table && table->descr) {
1279                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1280                 if (unlikely(!pdev)) {
1281                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1282                                 pcibios_scan_specific_bus(255-i);
1283                 }
1284                 pci_dev_put(pdev);
1285                 table++;
1286         }
1287 }
1288
1289 static unsigned i7core_pci_lastbus(void)
1290 {
1291         int last_bus = 0, bus;
1292         struct pci_bus *b = NULL;
1293
1294         while ((b = pci_find_next_bus(b)) != NULL) {
1295                 bus = b->number;
1296                 debugf0("Found bus %d\n", bus);
1297                 if (bus > last_bus)
1298                         last_bus = bus;
1299         }
1300
1301         debugf0("Last bus %d\n", last_bus);
1302
1303         return last_bus;
1304 }
1305
1306 /*
1307  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1308  *                      device/functions we want to reference for this driver
1309  *
1310  *                      Need to 'get' device 16 func 1 and func 2
1311  */
1312 static int i7core_get_onedevice(struct pci_dev **prev,
1313                                 const struct pci_id_table *table,
1314                                 const unsigned devno,
1315                                 const unsigned last_bus)
1316 {
1317         struct i7core_dev *i7core_dev;
1318         const struct pci_id_descr *dev_descr = &table->descr[devno];
1319
1320         struct pci_dev *pdev = NULL;
1321         u8 bus = 0;
1322         u8 socket = 0;
1323
1324         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1325                               dev_descr->dev_id, *prev);
1326
1327         if (!pdev) {
1328                 if (*prev) {
1329                         *prev = pdev;
1330                         return 0;
1331                 }
1332
1333                 if (dev_descr->optional)
1334                         return 0;
1335
1336                 if (devno == 0)
1337                         return -ENODEV;
1338
1339                 i7core_printk(KERN_INFO,
1340                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1341                         dev_descr->dev, dev_descr->func,
1342                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1343
1344                 /* End of list, leave */
1345                 return -ENODEV;
1346         }
1347         bus = pdev->bus->number;
1348
1349         socket = last_bus - bus;
1350
1351         i7core_dev = get_i7core_dev(socket);
1352         if (!i7core_dev) {
1353                 i7core_dev = alloc_i7core_dev(socket, table);
1354                 if (!i7core_dev) {
1355                         pci_dev_put(pdev);
1356                         return -ENOMEM;
1357                 }
1358         }
1359
1360         if (i7core_dev->pdev[devno]) {
1361                 i7core_printk(KERN_ERR,
1362                         "Duplicated device for "
1363                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1364                         bus, dev_descr->dev, dev_descr->func,
1365                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1366                 pci_dev_put(pdev);
1367                 return -ENODEV;
1368         }
1369
1370         i7core_dev->pdev[devno] = pdev;
1371
1372         /* Sanity check */
1373         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1374                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1375                 i7core_printk(KERN_ERR,
1376                         "Device PCI ID %04x:%04x "
1377                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1378                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1379                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1380                         bus, dev_descr->dev, dev_descr->func);
1381                 return -ENODEV;
1382         }
1383
1384         /* Be sure that the device is enabled */
1385         if (unlikely(pci_enable_device(pdev) < 0)) {
1386                 i7core_printk(KERN_ERR,
1387                         "Couldn't enable "
1388                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1389                         bus, dev_descr->dev, dev_descr->func,
1390                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1391                 return -ENODEV;
1392         }
1393
1394         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395                 socket, bus, dev_descr->dev,
1396                 dev_descr->func,
1397                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1398
1399         /*
1400          * As stated on drivers/pci/search.c, the reference count for
1401          * @from is always decremented if it is not %NULL. So, as we need
1402          * to get all devices up to null, we need to do a get for the device
1403          */
1404         pci_dev_get(pdev);
1405
1406         *prev = pdev;
1407
1408         return 0;
1409 }
1410
1411 static int i7core_get_all_devices(void)
1412 {
1413         int i, rc, last_bus;
1414         struct pci_dev *pdev = NULL;
1415         const struct pci_id_table *table = pci_dev_table;
1416
1417         last_bus = i7core_pci_lastbus();
1418
1419         while (table && table->descr) {
1420                 for (i = 0; i < table->n_devs; i++) {
1421                         pdev = NULL;
1422                         do {
1423                                 rc = i7core_get_onedevice(&pdev, table, i,
1424                                                           last_bus);
1425                                 if (rc < 0) {
1426                                         if (i == 0) {
1427                                                 i = table->n_devs;
1428                                                 break;
1429                                         }
1430                                         i7core_put_all_devices();
1431                                         return -ENODEV;
1432                                 }
1433                         } while (pdev);
1434                 }
1435                 table++;
1436         }
1437
1438         return 0;
1439 }
1440
1441 static int mci_bind_devs(struct mem_ctl_info *mci,
1442                          struct i7core_dev *i7core_dev)
1443 {
1444         struct i7core_pvt *pvt = mci->pvt_info;
1445         struct pci_dev *pdev;
1446         int i, func, slot;
1447
1448         pvt->is_registered = 0;
1449         for (i = 0; i < i7core_dev->n_devs; i++) {
1450                 pdev = i7core_dev->pdev[i];
1451                 if (!pdev)
1452                         continue;
1453
1454                 func = PCI_FUNC(pdev->devfn);
1455                 slot = PCI_SLOT(pdev->devfn);
1456                 if (slot == 3) {
1457                         if (unlikely(func > MAX_MCR_FUNC))
1458                                 goto error;
1459                         pvt->pci_mcr[func] = pdev;
1460                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1461                         if (unlikely(func > MAX_CHAN_FUNC))
1462                                 goto error;
1463                         pvt->pci_ch[slot - 4][func] = pdev;
1464                 } else if (!slot && !func)
1465                         pvt->pci_noncore = pdev;
1466                 else
1467                         goto error;
1468
1469                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1470                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1471                         pdev, i7core_dev->socket);
1472
1473                 if (PCI_SLOT(pdev->devfn) == 3 &&
1474                         PCI_FUNC(pdev->devfn) == 2)
1475                         pvt->is_registered = 1;
1476         }
1477
1478         return 0;
1479
1480 error:
1481         i7core_printk(KERN_ERR, "Device %d, function %d "
1482                       "is out of the expected range\n",
1483                       slot, func);
1484         return -EINVAL;
1485 }
1486
1487 /****************************************************************************
1488                         Error check routines
1489  ****************************************************************************/
1490 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1491                                       const int chan,
1492                                       const int dimm,
1493                                       const int add)
1494 {
1495         char *msg;
1496         struct i7core_pvt *pvt = mci->pvt_info;
1497         int row = pvt->csrow_map[chan][dimm], i;
1498
1499         for (i = 0; i < add; i++) {
1500                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1501                                 "(Socket=%d channel=%d dimm=%d)",
1502                                 pvt->i7core_dev->socket, chan, dimm);
1503
1504                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1505                 kfree (msg);
1506         }
1507 }
1508
1509 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1510                                          const int chan,
1511                                          const int new0,
1512                                          const int new1,
1513                                          const int new2)
1514 {
1515         struct i7core_pvt *pvt = mci->pvt_info;
1516         int add0 = 0, add1 = 0, add2 = 0;
1517         /* Updates CE counters if it is not the first time here */
1518         if (pvt->ce_count_available) {
1519                 /* Updates CE counters */
1520
1521                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1522                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1523                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1524
1525                 if (add2 < 0)
1526                         add2 += 0x7fff;
1527                 pvt->rdimm_ce_count[chan][2] += add2;
1528
1529                 if (add1 < 0)
1530                         add1 += 0x7fff;
1531                 pvt->rdimm_ce_count[chan][1] += add1;
1532
1533                 if (add0 < 0)
1534                         add0 += 0x7fff;
1535                 pvt->rdimm_ce_count[chan][0] += add0;
1536         } else
1537                 pvt->ce_count_available = 1;
1538
1539         /* Store the new values */
1540         pvt->rdimm_last_ce_count[chan][2] = new2;
1541         pvt->rdimm_last_ce_count[chan][1] = new1;
1542         pvt->rdimm_last_ce_count[chan][0] = new0;
1543
1544         /*updated the edac core */
1545         if (add0 != 0)
1546                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1547         if (add1 != 0)
1548                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1549         if (add2 != 0)
1550                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1551
1552 }
1553
1554 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1555 {
1556         struct i7core_pvt *pvt = mci->pvt_info;
1557         u32 rcv[3][2];
1558         int i, new0, new1, new2;
1559
1560         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1561         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1562                                                                 &rcv[0][0]);
1563         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1564                                                                 &rcv[0][1]);
1565         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1566                                                                 &rcv[1][0]);
1567         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1568                                                                 &rcv[1][1]);
1569         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1570                                                                 &rcv[2][0]);
1571         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1572                                                                 &rcv[2][1]);
1573         for (i = 0 ; i < 3; i++) {
1574                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1575                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1576                 /*if the channel has 3 dimms*/
1577                 if (pvt->channel[i].dimms > 2) {
1578                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1579                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1580                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1581                 } else {
1582                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1583                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1584                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1585                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1586                         new2 = 0;
1587                 }
1588
1589                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1590         }
1591 }
1592
1593 /* This function is based on the device 3 function 4 registers as described on:
1594  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1595  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1596  * also available at:
1597  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1598  */
1599 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1600 {
1601         struct i7core_pvt *pvt = mci->pvt_info;
1602         u32 rcv1, rcv0;
1603         int new0, new1, new2;
1604
1605         if (!pvt->pci_mcr[4]) {
1606                 debugf0("%s MCR registers not found\n", __func__);
1607                 return;
1608         }
1609
1610         /* Corrected test errors */
1611         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1612         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1613
1614         /* Store the new values */
1615         new2 = DIMM2_COR_ERR(rcv1);
1616         new1 = DIMM1_COR_ERR(rcv0);
1617         new0 = DIMM0_COR_ERR(rcv0);
1618
1619         /* Updates CE counters if it is not the first time here */
1620         if (pvt->ce_count_available) {
1621                 /* Updates CE counters */
1622                 int add0, add1, add2;
1623
1624                 add2 = new2 - pvt->udimm_last_ce_count[2];
1625                 add1 = new1 - pvt->udimm_last_ce_count[1];
1626                 add0 = new0 - pvt->udimm_last_ce_count[0];
1627
1628                 if (add2 < 0)
1629                         add2 += 0x7fff;
1630                 pvt->udimm_ce_count[2] += add2;
1631
1632                 if (add1 < 0)
1633                         add1 += 0x7fff;
1634                 pvt->udimm_ce_count[1] += add1;
1635
1636                 if (add0 < 0)
1637                         add0 += 0x7fff;
1638                 pvt->udimm_ce_count[0] += add0;
1639
1640                 if (add0 | add1 | add2)
1641                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1642                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1643                                       add0, add1, add2);
1644         } else
1645                 pvt->ce_count_available = 1;
1646
1647         /* Store the new values */
1648         pvt->udimm_last_ce_count[2] = new2;
1649         pvt->udimm_last_ce_count[1] = new1;
1650         pvt->udimm_last_ce_count[0] = new0;
1651 }
1652
1653 /*
1654  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1655  * Architectures Software Developer’s Manual Volume 3B.
1656  * Nehalem are defined as family 0x06, model 0x1a
1657  *
1658  * The MCA registers used here are the following ones:
1659  *     struct mce field MCA Register
1660  *     m->status        MSR_IA32_MC8_STATUS
1661  *     m->addr          MSR_IA32_MC8_ADDR
1662  *     m->misc          MSR_IA32_MC8_MISC
1663  * In the case of Nehalem, the error information is masked at .status and .misc
1664  * fields
1665  */
1666 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1667                                     const struct mce *m)
1668 {
1669         struct i7core_pvt *pvt = mci->pvt_info;
1670         char *type, *optype, *err, *msg;
1671         unsigned long error = m->status & 0x1ff0000l;
1672         u32 optypenum = (m->status >> 4) & 0x07;
1673         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1674         u32 dimm = (m->misc >> 16) & 0x3;
1675         u32 channel = (m->misc >> 18) & 0x3;
1676         u32 syndrome = m->misc >> 32;
1677         u32 errnum = find_first_bit(&error, 32);
1678         int csrow;
1679
1680         if (m->mcgstatus & 1)
1681                 type = "FATAL";
1682         else
1683                 type = "NON_FATAL";
1684
1685         switch (optypenum) {
1686         case 0:
1687                 optype = "generic undef request";
1688                 break;
1689         case 1:
1690                 optype = "read error";
1691                 break;
1692         case 2:
1693                 optype = "write error";
1694                 break;
1695         case 3:
1696                 optype = "addr/cmd error";
1697                 break;
1698         case 4:
1699                 optype = "scrubbing error";
1700                 break;
1701         default:
1702                 optype = "reserved";
1703                 break;
1704         }
1705
1706         switch (errnum) {
1707         case 16:
1708                 err = "read ECC error";
1709                 break;
1710         case 17:
1711                 err = "RAS ECC error";
1712                 break;
1713         case 18:
1714                 err = "write parity error";
1715                 break;
1716         case 19:
1717                 err = "redundacy loss";
1718                 break;
1719         case 20:
1720                 err = "reserved";
1721                 break;
1722         case 21:
1723                 err = "memory range error";
1724                 break;
1725         case 22:
1726                 err = "RTID out of range";
1727                 break;
1728         case 23:
1729                 err = "address parity error";
1730                 break;
1731         case 24:
1732                 err = "byte enable parity error";
1733                 break;
1734         default:
1735                 err = "unknown";
1736         }
1737
1738         /* FIXME: should convert addr into bank and rank information */
1739         msg = kasprintf(GFP_ATOMIC,
1740                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1741                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1742                 type, (long long) m->addr, m->cpu, dimm, channel,
1743                 syndrome, core_err_cnt, (long long)m->status,
1744                 (long long)m->misc, optype, err);
1745
1746         debugf0("%s", msg);
1747
1748         csrow = pvt->csrow_map[channel][dimm];
1749
1750         /* Call the helper to output message */
1751         if (m->mcgstatus & 1)
1752                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1753                                 0 /* FIXME: should be channel here */, msg);
1754         else if (!pvt->is_registered)
1755                 edac_mc_handle_fbd_ce(mci, csrow,
1756                                 0 /* FIXME: should be channel here */, msg);
1757
1758         kfree(msg);
1759 }
1760
1761 /*
1762  *      i7core_check_error      Retrieve and process errors reported by the
1763  *                              hardware. Called by the Core module.
1764  */
1765 static void i7core_check_error(struct mem_ctl_info *mci)
1766 {
1767         struct i7core_pvt *pvt = mci->pvt_info;
1768         int i;
1769         unsigned count = 0;
1770         struct mce *m;
1771
1772         /*
1773          * MCE first step: Copy all mce errors into a temporary buffer
1774          * We use a double buffering here, to reduce the risk of
1775          * loosing an error.
1776          */
1777         smp_rmb();
1778         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1779                 % MCE_LOG_LEN;
1780         if (!count)
1781                 goto check_ce_error;
1782
1783         m = pvt->mce_outentry;
1784         if (pvt->mce_in + count > MCE_LOG_LEN) {
1785                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1786
1787                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1788                 smp_wmb();
1789                 pvt->mce_in = 0;
1790                 count -= l;
1791                 m += l;
1792         }
1793         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1794         smp_wmb();
1795         pvt->mce_in += count;
1796
1797         smp_rmb();
1798         if (pvt->mce_overrun) {
1799                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1800                               pvt->mce_overrun);
1801                 smp_wmb();
1802                 pvt->mce_overrun = 0;
1803         }
1804
1805         /*
1806          * MCE second step: parse errors and display
1807          */
1808         for (i = 0; i < count; i++)
1809                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1810
1811         /*
1812          * Now, let's increment CE error counts
1813          */
1814 check_ce_error:
1815         if (!pvt->is_registered)
1816                 i7core_udimm_check_mc_ecc_err(mci);
1817         else
1818                 i7core_rdimm_check_mc_ecc_err(mci);
1819 }
1820
1821 /*
1822  * i7core_mce_check_error       Replicates mcelog routine to get errors
1823  *                              This routine simply queues mcelog errors, and
1824  *                              return. The error itself should be handled later
1825  *                              by i7core_check_error.
1826  * WARNING: As this routine should be called at NMI time, extra care should
1827  * be taken to avoid deadlocks, and to be as fast as possible.
1828  */
1829 static int i7core_mce_check_error(void *priv, struct mce *mce)
1830 {
1831         struct mem_ctl_info *mci = priv;
1832         struct i7core_pvt *pvt = mci->pvt_info;
1833
1834         /*
1835          * Just let mcelog handle it if the error is
1836          * outside the memory controller
1837          */
1838         if (((mce->status & 0xffff) >> 7) != 1)
1839                 return 0;
1840
1841         /* Bank 8 registers are the only ones that we know how to handle */
1842         if (mce->bank != 8)
1843                 return 0;
1844
1845 #ifdef CONFIG_SMP
1846         /* Only handle if it is the right mc controller */
1847         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1848                 return 0;
1849 #endif
1850
1851         smp_rmb();
1852         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1853                 smp_wmb();
1854                 pvt->mce_overrun++;
1855                 return 0;
1856         }
1857
1858         /* Copy memory error at the ringbuffer */
1859         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1860         smp_wmb();
1861         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1862
1863         /* Handle fatal errors immediately */
1864         if (mce->mcgstatus & 1)
1865                 i7core_check_error(mci);
1866
1867         /* Advice mcelog that the error were handled */
1868         return 1;
1869 }
1870
1871 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
1872 {
1873         pvt->i7core_pci = edac_pci_create_generic_ctl(
1874                                                 &pvt->i7core_dev->pdev[0]->dev,
1875                                                 EDAC_MOD_STR);
1876         if (unlikely(!pvt->i7core_pci))
1877                 pr_warn("Unable to setup PCI error report via EDAC\n");
1878 }
1879
1880 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
1881 {
1882         if (likely(pvt->i7core_pci))
1883                 edac_pci_release_generic_ctl(pvt->i7core_pci);
1884         else
1885                 i7core_printk(KERN_ERR,
1886                                 "Couldn't find mem_ctl_info for socket %d\n",
1887                                 pvt->i7core_dev->socket);
1888         pvt->i7core_pci = NULL;
1889 }
1890
1891 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
1892 {
1893         struct mem_ctl_info *mci = i7core_dev->mci;
1894         struct i7core_pvt *pvt;
1895
1896         if (unlikely(!mci || !mci->pvt_info)) {
1897                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1898                         __func__, &i7core_dev->pdev[0]->dev);
1899
1900                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
1901                 return;
1902         }
1903
1904         pvt = mci->pvt_info;
1905
1906         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1907                 __func__, mci, &i7core_dev->pdev[0]->dev);
1908
1909         /* Disable MCE NMI handler */
1910         edac_mce_unregister(&pvt->edac_mce);
1911
1912         /* Disable EDAC polling */
1913         i7core_pci_ctl_release(pvt);
1914
1915         /* Remove MC sysfs nodes */
1916         edac_mc_del_mc(mci->dev);
1917
1918         debugf1("%s: free mci struct\n", mci->ctl_name);
1919         kfree(mci->ctl_name);
1920         edac_mc_free(mci);
1921         i7core_dev->mci = NULL;
1922 }
1923
1924 static int i7core_register_mci(struct i7core_dev *i7core_dev)
1925 {
1926         struct mem_ctl_info *mci;
1927         struct i7core_pvt *pvt;
1928         int rc, channels, csrows;
1929
1930         /* Check the number of active and not disabled channels */
1931         rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
1932         if (unlikely(rc < 0))
1933                 return rc;
1934
1935         /* allocate a new MC control structure */
1936         mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
1937         if (unlikely(!mci))
1938                 return -ENOMEM;
1939
1940         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1941                 __func__, mci, &i7core_dev->pdev[0]->dev);
1942
1943         pvt = mci->pvt_info;
1944         memset(pvt, 0, sizeof(*pvt));
1945
1946         /* Associates i7core_dev and mci for future usage */
1947         pvt->i7core_dev = i7core_dev;
1948         i7core_dev->mci = mci;
1949
1950         /*
1951          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1952          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1953          * memory channels
1954          */
1955         mci->mtype_cap = MEM_FLAG_DDR3;
1956         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1957         mci->edac_cap = EDAC_FLAG_NONE;
1958         mci->mod_name = "i7core_edac.c";
1959         mci->mod_ver = I7CORE_REVISION;
1960         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1961                                   i7core_dev->socket);
1962         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1963         mci->ctl_page_to_phys = NULL;
1964
1965         /* Store pci devices at mci for faster access */
1966         rc = mci_bind_devs(mci, i7core_dev);
1967         if (unlikely(rc < 0))
1968                 goto fail0;
1969
1970         if (pvt->is_registered)
1971                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1972         else
1973                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1974
1975         /* Get dimm basic config */
1976         get_dimm_config(mci);
1977         /* record ptr to the generic device */
1978         mci->dev = &i7core_dev->pdev[0]->dev;
1979         /* Set the function pointer to an actual operation function */
1980         mci->edac_check = i7core_check_error;
1981
1982         /* add this new MC control structure to EDAC's list of MCs */
1983         if (unlikely(edac_mc_add_mc(mci))) {
1984                 debugf0("MC: " __FILE__
1985                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1986                 /* FIXME: perhaps some code should go here that disables error
1987                  * reporting if we just enabled it
1988                  */
1989
1990                 rc = -EINVAL;
1991                 goto fail0;
1992         }
1993
1994         /* Default error mask is any memory */
1995         pvt->inject.channel = 0;
1996         pvt->inject.dimm = -1;
1997         pvt->inject.rank = -1;
1998         pvt->inject.bank = -1;
1999         pvt->inject.page = -1;
2000         pvt->inject.col = -1;
2001
2002         /* allocating generic PCI control info */
2003         i7core_pci_ctl_create(pvt);
2004
2005         /* Registers on edac_mce in order to receive memory errors */
2006         pvt->edac_mce.priv = mci;
2007         pvt->edac_mce.check_error = i7core_mce_check_error;
2008         rc = edac_mce_register(&pvt->edac_mce);
2009         if (unlikely(rc < 0)) {
2010                 debugf0("MC: " __FILE__
2011                         ": %s(): failed edac_mce_register()\n", __func__);
2012                 goto fail1;
2013         }
2014
2015         return 0;
2016
2017 fail1:
2018         i7core_pci_ctl_release(pvt);
2019         edac_mc_del_mc(mci->dev);
2020 fail0:
2021         kfree(mci->ctl_name);
2022         edac_mc_free(mci);
2023         i7core_dev->mci = NULL;
2024         return rc;
2025 }
2026
2027 /*
2028  *      i7core_probe    Probe for ONE instance of device to see if it is
2029  *                      present.
2030  *      return:
2031  *              0 for FOUND a device
2032  *              < 0 for error code
2033  */
2034
2035 static int __devinit i7core_probe(struct pci_dev *pdev,
2036                                   const struct pci_device_id *id)
2037 {
2038         int rc;
2039         struct i7core_dev *i7core_dev;
2040
2041         /* get the pci devices we want to reserve for our use */
2042         mutex_lock(&i7core_edac_lock);
2043
2044         /*
2045          * All memory controllers are allocated at the first pass.
2046          */
2047         if (unlikely(probed >= 1)) {
2048                 mutex_unlock(&i7core_edac_lock);
2049                 return -ENODEV;
2050         }
2051         probed++;
2052
2053         rc = i7core_get_all_devices();
2054         if (unlikely(rc < 0))
2055                 goto fail0;
2056
2057         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2058                 rc = i7core_register_mci(i7core_dev);
2059                 if (unlikely(rc < 0))
2060                         goto fail1;
2061         }
2062
2063         i7core_printk(KERN_INFO, "Driver loaded.\n");
2064
2065         mutex_unlock(&i7core_edac_lock);
2066         return 0;
2067
2068 fail1:
2069         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2070                 i7core_unregister_mci(i7core_dev);
2071
2072         i7core_put_all_devices();
2073 fail0:
2074         mutex_unlock(&i7core_edac_lock);
2075         return rc;
2076 }
2077
2078 /*
2079  *      i7core_remove   destructor for one instance of device
2080  *
2081  */
2082 static void __devexit i7core_remove(struct pci_dev *pdev)
2083 {
2084         struct i7core_dev *i7core_dev;
2085
2086         debugf0(__FILE__ ": %s()\n", __func__);
2087
2088         /*
2089          * we have a trouble here: pdev value for removal will be wrong, since
2090          * it will point to the X58 register used to detect that the machine
2091          * is a Nehalem or upper design. However, due to the way several PCI
2092          * devices are grouped together to provide MC functionality, we need
2093          * to use a different method for releasing the devices
2094          */
2095
2096         mutex_lock(&i7core_edac_lock);
2097
2098         if (unlikely(!probed)) {
2099                 mutex_unlock(&i7core_edac_lock);
2100                 return;
2101         }
2102
2103         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2104                 i7core_unregister_mci(i7core_dev);
2105
2106         /* Release PCI resources */
2107         i7core_put_all_devices();
2108
2109         probed--;
2110
2111         mutex_unlock(&i7core_edac_lock);
2112 }
2113
2114 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2115
2116 /*
2117  *      i7core_driver   pci_driver structure for this module
2118  *
2119  */
2120 static struct pci_driver i7core_driver = {
2121         .name     = "i7core_edac",
2122         .probe    = i7core_probe,
2123         .remove   = __devexit_p(i7core_remove),
2124         .id_table = i7core_pci_tbl,
2125 };
2126
2127 /*
2128  *      i7core_init             Module entry function
2129  *                      Try to initialize this module for its devices
2130  */
2131 static int __init i7core_init(void)
2132 {
2133         int pci_rc;
2134
2135         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2136
2137         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2138         opstate_init();
2139
2140         if (use_pci_fixup)
2141                 i7core_xeon_pci_fixup(pci_dev_table);
2142
2143         pci_rc = pci_register_driver(&i7core_driver);
2144
2145         if (pci_rc >= 0)
2146                 return 0;
2147
2148         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2149                       pci_rc);
2150
2151         return pci_rc;
2152 }
2153
2154 /*
2155  *      i7core_exit()   Module exit function
2156  *                      Unregister the driver
2157  */
2158 static void __exit i7core_exit(void)
2159 {
2160         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2161         pci_unregister_driver(&i7core_driver);
2162 }
2163
2164 module_init(i7core_init);
2165 module_exit(i7core_exit);
2166
2167 MODULE_LICENSE("GPL");
2168 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2169 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2170 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2171                    I7CORE_REVISION);
2172
2173 module_param(edac_op_state, int, 0444);
2174 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");