i7core_edac: Be sure that the edac pci handler will be properly released
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /*
43  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
44  * registers start at bus 255, and are not reported by BIOS.
45  * We currently find devices with only 2 sockets. In order to support more QPI
46  * Quick Path Interconnect, just increment this number.
47  */
48 #define MAX_SOCKET_BUSES        2
49
50
51 /*
52  * Alter this version for the module when modifications are made
53  */
54 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
55 #define EDAC_MOD_STR      "i7core_edac"
56
57 /*
58  * Debug macros
59  */
60 #define i7core_printk(level, fmt, arg...)                       \
61         edac_printk(level, "i7core", fmt, ##arg)
62
63 #define i7core_mc_printk(mci, level, fmt, arg...)               \
64         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
65
66 /*
67  * i7core Memory Controller Registers
68  */
69
70         /* OFFSETS for Device 0 Function 0 */
71
72 #define MC_CFG_CONTROL  0x90
73
74         /* OFFSETS for Device 3 Function 0 */
75
76 #define MC_CONTROL      0x48
77 #define MC_STATUS       0x4c
78 #define MC_MAX_DOD      0x64
79
80 /*
81  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
82  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
83  */
84
85 #define MC_TEST_ERR_RCV1        0x60
86   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
87
88 #define MC_TEST_ERR_RCV0        0x64
89   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
90   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
91
92 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
93 #define MC_COR_ECC_CNT_0        0x80
94 #define MC_COR_ECC_CNT_1        0x84
95 #define MC_COR_ECC_CNT_2        0x88
96 #define MC_COR_ECC_CNT_3        0x8c
97 #define MC_COR_ECC_CNT_4        0x90
98 #define MC_COR_ECC_CNT_5        0x94
99
100 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
101 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
102
103
104         /* OFFSETS for Devices 4,5 and 6 Function 0 */
105
106 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
107   #define THREE_DIMMS_PRESENT           (1 << 24)
108   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
109   #define QUAD_RANK_PRESENT             (1 << 22)
110   #define REGISTERED_DIMM               (1 << 15)
111
112 #define MC_CHANNEL_MAPPER       0x60
113   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
114   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
115
116 #define MC_CHANNEL_RANK_PRESENT 0x7c
117   #define RANK_PRESENT_MASK             0xffff
118
119 #define MC_CHANNEL_ADDR_MATCH   0xf0
120 #define MC_CHANNEL_ERROR_MASK   0xf8
121 #define MC_CHANNEL_ERROR_INJECT 0xfc
122   #define INJECT_ADDR_PARITY    0x10
123   #define INJECT_ECC            0x08
124   #define MASK_CACHELINE        0x06
125   #define MASK_FULL_CACHELINE   0x06
126   #define MASK_MSB32_CACHELINE  0x04
127   #define MASK_LSB32_CACHELINE  0x02
128   #define NO_MASK_CACHELINE     0x00
129   #define REPEAT_EN             0x01
130
131         /* OFFSETS for Devices 4,5 and 6 Function 1 */
132
133 #define MC_DOD_CH_DIMM0         0x48
134 #define MC_DOD_CH_DIMM1         0x4c
135 #define MC_DOD_CH_DIMM2         0x50
136   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
137   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
138   #define DIMM_PRESENT_MASK     (1 << 9)
139   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
140   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
141   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
142   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
143   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
144   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
145   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
146   #define MC_DOD_NUMCOL_MASK            3
147   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
148
149 #define MC_RANK_PRESENT         0x7c
150
151 #define MC_SAG_CH_0     0x80
152 #define MC_SAG_CH_1     0x84
153 #define MC_SAG_CH_2     0x88
154 #define MC_SAG_CH_3     0x8c
155 #define MC_SAG_CH_4     0x90
156 #define MC_SAG_CH_5     0x94
157 #define MC_SAG_CH_6     0x98
158 #define MC_SAG_CH_7     0x9c
159
160 #define MC_RIR_LIMIT_CH_0       0x40
161 #define MC_RIR_LIMIT_CH_1       0x44
162 #define MC_RIR_LIMIT_CH_2       0x48
163 #define MC_RIR_LIMIT_CH_3       0x4C
164 #define MC_RIR_LIMIT_CH_4       0x50
165 #define MC_RIR_LIMIT_CH_5       0x54
166 #define MC_RIR_LIMIT_CH_6       0x58
167 #define MC_RIR_LIMIT_CH_7       0x5C
168 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
169
170 #define MC_RIR_WAY_CH           0x80
171   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
172   #define MC_RIR_WAY_RANK_MASK          0x7
173
174 /*
175  * i7core structs
176  */
177
178 #define NUM_CHANS 3
179 #define MAX_DIMMS 3             /* Max DIMMS per channel */
180 #define MAX_MCR_FUNC  4
181 #define MAX_CHAN_FUNC 3
182
183 struct i7core_info {
184         u32     mc_control;
185         u32     mc_status;
186         u32     max_dod;
187         u32     ch_map;
188 };
189
190
191 struct i7core_inject {
192         int     enable;
193
194         u32     section;
195         u32     type;
196         u32     eccmask;
197
198         /* Error address mask */
199         int channel, dimm, rank, bank, page, col;
200 };
201
202 struct i7core_channel {
203         u32             ranks;
204         u32             dimms;
205 };
206
207 struct pci_id_descr {
208         int                     dev;
209         int                     func;
210         int                     dev_id;
211         int                     optional;
212 };
213
214 struct pci_id_table {
215         struct pci_id_descr     *descr;
216         int                     n_devs;
217 };
218
219 struct i7core_dev {
220         struct list_head        list;
221         u8                      socket;
222         struct pci_dev          **pdev;
223         int                     n_devs;
224         struct mem_ctl_info     *mci;
225 };
226
227 struct i7core_pvt {
228         struct pci_dev  *pci_noncore;
229         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
230         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
231
232         struct i7core_dev *i7core_dev;
233
234         struct i7core_info      info;
235         struct i7core_inject    inject;
236         struct i7core_channel   channel[NUM_CHANS];
237
238         int             channels; /* Number of active channels */
239
240         int             ce_count_available;
241         int             csrow_map[NUM_CHANS][MAX_DIMMS];
242
243                         /* ECC corrected errors counts per udimm */
244         unsigned long   udimm_ce_count[MAX_DIMMS];
245         int             udimm_last_ce_count[MAX_DIMMS];
246                         /* ECC corrected errors counts per rdimm */
247         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
248         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
249
250         unsigned int    is_registered;
251
252         /* mcelog glue */
253         struct edac_mce         edac_mce;
254
255         /* Fifo double buffers */
256         struct mce              mce_entry[MCE_LOG_LEN];
257         struct mce              mce_outentry[MCE_LOG_LEN];
258
259         /* Fifo in/out counters */
260         unsigned                mce_in, mce_out;
261
262         /* Count indicator to show errors not got */
263         unsigned                mce_overrun;
264
265         /* Struct to control EDAC polling */
266         struct edac_pci_ctl_info *i7core_pci;
267 };
268
269 /* Static vars */
270 static LIST_HEAD(i7core_edac_list);
271 static DEFINE_MUTEX(i7core_edac_lock);
272
273 #define PCI_DESCR(device, function, device_id)  \
274         .dev = (device),                        \
275         .func = (function),                     \
276         .dev_id = (device_id)
277
278 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
279                 /* Memory controller */
280         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
281         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
282                         /* Exists only for RDIMM */
283         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
284         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
285
286                 /* Channel 0 */
287         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
288         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
289         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
290         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
291
292                 /* Channel 1 */
293         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
294         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
295         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
296         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
297
298                 /* Channel 2 */
299         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
300         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
301         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
302         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
303
304                 /* Generic Non-core registers */
305         /*
306          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
307          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
308          * the probing code needs to test for the other address in case of
309          * failure of this one
310          */
311         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
312
313 };
314
315 struct pci_id_descr pci_dev_descr_lynnfield[] = {
316         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
317         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
318         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
319
320         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
321         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
322         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
323         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
324
325         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
326         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
327         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
328         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
329
330         /*
331          * This is the PCI device has an alternate address on some
332          * processors like Core i7 860
333          */
334         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
335 };
336
337 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
338                 /* Memory controller */
339         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
340         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
341                         /* Exists only for RDIMM */
342         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
343         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
344
345                 /* Channel 0 */
346         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
347         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
348         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
349         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
350
351                 /* Channel 1 */
352         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
353         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
354         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
355         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
356
357                 /* Channel 2 */
358         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
359         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
360         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
361         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
362
363                 /* Generic Non-core registers */
364         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
365
366 };
367
368 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
369 struct pci_id_table pci_dev_table[] = {
370         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
371         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
372         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
373 };
374
375 /*
376  *      pci_device_id   table for which devices we are looking for
377  */
378 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
379         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
380         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
381         {0,}                    /* 0 terminated list. */
382 };
383
384 /****************************************************************************
385                         Anciliary status routines
386  ****************************************************************************/
387
388         /* MC_CONTROL bits */
389 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
390 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
391
392         /* MC_STATUS bits */
393 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
394 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
395
396         /* MC_MAX_DOD read functions */
397 static inline int numdimms(u32 dimms)
398 {
399         return (dimms & 0x3) + 1;
400 }
401
402 static inline int numrank(u32 rank)
403 {
404         static int ranks[4] = { 1, 2, 4, -EINVAL };
405
406         return ranks[rank & 0x3];
407 }
408
409 static inline int numbank(u32 bank)
410 {
411         static int banks[4] = { 4, 8, 16, -EINVAL };
412
413         return banks[bank & 0x3];
414 }
415
416 static inline int numrow(u32 row)
417 {
418         static int rows[8] = {
419                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
420                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
421         };
422
423         return rows[row & 0x7];
424 }
425
426 static inline int numcol(u32 col)
427 {
428         static int cols[8] = {
429                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
430         };
431         return cols[col & 0x3];
432 }
433
434 static struct i7core_dev *get_i7core_dev(u8 socket)
435 {
436         struct i7core_dev *i7core_dev;
437
438         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
439                 if (i7core_dev->socket == socket)
440                         return i7core_dev;
441         }
442
443         return NULL;
444 }
445
446 /****************************************************************************
447                         Memory check routines
448  ****************************************************************************/
449 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
450                                           unsigned func)
451 {
452         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
453         int i;
454
455         if (!i7core_dev)
456                 return NULL;
457
458         for (i = 0; i < i7core_dev->n_devs; i++) {
459                 if (!i7core_dev->pdev[i])
460                         continue;
461
462                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
463                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
464                         return i7core_dev->pdev[i];
465                 }
466         }
467
468         return NULL;
469 }
470
471 /**
472  * i7core_get_active_channels() - gets the number of channels and csrows
473  * @socket:     Quick Path Interconnect socket
474  * @channels:   Number of channels that will be returned
475  * @csrows:     Number of csrows found
476  *
477  * Since EDAC core needs to know in advance the number of available channels
478  * and csrows, in order to allocate memory for csrows/channels, it is needed
479  * to run two similar steps. At the first step, implemented on this function,
480  * it checks the number of csrows/channels present at one socket.
481  * this is used in order to properly allocate the size of mci components.
482  *
483  * It should be noticed that none of the current available datasheets explain
484  * or even mention how csrows are seen by the memory controller. So, we need
485  * to add a fake description for csrows.
486  * So, this driver is attributing one DIMM memory for one csrow.
487  */
488 static int i7core_get_active_channels(u8 socket, unsigned *channels,
489                                       unsigned *csrows)
490 {
491         struct pci_dev *pdev = NULL;
492         int i, j;
493         u32 status, control;
494
495         *channels = 0;
496         *csrows = 0;
497
498         pdev = get_pdev_slot_func(socket, 3, 0);
499         if (!pdev) {
500                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
501                               socket);
502                 return -ENODEV;
503         }
504
505         /* Device 3 function 0 reads */
506         pci_read_config_dword(pdev, MC_STATUS, &status);
507         pci_read_config_dword(pdev, MC_CONTROL, &control);
508
509         for (i = 0; i < NUM_CHANS; i++) {
510                 u32 dimm_dod[3];
511                 /* Check if the channel is active */
512                 if (!(control & (1 << (8 + i))))
513                         continue;
514
515                 /* Check if the channel is disabled */
516                 if (status & (1 << i))
517                         continue;
518
519                 pdev = get_pdev_slot_func(socket, i + 4, 1);
520                 if (!pdev) {
521                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
522                                                 "fn %d.%d!!!\n",
523                                                 socket, i + 4, 1);
524                         return -ENODEV;
525                 }
526                 /* Devices 4-6 function 1 */
527                 pci_read_config_dword(pdev,
528                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
529                 pci_read_config_dword(pdev,
530                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
531                 pci_read_config_dword(pdev,
532                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
533
534                 (*channels)++;
535
536                 for (j = 0; j < 3; j++) {
537                         if (!DIMM_PRESENT(dimm_dod[j]))
538                                 continue;
539                         (*csrows)++;
540                 }
541         }
542
543         debugf0("Number of active channels on socket %d: %d\n",
544                 socket, *channels);
545
546         return 0;
547 }
548
549 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
550 {
551         struct i7core_pvt *pvt = mci->pvt_info;
552         struct csrow_info *csr;
553         struct pci_dev *pdev;
554         int i, j;
555         unsigned long last_page = 0;
556         enum edac_type mode;
557         enum mem_type mtype;
558
559         /* Get data from the MC register, function 0 */
560         pdev = pvt->pci_mcr[0];
561         if (!pdev)
562                 return -ENODEV;
563
564         /* Device 3 function 0 reads */
565         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
566         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
567         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
568         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
569
570         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
571                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
572                 pvt->info.max_dod, pvt->info.ch_map);
573
574         if (ECC_ENABLED(pvt)) {
575                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
576                 if (ECCx8(pvt))
577                         mode = EDAC_S8ECD8ED;
578                 else
579                         mode = EDAC_S4ECD4ED;
580         } else {
581                 debugf0("ECC disabled\n");
582                 mode = EDAC_NONE;
583         }
584
585         /* FIXME: need to handle the error codes */
586         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
587                 "x%x x 0x%x\n",
588                 numdimms(pvt->info.max_dod),
589                 numrank(pvt->info.max_dod >> 2),
590                 numbank(pvt->info.max_dod >> 4),
591                 numrow(pvt->info.max_dod >> 6),
592                 numcol(pvt->info.max_dod >> 9));
593
594         for (i = 0; i < NUM_CHANS; i++) {
595                 u32 data, dimm_dod[3], value[8];
596
597                 if (!pvt->pci_ch[i][0])
598                         continue;
599
600                 if (!CH_ACTIVE(pvt, i)) {
601                         debugf0("Channel %i is not active\n", i);
602                         continue;
603                 }
604                 if (CH_DISABLED(pvt, i)) {
605                         debugf0("Channel %i is disabled\n", i);
606                         continue;
607                 }
608
609                 /* Devices 4-6 function 0 */
610                 pci_read_config_dword(pvt->pci_ch[i][0],
611                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
612
613                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
614                                                 4 : 2;
615
616                 if (data & REGISTERED_DIMM)
617                         mtype = MEM_RDDR3;
618                 else
619                         mtype = MEM_DDR3;
620 #if 0
621                 if (data & THREE_DIMMS_PRESENT)
622                         pvt->channel[i].dimms = 3;
623                 else if (data & SINGLE_QUAD_RANK_PRESENT)
624                         pvt->channel[i].dimms = 1;
625                 else
626                         pvt->channel[i].dimms = 2;
627 #endif
628
629                 /* Devices 4-6 function 1 */
630                 pci_read_config_dword(pvt->pci_ch[i][1],
631                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
632                 pci_read_config_dword(pvt->pci_ch[i][1],
633                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
634                 pci_read_config_dword(pvt->pci_ch[i][1],
635                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
636
637                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
638                         "%d ranks, %cDIMMs\n",
639                         i,
640                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
641                         data,
642                         pvt->channel[i].ranks,
643                         (data & REGISTERED_DIMM) ? 'R' : 'U');
644
645                 for (j = 0; j < 3; j++) {
646                         u32 banks, ranks, rows, cols;
647                         u32 size, npages;
648
649                         if (!DIMM_PRESENT(dimm_dod[j]))
650                                 continue;
651
652                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
653                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
654                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
655                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
656
657                         /* DDR3 has 8 I/O banks */
658                         size = (rows * cols * banks * ranks) >> (20 - 3);
659
660                         pvt->channel[i].dimms++;
661
662                         debugf0("\tdimm %d %d Mb offset: %x, "
663                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
664                                 j, size,
665                                 RANKOFFSET(dimm_dod[j]),
666                                 banks, ranks, rows, cols);
667
668 #if PAGE_SHIFT > 20
669                         npages = size >> (PAGE_SHIFT - 20);
670 #else
671                         npages = size << (20 - PAGE_SHIFT);
672 #endif
673
674                         csr = &mci->csrows[*csrow];
675                         csr->first_page = last_page + 1;
676                         last_page += npages;
677                         csr->last_page = last_page;
678                         csr->nr_pages = npages;
679
680                         csr->page_mask = 0;
681                         csr->grain = 8;
682                         csr->csrow_idx = *csrow;
683                         csr->nr_channels = 1;
684
685                         csr->channels[0].chan_idx = i;
686                         csr->channels[0].ce_count = 0;
687
688                         pvt->csrow_map[i][j] = *csrow;
689
690                         switch (banks) {
691                         case 4:
692                                 csr->dtype = DEV_X4;
693                                 break;
694                         case 8:
695                                 csr->dtype = DEV_X8;
696                                 break;
697                         case 16:
698                                 csr->dtype = DEV_X16;
699                                 break;
700                         default:
701                                 csr->dtype = DEV_UNKNOWN;
702                         }
703
704                         csr->edac_mode = mode;
705                         csr->mtype = mtype;
706
707                         (*csrow)++;
708                 }
709
710                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
711                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
712                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
713                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
714                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
715                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
716                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
717                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
718                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
719                 for (j = 0; j < 8; j++)
720                         debugf1("\t\t%#x\t%#x\t%#x\n",
721                                 (value[j] >> 27) & 0x1,
722                                 (value[j] >> 24) & 0x7,
723                                 (value[j] && ((1 << 24) - 1)));
724         }
725
726         return 0;
727 }
728
729 /****************************************************************************
730                         Error insertion routines
731  ****************************************************************************/
732
733 /* The i7core has independent error injection features per channel.
734    However, to have a simpler code, we don't allow enabling error injection
735    on more than one channel.
736    Also, since a change at an inject parameter will be applied only at enable,
737    we're disabling error injection on all write calls to the sysfs nodes that
738    controls the error code injection.
739  */
740 static int disable_inject(struct mem_ctl_info *mci)
741 {
742         struct i7core_pvt *pvt = mci->pvt_info;
743
744         pvt->inject.enable = 0;
745
746         if (!pvt->pci_ch[pvt->inject.channel][0])
747                 return -ENODEV;
748
749         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
750                                 MC_CHANNEL_ERROR_INJECT, 0);
751
752         return 0;
753 }
754
755 /*
756  * i7core inject inject.section
757  *
758  *      accept and store error injection inject.section value
759  *      bit 0 - refers to the lower 32-byte half cacheline
760  *      bit 1 - refers to the upper 32-byte half cacheline
761  */
762 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
763                                            const char *data, size_t count)
764 {
765         struct i7core_pvt *pvt = mci->pvt_info;
766         unsigned long value;
767         int rc;
768
769         if (pvt->inject.enable)
770                 disable_inject(mci);
771
772         rc = strict_strtoul(data, 10, &value);
773         if ((rc < 0) || (value > 3))
774                 return -EIO;
775
776         pvt->inject.section = (u32) value;
777         return count;
778 }
779
780 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
781                                               char *data)
782 {
783         struct i7core_pvt *pvt = mci->pvt_info;
784         return sprintf(data, "0x%08x\n", pvt->inject.section);
785 }
786
787 /*
788  * i7core inject.type
789  *
790  *      accept and store error injection inject.section value
791  *      bit 0 - repeat enable - Enable error repetition
792  *      bit 1 - inject ECC error
793  *      bit 2 - inject parity error
794  */
795 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
796                                         const char *data, size_t count)
797 {
798         struct i7core_pvt *pvt = mci->pvt_info;
799         unsigned long value;
800         int rc;
801
802         if (pvt->inject.enable)
803                 disable_inject(mci);
804
805         rc = strict_strtoul(data, 10, &value);
806         if ((rc < 0) || (value > 7))
807                 return -EIO;
808
809         pvt->inject.type = (u32) value;
810         return count;
811 }
812
813 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
814                                               char *data)
815 {
816         struct i7core_pvt *pvt = mci->pvt_info;
817         return sprintf(data, "0x%08x\n", pvt->inject.type);
818 }
819
820 /*
821  * i7core_inject_inject.eccmask_store
822  *
823  * The type of error (UE/CE) will depend on the inject.eccmask value:
824  *   Any bits set to a 1 will flip the corresponding ECC bit
825  *   Correctable errors can be injected by flipping 1 bit or the bits within
826  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
827  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
828  *   uncorrectable error to be injected.
829  */
830 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
831                                         const char *data, size_t count)
832 {
833         struct i7core_pvt *pvt = mci->pvt_info;
834         unsigned long value;
835         int rc;
836
837         if (pvt->inject.enable)
838                 disable_inject(mci);
839
840         rc = strict_strtoul(data, 10, &value);
841         if (rc < 0)
842                 return -EIO;
843
844         pvt->inject.eccmask = (u32) value;
845         return count;
846 }
847
848 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
849                                               char *data)
850 {
851         struct i7core_pvt *pvt = mci->pvt_info;
852         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
853 }
854
855 /*
856  * i7core_addrmatch
857  *
858  * The type of error (UE/CE) will depend on the inject.eccmask value:
859  *   Any bits set to a 1 will flip the corresponding ECC bit
860  *   Correctable errors can be injected by flipping 1 bit or the bits within
861  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
862  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
863  *   uncorrectable error to be injected.
864  */
865
866 #define DECLARE_ADDR_MATCH(param, limit)                        \
867 static ssize_t i7core_inject_store_##param(                     \
868                 struct mem_ctl_info *mci,                       \
869                 const char *data, size_t count)                 \
870 {                                                               \
871         struct i7core_pvt *pvt;                                 \
872         long value;                                             \
873         int rc;                                                 \
874                                                                 \
875         debugf1("%s()\n", __func__);                            \
876         pvt = mci->pvt_info;                                    \
877                                                                 \
878         if (pvt->inject.enable)                                 \
879                 disable_inject(mci);                            \
880                                                                 \
881         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
882                 value = -1;                                     \
883         else {                                                  \
884                 rc = strict_strtoul(data, 10, &value);          \
885                 if ((rc < 0) || (value >= limit))               \
886                         return -EIO;                            \
887         }                                                       \
888                                                                 \
889         pvt->inject.param = value;                              \
890                                                                 \
891         return count;                                           \
892 }                                                               \
893                                                                 \
894 static ssize_t i7core_inject_show_##param(                      \
895                 struct mem_ctl_info *mci,                       \
896                 char *data)                                     \
897 {                                                               \
898         struct i7core_pvt *pvt;                                 \
899                                                                 \
900         pvt = mci->pvt_info;                                    \
901         debugf1("%s() pvt=%p\n", __func__, pvt);                \
902         if (pvt->inject.param < 0)                              \
903                 return sprintf(data, "any\n");                  \
904         else                                                    \
905                 return sprintf(data, "%d\n", pvt->inject.param);\
906 }
907
908 #define ATTR_ADDR_MATCH(param)                                  \
909         {                                                       \
910                 .attr = {                                       \
911                         .name = #param,                         \
912                         .mode = (S_IRUGO | S_IWUSR)             \
913                 },                                              \
914                 .show  = i7core_inject_show_##param,            \
915                 .store = i7core_inject_store_##param,           \
916         }
917
918 DECLARE_ADDR_MATCH(channel, 3);
919 DECLARE_ADDR_MATCH(dimm, 3);
920 DECLARE_ADDR_MATCH(rank, 4);
921 DECLARE_ADDR_MATCH(bank, 32);
922 DECLARE_ADDR_MATCH(page, 0x10000);
923 DECLARE_ADDR_MATCH(col, 0x4000);
924
925 static int write_and_test(struct pci_dev *dev, int where, u32 val)
926 {
927         u32 read;
928         int count;
929
930         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
931                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
932                 where, val);
933
934         for (count = 0; count < 10; count++) {
935                 if (count)
936                         msleep(100);
937                 pci_write_config_dword(dev, where, val);
938                 pci_read_config_dword(dev, where, &read);
939
940                 if (read == val)
941                         return 0;
942         }
943
944         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
945                 "write=%08x. Read=%08x\n",
946                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
947                 where, val, read);
948
949         return -EINVAL;
950 }
951
952 /*
953  * This routine prepares the Memory Controller for error injection.
954  * The error will be injected when some process tries to write to the
955  * memory that matches the given criteria.
956  * The criteria can be set in terms of a mask where dimm, rank, bank, page
957  * and col can be specified.
958  * A -1 value for any of the mask items will make the MCU to ignore
959  * that matching criteria for error injection.
960  *
961  * It should be noticed that the error will only happen after a write operation
962  * on a memory that matches the condition. if REPEAT_EN is not enabled at
963  * inject mask, then it will produce just one error. Otherwise, it will repeat
964  * until the injectmask would be cleaned.
965  *
966  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
967  *    is reliable enough to check if the MC is using the
968  *    three channels. However, this is not clear at the datasheet.
969  */
970 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
971                                        const char *data, size_t count)
972 {
973         struct i7core_pvt *pvt = mci->pvt_info;
974         u32 injectmask;
975         u64 mask = 0;
976         int  rc;
977         long enable;
978
979         if (!pvt->pci_ch[pvt->inject.channel][0])
980                 return 0;
981
982         rc = strict_strtoul(data, 10, &enable);
983         if ((rc < 0))
984                 return 0;
985
986         if (enable) {
987                 pvt->inject.enable = 1;
988         } else {
989                 disable_inject(mci);
990                 return count;
991         }
992
993         /* Sets pvt->inject.dimm mask */
994         if (pvt->inject.dimm < 0)
995                 mask |= 1LL << 41;
996         else {
997                 if (pvt->channel[pvt->inject.channel].dimms > 2)
998                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
999                 else
1000                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1001         }
1002
1003         /* Sets pvt->inject.rank mask */
1004         if (pvt->inject.rank < 0)
1005                 mask |= 1LL << 40;
1006         else {
1007                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1008                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1009                 else
1010                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1011         }
1012
1013         /* Sets pvt->inject.bank mask */
1014         if (pvt->inject.bank < 0)
1015                 mask |= 1LL << 39;
1016         else
1017                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1018
1019         /* Sets pvt->inject.page mask */
1020         if (pvt->inject.page < 0)
1021                 mask |= 1LL << 38;
1022         else
1023                 mask |= (pvt->inject.page & 0xffff) << 14;
1024
1025         /* Sets pvt->inject.column mask */
1026         if (pvt->inject.col < 0)
1027                 mask |= 1LL << 37;
1028         else
1029                 mask |= (pvt->inject.col & 0x3fff);
1030
1031         /*
1032          * bit    0: REPEAT_EN
1033          * bits 1-2: MASK_HALF_CACHELINE
1034          * bit    3: INJECT_ECC
1035          * bit    4: INJECT_ADDR_PARITY
1036          */
1037
1038         injectmask = (pvt->inject.type & 1) |
1039                      (pvt->inject.section & 0x3) << 1 |
1040                      (pvt->inject.type & 0x6) << (3 - 1);
1041
1042         /* Unlock writes to registers - this register is write only */
1043         pci_write_config_dword(pvt->pci_noncore,
1044                                MC_CFG_CONTROL, 0x2);
1045
1046         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1047                                MC_CHANNEL_ADDR_MATCH, mask);
1048         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1049                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1050
1051         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1052                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1053
1054         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1055                                MC_CHANNEL_ERROR_INJECT, injectmask);
1056
1057         /*
1058          * This is something undocumented, based on my tests
1059          * Without writing 8 to this register, errors aren't injected. Not sure
1060          * why.
1061          */
1062         pci_write_config_dword(pvt->pci_noncore,
1063                                MC_CFG_CONTROL, 8);
1064
1065         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1066                 " inject 0x%08x\n",
1067                 mask, pvt->inject.eccmask, injectmask);
1068
1069
1070         return count;
1071 }
1072
1073 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1074                                         char *data)
1075 {
1076         struct i7core_pvt *pvt = mci->pvt_info;
1077         u32 injectmask;
1078
1079         if (!pvt->pci_ch[pvt->inject.channel][0])
1080                 return 0;
1081
1082         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1083                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1084
1085         debugf0("Inject error read: 0x%018x\n", injectmask);
1086
1087         if (injectmask & 0x0c)
1088                 pvt->inject.enable = 1;
1089
1090         return sprintf(data, "%d\n", pvt->inject.enable);
1091 }
1092
1093 #define DECLARE_COUNTER(param)                                  \
1094 static ssize_t i7core_show_counter_##param(                     \
1095                 struct mem_ctl_info *mci,                       \
1096                 char *data)                                     \
1097 {                                                               \
1098         struct i7core_pvt *pvt = mci->pvt_info;                 \
1099                                                                 \
1100         debugf1("%s() \n", __func__);                           \
1101         if (!pvt->ce_count_available || (pvt->is_registered))   \
1102                 return sprintf(data, "data unavailable\n");     \
1103         return sprintf(data, "%lu\n",                           \
1104                         pvt->udimm_ce_count[param]);            \
1105 }
1106
1107 #define ATTR_COUNTER(param)                                     \
1108         {                                                       \
1109                 .attr = {                                       \
1110                         .name = __stringify(udimm##param),      \
1111                         .mode = (S_IRUGO | S_IWUSR)             \
1112                 },                                              \
1113                 .show  = i7core_show_counter_##param            \
1114         }
1115
1116 DECLARE_COUNTER(0);
1117 DECLARE_COUNTER(1);
1118 DECLARE_COUNTER(2);
1119
1120 /*
1121  * Sysfs struct
1122  */
1123
1124
1125 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1126         ATTR_ADDR_MATCH(channel),
1127         ATTR_ADDR_MATCH(dimm),
1128         ATTR_ADDR_MATCH(rank),
1129         ATTR_ADDR_MATCH(bank),
1130         ATTR_ADDR_MATCH(page),
1131         ATTR_ADDR_MATCH(col),
1132         { .attr = { .name = NULL } }
1133 };
1134
1135 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1136         .name  = "inject_addrmatch",
1137         .mcidev_attr = i7core_addrmatch_attrs,
1138 };
1139
1140 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1141         ATTR_COUNTER(0),
1142         ATTR_COUNTER(1),
1143         ATTR_COUNTER(2),
1144         { .attr = { .name = NULL } }
1145 };
1146
1147 static struct mcidev_sysfs_group i7core_udimm_counters = {
1148         .name  = "all_channel_counts",
1149         .mcidev_attr = i7core_udimm_counters_attrs,
1150 };
1151
1152 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1153         {
1154                 .attr = {
1155                         .name = "inject_section",
1156                         .mode = (S_IRUGO | S_IWUSR)
1157                 },
1158                 .show  = i7core_inject_section_show,
1159                 .store = i7core_inject_section_store,
1160         }, {
1161                 .attr = {
1162                         .name = "inject_type",
1163                         .mode = (S_IRUGO | S_IWUSR)
1164                 },
1165                 .show  = i7core_inject_type_show,
1166                 .store = i7core_inject_type_store,
1167         }, {
1168                 .attr = {
1169                         .name = "inject_eccmask",
1170                         .mode = (S_IRUGO | S_IWUSR)
1171                 },
1172                 .show  = i7core_inject_eccmask_show,
1173                 .store = i7core_inject_eccmask_store,
1174         }, {
1175                 .grp = &i7core_inject_addrmatch,
1176         }, {
1177                 .attr = {
1178                         .name = "inject_enable",
1179                         .mode = (S_IRUGO | S_IWUSR)
1180                 },
1181                 .show  = i7core_inject_enable_show,
1182                 .store = i7core_inject_enable_store,
1183         },
1184         { .attr = { .name = NULL } },   /* Reserved for udimm counters */
1185         { .attr = { .name = NULL } }
1186 };
1187
1188 /****************************************************************************
1189         Device initialization routines: put/get, init/exit
1190  ****************************************************************************/
1191
1192 /*
1193  *      i7core_put_devices      'put' all the devices that we have
1194  *                              reserved via 'get'
1195  */
1196 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1197 {
1198         int i;
1199
1200         debugf0(__FILE__ ": %s()\n", __func__);
1201         for (i = 0; i < i7core_dev->n_devs; i++) {
1202                 struct pci_dev *pdev = i7core_dev->pdev[i];
1203                 if (!pdev)
1204                         continue;
1205                 debugf0("Removing dev %02x:%02x.%d\n",
1206                         pdev->bus->number,
1207                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1208                 pci_dev_put(pdev);
1209         }
1210         kfree(i7core_dev->pdev);
1211         list_del(&i7core_dev->list);
1212         kfree(i7core_dev);
1213 }
1214
1215 static void i7core_put_all_devices(void)
1216 {
1217         struct i7core_dev *i7core_dev, *tmp;
1218
1219         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1220                 i7core_put_devices(i7core_dev);
1221 }
1222
1223 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1224 {
1225         struct pci_dev *pdev = NULL;
1226         int i;
1227         /*
1228          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1229          * aren't announced by acpi. So, we need to use a legacy scan probing
1230          * to detect them
1231          */
1232         while (table && table->descr) {
1233                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1234                 if (unlikely(!pdev)) {
1235                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1236                                 pcibios_scan_specific_bus(255-i);
1237                 }
1238                 pci_dev_put(pdev);
1239                 table++;
1240         }
1241 }
1242
1243 static unsigned i7core_pci_lastbus(void)
1244 {
1245         int last_bus = 0, bus;
1246         struct pci_bus *b = NULL;
1247
1248         while ((b = pci_find_next_bus(b)) != NULL) {
1249                 bus = b->number;
1250                 debugf0("Found bus %d\n", bus);
1251                 if (bus > last_bus)
1252                         last_bus = bus;
1253         }
1254
1255         debugf0("Last bus %d\n", last_bus);
1256
1257         return last_bus;
1258 }
1259
1260 /*
1261  *      i7core_get_devices      Find and perform 'get' operation on the MCH's
1262  *                      device/functions we want to reference for this driver
1263  *
1264  *                      Need to 'get' device 16 func 1 and func 2
1265  */
1266 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1267                          struct pci_id_descr *dev_descr, unsigned n_devs,
1268                          unsigned last_bus)
1269 {
1270         struct i7core_dev *i7core_dev;
1271
1272         struct pci_dev *pdev = NULL;
1273         u8 bus = 0;
1274         u8 socket = 0;
1275
1276         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1277                               dev_descr->dev_id, *prev);
1278
1279         /*
1280          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1281          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1282          * to probe for the alternate address in case of failure
1283          */
1284         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1285                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1286                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1287
1288         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1289                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1290                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1291                                       *prev);
1292
1293         if (!pdev) {
1294                 if (*prev) {
1295                         *prev = pdev;
1296                         return 0;
1297                 }
1298
1299                 if (dev_descr->optional)
1300                         return 0;
1301
1302                 if (devno == 0)
1303                         return -ENODEV;
1304
1305                 i7core_printk(KERN_INFO,
1306                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1307                         dev_descr->dev, dev_descr->func,
1308                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1309
1310                 /* End of list, leave */
1311                 return -ENODEV;
1312         }
1313         bus = pdev->bus->number;
1314
1315         socket = last_bus - bus;
1316
1317         i7core_dev = get_i7core_dev(socket);
1318         if (!i7core_dev) {
1319                 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1320                 if (!i7core_dev)
1321                         return -ENOMEM;
1322                 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1323                                            GFP_KERNEL);
1324                 if (!i7core_dev->pdev) {
1325                         kfree(i7core_dev);
1326                         return -ENOMEM;
1327                 }
1328                 i7core_dev->socket = socket;
1329                 i7core_dev->n_devs = n_devs;
1330                 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1331         }
1332
1333         if (i7core_dev->pdev[devno]) {
1334                 i7core_printk(KERN_ERR,
1335                         "Duplicated device for "
1336                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1337                         bus, dev_descr->dev, dev_descr->func,
1338                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1339                 pci_dev_put(pdev);
1340                 return -ENODEV;
1341         }
1342
1343         i7core_dev->pdev[devno] = pdev;
1344
1345         /* Sanity check */
1346         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1347                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1348                 i7core_printk(KERN_ERR,
1349                         "Device PCI ID %04x:%04x "
1350                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1351                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1352                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1353                         bus, dev_descr->dev, dev_descr->func);
1354                 return -ENODEV;
1355         }
1356
1357         /* Be sure that the device is enabled */
1358         if (unlikely(pci_enable_device(pdev) < 0)) {
1359                 i7core_printk(KERN_ERR,
1360                         "Couldn't enable "
1361                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1362                         bus, dev_descr->dev, dev_descr->func,
1363                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1364                 return -ENODEV;
1365         }
1366
1367         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1368                 socket, bus, dev_descr->dev,
1369                 dev_descr->func,
1370                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1371
1372         *prev = pdev;
1373
1374         return 0;
1375 }
1376
1377 static int i7core_get_devices(struct pci_id_table *table)
1378 {
1379         int i, rc, last_bus;
1380         struct pci_dev *pdev = NULL;
1381         struct pci_id_descr *dev_descr;
1382
1383         last_bus = i7core_pci_lastbus();
1384
1385         while (table && table->descr) {
1386                 dev_descr = table->descr;
1387                 for (i = 0; i < table->n_devs; i++) {
1388                         pdev = NULL;
1389                         do {
1390                                 rc = i7core_get_onedevice(&pdev, i,
1391                                                           &dev_descr[i],
1392                                                           table->n_devs,
1393                                                           last_bus);
1394                                 if (rc < 0) {
1395                                         if (i == 0) {
1396                                                 i = table->n_devs;
1397                                                 break;
1398                                         }
1399                                         i7core_put_all_devices();
1400                                         return -ENODEV;
1401                                 }
1402                         } while (pdev);
1403                 }
1404                 table++;
1405         }
1406
1407         return 0;
1408         return 0;
1409 }
1410
1411 static int mci_bind_devs(struct mem_ctl_info *mci,
1412                          struct i7core_dev *i7core_dev)
1413 {
1414         struct i7core_pvt *pvt = mci->pvt_info;
1415         struct pci_dev *pdev;
1416         int i, func, slot;
1417
1418         /* Associates i7core_dev and mci for future usage */
1419         pvt->i7core_dev = i7core_dev;
1420         i7core_dev->mci = mci;
1421
1422         pvt->is_registered = 0;
1423         for (i = 0; i < i7core_dev->n_devs; i++) {
1424                 pdev = i7core_dev->pdev[i];
1425                 if (!pdev)
1426                         continue;
1427
1428                 func = PCI_FUNC(pdev->devfn);
1429                 slot = PCI_SLOT(pdev->devfn);
1430                 if (slot == 3) {
1431                         if (unlikely(func > MAX_MCR_FUNC))
1432                                 goto error;
1433                         pvt->pci_mcr[func] = pdev;
1434                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1435                         if (unlikely(func > MAX_CHAN_FUNC))
1436                                 goto error;
1437                         pvt->pci_ch[slot - 4][func] = pdev;
1438                 } else if (!slot && !func)
1439                         pvt->pci_noncore = pdev;
1440                 else
1441                         goto error;
1442
1443                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1444                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1445                         pdev, i7core_dev->socket);
1446
1447                 if (PCI_SLOT(pdev->devfn) == 3 &&
1448                         PCI_FUNC(pdev->devfn) == 2)
1449                         pvt->is_registered = 1;
1450         }
1451
1452         /*
1453          * Add extra nodes to count errors on udimm
1454          * For registered memory, this is not needed, since the counters
1455          * are already displayed at the standard locations
1456          */
1457         if (!pvt->is_registered)
1458                 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1459                         &i7core_udimm_counters;
1460
1461         return 0;
1462
1463 error:
1464         i7core_printk(KERN_ERR, "Device %d, function %d "
1465                       "is out of the expected range\n",
1466                       slot, func);
1467         return -EINVAL;
1468 }
1469
1470 /****************************************************************************
1471                         Error check routines
1472  ****************************************************************************/
1473 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1474                                          int chan, int dimm, int add)
1475 {
1476         char *msg;
1477         struct i7core_pvt *pvt = mci->pvt_info;
1478         int row = pvt->csrow_map[chan][dimm], i;
1479
1480         for (i = 0; i < add; i++) {
1481                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1482                                 "(Socket=%d channel=%d dimm=%d)",
1483                                 pvt->i7core_dev->socket, chan, dimm);
1484
1485                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1486                 kfree (msg);
1487         }
1488 }
1489
1490 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1491                         int chan, int new0, int new1, int new2)
1492 {
1493         struct i7core_pvt *pvt = mci->pvt_info;
1494         int add0 = 0, add1 = 0, add2 = 0;
1495         /* Updates CE counters if it is not the first time here */
1496         if (pvt->ce_count_available) {
1497                 /* Updates CE counters */
1498
1499                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1500                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1501                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1502
1503                 if (add2 < 0)
1504                         add2 += 0x7fff;
1505                 pvt->rdimm_ce_count[chan][2] += add2;
1506
1507                 if (add1 < 0)
1508                         add1 += 0x7fff;
1509                 pvt->rdimm_ce_count[chan][1] += add1;
1510
1511                 if (add0 < 0)
1512                         add0 += 0x7fff;
1513                 pvt->rdimm_ce_count[chan][0] += add0;
1514         } else
1515                 pvt->ce_count_available = 1;
1516
1517         /* Store the new values */
1518         pvt->rdimm_last_ce_count[chan][2] = new2;
1519         pvt->rdimm_last_ce_count[chan][1] = new1;
1520         pvt->rdimm_last_ce_count[chan][0] = new0;
1521
1522         /*updated the edac core */
1523         if (add0 != 0)
1524                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1525         if (add1 != 0)
1526                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1527         if (add2 != 0)
1528                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1529
1530 }
1531
1532 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1533 {
1534         struct i7core_pvt *pvt = mci->pvt_info;
1535         u32 rcv[3][2];
1536         int i, new0, new1, new2;
1537
1538         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1539         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1540                                                                 &rcv[0][0]);
1541         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1542                                                                 &rcv[0][1]);
1543         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1544                                                                 &rcv[1][0]);
1545         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1546                                                                 &rcv[1][1]);
1547         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1548                                                                 &rcv[2][0]);
1549         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1550                                                                 &rcv[2][1]);
1551         for (i = 0 ; i < 3; i++) {
1552                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1553                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1554                 /*if the channel has 3 dimms*/
1555                 if (pvt->channel[i].dimms > 2) {
1556                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1557                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1558                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1559                 } else {
1560                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1561                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1562                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1563                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1564                         new2 = 0;
1565                 }
1566
1567                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1568         }
1569 }
1570
1571 /* This function is based on the device 3 function 4 registers as described on:
1572  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1573  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1574  * also available at:
1575  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1576  */
1577 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1578 {
1579         struct i7core_pvt *pvt = mci->pvt_info;
1580         u32 rcv1, rcv0;
1581         int new0, new1, new2;
1582
1583         if (!pvt->pci_mcr[4]) {
1584                 debugf0("%s MCR registers not found\n", __func__);
1585                 return;
1586         }
1587
1588         /* Corrected test errors */
1589         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1590         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1591
1592         /* Store the new values */
1593         new2 = DIMM2_COR_ERR(rcv1);
1594         new1 = DIMM1_COR_ERR(rcv0);
1595         new0 = DIMM0_COR_ERR(rcv0);
1596
1597         /* Updates CE counters if it is not the first time here */
1598         if (pvt->ce_count_available) {
1599                 /* Updates CE counters */
1600                 int add0, add1, add2;
1601
1602                 add2 = new2 - pvt->udimm_last_ce_count[2];
1603                 add1 = new1 - pvt->udimm_last_ce_count[1];
1604                 add0 = new0 - pvt->udimm_last_ce_count[0];
1605
1606                 if (add2 < 0)
1607                         add2 += 0x7fff;
1608                 pvt->udimm_ce_count[2] += add2;
1609
1610                 if (add1 < 0)
1611                         add1 += 0x7fff;
1612                 pvt->udimm_ce_count[1] += add1;
1613
1614                 if (add0 < 0)
1615                         add0 += 0x7fff;
1616                 pvt->udimm_ce_count[0] += add0;
1617
1618                 if (add0 | add1 | add2)
1619                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1620                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1621                                       add0, add1, add2);
1622         } else
1623                 pvt->ce_count_available = 1;
1624
1625         /* Store the new values */
1626         pvt->udimm_last_ce_count[2] = new2;
1627         pvt->udimm_last_ce_count[1] = new1;
1628         pvt->udimm_last_ce_count[0] = new0;
1629 }
1630
1631 /*
1632  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1633  * Architectures Software Developer’s Manual Volume 3B.
1634  * Nehalem are defined as family 0x06, model 0x1a
1635  *
1636  * The MCA registers used here are the following ones:
1637  *     struct mce field MCA Register
1638  *     m->status        MSR_IA32_MC8_STATUS
1639  *     m->addr          MSR_IA32_MC8_ADDR
1640  *     m->misc          MSR_IA32_MC8_MISC
1641  * In the case of Nehalem, the error information is masked at .status and .misc
1642  * fields
1643  */
1644 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1645                                     struct mce *m)
1646 {
1647         struct i7core_pvt *pvt = mci->pvt_info;
1648         char *type, *optype, *err, *msg;
1649         unsigned long error = m->status & 0x1ff0000l;
1650         u32 optypenum = (m->status >> 4) & 0x07;
1651         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1652         u32 dimm = (m->misc >> 16) & 0x3;
1653         u32 channel = (m->misc >> 18) & 0x3;
1654         u32 syndrome = m->misc >> 32;
1655         u32 errnum = find_first_bit(&error, 32);
1656         int csrow;
1657
1658         if (m->mcgstatus & 1)
1659                 type = "FATAL";
1660         else
1661                 type = "NON_FATAL";
1662
1663         switch (optypenum) {
1664         case 0:
1665                 optype = "generic undef request";
1666                 break;
1667         case 1:
1668                 optype = "read error";
1669                 break;
1670         case 2:
1671                 optype = "write error";
1672                 break;
1673         case 3:
1674                 optype = "addr/cmd error";
1675                 break;
1676         case 4:
1677                 optype = "scrubbing error";
1678                 break;
1679         default:
1680                 optype = "reserved";
1681                 break;
1682         }
1683
1684         switch (errnum) {
1685         case 16:
1686                 err = "read ECC error";
1687                 break;
1688         case 17:
1689                 err = "RAS ECC error";
1690                 break;
1691         case 18:
1692                 err = "write parity error";
1693                 break;
1694         case 19:
1695                 err = "redundacy loss";
1696                 break;
1697         case 20:
1698                 err = "reserved";
1699                 break;
1700         case 21:
1701                 err = "memory range error";
1702                 break;
1703         case 22:
1704                 err = "RTID out of range";
1705                 break;
1706         case 23:
1707                 err = "address parity error";
1708                 break;
1709         case 24:
1710                 err = "byte enable parity error";
1711                 break;
1712         default:
1713                 err = "unknown";
1714         }
1715
1716         /* FIXME: should convert addr into bank and rank information */
1717         msg = kasprintf(GFP_ATOMIC,
1718                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1719                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1720                 type, (long long) m->addr, m->cpu, dimm, channel,
1721                 syndrome, core_err_cnt, (long long)m->status,
1722                 (long long)m->misc, optype, err);
1723
1724         debugf0("%s", msg);
1725
1726         csrow = pvt->csrow_map[channel][dimm];
1727
1728         /* Call the helper to output message */
1729         if (m->mcgstatus & 1)
1730                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1731                                 0 /* FIXME: should be channel here */, msg);
1732         else if (!pvt->is_registered)
1733                 edac_mc_handle_fbd_ce(mci, csrow,
1734                                 0 /* FIXME: should be channel here */, msg);
1735
1736         kfree(msg);
1737 }
1738
1739 /*
1740  *      i7core_check_error      Retrieve and process errors reported by the
1741  *                              hardware. Called by the Core module.
1742  */
1743 static void i7core_check_error(struct mem_ctl_info *mci)
1744 {
1745         struct i7core_pvt *pvt = mci->pvt_info;
1746         int i;
1747         unsigned count = 0;
1748         struct mce *m;
1749
1750         /*
1751          * MCE first step: Copy all mce errors into a temporary buffer
1752          * We use a double buffering here, to reduce the risk of
1753          * loosing an error.
1754          */
1755         smp_rmb();
1756         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1757                 % MCE_LOG_LEN;
1758         if (!count)
1759                 goto check_ce_error;
1760
1761         m = pvt->mce_outentry;
1762         if (pvt->mce_in + count > MCE_LOG_LEN) {
1763                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1764
1765                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1766                 smp_wmb();
1767                 pvt->mce_in = 0;
1768                 count -= l;
1769                 m += l;
1770         }
1771         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1772         smp_wmb();
1773         pvt->mce_in += count;
1774
1775         smp_rmb();
1776         if (pvt->mce_overrun) {
1777                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1778                               pvt->mce_overrun);
1779                 smp_wmb();
1780                 pvt->mce_overrun = 0;
1781         }
1782
1783         /*
1784          * MCE second step: parse errors and display
1785          */
1786         for (i = 0; i < count; i++)
1787                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1788
1789         /*
1790          * Now, let's increment CE error counts
1791          */
1792 check_ce_error:
1793         if (!pvt->is_registered)
1794                 i7core_udimm_check_mc_ecc_err(mci);
1795         else
1796                 i7core_rdimm_check_mc_ecc_err(mci);
1797 }
1798
1799 /*
1800  * i7core_mce_check_error       Replicates mcelog routine to get errors
1801  *                              This routine simply queues mcelog errors, and
1802  *                              return. The error itself should be handled later
1803  *                              by i7core_check_error.
1804  * WARNING: As this routine should be called at NMI time, extra care should
1805  * be taken to avoid deadlocks, and to be as fast as possible.
1806  */
1807 static int i7core_mce_check_error(void *priv, struct mce *mce)
1808 {
1809         struct mem_ctl_info *mci = priv;
1810         struct i7core_pvt *pvt = mci->pvt_info;
1811
1812         /*
1813          * Just let mcelog handle it if the error is
1814          * outside the memory controller
1815          */
1816         if (((mce->status & 0xffff) >> 7) != 1)
1817                 return 0;
1818
1819         /* Bank 8 registers are the only ones that we know how to handle */
1820         if (mce->bank != 8)
1821                 return 0;
1822
1823 #ifdef CONFIG_SMP
1824         /* Only handle if it is the right mc controller */
1825         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1826                 return 0;
1827 #endif
1828
1829         smp_rmb();
1830         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1831                 smp_wmb();
1832                 pvt->mce_overrun++;
1833                 return 0;
1834         }
1835
1836         /* Copy memory error at the ringbuffer */
1837         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1838         smp_wmb();
1839         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1840
1841         /* Handle fatal errors immediately */
1842         if (mce->mcgstatus & 1)
1843                 i7core_check_error(mci);
1844
1845         /* Advice mcelog that the error were handled */
1846         return 1;
1847 }
1848
1849 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1850                                int num_channels, int num_csrows)
1851 {
1852         struct mem_ctl_info *mci;
1853         struct i7core_pvt *pvt;
1854         int csrow = 0;
1855         int rc;
1856
1857         /* allocate a new MC control structure */
1858         mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1859                             i7core_dev->socket);
1860         if (unlikely(!mci))
1861                 return -ENOMEM;
1862
1863         debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1864
1865         /* record ptr to the generic device */
1866         mci->dev = &i7core_dev->pdev[0]->dev;
1867
1868         pvt = mci->pvt_info;
1869         memset(pvt, 0, sizeof(*pvt));
1870
1871         /*
1872          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1873          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1874          * memory channels
1875          */
1876         mci->mtype_cap = MEM_FLAG_DDR3;
1877         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1878         mci->edac_cap = EDAC_FLAG_NONE;
1879         mci->mod_name = "i7core_edac.c";
1880         mci->mod_ver = I7CORE_REVISION;
1881         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1882                                   i7core_dev->socket);
1883         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1884         mci->ctl_page_to_phys = NULL;
1885         mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1886         /* Set the function pointer to an actual operation function */
1887         mci->edac_check = i7core_check_error;
1888
1889         /* Store pci devices at mci for faster access */
1890         rc = mci_bind_devs(mci, i7core_dev);
1891         if (unlikely(rc < 0))
1892                 goto fail;
1893
1894         /* Get dimm basic config */
1895         get_dimm_config(mci, &csrow);
1896
1897         /* add this new MC control structure to EDAC's list of MCs */
1898         if (unlikely(edac_mc_add_mc(mci))) {
1899                 debugf0("MC: " __FILE__
1900                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1901                 /* FIXME: perhaps some code should go here that disables error
1902                  * reporting if we just enabled it
1903                  */
1904
1905                 rc = -EINVAL;
1906                 goto fail;
1907         }
1908
1909         /* allocating generic PCI control info */
1910         pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1911                                                  EDAC_MOD_STR);
1912         if (unlikely(!pvt->i7core_pci)) {
1913                 printk(KERN_WARNING
1914                         "%s(): Unable to create PCI control\n",
1915                         __func__);
1916                 printk(KERN_WARNING
1917                         "%s(): PCI error report via EDAC not setup\n",
1918                         __func__);
1919         }
1920
1921         /* Default error mask is any memory */
1922         pvt->inject.channel = 0;
1923         pvt->inject.dimm = -1;
1924         pvt->inject.rank = -1;
1925         pvt->inject.bank = -1;
1926         pvt->inject.page = -1;
1927         pvt->inject.col = -1;
1928
1929         /* Registers on edac_mce in order to receive memory errors */
1930         pvt->edac_mce.priv = mci;
1931         pvt->edac_mce.check_error = i7core_mce_check_error;
1932
1933         rc = edac_mce_register(&pvt->edac_mce);
1934         if (unlikely(rc < 0)) {
1935                 debugf0("MC: " __FILE__
1936                         ": %s(): failed edac_mce_register()\n", __func__);
1937         }
1938
1939 fail:
1940         if (rc < 0)
1941                 edac_mc_free(mci);
1942         return rc;
1943 }
1944
1945 /*
1946  *      i7core_probe    Probe for ONE instance of device to see if it is
1947  *                      present.
1948  *      return:
1949  *              0 for FOUND a device
1950  *              < 0 for error code
1951  */
1952
1953 static int probed = 0;
1954
1955 static int __devinit i7core_probe(struct pci_dev *pdev,
1956                                   const struct pci_device_id *id)
1957 {
1958         int rc;
1959         struct i7core_dev *i7core_dev;
1960
1961         /* get the pci devices we want to reserve for our use */
1962         mutex_lock(&i7core_edac_lock);
1963
1964         /*
1965          * All memory controllers are allocated at the first pass.
1966          */
1967         if (unlikely(probed >= 1)) {
1968                 mutex_unlock(&i7core_edac_lock);
1969                 return -EINVAL;
1970         }
1971         probed++;
1972
1973         rc = i7core_get_devices(pci_dev_table);
1974         if (unlikely(rc < 0))
1975                 goto fail0;
1976
1977         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1978                 int channels;
1979                 int csrows;
1980
1981                 /* Check the number of active and not disabled channels */
1982                 rc = i7core_get_active_channels(i7core_dev->socket,
1983                                                 &channels, &csrows);
1984                 if (unlikely(rc < 0))
1985                         goto fail1;
1986
1987                 rc = i7core_register_mci(i7core_dev, channels, csrows);
1988                 if (unlikely(rc < 0))
1989                         goto fail1;
1990         }
1991
1992         i7core_printk(KERN_INFO, "Driver loaded.\n");
1993
1994         mutex_unlock(&i7core_edac_lock);
1995         return 0;
1996
1997 fail1:
1998         i7core_put_all_devices();
1999 fail0:
2000         mutex_unlock(&i7core_edac_lock);
2001         return rc;
2002 }
2003
2004 /*
2005  *      i7core_remove   destructor for one instance of device
2006  *
2007  */
2008 static void __devexit i7core_remove(struct pci_dev *pdev)
2009 {
2010         struct mem_ctl_info *mci;
2011         struct i7core_dev *i7core_dev, *tmp;
2012         struct i7core_pvt *pvt;
2013
2014         debugf0(__FILE__ ": %s()\n", __func__);
2015
2016         /*
2017          * we have a trouble here: pdev value for removal will be wrong, since
2018          * it will point to the X58 register used to detect that the machine
2019          * is a Nehalem or upper design. However, due to the way several PCI
2020          * devices are grouped together to provide MC functionality, we need
2021          * to use a different method for releasing the devices
2022          */
2023
2024         mutex_lock(&i7core_edac_lock);
2025         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2026                 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2027                 if (unlikely(!mci || !mci->pvt_info)) {
2028                         i7core_printk(KERN_ERR,
2029                                       "Couldn't find mci hanler\n");
2030                 } else {
2031                         pvt = mci->pvt_info;
2032                         i7core_dev = pvt->i7core_dev;
2033
2034                         if (likely(pvt->i7core_pci))
2035                                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2036                         else
2037                                 i7core_printk(KERN_ERR,
2038                                               "Couldn't find mem_ctl_info for socket %d\n",
2039                                               i7core_dev->socket);
2040                         pvt->i7core_pci = NULL;
2041
2042                         edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2043
2044                         edac_mce_unregister(&pvt->edac_mce);
2045                         kfree(mci->ctl_name);
2046                         edac_mc_free(mci);
2047                         i7core_put_devices(i7core_dev);
2048                 }
2049         }
2050         probed--;
2051
2052         mutex_unlock(&i7core_edac_lock);
2053 }
2054
2055 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2056
2057 /*
2058  *      i7core_driver   pci_driver structure for this module
2059  *
2060  */
2061 static struct pci_driver i7core_driver = {
2062         .name     = "i7core_edac",
2063         .probe    = i7core_probe,
2064         .remove   = __devexit_p(i7core_remove),
2065         .id_table = i7core_pci_tbl,
2066 };
2067
2068 /*
2069  *      i7core_init             Module entry function
2070  *                      Try to initialize this module for its devices
2071  */
2072 static int __init i7core_init(void)
2073 {
2074         int pci_rc;
2075
2076         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2077
2078         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2079         opstate_init();
2080
2081         i7core_xeon_pci_fixup(pci_dev_table);
2082
2083         pci_rc = pci_register_driver(&i7core_driver);
2084
2085         if (pci_rc >= 0)
2086                 return 0;
2087
2088         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2089                       pci_rc);
2090
2091         return pci_rc;
2092 }
2093
2094 /*
2095  *      i7core_exit()   Module exit function
2096  *                      Unregister the driver
2097  */
2098 static void __exit i7core_exit(void)
2099 {
2100         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2101         pci_unregister_driver(&i7core_driver);
2102 }
2103
2104 module_init(i7core_init);
2105 module_exit(i7core_exit);
2106
2107 MODULE_LICENSE("GPL");
2108 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2109 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2110 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2111                    I7CORE_REVISION);
2112
2113 module_param(edac_op_state, int, 0444);
2114 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");