i7core_edac: move static vars to the beginning of the file
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 /*
48  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
49  * registers start at bus 255, and are not reported by BIOS.
50  * We currently find devices with only 2 sockets. In order to support more QPI
51  * Quick Path Interconnect, just increment this number.
52  */
53 #define MAX_SOCKET_BUSES        2
54
55
56 /*
57  * Alter this version for the module when modifications are made
58  */
59 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
60 #define EDAC_MOD_STR      "i7core_edac"
61
62 /*
63  * Debug macros
64  */
65 #define i7core_printk(level, fmt, arg...)                       \
66         edac_printk(level, "i7core", fmt, ##arg)
67
68 #define i7core_mc_printk(mci, level, fmt, arg...)               \
69         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
70
71 /*
72  * i7core Memory Controller Registers
73  */
74
75         /* OFFSETS for Device 0 Function 0 */
76
77 #define MC_CFG_CONTROL  0x90
78
79         /* OFFSETS for Device 3 Function 0 */
80
81 #define MC_CONTROL      0x48
82 #define MC_STATUS       0x4c
83 #define MC_MAX_DOD      0x64
84
85 /*
86  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
87  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
88  */
89
90 #define MC_TEST_ERR_RCV1        0x60
91   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
92
93 #define MC_TEST_ERR_RCV0        0x64
94   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
95   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
96
97 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
98 #define MC_COR_ECC_CNT_0        0x80
99 #define MC_COR_ECC_CNT_1        0x84
100 #define MC_COR_ECC_CNT_2        0x88
101 #define MC_COR_ECC_CNT_3        0x8c
102 #define MC_COR_ECC_CNT_4        0x90
103 #define MC_COR_ECC_CNT_5        0x94
104
105 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
106 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
107
108
109         /* OFFSETS for Devices 4,5 and 6 Function 0 */
110
111 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
112   #define THREE_DIMMS_PRESENT           (1 << 24)
113   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
114   #define QUAD_RANK_PRESENT             (1 << 22)
115   #define REGISTERED_DIMM               (1 << 15)
116
117 #define MC_CHANNEL_MAPPER       0x60
118   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
119   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
120
121 #define MC_CHANNEL_RANK_PRESENT 0x7c
122   #define RANK_PRESENT_MASK             0xffff
123
124 #define MC_CHANNEL_ADDR_MATCH   0xf0
125 #define MC_CHANNEL_ERROR_MASK   0xf8
126 #define MC_CHANNEL_ERROR_INJECT 0xfc
127   #define INJECT_ADDR_PARITY    0x10
128   #define INJECT_ECC            0x08
129   #define MASK_CACHELINE        0x06
130   #define MASK_FULL_CACHELINE   0x06
131   #define MASK_MSB32_CACHELINE  0x04
132   #define MASK_LSB32_CACHELINE  0x02
133   #define NO_MASK_CACHELINE     0x00
134   #define REPEAT_EN             0x01
135
136         /* OFFSETS for Devices 4,5 and 6 Function 1 */
137
138 #define MC_DOD_CH_DIMM0         0x48
139 #define MC_DOD_CH_DIMM1         0x4c
140 #define MC_DOD_CH_DIMM2         0x50
141   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
142   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
143   #define DIMM_PRESENT_MASK     (1 << 9)
144   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
145   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
146   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
147   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
148   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
149   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
150   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
151   #define MC_DOD_NUMCOL_MASK            3
152   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
153
154 #define MC_RANK_PRESENT         0x7c
155
156 #define MC_SAG_CH_0     0x80
157 #define MC_SAG_CH_1     0x84
158 #define MC_SAG_CH_2     0x88
159 #define MC_SAG_CH_3     0x8c
160 #define MC_SAG_CH_4     0x90
161 #define MC_SAG_CH_5     0x94
162 #define MC_SAG_CH_6     0x98
163 #define MC_SAG_CH_7     0x9c
164
165 #define MC_RIR_LIMIT_CH_0       0x40
166 #define MC_RIR_LIMIT_CH_1       0x44
167 #define MC_RIR_LIMIT_CH_2       0x48
168 #define MC_RIR_LIMIT_CH_3       0x4C
169 #define MC_RIR_LIMIT_CH_4       0x50
170 #define MC_RIR_LIMIT_CH_5       0x54
171 #define MC_RIR_LIMIT_CH_6       0x58
172 #define MC_RIR_LIMIT_CH_7       0x5C
173 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
174
175 #define MC_RIR_WAY_CH           0x80
176   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
177   #define MC_RIR_WAY_RANK_MASK          0x7
178
179 /*
180  * i7core structs
181  */
182
183 #define NUM_CHANS 3
184 #define MAX_DIMMS 3             /* Max DIMMS per channel */
185 #define MAX_MCR_FUNC  4
186 #define MAX_CHAN_FUNC 3
187
188 struct i7core_info {
189         u32     mc_control;
190         u32     mc_status;
191         u32     max_dod;
192         u32     ch_map;
193 };
194
195
196 struct i7core_inject {
197         int     enable;
198
199         u32     section;
200         u32     type;
201         u32     eccmask;
202
203         /* Error address mask */
204         int channel, dimm, rank, bank, page, col;
205 };
206
207 struct i7core_channel {
208         u32             ranks;
209         u32             dimms;
210 };
211
212 struct pci_id_descr {
213         int                     dev;
214         int                     func;
215         int                     dev_id;
216         int                     optional;
217 };
218
219 struct pci_id_table {
220         struct pci_id_descr     *descr;
221         int                     n_devs;
222 };
223
224 struct i7core_dev {
225         struct list_head        list;
226         u8                      socket;
227         struct pci_dev          **pdev;
228         int                     n_devs;
229         struct mem_ctl_info     *mci;
230 };
231
232 struct i7core_pvt {
233         struct pci_dev  *pci_noncore;
234         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
235         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
236
237         struct i7core_dev *i7core_dev;
238
239         struct i7core_info      info;
240         struct i7core_inject    inject;
241         struct i7core_channel   channel[NUM_CHANS];
242
243         int             channels; /* Number of active channels */
244
245         int             ce_count_available;
246         int             csrow_map[NUM_CHANS][MAX_DIMMS];
247
248                         /* ECC corrected errors counts per udimm */
249         unsigned long   udimm_ce_count[MAX_DIMMS];
250         int             udimm_last_ce_count[MAX_DIMMS];
251                         /* ECC corrected errors counts per rdimm */
252         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
253         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
254
255         unsigned int    is_registered;
256
257         /* mcelog glue */
258         struct edac_mce         edac_mce;
259
260         /* Fifo double buffers */
261         struct mce              mce_entry[MCE_LOG_LEN];
262         struct mce              mce_outentry[MCE_LOG_LEN];
263
264         /* Fifo in/out counters */
265         unsigned                mce_in, mce_out;
266
267         /* Count indicator to show errors not got */
268         unsigned                mce_overrun;
269
270         /* Struct to control EDAC polling */
271         struct edac_pci_ctl_info *i7core_pci;
272 };
273
274 #define PCI_DESCR(device, function, device_id)  \
275         .dev = (device),                        \
276         .func = (function),                     \
277         .dev_id = (device_id)
278
279 struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
280                 /* Memory controller */
281         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
282         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
283                         /* Exists only for RDIMM */
284         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
285         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
286
287                 /* Channel 0 */
288         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
289         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
290         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
291         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
292
293                 /* Channel 1 */
294         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
295         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
296         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
297         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
298
299                 /* Channel 2 */
300         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
301         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
302         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
303         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
304
305                 /* Generic Non-core registers */
306         /*
307          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
308          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
309          * the probing code needs to test for the other address in case of
310          * failure of this one
311          */
312         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
313
314 };
315
316 struct pci_id_descr pci_dev_descr_lynnfield[] = {
317         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
318         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
319         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
320
321         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
322         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
323         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
324         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
325
326         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
327         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
328         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
329         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
330
331         /*
332          * This is the PCI device has an alternate address on some
333          * processors like Core i7 860
334          */
335         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
336 };
337
338 struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
339                 /* Memory controller */
340         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
341         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
342                         /* Exists only for RDIMM */
343         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
344         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
345
346                 /* Channel 0 */
347         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
348         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
349         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
350         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
351
352                 /* Channel 1 */
353         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
354         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
355         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
356         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
357
358                 /* Channel 2 */
359         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
360         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
361         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
362         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
363
364                 /* Generic Non-core registers */
365         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
366
367 };
368
369 #define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) }
370 struct pci_id_table pci_dev_table[] = {
371         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
372         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
373         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
374 };
375
376 /*
377  *      pci_device_id   table for which devices we are looking for
378  */
379 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
380         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
381         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
382         {0,}                    /* 0 terminated list. */
383 };
384
385 /****************************************************************************
386                         Anciliary status routines
387  ****************************************************************************/
388
389         /* MC_CONTROL bits */
390 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
391 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
392
393         /* MC_STATUS bits */
394 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
395 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
396
397         /* MC_MAX_DOD read functions */
398 static inline int numdimms(u32 dimms)
399 {
400         return (dimms & 0x3) + 1;
401 }
402
403 static inline int numrank(u32 rank)
404 {
405         static int ranks[4] = { 1, 2, 4, -EINVAL };
406
407         return ranks[rank & 0x3];
408 }
409
410 static inline int numbank(u32 bank)
411 {
412         static int banks[4] = { 4, 8, 16, -EINVAL };
413
414         return banks[bank & 0x3];
415 }
416
417 static inline int numrow(u32 row)
418 {
419         static int rows[8] = {
420                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
421                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
422         };
423
424         return rows[row & 0x7];
425 }
426
427 static inline int numcol(u32 col)
428 {
429         static int cols[8] = {
430                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
431         };
432         return cols[col & 0x3];
433 }
434
435 static struct i7core_dev *get_i7core_dev(u8 socket)
436 {
437         struct i7core_dev *i7core_dev;
438
439         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
440                 if (i7core_dev->socket == socket)
441                         return i7core_dev;
442         }
443
444         return NULL;
445 }
446
447 /****************************************************************************
448                         Memory check routines
449  ****************************************************************************/
450 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
451                                           unsigned func)
452 {
453         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
454         int i;
455
456         if (!i7core_dev)
457                 return NULL;
458
459         for (i = 0; i < i7core_dev->n_devs; i++) {
460                 if (!i7core_dev->pdev[i])
461                         continue;
462
463                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
464                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
465                         return i7core_dev->pdev[i];
466                 }
467         }
468
469         return NULL;
470 }
471
472 /**
473  * i7core_get_active_channels() - gets the number of channels and csrows
474  * @socket:     Quick Path Interconnect socket
475  * @channels:   Number of channels that will be returned
476  * @csrows:     Number of csrows found
477  *
478  * Since EDAC core needs to know in advance the number of available channels
479  * and csrows, in order to allocate memory for csrows/channels, it is needed
480  * to run two similar steps. At the first step, implemented on this function,
481  * it checks the number of csrows/channels present at one socket.
482  * this is used in order to properly allocate the size of mci components.
483  *
484  * It should be noticed that none of the current available datasheets explain
485  * or even mention how csrows are seen by the memory controller. So, we need
486  * to add a fake description for csrows.
487  * So, this driver is attributing one DIMM memory for one csrow.
488  */
489 static int i7core_get_active_channels(u8 socket, unsigned *channels,
490                                       unsigned *csrows)
491 {
492         struct pci_dev *pdev = NULL;
493         int i, j;
494         u32 status, control;
495
496         *channels = 0;
497         *csrows = 0;
498
499         pdev = get_pdev_slot_func(socket, 3, 0);
500         if (!pdev) {
501                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
502                               socket);
503                 return -ENODEV;
504         }
505
506         /* Device 3 function 0 reads */
507         pci_read_config_dword(pdev, MC_STATUS, &status);
508         pci_read_config_dword(pdev, MC_CONTROL, &control);
509
510         for (i = 0; i < NUM_CHANS; i++) {
511                 u32 dimm_dod[3];
512                 /* Check if the channel is active */
513                 if (!(control & (1 << (8 + i))))
514                         continue;
515
516                 /* Check if the channel is disabled */
517                 if (status & (1 << i))
518                         continue;
519
520                 pdev = get_pdev_slot_func(socket, i + 4, 1);
521                 if (!pdev) {
522                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
523                                                 "fn %d.%d!!!\n",
524                                                 socket, i + 4, 1);
525                         return -ENODEV;
526                 }
527                 /* Devices 4-6 function 1 */
528                 pci_read_config_dword(pdev,
529                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
530                 pci_read_config_dword(pdev,
531                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
532                 pci_read_config_dword(pdev,
533                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
534
535                 (*channels)++;
536
537                 for (j = 0; j < 3; j++) {
538                         if (!DIMM_PRESENT(dimm_dod[j]))
539                                 continue;
540                         (*csrows)++;
541                 }
542         }
543
544         debugf0("Number of active channels on socket %d: %d\n",
545                 socket, *channels);
546
547         return 0;
548 }
549
550 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
551 {
552         struct i7core_pvt *pvt = mci->pvt_info;
553         struct csrow_info *csr;
554         struct pci_dev *pdev;
555         int i, j;
556         unsigned long last_page = 0;
557         enum edac_type mode;
558         enum mem_type mtype;
559
560         /* Get data from the MC register, function 0 */
561         pdev = pvt->pci_mcr[0];
562         if (!pdev)
563                 return -ENODEV;
564
565         /* Device 3 function 0 reads */
566         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
567         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
568         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
569         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
570
571         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
572                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
573                 pvt->info.max_dod, pvt->info.ch_map);
574
575         if (ECC_ENABLED(pvt)) {
576                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
577                 if (ECCx8(pvt))
578                         mode = EDAC_S8ECD8ED;
579                 else
580                         mode = EDAC_S4ECD4ED;
581         } else {
582                 debugf0("ECC disabled\n");
583                 mode = EDAC_NONE;
584         }
585
586         /* FIXME: need to handle the error codes */
587         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
588                 "x%x x 0x%x\n",
589                 numdimms(pvt->info.max_dod),
590                 numrank(pvt->info.max_dod >> 2),
591                 numbank(pvt->info.max_dod >> 4),
592                 numrow(pvt->info.max_dod >> 6),
593                 numcol(pvt->info.max_dod >> 9));
594
595         for (i = 0; i < NUM_CHANS; i++) {
596                 u32 data, dimm_dod[3], value[8];
597
598                 if (!pvt->pci_ch[i][0])
599                         continue;
600
601                 if (!CH_ACTIVE(pvt, i)) {
602                         debugf0("Channel %i is not active\n", i);
603                         continue;
604                 }
605                 if (CH_DISABLED(pvt, i)) {
606                         debugf0("Channel %i is disabled\n", i);
607                         continue;
608                 }
609
610                 /* Devices 4-6 function 0 */
611                 pci_read_config_dword(pvt->pci_ch[i][0],
612                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
613
614                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
615                                                 4 : 2;
616
617                 if (data & REGISTERED_DIMM)
618                         mtype = MEM_RDDR3;
619                 else
620                         mtype = MEM_DDR3;
621 #if 0
622                 if (data & THREE_DIMMS_PRESENT)
623                         pvt->channel[i].dimms = 3;
624                 else if (data & SINGLE_QUAD_RANK_PRESENT)
625                         pvt->channel[i].dimms = 1;
626                 else
627                         pvt->channel[i].dimms = 2;
628 #endif
629
630                 /* Devices 4-6 function 1 */
631                 pci_read_config_dword(pvt->pci_ch[i][1],
632                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
633                 pci_read_config_dword(pvt->pci_ch[i][1],
634                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
635                 pci_read_config_dword(pvt->pci_ch[i][1],
636                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
637
638                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
639                         "%d ranks, %cDIMMs\n",
640                         i,
641                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
642                         data,
643                         pvt->channel[i].ranks,
644                         (data & REGISTERED_DIMM) ? 'R' : 'U');
645
646                 for (j = 0; j < 3; j++) {
647                         u32 banks, ranks, rows, cols;
648                         u32 size, npages;
649
650                         if (!DIMM_PRESENT(dimm_dod[j]))
651                                 continue;
652
653                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
654                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
655                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
656                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
657
658                         /* DDR3 has 8 I/O banks */
659                         size = (rows * cols * banks * ranks) >> (20 - 3);
660
661                         pvt->channel[i].dimms++;
662
663                         debugf0("\tdimm %d %d Mb offset: %x, "
664                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
665                                 j, size,
666                                 RANKOFFSET(dimm_dod[j]),
667                                 banks, ranks, rows, cols);
668
669 #if PAGE_SHIFT > 20
670                         npages = size >> (PAGE_SHIFT - 20);
671 #else
672                         npages = size << (20 - PAGE_SHIFT);
673 #endif
674
675                         csr = &mci->csrows[*csrow];
676                         csr->first_page = last_page + 1;
677                         last_page += npages;
678                         csr->last_page = last_page;
679                         csr->nr_pages = npages;
680
681                         csr->page_mask = 0;
682                         csr->grain = 8;
683                         csr->csrow_idx = *csrow;
684                         csr->nr_channels = 1;
685
686                         csr->channels[0].chan_idx = i;
687                         csr->channels[0].ce_count = 0;
688
689                         pvt->csrow_map[i][j] = *csrow;
690
691                         switch (banks) {
692                         case 4:
693                                 csr->dtype = DEV_X4;
694                                 break;
695                         case 8:
696                                 csr->dtype = DEV_X8;
697                                 break;
698                         case 16:
699                                 csr->dtype = DEV_X16;
700                                 break;
701                         default:
702                                 csr->dtype = DEV_UNKNOWN;
703                         }
704
705                         csr->edac_mode = mode;
706                         csr->mtype = mtype;
707
708                         (*csrow)++;
709                 }
710
711                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
712                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
713                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
714                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
715                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
716                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
717                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
718                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
719                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
720                 for (j = 0; j < 8; j++)
721                         debugf1("\t\t%#x\t%#x\t%#x\n",
722                                 (value[j] >> 27) & 0x1,
723                                 (value[j] >> 24) & 0x7,
724                                 (value[j] && ((1 << 24) - 1)));
725         }
726
727         return 0;
728 }
729
730 /****************************************************************************
731                         Error insertion routines
732  ****************************************************************************/
733
734 /* The i7core has independent error injection features per channel.
735    However, to have a simpler code, we don't allow enabling error injection
736    on more than one channel.
737    Also, since a change at an inject parameter will be applied only at enable,
738    we're disabling error injection on all write calls to the sysfs nodes that
739    controls the error code injection.
740  */
741 static int disable_inject(struct mem_ctl_info *mci)
742 {
743         struct i7core_pvt *pvt = mci->pvt_info;
744
745         pvt->inject.enable = 0;
746
747         if (!pvt->pci_ch[pvt->inject.channel][0])
748                 return -ENODEV;
749
750         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
751                                 MC_CHANNEL_ERROR_INJECT, 0);
752
753         return 0;
754 }
755
756 /*
757  * i7core inject inject.section
758  *
759  *      accept and store error injection inject.section value
760  *      bit 0 - refers to the lower 32-byte half cacheline
761  *      bit 1 - refers to the upper 32-byte half cacheline
762  */
763 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
764                                            const char *data, size_t count)
765 {
766         struct i7core_pvt *pvt = mci->pvt_info;
767         unsigned long value;
768         int rc;
769
770         if (pvt->inject.enable)
771                 disable_inject(mci);
772
773         rc = strict_strtoul(data, 10, &value);
774         if ((rc < 0) || (value > 3))
775                 return -EIO;
776
777         pvt->inject.section = (u32) value;
778         return count;
779 }
780
781 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
782                                               char *data)
783 {
784         struct i7core_pvt *pvt = mci->pvt_info;
785         return sprintf(data, "0x%08x\n", pvt->inject.section);
786 }
787
788 /*
789  * i7core inject.type
790  *
791  *      accept and store error injection inject.section value
792  *      bit 0 - repeat enable - Enable error repetition
793  *      bit 1 - inject ECC error
794  *      bit 2 - inject parity error
795  */
796 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
797                                         const char *data, size_t count)
798 {
799         struct i7core_pvt *pvt = mci->pvt_info;
800         unsigned long value;
801         int rc;
802
803         if (pvt->inject.enable)
804                 disable_inject(mci);
805
806         rc = strict_strtoul(data, 10, &value);
807         if ((rc < 0) || (value > 7))
808                 return -EIO;
809
810         pvt->inject.type = (u32) value;
811         return count;
812 }
813
814 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
815                                               char *data)
816 {
817         struct i7core_pvt *pvt = mci->pvt_info;
818         return sprintf(data, "0x%08x\n", pvt->inject.type);
819 }
820
821 /*
822  * i7core_inject_inject.eccmask_store
823  *
824  * The type of error (UE/CE) will depend on the inject.eccmask value:
825  *   Any bits set to a 1 will flip the corresponding ECC bit
826  *   Correctable errors can be injected by flipping 1 bit or the bits within
827  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
828  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
829  *   uncorrectable error to be injected.
830  */
831 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
832                                         const char *data, size_t count)
833 {
834         struct i7core_pvt *pvt = mci->pvt_info;
835         unsigned long value;
836         int rc;
837
838         if (pvt->inject.enable)
839                 disable_inject(mci);
840
841         rc = strict_strtoul(data, 10, &value);
842         if (rc < 0)
843                 return -EIO;
844
845         pvt->inject.eccmask = (u32) value;
846         return count;
847 }
848
849 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
850                                               char *data)
851 {
852         struct i7core_pvt *pvt = mci->pvt_info;
853         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
854 }
855
856 /*
857  * i7core_addrmatch
858  *
859  * The type of error (UE/CE) will depend on the inject.eccmask value:
860  *   Any bits set to a 1 will flip the corresponding ECC bit
861  *   Correctable errors can be injected by flipping 1 bit or the bits within
862  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
863  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
864  *   uncorrectable error to be injected.
865  */
866
867 #define DECLARE_ADDR_MATCH(param, limit)                        \
868 static ssize_t i7core_inject_store_##param(                     \
869                 struct mem_ctl_info *mci,                       \
870                 const char *data, size_t count)                 \
871 {                                                               \
872         struct i7core_pvt *pvt;                                 \
873         long value;                                             \
874         int rc;                                                 \
875                                                                 \
876         debugf1("%s()\n", __func__);                            \
877         pvt = mci->pvt_info;                                    \
878                                                                 \
879         if (pvt->inject.enable)                                 \
880                 disable_inject(mci);                            \
881                                                                 \
882         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
883                 value = -1;                                     \
884         else {                                                  \
885                 rc = strict_strtoul(data, 10, &value);          \
886                 if ((rc < 0) || (value >= limit))               \
887                         return -EIO;                            \
888         }                                                       \
889                                                                 \
890         pvt->inject.param = value;                              \
891                                                                 \
892         return count;                                           \
893 }                                                               \
894                                                                 \
895 static ssize_t i7core_inject_show_##param(                      \
896                 struct mem_ctl_info *mci,                       \
897                 char *data)                                     \
898 {                                                               \
899         struct i7core_pvt *pvt;                                 \
900                                                                 \
901         pvt = mci->pvt_info;                                    \
902         debugf1("%s() pvt=%p\n", __func__, pvt);                \
903         if (pvt->inject.param < 0)                              \
904                 return sprintf(data, "any\n");                  \
905         else                                                    \
906                 return sprintf(data, "%d\n", pvt->inject.param);\
907 }
908
909 #define ATTR_ADDR_MATCH(param)                                  \
910         {                                                       \
911                 .attr = {                                       \
912                         .name = #param,                         \
913                         .mode = (S_IRUGO | S_IWUSR)             \
914                 },                                              \
915                 .show  = i7core_inject_show_##param,            \
916                 .store = i7core_inject_store_##param,           \
917         }
918
919 DECLARE_ADDR_MATCH(channel, 3);
920 DECLARE_ADDR_MATCH(dimm, 3);
921 DECLARE_ADDR_MATCH(rank, 4);
922 DECLARE_ADDR_MATCH(bank, 32);
923 DECLARE_ADDR_MATCH(page, 0x10000);
924 DECLARE_ADDR_MATCH(col, 0x4000);
925
926 static int write_and_test(struct pci_dev *dev, int where, u32 val)
927 {
928         u32 read;
929         int count;
930
931         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
932                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
933                 where, val);
934
935         for (count = 0; count < 10; count++) {
936                 if (count)
937                         msleep(100);
938                 pci_write_config_dword(dev, where, val);
939                 pci_read_config_dword(dev, where, &read);
940
941                 if (read == val)
942                         return 0;
943         }
944
945         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
946                 "write=%08x. Read=%08x\n",
947                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
948                 where, val, read);
949
950         return -EINVAL;
951 }
952
953 /*
954  * This routine prepares the Memory Controller for error injection.
955  * The error will be injected when some process tries to write to the
956  * memory that matches the given criteria.
957  * The criteria can be set in terms of a mask where dimm, rank, bank, page
958  * and col can be specified.
959  * A -1 value for any of the mask items will make the MCU to ignore
960  * that matching criteria for error injection.
961  *
962  * It should be noticed that the error will only happen after a write operation
963  * on a memory that matches the condition. if REPEAT_EN is not enabled at
964  * inject mask, then it will produce just one error. Otherwise, it will repeat
965  * until the injectmask would be cleaned.
966  *
967  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
968  *    is reliable enough to check if the MC is using the
969  *    three channels. However, this is not clear at the datasheet.
970  */
971 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
972                                        const char *data, size_t count)
973 {
974         struct i7core_pvt *pvt = mci->pvt_info;
975         u32 injectmask;
976         u64 mask = 0;
977         int  rc;
978         long enable;
979
980         if (!pvt->pci_ch[pvt->inject.channel][0])
981                 return 0;
982
983         rc = strict_strtoul(data, 10, &enable);
984         if ((rc < 0))
985                 return 0;
986
987         if (enable) {
988                 pvt->inject.enable = 1;
989         } else {
990                 disable_inject(mci);
991                 return count;
992         }
993
994         /* Sets pvt->inject.dimm mask */
995         if (pvt->inject.dimm < 0)
996                 mask |= 1LL << 41;
997         else {
998                 if (pvt->channel[pvt->inject.channel].dimms > 2)
999                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1000                 else
1001                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1002         }
1003
1004         /* Sets pvt->inject.rank mask */
1005         if (pvt->inject.rank < 0)
1006                 mask |= 1LL << 40;
1007         else {
1008                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1009                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1010                 else
1011                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1012         }
1013
1014         /* Sets pvt->inject.bank mask */
1015         if (pvt->inject.bank < 0)
1016                 mask |= 1LL << 39;
1017         else
1018                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1019
1020         /* Sets pvt->inject.page mask */
1021         if (pvt->inject.page < 0)
1022                 mask |= 1LL << 38;
1023         else
1024                 mask |= (pvt->inject.page & 0xffff) << 14;
1025
1026         /* Sets pvt->inject.column mask */
1027         if (pvt->inject.col < 0)
1028                 mask |= 1LL << 37;
1029         else
1030                 mask |= (pvt->inject.col & 0x3fff);
1031
1032         /*
1033          * bit    0: REPEAT_EN
1034          * bits 1-2: MASK_HALF_CACHELINE
1035          * bit    3: INJECT_ECC
1036          * bit    4: INJECT_ADDR_PARITY
1037          */
1038
1039         injectmask = (pvt->inject.type & 1) |
1040                      (pvt->inject.section & 0x3) << 1 |
1041                      (pvt->inject.type & 0x6) << (3 - 1);
1042
1043         /* Unlock writes to registers - this register is write only */
1044         pci_write_config_dword(pvt->pci_noncore,
1045                                MC_CFG_CONTROL, 0x2);
1046
1047         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1048                                MC_CHANNEL_ADDR_MATCH, mask);
1049         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1050                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1051
1052         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1053                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1054
1055         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1056                                MC_CHANNEL_ERROR_INJECT, injectmask);
1057
1058         /*
1059          * This is something undocumented, based on my tests
1060          * Without writing 8 to this register, errors aren't injected. Not sure
1061          * why.
1062          */
1063         pci_write_config_dword(pvt->pci_noncore,
1064                                MC_CFG_CONTROL, 8);
1065
1066         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1067                 " inject 0x%08x\n",
1068                 mask, pvt->inject.eccmask, injectmask);
1069
1070
1071         return count;
1072 }
1073
1074 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1075                                         char *data)
1076 {
1077         struct i7core_pvt *pvt = mci->pvt_info;
1078         u32 injectmask;
1079
1080         if (!pvt->pci_ch[pvt->inject.channel][0])
1081                 return 0;
1082
1083         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1084                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1085
1086         debugf0("Inject error read: 0x%018x\n", injectmask);
1087
1088         if (injectmask & 0x0c)
1089                 pvt->inject.enable = 1;
1090
1091         return sprintf(data, "%d\n", pvt->inject.enable);
1092 }
1093
1094 #define DECLARE_COUNTER(param)                                  \
1095 static ssize_t i7core_show_counter_##param(                     \
1096                 struct mem_ctl_info *mci,                       \
1097                 char *data)                                     \
1098 {                                                               \
1099         struct i7core_pvt *pvt = mci->pvt_info;                 \
1100                                                                 \
1101         debugf1("%s() \n", __func__);                           \
1102         if (!pvt->ce_count_available || (pvt->is_registered))   \
1103                 return sprintf(data, "data unavailable\n");     \
1104         return sprintf(data, "%lu\n",                           \
1105                         pvt->udimm_ce_count[param]);            \
1106 }
1107
1108 #define ATTR_COUNTER(param)                                     \
1109         {                                                       \
1110                 .attr = {                                       \
1111                         .name = __stringify(udimm##param),      \
1112                         .mode = (S_IRUGO | S_IWUSR)             \
1113                 },                                              \
1114                 .show  = i7core_show_counter_##param            \
1115         }
1116
1117 DECLARE_COUNTER(0);
1118 DECLARE_COUNTER(1);
1119 DECLARE_COUNTER(2);
1120
1121 /*
1122  * Sysfs struct
1123  */
1124
1125
1126 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1127         ATTR_ADDR_MATCH(channel),
1128         ATTR_ADDR_MATCH(dimm),
1129         ATTR_ADDR_MATCH(rank),
1130         ATTR_ADDR_MATCH(bank),
1131         ATTR_ADDR_MATCH(page),
1132         ATTR_ADDR_MATCH(col),
1133         { .attr = { .name = NULL } }
1134 };
1135
1136 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1137         .name  = "inject_addrmatch",
1138         .mcidev_attr = i7core_addrmatch_attrs,
1139 };
1140
1141 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1142         ATTR_COUNTER(0),
1143         ATTR_COUNTER(1),
1144         ATTR_COUNTER(2),
1145         { .attr = { .name = NULL } }
1146 };
1147
1148 static struct mcidev_sysfs_group i7core_udimm_counters = {
1149         .name  = "all_channel_counts",
1150         .mcidev_attr = i7core_udimm_counters_attrs,
1151 };
1152
1153 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1154         {
1155                 .attr = {
1156                         .name = "inject_section",
1157                         .mode = (S_IRUGO | S_IWUSR)
1158                 },
1159                 .show  = i7core_inject_section_show,
1160                 .store = i7core_inject_section_store,
1161         }, {
1162                 .attr = {
1163                         .name = "inject_type",
1164                         .mode = (S_IRUGO | S_IWUSR)
1165                 },
1166                 .show  = i7core_inject_type_show,
1167                 .store = i7core_inject_type_store,
1168         }, {
1169                 .attr = {
1170                         .name = "inject_eccmask",
1171                         .mode = (S_IRUGO | S_IWUSR)
1172                 },
1173                 .show  = i7core_inject_eccmask_show,
1174                 .store = i7core_inject_eccmask_store,
1175         }, {
1176                 .grp = &i7core_inject_addrmatch,
1177         }, {
1178                 .attr = {
1179                         .name = "inject_enable",
1180                         .mode = (S_IRUGO | S_IWUSR)
1181                 },
1182                 .show  = i7core_inject_enable_show,
1183                 .store = i7core_inject_enable_store,
1184         },
1185         { .attr = { .name = NULL } },   /* Reserved for udimm counters */
1186         { .attr = { .name = NULL } }
1187 };
1188
1189 /****************************************************************************
1190         Device initialization routines: put/get, init/exit
1191  ****************************************************************************/
1192
1193 /*
1194  *      i7core_put_devices      'put' all the devices that we have
1195  *                              reserved via 'get'
1196  */
1197 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1198 {
1199         int i;
1200
1201         debugf0(__FILE__ ": %s()\n", __func__);
1202         for (i = 0; i < i7core_dev->n_devs; i++) {
1203                 struct pci_dev *pdev = i7core_dev->pdev[i];
1204                 if (!pdev)
1205                         continue;
1206                 debugf0("Removing dev %02x:%02x.%d\n",
1207                         pdev->bus->number,
1208                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1209                 pci_dev_put(pdev);
1210         }
1211         kfree(i7core_dev->pdev);
1212         list_del(&i7core_dev->list);
1213         kfree(i7core_dev);
1214 }
1215
1216 static void i7core_put_all_devices(void)
1217 {
1218         struct i7core_dev *i7core_dev, *tmp;
1219
1220         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1221                 i7core_put_devices(i7core_dev);
1222 }
1223
1224 static void __init i7core_xeon_pci_fixup(struct pci_id_table *table)
1225 {
1226         struct pci_dev *pdev = NULL;
1227         int i;
1228         /*
1229          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1230          * aren't announced by acpi. So, we need to use a legacy scan probing
1231          * to detect them
1232          */
1233         while (table && table->descr) {
1234                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1235                 if (unlikely(!pdev)) {
1236                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1237                                 pcibios_scan_specific_bus(255-i);
1238                 }
1239                 pci_dev_put(pdev);
1240                 table++;
1241         }
1242 }
1243
1244 static unsigned i7core_pci_lastbus(void)
1245 {
1246         int last_bus = 0, bus;
1247         struct pci_bus *b = NULL;
1248
1249         while ((b = pci_find_next_bus(b)) != NULL) {
1250                 bus = b->number;
1251                 debugf0("Found bus %d\n", bus);
1252                 if (bus > last_bus)
1253                         last_bus = bus;
1254         }
1255
1256         debugf0("Last bus %d\n", last_bus);
1257
1258         return last_bus;
1259 }
1260
1261 /*
1262  *      i7core_get_devices      Find and perform 'get' operation on the MCH's
1263  *                      device/functions we want to reference for this driver
1264  *
1265  *                      Need to 'get' device 16 func 1 and func 2
1266  */
1267 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1268                          struct pci_id_descr *dev_descr, unsigned n_devs,
1269                          unsigned last_bus)
1270 {
1271         struct i7core_dev *i7core_dev;
1272
1273         struct pci_dev *pdev = NULL;
1274         u8 bus = 0;
1275         u8 socket = 0;
1276
1277         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1278                               dev_descr->dev_id, *prev);
1279
1280         /*
1281          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1282          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1283          * to probe for the alternate address in case of failure
1284          */
1285         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1286                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1287                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1288
1289         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1290                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1291                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1292                                       *prev);
1293
1294         if (!pdev) {
1295                 if (*prev) {
1296                         *prev = pdev;
1297                         return 0;
1298                 }
1299
1300                 if (dev_descr->optional)
1301                         return 0;
1302
1303                 if (devno == 0)
1304                         return -ENODEV;
1305
1306                 i7core_printk(KERN_INFO,
1307                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1308                         dev_descr->dev, dev_descr->func,
1309                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1310
1311                 /* End of list, leave */
1312                 return -ENODEV;
1313         }
1314         bus = pdev->bus->number;
1315
1316         socket = last_bus - bus;
1317
1318         i7core_dev = get_i7core_dev(socket);
1319         if (!i7core_dev) {
1320                 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1321                 if (!i7core_dev)
1322                         return -ENOMEM;
1323                 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1324                                            GFP_KERNEL);
1325                 if (!i7core_dev->pdev) {
1326                         kfree(i7core_dev);
1327                         return -ENOMEM;
1328                 }
1329                 i7core_dev->socket = socket;
1330                 i7core_dev->n_devs = n_devs;
1331                 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1332         }
1333
1334         if (i7core_dev->pdev[devno]) {
1335                 i7core_printk(KERN_ERR,
1336                         "Duplicated device for "
1337                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1338                         bus, dev_descr->dev, dev_descr->func,
1339                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1340                 pci_dev_put(pdev);
1341                 return -ENODEV;
1342         }
1343
1344         i7core_dev->pdev[devno] = pdev;
1345
1346         /* Sanity check */
1347         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1348                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1349                 i7core_printk(KERN_ERR,
1350                         "Device PCI ID %04x:%04x "
1351                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1352                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1353                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1354                         bus, dev_descr->dev, dev_descr->func);
1355                 return -ENODEV;
1356         }
1357
1358         /* Be sure that the device is enabled */
1359         if (unlikely(pci_enable_device(pdev) < 0)) {
1360                 i7core_printk(KERN_ERR,
1361                         "Couldn't enable "
1362                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1363                         bus, dev_descr->dev, dev_descr->func,
1364                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1365                 return -ENODEV;
1366         }
1367
1368         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1369                 socket, bus, dev_descr->dev,
1370                 dev_descr->func,
1371                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1372
1373         *prev = pdev;
1374
1375         return 0;
1376 }
1377
1378 static int i7core_get_devices(struct pci_id_table *table)
1379 {
1380         int i, rc, last_bus;
1381         struct pci_dev *pdev = NULL;
1382         struct pci_id_descr *dev_descr;
1383
1384         last_bus = i7core_pci_lastbus();
1385
1386         while (table && table->descr) {
1387                 dev_descr = table->descr;
1388                 for (i = 0; i < table->n_devs; i++) {
1389                         pdev = NULL;
1390                         do {
1391                                 rc = i7core_get_onedevice(&pdev, i,
1392                                                           &dev_descr[i],
1393                                                           table->n_devs,
1394                                                           last_bus);
1395                                 if (rc < 0) {
1396                                         if (i == 0) {
1397                                                 i = table->n_devs;
1398                                                 break;
1399                                         }
1400                                         i7core_put_all_devices();
1401                                         return -ENODEV;
1402                                 }
1403                         } while (pdev);
1404                 }
1405                 table++;
1406         }
1407
1408         return 0;
1409         return 0;
1410 }
1411
1412 static int mci_bind_devs(struct mem_ctl_info *mci,
1413                          struct i7core_dev *i7core_dev)
1414 {
1415         struct i7core_pvt *pvt = mci->pvt_info;
1416         struct pci_dev *pdev;
1417         int i, func, slot;
1418
1419         /* Associates i7core_dev and mci for future usage */
1420         pvt->i7core_dev = i7core_dev;
1421         i7core_dev->mci = mci;
1422
1423         pvt->is_registered = 0;
1424         for (i = 0; i < i7core_dev->n_devs; i++) {
1425                 pdev = i7core_dev->pdev[i];
1426                 if (!pdev)
1427                         continue;
1428
1429                 func = PCI_FUNC(pdev->devfn);
1430                 slot = PCI_SLOT(pdev->devfn);
1431                 if (slot == 3) {
1432                         if (unlikely(func > MAX_MCR_FUNC))
1433                                 goto error;
1434                         pvt->pci_mcr[func] = pdev;
1435                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1436                         if (unlikely(func > MAX_CHAN_FUNC))
1437                                 goto error;
1438                         pvt->pci_ch[slot - 4][func] = pdev;
1439                 } else if (!slot && !func)
1440                         pvt->pci_noncore = pdev;
1441                 else
1442                         goto error;
1443
1444                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1445                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1446                         pdev, i7core_dev->socket);
1447
1448                 if (PCI_SLOT(pdev->devfn) == 3 &&
1449                         PCI_FUNC(pdev->devfn) == 2)
1450                         pvt->is_registered = 1;
1451         }
1452
1453         /*
1454          * Add extra nodes to count errors on udimm
1455          * For registered memory, this is not needed, since the counters
1456          * are already displayed at the standard locations
1457          */
1458         if (!pvt->is_registered)
1459                 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1460                         &i7core_udimm_counters;
1461
1462         return 0;
1463
1464 error:
1465         i7core_printk(KERN_ERR, "Device %d, function %d "
1466                       "is out of the expected range\n",
1467                       slot, func);
1468         return -EINVAL;
1469 }
1470
1471 /****************************************************************************
1472                         Error check routines
1473  ****************************************************************************/
1474 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1475                                          int chan, int dimm, int add)
1476 {
1477         char *msg;
1478         struct i7core_pvt *pvt = mci->pvt_info;
1479         int row = pvt->csrow_map[chan][dimm], i;
1480
1481         for (i = 0; i < add; i++) {
1482                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1483                                 "(Socket=%d channel=%d dimm=%d)",
1484                                 pvt->i7core_dev->socket, chan, dimm);
1485
1486                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1487                 kfree (msg);
1488         }
1489 }
1490
1491 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1492                         int chan, int new0, int new1, int new2)
1493 {
1494         struct i7core_pvt *pvt = mci->pvt_info;
1495         int add0 = 0, add1 = 0, add2 = 0;
1496         /* Updates CE counters if it is not the first time here */
1497         if (pvt->ce_count_available) {
1498                 /* Updates CE counters */
1499
1500                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1501                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1502                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1503
1504                 if (add2 < 0)
1505                         add2 += 0x7fff;
1506                 pvt->rdimm_ce_count[chan][2] += add2;
1507
1508                 if (add1 < 0)
1509                         add1 += 0x7fff;
1510                 pvt->rdimm_ce_count[chan][1] += add1;
1511
1512                 if (add0 < 0)
1513                         add0 += 0x7fff;
1514                 pvt->rdimm_ce_count[chan][0] += add0;
1515         } else
1516                 pvt->ce_count_available = 1;
1517
1518         /* Store the new values */
1519         pvt->rdimm_last_ce_count[chan][2] = new2;
1520         pvt->rdimm_last_ce_count[chan][1] = new1;
1521         pvt->rdimm_last_ce_count[chan][0] = new0;
1522
1523         /*updated the edac core */
1524         if (add0 != 0)
1525                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1526         if (add1 != 0)
1527                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1528         if (add2 != 0)
1529                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1530
1531 }
1532
1533 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1534 {
1535         struct i7core_pvt *pvt = mci->pvt_info;
1536         u32 rcv[3][2];
1537         int i, new0, new1, new2;
1538
1539         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1540         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1541                                                                 &rcv[0][0]);
1542         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1543                                                                 &rcv[0][1]);
1544         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1545                                                                 &rcv[1][0]);
1546         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1547                                                                 &rcv[1][1]);
1548         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1549                                                                 &rcv[2][0]);
1550         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1551                                                                 &rcv[2][1]);
1552         for (i = 0 ; i < 3; i++) {
1553                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1554                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1555                 /*if the channel has 3 dimms*/
1556                 if (pvt->channel[i].dimms > 2) {
1557                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1558                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1559                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1560                 } else {
1561                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1562                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1563                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1564                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1565                         new2 = 0;
1566                 }
1567
1568                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1569         }
1570 }
1571
1572 /* This function is based on the device 3 function 4 registers as described on:
1573  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1574  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1575  * also available at:
1576  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1577  */
1578 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1579 {
1580         struct i7core_pvt *pvt = mci->pvt_info;
1581         u32 rcv1, rcv0;
1582         int new0, new1, new2;
1583
1584         if (!pvt->pci_mcr[4]) {
1585                 debugf0("%s MCR registers not found\n", __func__);
1586                 return;
1587         }
1588
1589         /* Corrected test errors */
1590         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1591         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1592
1593         /* Store the new values */
1594         new2 = DIMM2_COR_ERR(rcv1);
1595         new1 = DIMM1_COR_ERR(rcv0);
1596         new0 = DIMM0_COR_ERR(rcv0);
1597
1598         /* Updates CE counters if it is not the first time here */
1599         if (pvt->ce_count_available) {
1600                 /* Updates CE counters */
1601                 int add0, add1, add2;
1602
1603                 add2 = new2 - pvt->udimm_last_ce_count[2];
1604                 add1 = new1 - pvt->udimm_last_ce_count[1];
1605                 add0 = new0 - pvt->udimm_last_ce_count[0];
1606
1607                 if (add2 < 0)
1608                         add2 += 0x7fff;
1609                 pvt->udimm_ce_count[2] += add2;
1610
1611                 if (add1 < 0)
1612                         add1 += 0x7fff;
1613                 pvt->udimm_ce_count[1] += add1;
1614
1615                 if (add0 < 0)
1616                         add0 += 0x7fff;
1617                 pvt->udimm_ce_count[0] += add0;
1618
1619                 if (add0 | add1 | add2)
1620                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1621                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1622                                       add0, add1, add2);
1623         } else
1624                 pvt->ce_count_available = 1;
1625
1626         /* Store the new values */
1627         pvt->udimm_last_ce_count[2] = new2;
1628         pvt->udimm_last_ce_count[1] = new1;
1629         pvt->udimm_last_ce_count[0] = new0;
1630 }
1631
1632 /*
1633  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1634  * Architectures Software Developer’s Manual Volume 3B.
1635  * Nehalem are defined as family 0x06, model 0x1a
1636  *
1637  * The MCA registers used here are the following ones:
1638  *     struct mce field MCA Register
1639  *     m->status        MSR_IA32_MC8_STATUS
1640  *     m->addr          MSR_IA32_MC8_ADDR
1641  *     m->misc          MSR_IA32_MC8_MISC
1642  * In the case of Nehalem, the error information is masked at .status and .misc
1643  * fields
1644  */
1645 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1646                                     struct mce *m)
1647 {
1648         struct i7core_pvt *pvt = mci->pvt_info;
1649         char *type, *optype, *err, *msg;
1650         unsigned long error = m->status & 0x1ff0000l;
1651         u32 optypenum = (m->status >> 4) & 0x07;
1652         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1653         u32 dimm = (m->misc >> 16) & 0x3;
1654         u32 channel = (m->misc >> 18) & 0x3;
1655         u32 syndrome = m->misc >> 32;
1656         u32 errnum = find_first_bit(&error, 32);
1657         int csrow;
1658
1659         if (m->mcgstatus & 1)
1660                 type = "FATAL";
1661         else
1662                 type = "NON_FATAL";
1663
1664         switch (optypenum) {
1665         case 0:
1666                 optype = "generic undef request";
1667                 break;
1668         case 1:
1669                 optype = "read error";
1670                 break;
1671         case 2:
1672                 optype = "write error";
1673                 break;
1674         case 3:
1675                 optype = "addr/cmd error";
1676                 break;
1677         case 4:
1678                 optype = "scrubbing error";
1679                 break;
1680         default:
1681                 optype = "reserved";
1682                 break;
1683         }
1684
1685         switch (errnum) {
1686         case 16:
1687                 err = "read ECC error";
1688                 break;
1689         case 17:
1690                 err = "RAS ECC error";
1691                 break;
1692         case 18:
1693                 err = "write parity error";
1694                 break;
1695         case 19:
1696                 err = "redundacy loss";
1697                 break;
1698         case 20:
1699                 err = "reserved";
1700                 break;
1701         case 21:
1702                 err = "memory range error";
1703                 break;
1704         case 22:
1705                 err = "RTID out of range";
1706                 break;
1707         case 23:
1708                 err = "address parity error";
1709                 break;
1710         case 24:
1711                 err = "byte enable parity error";
1712                 break;
1713         default:
1714                 err = "unknown";
1715         }
1716
1717         /* FIXME: should convert addr into bank and rank information */
1718         msg = kasprintf(GFP_ATOMIC,
1719                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1720                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1721                 type, (long long) m->addr, m->cpu, dimm, channel,
1722                 syndrome, core_err_cnt, (long long)m->status,
1723                 (long long)m->misc, optype, err);
1724
1725         debugf0("%s", msg);
1726
1727         csrow = pvt->csrow_map[channel][dimm];
1728
1729         /* Call the helper to output message */
1730         if (m->mcgstatus & 1)
1731                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1732                                 0 /* FIXME: should be channel here */, msg);
1733         else if (!pvt->is_registered)
1734                 edac_mc_handle_fbd_ce(mci, csrow,
1735                                 0 /* FIXME: should be channel here */, msg);
1736
1737         kfree(msg);
1738 }
1739
1740 /*
1741  *      i7core_check_error      Retrieve and process errors reported by the
1742  *                              hardware. Called by the Core module.
1743  */
1744 static void i7core_check_error(struct mem_ctl_info *mci)
1745 {
1746         struct i7core_pvt *pvt = mci->pvt_info;
1747         int i;
1748         unsigned count = 0;
1749         struct mce *m;
1750
1751         /*
1752          * MCE first step: Copy all mce errors into a temporary buffer
1753          * We use a double buffering here, to reduce the risk of
1754          * loosing an error.
1755          */
1756         smp_rmb();
1757         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1758                 % MCE_LOG_LEN;
1759         if (!count)
1760                 goto check_ce_error;
1761
1762         m = pvt->mce_outentry;
1763         if (pvt->mce_in + count > MCE_LOG_LEN) {
1764                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1765
1766                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1767                 smp_wmb();
1768                 pvt->mce_in = 0;
1769                 count -= l;
1770                 m += l;
1771         }
1772         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1773         smp_wmb();
1774         pvt->mce_in += count;
1775
1776         smp_rmb();
1777         if (pvt->mce_overrun) {
1778                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1779                               pvt->mce_overrun);
1780                 smp_wmb();
1781                 pvt->mce_overrun = 0;
1782         }
1783
1784         /*
1785          * MCE second step: parse errors and display
1786          */
1787         for (i = 0; i < count; i++)
1788                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1789
1790         /*
1791          * Now, let's increment CE error counts
1792          */
1793 check_ce_error:
1794         if (!pvt->is_registered)
1795                 i7core_udimm_check_mc_ecc_err(mci);
1796         else
1797                 i7core_rdimm_check_mc_ecc_err(mci);
1798 }
1799
1800 /*
1801  * i7core_mce_check_error       Replicates mcelog routine to get errors
1802  *                              This routine simply queues mcelog errors, and
1803  *                              return. The error itself should be handled later
1804  *                              by i7core_check_error.
1805  * WARNING: As this routine should be called at NMI time, extra care should
1806  * be taken to avoid deadlocks, and to be as fast as possible.
1807  */
1808 static int i7core_mce_check_error(void *priv, struct mce *mce)
1809 {
1810         struct mem_ctl_info *mci = priv;
1811         struct i7core_pvt *pvt = mci->pvt_info;
1812
1813         /*
1814          * Just let mcelog handle it if the error is
1815          * outside the memory controller
1816          */
1817         if (((mce->status & 0xffff) >> 7) != 1)
1818                 return 0;
1819
1820         /* Bank 8 registers are the only ones that we know how to handle */
1821         if (mce->bank != 8)
1822                 return 0;
1823
1824 #ifdef CONFIG_SMP
1825         /* Only handle if it is the right mc controller */
1826         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1827                 return 0;
1828 #endif
1829
1830         smp_rmb();
1831         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1832                 smp_wmb();
1833                 pvt->mce_overrun++;
1834                 return 0;
1835         }
1836
1837         /* Copy memory error at the ringbuffer */
1838         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1839         smp_wmb();
1840         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1841
1842         /* Handle fatal errors immediately */
1843         if (mce->mcgstatus & 1)
1844                 i7core_check_error(mci);
1845
1846         /* Advice mcelog that the error were handled */
1847         return 1;
1848 }
1849
1850 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1851                                int num_channels, int num_csrows)
1852 {
1853         struct mem_ctl_info *mci;
1854         struct i7core_pvt *pvt;
1855         int csrow = 0;
1856         int rc;
1857
1858         /* allocate a new MC control structure */
1859         mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1860                             i7core_dev->socket);
1861         if (unlikely(!mci))
1862                 return -ENOMEM;
1863
1864         debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1865
1866         /* record ptr to the generic device */
1867         mci->dev = &i7core_dev->pdev[0]->dev;
1868
1869         pvt = mci->pvt_info;
1870         memset(pvt, 0, sizeof(*pvt));
1871
1872         /*
1873          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1874          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1875          * memory channels
1876          */
1877         mci->mtype_cap = MEM_FLAG_DDR3;
1878         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1879         mci->edac_cap = EDAC_FLAG_NONE;
1880         mci->mod_name = "i7core_edac.c";
1881         mci->mod_ver = I7CORE_REVISION;
1882         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1883                                   i7core_dev->socket);
1884         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1885         mci->ctl_page_to_phys = NULL;
1886         mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1887         /* Set the function pointer to an actual operation function */
1888         mci->edac_check = i7core_check_error;
1889
1890         /* Store pci devices at mci for faster access */
1891         rc = mci_bind_devs(mci, i7core_dev);
1892         if (unlikely(rc < 0))
1893                 goto fail;
1894
1895         /* Get dimm basic config */
1896         get_dimm_config(mci, &csrow);
1897
1898         /* add this new MC control structure to EDAC's list of MCs */
1899         if (unlikely(edac_mc_add_mc(mci))) {
1900                 debugf0("MC: " __FILE__
1901                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1902                 /* FIXME: perhaps some code should go here that disables error
1903                  * reporting if we just enabled it
1904                  */
1905
1906                 rc = -EINVAL;
1907                 goto fail;
1908         }
1909
1910         /* allocating generic PCI control info */
1911         pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1912                                                  EDAC_MOD_STR);
1913         if (unlikely(!pvt->i7core_pci)) {
1914                 printk(KERN_WARNING
1915                         "%s(): Unable to create PCI control\n",
1916                         __func__);
1917                 printk(KERN_WARNING
1918                         "%s(): PCI error report via EDAC not setup\n",
1919                         __func__);
1920         }
1921
1922         /* Default error mask is any memory */
1923         pvt->inject.channel = 0;
1924         pvt->inject.dimm = -1;
1925         pvt->inject.rank = -1;
1926         pvt->inject.bank = -1;
1927         pvt->inject.page = -1;
1928         pvt->inject.col = -1;
1929
1930         /* Registers on edac_mce in order to receive memory errors */
1931         pvt->edac_mce.priv = mci;
1932         pvt->edac_mce.check_error = i7core_mce_check_error;
1933
1934         rc = edac_mce_register(&pvt->edac_mce);
1935         if (unlikely(rc < 0)) {
1936                 debugf0("MC: " __FILE__
1937                         ": %s(): failed edac_mce_register()\n", __func__);
1938         }
1939
1940 fail:
1941         if (rc < 0)
1942                 edac_mc_free(mci);
1943         return rc;
1944 }
1945
1946 /*
1947  *      i7core_probe    Probe for ONE instance of device to see if it is
1948  *                      present.
1949  *      return:
1950  *              0 for FOUND a device
1951  *              < 0 for error code
1952  */
1953
1954 static int __devinit i7core_probe(struct pci_dev *pdev,
1955                                   const struct pci_device_id *id)
1956 {
1957         int rc;
1958         struct i7core_dev *i7core_dev;
1959
1960         /* get the pci devices we want to reserve for our use */
1961         mutex_lock(&i7core_edac_lock);
1962
1963         /*
1964          * All memory controllers are allocated at the first pass.
1965          */
1966         if (unlikely(probed >= 1)) {
1967                 mutex_unlock(&i7core_edac_lock);
1968                 return -EINVAL;
1969         }
1970         probed++;
1971
1972         rc = i7core_get_devices(pci_dev_table);
1973         if (unlikely(rc < 0))
1974                 goto fail0;
1975
1976         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1977                 int channels;
1978                 int csrows;
1979
1980                 /* Check the number of active and not disabled channels */
1981                 rc = i7core_get_active_channels(i7core_dev->socket,
1982                                                 &channels, &csrows);
1983                 if (unlikely(rc < 0))
1984                         goto fail1;
1985
1986                 rc = i7core_register_mci(i7core_dev, channels, csrows);
1987                 if (unlikely(rc < 0))
1988                         goto fail1;
1989         }
1990
1991         i7core_printk(KERN_INFO, "Driver loaded.\n");
1992
1993         mutex_unlock(&i7core_edac_lock);
1994         return 0;
1995
1996 fail1:
1997         i7core_put_all_devices();
1998 fail0:
1999         mutex_unlock(&i7core_edac_lock);
2000         return rc;
2001 }
2002
2003 /*
2004  *      i7core_remove   destructor for one instance of device
2005  *
2006  */
2007 static void __devexit i7core_remove(struct pci_dev *pdev)
2008 {
2009         struct mem_ctl_info *mci;
2010         struct i7core_dev *i7core_dev, *tmp;
2011         struct i7core_pvt *pvt;
2012
2013         debugf0(__FILE__ ": %s()\n", __func__);
2014
2015         /*
2016          * we have a trouble here: pdev value for removal will be wrong, since
2017          * it will point to the X58 register used to detect that the machine
2018          * is a Nehalem or upper design. However, due to the way several PCI
2019          * devices are grouped together to provide MC functionality, we need
2020          * to use a different method for releasing the devices
2021          */
2022
2023         mutex_lock(&i7core_edac_lock);
2024         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2025                 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2026                 if (unlikely(!mci || !mci->pvt_info)) {
2027                         i7core_printk(KERN_ERR,
2028                                       "Couldn't find mci hanler\n");
2029                 } else {
2030                         pvt = mci->pvt_info;
2031                         i7core_dev = pvt->i7core_dev;
2032
2033                         if (likely(pvt->i7core_pci))
2034                                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2035                         else
2036                                 i7core_printk(KERN_ERR,
2037                                               "Couldn't find mem_ctl_info for socket %d\n",
2038                                               i7core_dev->socket);
2039                         pvt->i7core_pci = NULL;
2040
2041                         edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2042
2043                         edac_mce_unregister(&pvt->edac_mce);
2044                         kfree(mci->ctl_name);
2045                         edac_mc_free(mci);
2046                         i7core_put_devices(i7core_dev);
2047                 }
2048         }
2049         probed--;
2050
2051         mutex_unlock(&i7core_edac_lock);
2052 }
2053
2054 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2055
2056 /*
2057  *      i7core_driver   pci_driver structure for this module
2058  *
2059  */
2060 static struct pci_driver i7core_driver = {
2061         .name     = "i7core_edac",
2062         .probe    = i7core_probe,
2063         .remove   = __devexit_p(i7core_remove),
2064         .id_table = i7core_pci_tbl,
2065 };
2066
2067 /*
2068  *      i7core_init             Module entry function
2069  *                      Try to initialize this module for its devices
2070  */
2071 static int __init i7core_init(void)
2072 {
2073         int pci_rc;
2074
2075         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2076
2077         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2078         opstate_init();
2079
2080         i7core_xeon_pci_fixup(pci_dev_table);
2081
2082         pci_rc = pci_register_driver(&i7core_driver);
2083
2084         if (pci_rc >= 0)
2085                 return 0;
2086
2087         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2088                       pci_rc);
2089
2090         return pci_rc;
2091 }
2092
2093 /*
2094  *      i7core_exit()   Module exit function
2095  *                      Unregister the driver
2096  */
2097 static void __exit i7core_exit(void)
2098 {
2099         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2100         pci_unregister_driver(&i7core_driver);
2101 }
2102
2103 module_init(i7core_init);
2104 module_exit(i7core_exit);
2105
2106 MODULE_LICENSE("GPL");
2107 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2108 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2109 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2110                    I7CORE_REVISION);
2111
2112 module_param(edac_op_state, int, 0444);
2113 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");