i7core_edac: Introduce alloc_i7core_dev
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81
82         /* OFFSETS for Device 3 Function 0 */
83
84 #define MC_CONTROL      0x48
85 #define MC_STATUS       0x4c
86 #define MC_MAX_DOD      0x64
87
88 /*
89  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91  */
92
93 #define MC_TEST_ERR_RCV1        0x60
94   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
95
96 #define MC_TEST_ERR_RCV0        0x64
97   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
98   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
99
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0        0x80
102 #define MC_COR_ECC_CNT_1        0x84
103 #define MC_COR_ECC_CNT_2        0x88
104 #define MC_COR_ECC_CNT_3        0x8c
105 #define MC_COR_ECC_CNT_4        0x90
106 #define MC_COR_ECC_CNT_5        0x94
107
108 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
110
111
112         /* OFFSETS for Devices 4,5 and 6 Function 0 */
113
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115   #define THREE_DIMMS_PRESENT           (1 << 24)
116   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
117   #define QUAD_RANK_PRESENT             (1 << 22)
118   #define REGISTERED_DIMM               (1 << 15)
119
120 #define MC_CHANNEL_MAPPER       0x60
121   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
123
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125   #define RANK_PRESENT_MASK             0xffff
126
127 #define MC_CHANNEL_ADDR_MATCH   0xf0
128 #define MC_CHANNEL_ERROR_MASK   0xf8
129 #define MC_CHANNEL_ERROR_INJECT 0xfc
130   #define INJECT_ADDR_PARITY    0x10
131   #define INJECT_ECC            0x08
132   #define MASK_CACHELINE        0x06
133   #define MASK_FULL_CACHELINE   0x06
134   #define MASK_MSB32_CACHELINE  0x04
135   #define MASK_LSB32_CACHELINE  0x02
136   #define NO_MASK_CACHELINE     0x00
137   #define REPEAT_EN             0x01
138
139         /* OFFSETS for Devices 4,5 and 6 Function 1 */
140
141 #define MC_DOD_CH_DIMM0         0x48
142 #define MC_DOD_CH_DIMM1         0x4c
143 #define MC_DOD_CH_DIMM2         0x50
144   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
145   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
146   #define DIMM_PRESENT_MASK     (1 << 9)
147   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
148   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
149   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
151   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
153   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
154   #define MC_DOD_NUMCOL_MASK            3
155   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
156
157 #define MC_RANK_PRESENT         0x7c
158
159 #define MC_SAG_CH_0     0x80
160 #define MC_SAG_CH_1     0x84
161 #define MC_SAG_CH_2     0x88
162 #define MC_SAG_CH_3     0x8c
163 #define MC_SAG_CH_4     0x90
164 #define MC_SAG_CH_5     0x94
165 #define MC_SAG_CH_6     0x98
166 #define MC_SAG_CH_7     0x9c
167
168 #define MC_RIR_LIMIT_CH_0       0x40
169 #define MC_RIR_LIMIT_CH_1       0x44
170 #define MC_RIR_LIMIT_CH_2       0x48
171 #define MC_RIR_LIMIT_CH_3       0x4C
172 #define MC_RIR_LIMIT_CH_4       0x50
173 #define MC_RIR_LIMIT_CH_5       0x54
174 #define MC_RIR_LIMIT_CH_6       0x58
175 #define MC_RIR_LIMIT_CH_7       0x5C
176 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
177
178 #define MC_RIR_WAY_CH           0x80
179   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
180   #define MC_RIR_WAY_RANK_MASK          0x7
181
182 /*
183  * i7core structs
184  */
185
186 #define NUM_CHANS 3
187 #define MAX_DIMMS 3             /* Max DIMMS per channel */
188 #define MAX_MCR_FUNC  4
189 #define MAX_CHAN_FUNC 3
190
191 struct i7core_info {
192         u32     mc_control;
193         u32     mc_status;
194         u32     max_dod;
195         u32     ch_map;
196 };
197
198
199 struct i7core_inject {
200         int     enable;
201
202         u32     section;
203         u32     type;
204         u32     eccmask;
205
206         /* Error address mask */
207         int channel, dimm, rank, bank, page, col;
208 };
209
210 struct i7core_channel {
211         u32             ranks;
212         u32             dimms;
213 };
214
215 struct pci_id_descr {
216         int                     dev;
217         int                     func;
218         int                     dev_id;
219         int                     optional;
220 };
221
222 struct pci_id_table {
223         const struct pci_id_descr       *descr;
224         int                             n_devs;
225 };
226
227 struct i7core_dev {
228         struct list_head        list;
229         u8                      socket;
230         struct pci_dev          **pdev;
231         int                     n_devs;
232         struct mem_ctl_info     *mci;
233 };
234
235 struct i7core_pvt {
236         struct pci_dev  *pci_noncore;
237         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
238         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240         struct i7core_dev *i7core_dev;
241
242         struct i7core_info      info;
243         struct i7core_inject    inject;
244         struct i7core_channel   channel[NUM_CHANS];
245
246         int             channels; /* Number of active channels */
247
248         int             ce_count_available;
249         int             csrow_map[NUM_CHANS][MAX_DIMMS];
250
251                         /* ECC corrected errors counts per udimm */
252         unsigned long   udimm_ce_count[MAX_DIMMS];
253         int             udimm_last_ce_count[MAX_DIMMS];
254                         /* ECC corrected errors counts per rdimm */
255         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
256         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
257
258         unsigned int    is_registered;
259
260         /* mcelog glue */
261         struct edac_mce         edac_mce;
262
263         /* Fifo double buffers */
264         struct mce              mce_entry[MCE_LOG_LEN];
265         struct mce              mce_outentry[MCE_LOG_LEN];
266
267         /* Fifo in/out counters */
268         unsigned                mce_in, mce_out;
269
270         /* Count indicator to show errors not got */
271         unsigned                mce_overrun;
272
273         /* Struct to control EDAC polling */
274         struct edac_pci_ctl_info *i7core_pci;
275 };
276
277 #define PCI_DESCR(device, function, device_id)  \
278         .dev = (device),                        \
279         .func = (function),                     \
280         .dev_id = (device_id)
281
282 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
283                 /* Memory controller */
284         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
285         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
286                         /* Exists only for RDIMM */
287         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
288         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
289
290                 /* Channel 0 */
291         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
292         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
293         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
294         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
295
296                 /* Channel 1 */
297         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
298         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
299         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
300         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
301
302                 /* Channel 2 */
303         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
304         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
305         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
306         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
307
308                 /* Generic Non-core registers */
309         /*
310          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
311          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
312          * the probing code needs to test for the other address in case of
313          * failure of this one
314          */
315         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
316
317 };
318
319 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
320         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
321         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
322         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
323
324         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
325         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
326         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
327         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
328
329         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
330         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
331         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
332         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
333
334         /*
335          * This is the PCI device has an alternate address on some
336          * processors like Core i7 860
337          */
338         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
339 };
340
341 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
342                 /* Memory controller */
343         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
344         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
345                         /* Exists only for RDIMM */
346         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
347         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
348
349                 /* Channel 0 */
350         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
351         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
352         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
353         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
354
355                 /* Channel 1 */
356         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
357         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
358         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
359         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
360
361                 /* Channel 2 */
362         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
363         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
364         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
365         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
366
367                 /* Generic Non-core registers */
368         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
369
370 };
371
372 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
373 static const struct pci_id_table pci_dev_table[] = {
374         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
375         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
376         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
377 };
378
379 /*
380  *      pci_device_id   table for which devices we are looking for
381  */
382 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
383         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
384         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
385         {0,}                    /* 0 terminated list. */
386 };
387
388 /****************************************************************************
389                         Anciliary status routines
390  ****************************************************************************/
391
392         /* MC_CONTROL bits */
393 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
394 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
395
396         /* MC_STATUS bits */
397 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
398 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
399
400         /* MC_MAX_DOD read functions */
401 static inline int numdimms(u32 dimms)
402 {
403         return (dimms & 0x3) + 1;
404 }
405
406 static inline int numrank(u32 rank)
407 {
408         static int ranks[4] = { 1, 2, 4, -EINVAL };
409
410         return ranks[rank & 0x3];
411 }
412
413 static inline int numbank(u32 bank)
414 {
415         static int banks[4] = { 4, 8, 16, -EINVAL };
416
417         return banks[bank & 0x3];
418 }
419
420 static inline int numrow(u32 row)
421 {
422         static int rows[8] = {
423                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
424                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
425         };
426
427         return rows[row & 0x7];
428 }
429
430 static inline int numcol(u32 col)
431 {
432         static int cols[8] = {
433                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
434         };
435         return cols[col & 0x3];
436 }
437
438 static struct i7core_dev *get_i7core_dev(u8 socket)
439 {
440         struct i7core_dev *i7core_dev;
441
442         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
443                 if (i7core_dev->socket == socket)
444                         return i7core_dev;
445         }
446
447         return NULL;
448 }
449
450 static struct i7core_dev *alloc_i7core_dev(u8 socket,
451                                            const struct pci_id_table *table)
452 {
453         struct i7core_dev *i7core_dev;
454
455         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
456         if (!i7core_dev)
457                 return NULL;
458
459         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
460                                    GFP_KERNEL);
461         if (!i7core_dev->pdev) {
462                 kfree(i7core_dev);
463                 return NULL;
464         }
465
466         i7core_dev->socket = socket;
467         i7core_dev->n_devs = table->n_devs;
468         list_add_tail(&i7core_dev->list, &i7core_edac_list);
469
470         return i7core_dev;
471 }
472
473 /****************************************************************************
474                         Memory check routines
475  ****************************************************************************/
476 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
477                                           unsigned func)
478 {
479         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
480         int i;
481
482         if (!i7core_dev)
483                 return NULL;
484
485         for (i = 0; i < i7core_dev->n_devs; i++) {
486                 if (!i7core_dev->pdev[i])
487                         continue;
488
489                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
490                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
491                         return i7core_dev->pdev[i];
492                 }
493         }
494
495         return NULL;
496 }
497
498 /**
499  * i7core_get_active_channels() - gets the number of channels and csrows
500  * @socket:     Quick Path Interconnect socket
501  * @channels:   Number of channels that will be returned
502  * @csrows:     Number of csrows found
503  *
504  * Since EDAC core needs to know in advance the number of available channels
505  * and csrows, in order to allocate memory for csrows/channels, it is needed
506  * to run two similar steps. At the first step, implemented on this function,
507  * it checks the number of csrows/channels present at one socket.
508  * this is used in order to properly allocate the size of mci components.
509  *
510  * It should be noticed that none of the current available datasheets explain
511  * or even mention how csrows are seen by the memory controller. So, we need
512  * to add a fake description for csrows.
513  * So, this driver is attributing one DIMM memory for one csrow.
514  */
515 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
516                                       unsigned *csrows)
517 {
518         struct pci_dev *pdev = NULL;
519         int i, j;
520         u32 status, control;
521
522         *channels = 0;
523         *csrows = 0;
524
525         pdev = get_pdev_slot_func(socket, 3, 0);
526         if (!pdev) {
527                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
528                               socket);
529                 return -ENODEV;
530         }
531
532         /* Device 3 function 0 reads */
533         pci_read_config_dword(pdev, MC_STATUS, &status);
534         pci_read_config_dword(pdev, MC_CONTROL, &control);
535
536         for (i = 0; i < NUM_CHANS; i++) {
537                 u32 dimm_dod[3];
538                 /* Check if the channel is active */
539                 if (!(control & (1 << (8 + i))))
540                         continue;
541
542                 /* Check if the channel is disabled */
543                 if (status & (1 << i))
544                         continue;
545
546                 pdev = get_pdev_slot_func(socket, i + 4, 1);
547                 if (!pdev) {
548                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
549                                                 "fn %d.%d!!!\n",
550                                                 socket, i + 4, 1);
551                         return -ENODEV;
552                 }
553                 /* Devices 4-6 function 1 */
554                 pci_read_config_dword(pdev,
555                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
556                 pci_read_config_dword(pdev,
557                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
558                 pci_read_config_dword(pdev,
559                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
560
561                 (*channels)++;
562
563                 for (j = 0; j < 3; j++) {
564                         if (!DIMM_PRESENT(dimm_dod[j]))
565                                 continue;
566                         (*csrows)++;
567                 }
568         }
569
570         debugf0("Number of active channels on socket %d: %d\n",
571                 socket, *channels);
572
573         return 0;
574 }
575
576 static int get_dimm_config(const struct mem_ctl_info *mci, int *csrow)
577 {
578         struct i7core_pvt *pvt = mci->pvt_info;
579         struct csrow_info *csr;
580         struct pci_dev *pdev;
581         int i, j;
582         unsigned long last_page = 0;
583         enum edac_type mode;
584         enum mem_type mtype;
585
586         /* Get data from the MC register, function 0 */
587         pdev = pvt->pci_mcr[0];
588         if (!pdev)
589                 return -ENODEV;
590
591         /* Device 3 function 0 reads */
592         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
593         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
594         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
595         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
596
597         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
598                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
599                 pvt->info.max_dod, pvt->info.ch_map);
600
601         if (ECC_ENABLED(pvt)) {
602                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
603                 if (ECCx8(pvt))
604                         mode = EDAC_S8ECD8ED;
605                 else
606                         mode = EDAC_S4ECD4ED;
607         } else {
608                 debugf0("ECC disabled\n");
609                 mode = EDAC_NONE;
610         }
611
612         /* FIXME: need to handle the error codes */
613         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
614                 "x%x x 0x%x\n",
615                 numdimms(pvt->info.max_dod),
616                 numrank(pvt->info.max_dod >> 2),
617                 numbank(pvt->info.max_dod >> 4),
618                 numrow(pvt->info.max_dod >> 6),
619                 numcol(pvt->info.max_dod >> 9));
620
621         for (i = 0; i < NUM_CHANS; i++) {
622                 u32 data, dimm_dod[3], value[8];
623
624                 if (!pvt->pci_ch[i][0])
625                         continue;
626
627                 if (!CH_ACTIVE(pvt, i)) {
628                         debugf0("Channel %i is not active\n", i);
629                         continue;
630                 }
631                 if (CH_DISABLED(pvt, i)) {
632                         debugf0("Channel %i is disabled\n", i);
633                         continue;
634                 }
635
636                 /* Devices 4-6 function 0 */
637                 pci_read_config_dword(pvt->pci_ch[i][0],
638                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
639
640                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
641                                                 4 : 2;
642
643                 if (data & REGISTERED_DIMM)
644                         mtype = MEM_RDDR3;
645                 else
646                         mtype = MEM_DDR3;
647 #if 0
648                 if (data & THREE_DIMMS_PRESENT)
649                         pvt->channel[i].dimms = 3;
650                 else if (data & SINGLE_QUAD_RANK_PRESENT)
651                         pvt->channel[i].dimms = 1;
652                 else
653                         pvt->channel[i].dimms = 2;
654 #endif
655
656                 /* Devices 4-6 function 1 */
657                 pci_read_config_dword(pvt->pci_ch[i][1],
658                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
659                 pci_read_config_dword(pvt->pci_ch[i][1],
660                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
661                 pci_read_config_dword(pvt->pci_ch[i][1],
662                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
663
664                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
665                         "%d ranks, %cDIMMs\n",
666                         i,
667                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
668                         data,
669                         pvt->channel[i].ranks,
670                         (data & REGISTERED_DIMM) ? 'R' : 'U');
671
672                 for (j = 0; j < 3; j++) {
673                         u32 banks, ranks, rows, cols;
674                         u32 size, npages;
675
676                         if (!DIMM_PRESENT(dimm_dod[j]))
677                                 continue;
678
679                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
680                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
681                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
682                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
683
684                         /* DDR3 has 8 I/O banks */
685                         size = (rows * cols * banks * ranks) >> (20 - 3);
686
687                         pvt->channel[i].dimms++;
688
689                         debugf0("\tdimm %d %d Mb offset: %x, "
690                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
691                                 j, size,
692                                 RANKOFFSET(dimm_dod[j]),
693                                 banks, ranks, rows, cols);
694
695                         npages = MiB_TO_PAGES(size);
696
697                         csr = &mci->csrows[*csrow];
698                         csr->first_page = last_page + 1;
699                         last_page += npages;
700                         csr->last_page = last_page;
701                         csr->nr_pages = npages;
702
703                         csr->page_mask = 0;
704                         csr->grain = 8;
705                         csr->csrow_idx = *csrow;
706                         csr->nr_channels = 1;
707
708                         csr->channels[0].chan_idx = i;
709                         csr->channels[0].ce_count = 0;
710
711                         pvt->csrow_map[i][j] = *csrow;
712
713                         switch (banks) {
714                         case 4:
715                                 csr->dtype = DEV_X4;
716                                 break;
717                         case 8:
718                                 csr->dtype = DEV_X8;
719                                 break;
720                         case 16:
721                                 csr->dtype = DEV_X16;
722                                 break;
723                         default:
724                                 csr->dtype = DEV_UNKNOWN;
725                         }
726
727                         csr->edac_mode = mode;
728                         csr->mtype = mtype;
729
730                         (*csrow)++;
731                 }
732
733                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
734                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
735                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
736                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
737                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
738                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
739                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
740                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
741                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
742                 for (j = 0; j < 8; j++)
743                         debugf1("\t\t%#x\t%#x\t%#x\n",
744                                 (value[j] >> 27) & 0x1,
745                                 (value[j] >> 24) & 0x7,
746                                 (value[j] && ((1 << 24) - 1)));
747         }
748
749         return 0;
750 }
751
752 /****************************************************************************
753                         Error insertion routines
754  ****************************************************************************/
755
756 /* The i7core has independent error injection features per channel.
757    However, to have a simpler code, we don't allow enabling error injection
758    on more than one channel.
759    Also, since a change at an inject parameter will be applied only at enable,
760    we're disabling error injection on all write calls to the sysfs nodes that
761    controls the error code injection.
762  */
763 static int disable_inject(const struct mem_ctl_info *mci)
764 {
765         struct i7core_pvt *pvt = mci->pvt_info;
766
767         pvt->inject.enable = 0;
768
769         if (!pvt->pci_ch[pvt->inject.channel][0])
770                 return -ENODEV;
771
772         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
773                                 MC_CHANNEL_ERROR_INJECT, 0);
774
775         return 0;
776 }
777
778 /*
779  * i7core inject inject.section
780  *
781  *      accept and store error injection inject.section value
782  *      bit 0 - refers to the lower 32-byte half cacheline
783  *      bit 1 - refers to the upper 32-byte half cacheline
784  */
785 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
786                                            const char *data, size_t count)
787 {
788         struct i7core_pvt *pvt = mci->pvt_info;
789         unsigned long value;
790         int rc;
791
792         if (pvt->inject.enable)
793                 disable_inject(mci);
794
795         rc = strict_strtoul(data, 10, &value);
796         if ((rc < 0) || (value > 3))
797                 return -EIO;
798
799         pvt->inject.section = (u32) value;
800         return count;
801 }
802
803 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
804                                               char *data)
805 {
806         struct i7core_pvt *pvt = mci->pvt_info;
807         return sprintf(data, "0x%08x\n", pvt->inject.section);
808 }
809
810 /*
811  * i7core inject.type
812  *
813  *      accept and store error injection inject.section value
814  *      bit 0 - repeat enable - Enable error repetition
815  *      bit 1 - inject ECC error
816  *      bit 2 - inject parity error
817  */
818 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
819                                         const char *data, size_t count)
820 {
821         struct i7core_pvt *pvt = mci->pvt_info;
822         unsigned long value;
823         int rc;
824
825         if (pvt->inject.enable)
826                 disable_inject(mci);
827
828         rc = strict_strtoul(data, 10, &value);
829         if ((rc < 0) || (value > 7))
830                 return -EIO;
831
832         pvt->inject.type = (u32) value;
833         return count;
834 }
835
836 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
837                                               char *data)
838 {
839         struct i7core_pvt *pvt = mci->pvt_info;
840         return sprintf(data, "0x%08x\n", pvt->inject.type);
841 }
842
843 /*
844  * i7core_inject_inject.eccmask_store
845  *
846  * The type of error (UE/CE) will depend on the inject.eccmask value:
847  *   Any bits set to a 1 will flip the corresponding ECC bit
848  *   Correctable errors can be injected by flipping 1 bit or the bits within
849  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
850  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
851  *   uncorrectable error to be injected.
852  */
853 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
854                                         const char *data, size_t count)
855 {
856         struct i7core_pvt *pvt = mci->pvt_info;
857         unsigned long value;
858         int rc;
859
860         if (pvt->inject.enable)
861                 disable_inject(mci);
862
863         rc = strict_strtoul(data, 10, &value);
864         if (rc < 0)
865                 return -EIO;
866
867         pvt->inject.eccmask = (u32) value;
868         return count;
869 }
870
871 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
872                                               char *data)
873 {
874         struct i7core_pvt *pvt = mci->pvt_info;
875         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
876 }
877
878 /*
879  * i7core_addrmatch
880  *
881  * The type of error (UE/CE) will depend on the inject.eccmask value:
882  *   Any bits set to a 1 will flip the corresponding ECC bit
883  *   Correctable errors can be injected by flipping 1 bit or the bits within
884  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
885  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
886  *   uncorrectable error to be injected.
887  */
888
889 #define DECLARE_ADDR_MATCH(param, limit)                        \
890 static ssize_t i7core_inject_store_##param(                     \
891                 struct mem_ctl_info *mci,                       \
892                 const char *data, size_t count)                 \
893 {                                                               \
894         struct i7core_pvt *pvt;                                 \
895         long value;                                             \
896         int rc;                                                 \
897                                                                 \
898         debugf1("%s()\n", __func__);                            \
899         pvt = mci->pvt_info;                                    \
900                                                                 \
901         if (pvt->inject.enable)                                 \
902                 disable_inject(mci);                            \
903                                                                 \
904         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
905                 value = -1;                                     \
906         else {                                                  \
907                 rc = strict_strtoul(data, 10, &value);          \
908                 if ((rc < 0) || (value >= limit))               \
909                         return -EIO;                            \
910         }                                                       \
911                                                                 \
912         pvt->inject.param = value;                              \
913                                                                 \
914         return count;                                           \
915 }                                                               \
916                                                                 \
917 static ssize_t i7core_inject_show_##param(                      \
918                 struct mem_ctl_info *mci,                       \
919                 char *data)                                     \
920 {                                                               \
921         struct i7core_pvt *pvt;                                 \
922                                                                 \
923         pvt = mci->pvt_info;                                    \
924         debugf1("%s() pvt=%p\n", __func__, pvt);                \
925         if (pvt->inject.param < 0)                              \
926                 return sprintf(data, "any\n");                  \
927         else                                                    \
928                 return sprintf(data, "%d\n", pvt->inject.param);\
929 }
930
931 #define ATTR_ADDR_MATCH(param)                                  \
932         {                                                       \
933                 .attr = {                                       \
934                         .name = #param,                         \
935                         .mode = (S_IRUGO | S_IWUSR)             \
936                 },                                              \
937                 .show  = i7core_inject_show_##param,            \
938                 .store = i7core_inject_store_##param,           \
939         }
940
941 DECLARE_ADDR_MATCH(channel, 3);
942 DECLARE_ADDR_MATCH(dimm, 3);
943 DECLARE_ADDR_MATCH(rank, 4);
944 DECLARE_ADDR_MATCH(bank, 32);
945 DECLARE_ADDR_MATCH(page, 0x10000);
946 DECLARE_ADDR_MATCH(col, 0x4000);
947
948 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
949 {
950         u32 read;
951         int count;
952
953         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
954                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
955                 where, val);
956
957         for (count = 0; count < 10; count++) {
958                 if (count)
959                         msleep(100);
960                 pci_write_config_dword(dev, where, val);
961                 pci_read_config_dword(dev, where, &read);
962
963                 if (read == val)
964                         return 0;
965         }
966
967         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
968                 "write=%08x. Read=%08x\n",
969                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
970                 where, val, read);
971
972         return -EINVAL;
973 }
974
975 /*
976  * This routine prepares the Memory Controller for error injection.
977  * The error will be injected when some process tries to write to the
978  * memory that matches the given criteria.
979  * The criteria can be set in terms of a mask where dimm, rank, bank, page
980  * and col can be specified.
981  * A -1 value for any of the mask items will make the MCU to ignore
982  * that matching criteria for error injection.
983  *
984  * It should be noticed that the error will only happen after a write operation
985  * on a memory that matches the condition. if REPEAT_EN is not enabled at
986  * inject mask, then it will produce just one error. Otherwise, it will repeat
987  * until the injectmask would be cleaned.
988  *
989  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
990  *    is reliable enough to check if the MC is using the
991  *    three channels. However, this is not clear at the datasheet.
992  */
993 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
994                                        const char *data, size_t count)
995 {
996         struct i7core_pvt *pvt = mci->pvt_info;
997         u32 injectmask;
998         u64 mask = 0;
999         int  rc;
1000         long enable;
1001
1002         if (!pvt->pci_ch[pvt->inject.channel][0])
1003                 return 0;
1004
1005         rc = strict_strtoul(data, 10, &enable);
1006         if ((rc < 0))
1007                 return 0;
1008
1009         if (enable) {
1010                 pvt->inject.enable = 1;
1011         } else {
1012                 disable_inject(mci);
1013                 return count;
1014         }
1015
1016         /* Sets pvt->inject.dimm mask */
1017         if (pvt->inject.dimm < 0)
1018                 mask |= 1LL << 41;
1019         else {
1020                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1021                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1022                 else
1023                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1024         }
1025
1026         /* Sets pvt->inject.rank mask */
1027         if (pvt->inject.rank < 0)
1028                 mask |= 1LL << 40;
1029         else {
1030                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1031                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1032                 else
1033                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1034         }
1035
1036         /* Sets pvt->inject.bank mask */
1037         if (pvt->inject.bank < 0)
1038                 mask |= 1LL << 39;
1039         else
1040                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1041
1042         /* Sets pvt->inject.page mask */
1043         if (pvt->inject.page < 0)
1044                 mask |= 1LL << 38;
1045         else
1046                 mask |= (pvt->inject.page & 0xffff) << 14;
1047
1048         /* Sets pvt->inject.column mask */
1049         if (pvt->inject.col < 0)
1050                 mask |= 1LL << 37;
1051         else
1052                 mask |= (pvt->inject.col & 0x3fff);
1053
1054         /*
1055          * bit    0: REPEAT_EN
1056          * bits 1-2: MASK_HALF_CACHELINE
1057          * bit    3: INJECT_ECC
1058          * bit    4: INJECT_ADDR_PARITY
1059          */
1060
1061         injectmask = (pvt->inject.type & 1) |
1062                      (pvt->inject.section & 0x3) << 1 |
1063                      (pvt->inject.type & 0x6) << (3 - 1);
1064
1065         /* Unlock writes to registers - this register is write only */
1066         pci_write_config_dword(pvt->pci_noncore,
1067                                MC_CFG_CONTROL, 0x2);
1068
1069         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1070                                MC_CHANNEL_ADDR_MATCH, mask);
1071         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1072                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1073
1074         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1075                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1076
1077         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1078                                MC_CHANNEL_ERROR_INJECT, injectmask);
1079
1080         /*
1081          * This is something undocumented, based on my tests
1082          * Without writing 8 to this register, errors aren't injected. Not sure
1083          * why.
1084          */
1085         pci_write_config_dword(pvt->pci_noncore,
1086                                MC_CFG_CONTROL, 8);
1087
1088         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1089                 " inject 0x%08x\n",
1090                 mask, pvt->inject.eccmask, injectmask);
1091
1092
1093         return count;
1094 }
1095
1096 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1097                                         char *data)
1098 {
1099         struct i7core_pvt *pvt = mci->pvt_info;
1100         u32 injectmask;
1101
1102         if (!pvt->pci_ch[pvt->inject.channel][0])
1103                 return 0;
1104
1105         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1106                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1107
1108         debugf0("Inject error read: 0x%018x\n", injectmask);
1109
1110         if (injectmask & 0x0c)
1111                 pvt->inject.enable = 1;
1112
1113         return sprintf(data, "%d\n", pvt->inject.enable);
1114 }
1115
1116 #define DECLARE_COUNTER(param)                                  \
1117 static ssize_t i7core_show_counter_##param(                     \
1118                 struct mem_ctl_info *mci,                       \
1119                 char *data)                                     \
1120 {                                                               \
1121         struct i7core_pvt *pvt = mci->pvt_info;                 \
1122                                                                 \
1123         debugf1("%s() \n", __func__);                           \
1124         if (!pvt->ce_count_available || (pvt->is_registered))   \
1125                 return sprintf(data, "data unavailable\n");     \
1126         return sprintf(data, "%lu\n",                           \
1127                         pvt->udimm_ce_count[param]);            \
1128 }
1129
1130 #define ATTR_COUNTER(param)                                     \
1131         {                                                       \
1132                 .attr = {                                       \
1133                         .name = __stringify(udimm##param),      \
1134                         .mode = (S_IRUGO | S_IWUSR)             \
1135                 },                                              \
1136                 .show  = i7core_show_counter_##param            \
1137         }
1138
1139 DECLARE_COUNTER(0);
1140 DECLARE_COUNTER(1);
1141 DECLARE_COUNTER(2);
1142
1143 /*
1144  * Sysfs struct
1145  */
1146
1147 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1148         ATTR_ADDR_MATCH(channel),
1149         ATTR_ADDR_MATCH(dimm),
1150         ATTR_ADDR_MATCH(rank),
1151         ATTR_ADDR_MATCH(bank),
1152         ATTR_ADDR_MATCH(page),
1153         ATTR_ADDR_MATCH(col),
1154         { } /* End of list */
1155 };
1156
1157 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1158         .name  = "inject_addrmatch",
1159         .mcidev_attr = i7core_addrmatch_attrs,
1160 };
1161
1162 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1163         ATTR_COUNTER(0),
1164         ATTR_COUNTER(1),
1165         ATTR_COUNTER(2),
1166         { .attr = { .name = NULL } }
1167 };
1168
1169 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1170         .name  = "all_channel_counts",
1171         .mcidev_attr = i7core_udimm_counters_attrs,
1172 };
1173
1174 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1175         {
1176                 .attr = {
1177                         .name = "inject_section",
1178                         .mode = (S_IRUGO | S_IWUSR)
1179                 },
1180                 .show  = i7core_inject_section_show,
1181                 .store = i7core_inject_section_store,
1182         }, {
1183                 .attr = {
1184                         .name = "inject_type",
1185                         .mode = (S_IRUGO | S_IWUSR)
1186                 },
1187                 .show  = i7core_inject_type_show,
1188                 .store = i7core_inject_type_store,
1189         }, {
1190                 .attr = {
1191                         .name = "inject_eccmask",
1192                         .mode = (S_IRUGO | S_IWUSR)
1193                 },
1194                 .show  = i7core_inject_eccmask_show,
1195                 .store = i7core_inject_eccmask_store,
1196         }, {
1197                 .grp = &i7core_inject_addrmatch,
1198         }, {
1199                 .attr = {
1200                         .name = "inject_enable",
1201                         .mode = (S_IRUGO | S_IWUSR)
1202                 },
1203                 .show  = i7core_inject_enable_show,
1204                 .store = i7core_inject_enable_store,
1205         },
1206         { }     /* End of list */
1207 };
1208
1209 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1210         {
1211                 .attr = {
1212                         .name = "inject_section",
1213                         .mode = (S_IRUGO | S_IWUSR)
1214                 },
1215                 .show  = i7core_inject_section_show,
1216                 .store = i7core_inject_section_store,
1217         }, {
1218                 .attr = {
1219                         .name = "inject_type",
1220                         .mode = (S_IRUGO | S_IWUSR)
1221                 },
1222                 .show  = i7core_inject_type_show,
1223                 .store = i7core_inject_type_store,
1224         }, {
1225                 .attr = {
1226                         .name = "inject_eccmask",
1227                         .mode = (S_IRUGO | S_IWUSR)
1228                 },
1229                 .show  = i7core_inject_eccmask_show,
1230                 .store = i7core_inject_eccmask_store,
1231         }, {
1232                 .grp = &i7core_inject_addrmatch,
1233         }, {
1234                 .attr = {
1235                         .name = "inject_enable",
1236                         .mode = (S_IRUGO | S_IWUSR)
1237                 },
1238                 .show  = i7core_inject_enable_show,
1239                 .store = i7core_inject_enable_store,
1240         }, {
1241                 .grp = &i7core_udimm_counters,
1242         },
1243         { }     /* End of list */
1244 };
1245
1246 /****************************************************************************
1247         Device initialization routines: put/get, init/exit
1248  ****************************************************************************/
1249
1250 /*
1251  *      i7core_put_devices      'put' all the devices that we have
1252  *                              reserved via 'get'
1253  */
1254 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1255 {
1256         int i;
1257
1258         debugf0(__FILE__ ": %s()\n", __func__);
1259         for (i = 0; i < i7core_dev->n_devs; i++) {
1260                 struct pci_dev *pdev = i7core_dev->pdev[i];
1261                 if (!pdev)
1262                         continue;
1263                 debugf0("Removing dev %02x:%02x.%d\n",
1264                         pdev->bus->number,
1265                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1266                 pci_dev_put(pdev);
1267         }
1268         kfree(i7core_dev->pdev);
1269 }
1270
1271 static void i7core_put_all_devices(void)
1272 {
1273         struct i7core_dev *i7core_dev, *tmp;
1274
1275         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1276                 i7core_put_devices(i7core_dev);
1277                 list_del(&i7core_dev->list);
1278                 kfree(i7core_dev);
1279         }
1280 }
1281
1282 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1283 {
1284         struct pci_dev *pdev = NULL;
1285         int i;
1286
1287         /*
1288          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1289          * aren't announced by acpi. So, we need to use a legacy scan probing
1290          * to detect them
1291          */
1292         while (table && table->descr) {
1293                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1294                 if (unlikely(!pdev)) {
1295                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1296                                 pcibios_scan_specific_bus(255-i);
1297                 }
1298                 pci_dev_put(pdev);
1299                 table++;
1300         }
1301 }
1302
1303 static unsigned i7core_pci_lastbus(void)
1304 {
1305         int last_bus = 0, bus;
1306         struct pci_bus *b = NULL;
1307
1308         while ((b = pci_find_next_bus(b)) != NULL) {
1309                 bus = b->number;
1310                 debugf0("Found bus %d\n", bus);
1311                 if (bus > last_bus)
1312                         last_bus = bus;
1313         }
1314
1315         debugf0("Last bus %d\n", last_bus);
1316
1317         return last_bus;
1318 }
1319
1320 /*
1321  *      i7core_get_devices      Find and perform 'get' operation on the MCH's
1322  *                      device/functions we want to reference for this driver
1323  *
1324  *                      Need to 'get' device 16 func 1 and func 2
1325  */
1326 static int i7core_get_onedevice(struct pci_dev **prev,
1327                                 const struct pci_id_table *table,
1328                                 const unsigned devno,
1329                                 const unsigned last_bus)
1330 {
1331         struct i7core_dev *i7core_dev;
1332         const struct pci_id_descr *dev_descr = &table->descr[devno];
1333
1334         struct pci_dev *pdev = NULL;
1335         u8 bus = 0;
1336         u8 socket = 0;
1337
1338         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1339                               dev_descr->dev_id, *prev);
1340
1341         /*
1342          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1343          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1344          * to probe for the alternate address in case of failure
1345          */
1346         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1347                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1348                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1349
1350         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1351                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1352                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1353                                       *prev);
1354
1355         if (!pdev) {
1356                 if (*prev) {
1357                         *prev = pdev;
1358                         return 0;
1359                 }
1360
1361                 if (dev_descr->optional)
1362                         return 0;
1363
1364                 if (devno == 0)
1365                         return -ENODEV;
1366
1367                 i7core_printk(KERN_INFO,
1368                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1369                         dev_descr->dev, dev_descr->func,
1370                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1371
1372                 /* End of list, leave */
1373                 return -ENODEV;
1374         }
1375         bus = pdev->bus->number;
1376
1377         socket = last_bus - bus;
1378
1379         i7core_dev = get_i7core_dev(socket);
1380         if (!i7core_dev) {
1381                 i7core_dev = alloc_i7core_dev(socket, table);
1382                 if (!i7core_dev)
1383                         return -ENOMEM;
1384         }
1385
1386         if (i7core_dev->pdev[devno]) {
1387                 i7core_printk(KERN_ERR,
1388                         "Duplicated device for "
1389                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1390                         bus, dev_descr->dev, dev_descr->func,
1391                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1392                 pci_dev_put(pdev);
1393                 return -ENODEV;
1394         }
1395
1396         i7core_dev->pdev[devno] = pdev;
1397
1398         /* Sanity check */
1399         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1400                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1401                 i7core_printk(KERN_ERR,
1402                         "Device PCI ID %04x:%04x "
1403                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1404                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1405                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1406                         bus, dev_descr->dev, dev_descr->func);
1407                 return -ENODEV;
1408         }
1409
1410         /* Be sure that the device is enabled */
1411         if (unlikely(pci_enable_device(pdev) < 0)) {
1412                 i7core_printk(KERN_ERR,
1413                         "Couldn't enable "
1414                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1415                         bus, dev_descr->dev, dev_descr->func,
1416                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1417                 return -ENODEV;
1418         }
1419
1420         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1421                 socket, bus, dev_descr->dev,
1422                 dev_descr->func,
1423                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1424
1425         *prev = pdev;
1426
1427         return 0;
1428 }
1429
1430 static int i7core_get_devices(const struct pci_id_table *table)
1431 {
1432         int i, rc, last_bus;
1433         struct pci_dev *pdev = NULL;
1434
1435         last_bus = i7core_pci_lastbus();
1436
1437         while (table && table->descr) {
1438                 for (i = 0; i < table->n_devs; i++) {
1439                         pdev = NULL;
1440                         do {
1441                                 rc = i7core_get_onedevice(&pdev, table, i,
1442                                                           last_bus);
1443                                 if (rc < 0) {
1444                                         if (i == 0) {
1445                                                 i = table->n_devs;
1446                                                 break;
1447                                         }
1448                                         i7core_put_all_devices();
1449                                         return -ENODEV;
1450                                 }
1451                         } while (pdev);
1452                 }
1453                 table++;
1454         }
1455
1456         return 0;
1457 }
1458
1459 static int mci_bind_devs(struct mem_ctl_info *mci,
1460                          struct i7core_dev *i7core_dev)
1461 {
1462         struct i7core_pvt *pvt = mci->pvt_info;
1463         struct pci_dev *pdev;
1464         int i, func, slot;
1465
1466         /* Associates i7core_dev and mci for future usage */
1467         pvt->i7core_dev = i7core_dev;
1468         i7core_dev->mci = mci;
1469
1470         pvt->is_registered = 0;
1471         for (i = 0; i < i7core_dev->n_devs; i++) {
1472                 pdev = i7core_dev->pdev[i];
1473                 if (!pdev)
1474                         continue;
1475
1476                 func = PCI_FUNC(pdev->devfn);
1477                 slot = PCI_SLOT(pdev->devfn);
1478                 if (slot == 3) {
1479                         if (unlikely(func > MAX_MCR_FUNC))
1480                                 goto error;
1481                         pvt->pci_mcr[func] = pdev;
1482                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1483                         if (unlikely(func > MAX_CHAN_FUNC))
1484                                 goto error;
1485                         pvt->pci_ch[slot - 4][func] = pdev;
1486                 } else if (!slot && !func)
1487                         pvt->pci_noncore = pdev;
1488                 else
1489                         goto error;
1490
1491                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1492                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1493                         pdev, i7core_dev->socket);
1494
1495                 if (PCI_SLOT(pdev->devfn) == 3 &&
1496                         PCI_FUNC(pdev->devfn) == 2)
1497                         pvt->is_registered = 1;
1498         }
1499
1500         return 0;
1501
1502 error:
1503         i7core_printk(KERN_ERR, "Device %d, function %d "
1504                       "is out of the expected range\n",
1505                       slot, func);
1506         return -EINVAL;
1507 }
1508
1509 /****************************************************************************
1510                         Error check routines
1511  ****************************************************************************/
1512 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1513                                       const int chan,
1514                                       const int dimm,
1515                                       const int add)
1516 {
1517         char *msg;
1518         struct i7core_pvt *pvt = mci->pvt_info;
1519         int row = pvt->csrow_map[chan][dimm], i;
1520
1521         for (i = 0; i < add; i++) {
1522                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1523                                 "(Socket=%d channel=%d dimm=%d)",
1524                                 pvt->i7core_dev->socket, chan, dimm);
1525
1526                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1527                 kfree (msg);
1528         }
1529 }
1530
1531 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1532                                          const int chan,
1533                                          const int new0,
1534                                          const int new1,
1535                                          const int new2)
1536 {
1537         struct i7core_pvt *pvt = mci->pvt_info;
1538         int add0 = 0, add1 = 0, add2 = 0;
1539         /* Updates CE counters if it is not the first time here */
1540         if (pvt->ce_count_available) {
1541                 /* Updates CE counters */
1542
1543                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1544                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1545                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1546
1547                 if (add2 < 0)
1548                         add2 += 0x7fff;
1549                 pvt->rdimm_ce_count[chan][2] += add2;
1550
1551                 if (add1 < 0)
1552                         add1 += 0x7fff;
1553                 pvt->rdimm_ce_count[chan][1] += add1;
1554
1555                 if (add0 < 0)
1556                         add0 += 0x7fff;
1557                 pvt->rdimm_ce_count[chan][0] += add0;
1558         } else
1559                 pvt->ce_count_available = 1;
1560
1561         /* Store the new values */
1562         pvt->rdimm_last_ce_count[chan][2] = new2;
1563         pvt->rdimm_last_ce_count[chan][1] = new1;
1564         pvt->rdimm_last_ce_count[chan][0] = new0;
1565
1566         /*updated the edac core */
1567         if (add0 != 0)
1568                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1569         if (add1 != 0)
1570                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1571         if (add2 != 0)
1572                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1573
1574 }
1575
1576 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1577 {
1578         struct i7core_pvt *pvt = mci->pvt_info;
1579         u32 rcv[3][2];
1580         int i, new0, new1, new2;
1581
1582         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1583         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1584                                                                 &rcv[0][0]);
1585         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1586                                                                 &rcv[0][1]);
1587         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1588                                                                 &rcv[1][0]);
1589         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1590                                                                 &rcv[1][1]);
1591         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1592                                                                 &rcv[2][0]);
1593         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1594                                                                 &rcv[2][1]);
1595         for (i = 0 ; i < 3; i++) {
1596                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1597                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1598                 /*if the channel has 3 dimms*/
1599                 if (pvt->channel[i].dimms > 2) {
1600                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1601                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1602                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1603                 } else {
1604                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1605                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1606                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1607                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1608                         new2 = 0;
1609                 }
1610
1611                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1612         }
1613 }
1614
1615 /* This function is based on the device 3 function 4 registers as described on:
1616  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1617  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1618  * also available at:
1619  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1620  */
1621 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1622 {
1623         struct i7core_pvt *pvt = mci->pvt_info;
1624         u32 rcv1, rcv0;
1625         int new0, new1, new2;
1626
1627         if (!pvt->pci_mcr[4]) {
1628                 debugf0("%s MCR registers not found\n", __func__);
1629                 return;
1630         }
1631
1632         /* Corrected test errors */
1633         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1634         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1635
1636         /* Store the new values */
1637         new2 = DIMM2_COR_ERR(rcv1);
1638         new1 = DIMM1_COR_ERR(rcv0);
1639         new0 = DIMM0_COR_ERR(rcv0);
1640
1641         /* Updates CE counters if it is not the first time here */
1642         if (pvt->ce_count_available) {
1643                 /* Updates CE counters */
1644                 int add0, add1, add2;
1645
1646                 add2 = new2 - pvt->udimm_last_ce_count[2];
1647                 add1 = new1 - pvt->udimm_last_ce_count[1];
1648                 add0 = new0 - pvt->udimm_last_ce_count[0];
1649
1650                 if (add2 < 0)
1651                         add2 += 0x7fff;
1652                 pvt->udimm_ce_count[2] += add2;
1653
1654                 if (add1 < 0)
1655                         add1 += 0x7fff;
1656                 pvt->udimm_ce_count[1] += add1;
1657
1658                 if (add0 < 0)
1659                         add0 += 0x7fff;
1660                 pvt->udimm_ce_count[0] += add0;
1661
1662                 if (add0 | add1 | add2)
1663                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1664                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1665                                       add0, add1, add2);
1666         } else
1667                 pvt->ce_count_available = 1;
1668
1669         /* Store the new values */
1670         pvt->udimm_last_ce_count[2] = new2;
1671         pvt->udimm_last_ce_count[1] = new1;
1672         pvt->udimm_last_ce_count[0] = new0;
1673 }
1674
1675 /*
1676  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1677  * Architectures Software Developer’s Manual Volume 3B.
1678  * Nehalem are defined as family 0x06, model 0x1a
1679  *
1680  * The MCA registers used here are the following ones:
1681  *     struct mce field MCA Register
1682  *     m->status        MSR_IA32_MC8_STATUS
1683  *     m->addr          MSR_IA32_MC8_ADDR
1684  *     m->misc          MSR_IA32_MC8_MISC
1685  * In the case of Nehalem, the error information is masked at .status and .misc
1686  * fields
1687  */
1688 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1689                                     const struct mce *m)
1690 {
1691         struct i7core_pvt *pvt = mci->pvt_info;
1692         char *type, *optype, *err, *msg;
1693         unsigned long error = m->status & 0x1ff0000l;
1694         u32 optypenum = (m->status >> 4) & 0x07;
1695         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1696         u32 dimm = (m->misc >> 16) & 0x3;
1697         u32 channel = (m->misc >> 18) & 0x3;
1698         u32 syndrome = m->misc >> 32;
1699         u32 errnum = find_first_bit(&error, 32);
1700         int csrow;
1701
1702         if (m->mcgstatus & 1)
1703                 type = "FATAL";
1704         else
1705                 type = "NON_FATAL";
1706
1707         switch (optypenum) {
1708         case 0:
1709                 optype = "generic undef request";
1710                 break;
1711         case 1:
1712                 optype = "read error";
1713                 break;
1714         case 2:
1715                 optype = "write error";
1716                 break;
1717         case 3:
1718                 optype = "addr/cmd error";
1719                 break;
1720         case 4:
1721                 optype = "scrubbing error";
1722                 break;
1723         default:
1724                 optype = "reserved";
1725                 break;
1726         }
1727
1728         switch (errnum) {
1729         case 16:
1730                 err = "read ECC error";
1731                 break;
1732         case 17:
1733                 err = "RAS ECC error";
1734                 break;
1735         case 18:
1736                 err = "write parity error";
1737                 break;
1738         case 19:
1739                 err = "redundacy loss";
1740                 break;
1741         case 20:
1742                 err = "reserved";
1743                 break;
1744         case 21:
1745                 err = "memory range error";
1746                 break;
1747         case 22:
1748                 err = "RTID out of range";
1749                 break;
1750         case 23:
1751                 err = "address parity error";
1752                 break;
1753         case 24:
1754                 err = "byte enable parity error";
1755                 break;
1756         default:
1757                 err = "unknown";
1758         }
1759
1760         /* FIXME: should convert addr into bank and rank information */
1761         msg = kasprintf(GFP_ATOMIC,
1762                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1763                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1764                 type, (long long) m->addr, m->cpu, dimm, channel,
1765                 syndrome, core_err_cnt, (long long)m->status,
1766                 (long long)m->misc, optype, err);
1767
1768         debugf0("%s", msg);
1769
1770         csrow = pvt->csrow_map[channel][dimm];
1771
1772         /* Call the helper to output message */
1773         if (m->mcgstatus & 1)
1774                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1775                                 0 /* FIXME: should be channel here */, msg);
1776         else if (!pvt->is_registered)
1777                 edac_mc_handle_fbd_ce(mci, csrow,
1778                                 0 /* FIXME: should be channel here */, msg);
1779
1780         kfree(msg);
1781 }
1782
1783 /*
1784  *      i7core_check_error      Retrieve and process errors reported by the
1785  *                              hardware. Called by the Core module.
1786  */
1787 static void i7core_check_error(struct mem_ctl_info *mci)
1788 {
1789         struct i7core_pvt *pvt = mci->pvt_info;
1790         int i;
1791         unsigned count = 0;
1792         struct mce *m;
1793
1794         /*
1795          * MCE first step: Copy all mce errors into a temporary buffer
1796          * We use a double buffering here, to reduce the risk of
1797          * loosing an error.
1798          */
1799         smp_rmb();
1800         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1801                 % MCE_LOG_LEN;
1802         if (!count)
1803                 goto check_ce_error;
1804
1805         m = pvt->mce_outentry;
1806         if (pvt->mce_in + count > MCE_LOG_LEN) {
1807                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1808
1809                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1810                 smp_wmb();
1811                 pvt->mce_in = 0;
1812                 count -= l;
1813                 m += l;
1814         }
1815         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1816         smp_wmb();
1817         pvt->mce_in += count;
1818
1819         smp_rmb();
1820         if (pvt->mce_overrun) {
1821                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1822                               pvt->mce_overrun);
1823                 smp_wmb();
1824                 pvt->mce_overrun = 0;
1825         }
1826
1827         /*
1828          * MCE second step: parse errors and display
1829          */
1830         for (i = 0; i < count; i++)
1831                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1832
1833         /*
1834          * Now, let's increment CE error counts
1835          */
1836 check_ce_error:
1837         if (!pvt->is_registered)
1838                 i7core_udimm_check_mc_ecc_err(mci);
1839         else
1840                 i7core_rdimm_check_mc_ecc_err(mci);
1841 }
1842
1843 /*
1844  * i7core_mce_check_error       Replicates mcelog routine to get errors
1845  *                              This routine simply queues mcelog errors, and
1846  *                              return. The error itself should be handled later
1847  *                              by i7core_check_error.
1848  * WARNING: As this routine should be called at NMI time, extra care should
1849  * be taken to avoid deadlocks, and to be as fast as possible.
1850  */
1851 static int i7core_mce_check_error(void *priv, struct mce *mce)
1852 {
1853         struct mem_ctl_info *mci = priv;
1854         struct i7core_pvt *pvt = mci->pvt_info;
1855
1856         /*
1857          * Just let mcelog handle it if the error is
1858          * outside the memory controller
1859          */
1860         if (((mce->status & 0xffff) >> 7) != 1)
1861                 return 0;
1862
1863         /* Bank 8 registers are the only ones that we know how to handle */
1864         if (mce->bank != 8)
1865                 return 0;
1866
1867 #ifdef CONFIG_SMP
1868         /* Only handle if it is the right mc controller */
1869         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1870                 return 0;
1871 #endif
1872
1873         smp_rmb();
1874         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1875                 smp_wmb();
1876                 pvt->mce_overrun++;
1877                 return 0;
1878         }
1879
1880         /* Copy memory error at the ringbuffer */
1881         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1882         smp_wmb();
1883         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1884
1885         /* Handle fatal errors immediately */
1886         if (mce->mcgstatus & 1)
1887                 i7core_check_error(mci);
1888
1889         /* Advice mcelog that the error were handled */
1890         return 1;
1891 }
1892
1893 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1894                                const int num_channels, const int num_csrows)
1895 {
1896         struct mem_ctl_info *mci;
1897         struct i7core_pvt *pvt;
1898         int csrow = 0;
1899         int rc;
1900
1901         /* allocate a new MC control structure */
1902         mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1903                             i7core_dev->socket);
1904         if (unlikely(!mci))
1905                 return -ENOMEM;
1906
1907         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1908                 __func__, mci, &i7core_dev->pdev[0]->dev);
1909
1910         /* record ptr to the generic device */
1911         mci->dev = &i7core_dev->pdev[0]->dev;
1912
1913         pvt = mci->pvt_info;
1914         memset(pvt, 0, sizeof(*pvt));
1915
1916         /*
1917          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1918          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1919          * memory channels
1920          */
1921         mci->mtype_cap = MEM_FLAG_DDR3;
1922         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1923         mci->edac_cap = EDAC_FLAG_NONE;
1924         mci->mod_name = "i7core_edac.c";
1925         mci->mod_ver = I7CORE_REVISION;
1926         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1927                                   i7core_dev->socket);
1928         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1929         mci->ctl_page_to_phys = NULL;
1930
1931         if (pvt->is_registered)
1932                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1933         else
1934                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1935
1936         /* Set the function pointer to an actual operation function */
1937         mci->edac_check = i7core_check_error;
1938
1939         /* Store pci devices at mci for faster access */
1940         rc = mci_bind_devs(mci, i7core_dev);
1941         if (unlikely(rc < 0))
1942                 goto fail;
1943
1944         /* Get dimm basic config */
1945         get_dimm_config(mci, &csrow);
1946
1947         /* add this new MC control structure to EDAC's list of MCs */
1948         if (unlikely(edac_mc_add_mc(mci))) {
1949                 debugf0("MC: " __FILE__
1950                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1951                 /* FIXME: perhaps some code should go here that disables error
1952                  * reporting if we just enabled it
1953                  */
1954
1955                 rc = -EINVAL;
1956                 goto fail;
1957         }
1958
1959         /* Default error mask is any memory */
1960         pvt->inject.channel = 0;
1961         pvt->inject.dimm = -1;
1962         pvt->inject.rank = -1;
1963         pvt->inject.bank = -1;
1964         pvt->inject.page = -1;
1965         pvt->inject.col = -1;
1966
1967         /* Registers on edac_mce in order to receive memory errors */
1968         pvt->edac_mce.priv = mci;
1969         pvt->edac_mce.check_error = i7core_mce_check_error;
1970
1971         /* allocating generic PCI control info */
1972         pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1973                                                  EDAC_MOD_STR);
1974         if (unlikely(!pvt->i7core_pci)) {
1975                 printk(KERN_WARNING
1976                         "%s(): Unable to create PCI control\n",
1977                         __func__);
1978                 printk(KERN_WARNING
1979                         "%s(): PCI error report via EDAC not setup\n",
1980                         __func__);
1981         }
1982
1983         rc = edac_mce_register(&pvt->edac_mce);
1984         if (unlikely(rc < 0)) {
1985                 debugf0("MC: " __FILE__
1986                         ": %s(): failed edac_mce_register()\n", __func__);
1987         }
1988
1989 fail:
1990         if (rc < 0)
1991                 edac_mc_free(mci);
1992         return rc;
1993 }
1994
1995 /*
1996  *      i7core_probe    Probe for ONE instance of device to see if it is
1997  *                      present.
1998  *      return:
1999  *              0 for FOUND a device
2000  *              < 0 for error code
2001  */
2002
2003 static int __devinit i7core_probe(struct pci_dev *pdev,
2004                                   const struct pci_device_id *id)
2005 {
2006         int rc;
2007         struct i7core_dev *i7core_dev;
2008
2009         /* get the pci devices we want to reserve for our use */
2010         mutex_lock(&i7core_edac_lock);
2011
2012         /*
2013          * All memory controllers are allocated at the first pass.
2014          */
2015         if (unlikely(probed >= 1)) {
2016                 mutex_unlock(&i7core_edac_lock);
2017                 return -EINVAL;
2018         }
2019         probed++;
2020
2021         rc = i7core_get_devices(pci_dev_table);
2022         if (unlikely(rc < 0))
2023                 goto fail0;
2024
2025         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2026                 int channels;
2027                 int csrows;
2028
2029                 /* Check the number of active and not disabled channels */
2030                 rc = i7core_get_active_channels(i7core_dev->socket,
2031                                                 &channels, &csrows);
2032                 if (unlikely(rc < 0))
2033                         goto fail1;
2034
2035                 rc = i7core_register_mci(i7core_dev, channels, csrows);
2036                 if (unlikely(rc < 0))
2037                         goto fail1;
2038         }
2039
2040         i7core_printk(KERN_INFO, "Driver loaded.\n");
2041
2042         mutex_unlock(&i7core_edac_lock);
2043         return 0;
2044
2045 fail1:
2046         i7core_put_all_devices();
2047 fail0:
2048         mutex_unlock(&i7core_edac_lock);
2049         return rc;
2050 }
2051
2052 /*
2053  *      i7core_remove   destructor for one instance of device
2054  *
2055  */
2056 static void __devexit i7core_remove(struct pci_dev *pdev)
2057 {
2058         struct mem_ctl_info *mci;
2059         struct i7core_dev *i7core_dev, *tmp;
2060         struct i7core_pvt *pvt;
2061
2062         debugf0(__FILE__ ": %s()\n", __func__);
2063
2064         /*
2065          * we have a trouble here: pdev value for removal will be wrong, since
2066          * it will point to the X58 register used to detect that the machine
2067          * is a Nehalem or upper design. However, due to the way several PCI
2068          * devices are grouped together to provide MC functionality, we need
2069          * to use a different method for releasing the devices
2070          */
2071
2072         mutex_lock(&i7core_edac_lock);
2073         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2074                 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2075                 if (unlikely(!mci || !mci->pvt_info)) {
2076                         debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2077                                 __func__, &i7core_dev->pdev[0]->dev);
2078
2079                                 i7core_printk(KERN_ERR,
2080                                       "Couldn't find mci hanler\n");
2081                 } else {
2082                         pvt = mci->pvt_info;
2083                         i7core_dev = pvt->i7core_dev;
2084
2085                         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2086                                 __func__, mci, &i7core_dev->pdev[0]->dev);
2087
2088                         /* Disable MCE NMI handler */
2089                         edac_mce_unregister(&pvt->edac_mce);
2090
2091                         /* Disable EDAC polling */
2092                         if (likely(pvt->i7core_pci))
2093                                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2094                         else
2095                                 i7core_printk(KERN_ERR,
2096                                               "Couldn't find mem_ctl_info for socket %d\n",
2097                                               i7core_dev->socket);
2098                         pvt->i7core_pci = NULL;
2099
2100                         /* Remove MC sysfs nodes */
2101                         edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2102
2103                         debugf1("%s: free mci struct\n", mci->ctl_name);
2104                         kfree(mci->ctl_name);
2105                         edac_mc_free(mci);
2106
2107                         /* Release PCI resources */
2108                         i7core_put_devices(i7core_dev);
2109                         list_del(&i7core_dev->list);
2110                         kfree(i7core_dev);
2111                 }
2112         }
2113         probed--;
2114
2115         mutex_unlock(&i7core_edac_lock);
2116 }
2117
2118 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2119
2120 /*
2121  *      i7core_driver   pci_driver structure for this module
2122  *
2123  */
2124 static struct pci_driver i7core_driver = {
2125         .name     = "i7core_edac",
2126         .probe    = i7core_probe,
2127         .remove   = __devexit_p(i7core_remove),
2128         .id_table = i7core_pci_tbl,
2129 };
2130
2131 /*
2132  *      i7core_init             Module entry function
2133  *                      Try to initialize this module for its devices
2134  */
2135 static int __init i7core_init(void)
2136 {
2137         int pci_rc;
2138
2139         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2140
2141         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2142         opstate_init();
2143
2144         if (use_pci_fixup)
2145                 i7core_xeon_pci_fixup(pci_dev_table);
2146
2147         pci_rc = pci_register_driver(&i7core_driver);
2148
2149         if (pci_rc >= 0)
2150                 return 0;
2151
2152         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2153                       pci_rc);
2154
2155         return pci_rc;
2156 }
2157
2158 /*
2159  *      i7core_exit()   Module exit function
2160  *                      Unregister the driver
2161  */
2162 static void __exit i7core_exit(void)
2163 {
2164         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2165         pci_unregister_driver(&i7core_driver);
2166 }
2167
2168 module_init(i7core_init);
2169 module_exit(i7core_exit);
2170
2171 MODULE_LICENSE("GPL");
2172 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2173 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2174 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2175                    I7CORE_REVISION);
2176
2177 module_param(edac_op_state, int, 0444);
2178 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");