i7core_edac: Introduce free_i7core_dev
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports yhe memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0 " __DATE__
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81
82         /* OFFSETS for Device 3 Function 0 */
83
84 #define MC_CONTROL      0x48
85 #define MC_STATUS       0x4c
86 #define MC_MAX_DOD      0x64
87
88 /*
89  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91  */
92
93 #define MC_TEST_ERR_RCV1        0x60
94   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
95
96 #define MC_TEST_ERR_RCV0        0x64
97   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
98   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
99
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0        0x80
102 #define MC_COR_ECC_CNT_1        0x84
103 #define MC_COR_ECC_CNT_2        0x88
104 #define MC_COR_ECC_CNT_3        0x8c
105 #define MC_COR_ECC_CNT_4        0x90
106 #define MC_COR_ECC_CNT_5        0x94
107
108 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
110
111
112         /* OFFSETS for Devices 4,5 and 6 Function 0 */
113
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115   #define THREE_DIMMS_PRESENT           (1 << 24)
116   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
117   #define QUAD_RANK_PRESENT             (1 << 22)
118   #define REGISTERED_DIMM               (1 << 15)
119
120 #define MC_CHANNEL_MAPPER       0x60
121   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
123
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125   #define RANK_PRESENT_MASK             0xffff
126
127 #define MC_CHANNEL_ADDR_MATCH   0xf0
128 #define MC_CHANNEL_ERROR_MASK   0xf8
129 #define MC_CHANNEL_ERROR_INJECT 0xfc
130   #define INJECT_ADDR_PARITY    0x10
131   #define INJECT_ECC            0x08
132   #define MASK_CACHELINE        0x06
133   #define MASK_FULL_CACHELINE   0x06
134   #define MASK_MSB32_CACHELINE  0x04
135   #define MASK_LSB32_CACHELINE  0x02
136   #define NO_MASK_CACHELINE     0x00
137   #define REPEAT_EN             0x01
138
139         /* OFFSETS for Devices 4,5 and 6 Function 1 */
140
141 #define MC_DOD_CH_DIMM0         0x48
142 #define MC_DOD_CH_DIMM1         0x4c
143 #define MC_DOD_CH_DIMM2         0x50
144   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
145   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
146   #define DIMM_PRESENT_MASK     (1 << 9)
147   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
148   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
149   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
151   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
153   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
154   #define MC_DOD_NUMCOL_MASK            3
155   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
156
157 #define MC_RANK_PRESENT         0x7c
158
159 #define MC_SAG_CH_0     0x80
160 #define MC_SAG_CH_1     0x84
161 #define MC_SAG_CH_2     0x88
162 #define MC_SAG_CH_3     0x8c
163 #define MC_SAG_CH_4     0x90
164 #define MC_SAG_CH_5     0x94
165 #define MC_SAG_CH_6     0x98
166 #define MC_SAG_CH_7     0x9c
167
168 #define MC_RIR_LIMIT_CH_0       0x40
169 #define MC_RIR_LIMIT_CH_1       0x44
170 #define MC_RIR_LIMIT_CH_2       0x48
171 #define MC_RIR_LIMIT_CH_3       0x4C
172 #define MC_RIR_LIMIT_CH_4       0x50
173 #define MC_RIR_LIMIT_CH_5       0x54
174 #define MC_RIR_LIMIT_CH_6       0x58
175 #define MC_RIR_LIMIT_CH_7       0x5C
176 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
177
178 #define MC_RIR_WAY_CH           0x80
179   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
180   #define MC_RIR_WAY_RANK_MASK          0x7
181
182 /*
183  * i7core structs
184  */
185
186 #define NUM_CHANS 3
187 #define MAX_DIMMS 3             /* Max DIMMS per channel */
188 #define MAX_MCR_FUNC  4
189 #define MAX_CHAN_FUNC 3
190
191 struct i7core_info {
192         u32     mc_control;
193         u32     mc_status;
194         u32     max_dod;
195         u32     ch_map;
196 };
197
198
199 struct i7core_inject {
200         int     enable;
201
202         u32     section;
203         u32     type;
204         u32     eccmask;
205
206         /* Error address mask */
207         int channel, dimm, rank, bank, page, col;
208 };
209
210 struct i7core_channel {
211         u32             ranks;
212         u32             dimms;
213 };
214
215 struct pci_id_descr {
216         int                     dev;
217         int                     func;
218         int                     dev_id;
219         int                     optional;
220 };
221
222 struct pci_id_table {
223         const struct pci_id_descr       *descr;
224         int                             n_devs;
225 };
226
227 struct i7core_dev {
228         struct list_head        list;
229         u8                      socket;
230         struct pci_dev          **pdev;
231         int                     n_devs;
232         struct mem_ctl_info     *mci;
233 };
234
235 struct i7core_pvt {
236         struct pci_dev  *pci_noncore;
237         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
238         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240         struct i7core_dev *i7core_dev;
241
242         struct i7core_info      info;
243         struct i7core_inject    inject;
244         struct i7core_channel   channel[NUM_CHANS];
245
246         int             channels; /* Number of active channels */
247
248         int             ce_count_available;
249         int             csrow_map[NUM_CHANS][MAX_DIMMS];
250
251                         /* ECC corrected errors counts per udimm */
252         unsigned long   udimm_ce_count[MAX_DIMMS];
253         int             udimm_last_ce_count[MAX_DIMMS];
254                         /* ECC corrected errors counts per rdimm */
255         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
256         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
257
258         unsigned int    is_registered;
259
260         /* mcelog glue */
261         struct edac_mce         edac_mce;
262
263         /* Fifo double buffers */
264         struct mce              mce_entry[MCE_LOG_LEN];
265         struct mce              mce_outentry[MCE_LOG_LEN];
266
267         /* Fifo in/out counters */
268         unsigned                mce_in, mce_out;
269
270         /* Count indicator to show errors not got */
271         unsigned                mce_overrun;
272
273         /* Struct to control EDAC polling */
274         struct edac_pci_ctl_info *i7core_pci;
275 };
276
277 #define PCI_DESCR(device, function, device_id)  \
278         .dev = (device),                        \
279         .func = (function),                     \
280         .dev_id = (device_id)
281
282 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
283                 /* Memory controller */
284         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
285         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
286                         /* Exists only for RDIMM */
287         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
288         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
289
290                 /* Channel 0 */
291         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
292         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
293         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
294         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
295
296                 /* Channel 1 */
297         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
298         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
299         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
300         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
301
302                 /* Channel 2 */
303         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
304         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
305         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
306         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
307
308                 /* Generic Non-core registers */
309         /*
310          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
311          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
312          * the probing code needs to test for the other address in case of
313          * failure of this one
314          */
315         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
316
317 };
318
319 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
320         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
321         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
322         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
323
324         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
325         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
326         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
327         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
328
329         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
330         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
331         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
332         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
333
334         /*
335          * This is the PCI device has an alternate address on some
336          * processors like Core i7 860
337          */
338         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
339 };
340
341 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
342                 /* Memory controller */
343         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
344         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
345                         /* Exists only for RDIMM */
346         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
347         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
348
349                 /* Channel 0 */
350         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
351         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
352         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
353         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
354
355                 /* Channel 1 */
356         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
357         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
358         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
359         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
360
361                 /* Channel 2 */
362         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
363         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
364         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
365         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
366
367                 /* Generic Non-core registers */
368         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
369
370 };
371
372 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
373 static const struct pci_id_table pci_dev_table[] = {
374         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
375         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
376         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
377 };
378
379 /*
380  *      pci_device_id   table for which devices we are looking for
381  */
382 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
383         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
384         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
385         {0,}                    /* 0 terminated list. */
386 };
387
388 /****************************************************************************
389                         Anciliary status routines
390  ****************************************************************************/
391
392         /* MC_CONTROL bits */
393 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
394 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
395
396         /* MC_STATUS bits */
397 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
398 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
399
400         /* MC_MAX_DOD read functions */
401 static inline int numdimms(u32 dimms)
402 {
403         return (dimms & 0x3) + 1;
404 }
405
406 static inline int numrank(u32 rank)
407 {
408         static int ranks[4] = { 1, 2, 4, -EINVAL };
409
410         return ranks[rank & 0x3];
411 }
412
413 static inline int numbank(u32 bank)
414 {
415         static int banks[4] = { 4, 8, 16, -EINVAL };
416
417         return banks[bank & 0x3];
418 }
419
420 static inline int numrow(u32 row)
421 {
422         static int rows[8] = {
423                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
424                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
425         };
426
427         return rows[row & 0x7];
428 }
429
430 static inline int numcol(u32 col)
431 {
432         static int cols[8] = {
433                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
434         };
435         return cols[col & 0x3];
436 }
437
438 static struct i7core_dev *get_i7core_dev(u8 socket)
439 {
440         struct i7core_dev *i7core_dev;
441
442         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
443                 if (i7core_dev->socket == socket)
444                         return i7core_dev;
445         }
446
447         return NULL;
448 }
449
450 static struct i7core_dev *alloc_i7core_dev(u8 socket,
451                                            const struct pci_id_table *table)
452 {
453         struct i7core_dev *i7core_dev;
454
455         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
456         if (!i7core_dev)
457                 return NULL;
458
459         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
460                                    GFP_KERNEL);
461         if (!i7core_dev->pdev) {
462                 kfree(i7core_dev);
463                 return NULL;
464         }
465
466         i7core_dev->socket = socket;
467         i7core_dev->n_devs = table->n_devs;
468         list_add_tail(&i7core_dev->list, &i7core_edac_list);
469
470         return i7core_dev;
471 }
472
473 static void free_i7core_dev(struct i7core_dev *i7core_dev)
474 {
475         list_del(&i7core_dev->list);
476         kfree(i7core_dev->pdev);
477         kfree(i7core_dev);
478 }
479
480 /****************************************************************************
481                         Memory check routines
482  ****************************************************************************/
483 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
484                                           unsigned func)
485 {
486         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
487         int i;
488
489         if (!i7core_dev)
490                 return NULL;
491
492         for (i = 0; i < i7core_dev->n_devs; i++) {
493                 if (!i7core_dev->pdev[i])
494                         continue;
495
496                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
497                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
498                         return i7core_dev->pdev[i];
499                 }
500         }
501
502         return NULL;
503 }
504
505 /**
506  * i7core_get_active_channels() - gets the number of channels and csrows
507  * @socket:     Quick Path Interconnect socket
508  * @channels:   Number of channels that will be returned
509  * @csrows:     Number of csrows found
510  *
511  * Since EDAC core needs to know in advance the number of available channels
512  * and csrows, in order to allocate memory for csrows/channels, it is needed
513  * to run two similar steps. At the first step, implemented on this function,
514  * it checks the number of csrows/channels present at one socket.
515  * this is used in order to properly allocate the size of mci components.
516  *
517  * It should be noticed that none of the current available datasheets explain
518  * or even mention how csrows are seen by the memory controller. So, we need
519  * to add a fake description for csrows.
520  * So, this driver is attributing one DIMM memory for one csrow.
521  */
522 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
523                                       unsigned *csrows)
524 {
525         struct pci_dev *pdev = NULL;
526         int i, j;
527         u32 status, control;
528
529         *channels = 0;
530         *csrows = 0;
531
532         pdev = get_pdev_slot_func(socket, 3, 0);
533         if (!pdev) {
534                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
535                               socket);
536                 return -ENODEV;
537         }
538
539         /* Device 3 function 0 reads */
540         pci_read_config_dword(pdev, MC_STATUS, &status);
541         pci_read_config_dword(pdev, MC_CONTROL, &control);
542
543         for (i = 0; i < NUM_CHANS; i++) {
544                 u32 dimm_dod[3];
545                 /* Check if the channel is active */
546                 if (!(control & (1 << (8 + i))))
547                         continue;
548
549                 /* Check if the channel is disabled */
550                 if (status & (1 << i))
551                         continue;
552
553                 pdev = get_pdev_slot_func(socket, i + 4, 1);
554                 if (!pdev) {
555                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
556                                                 "fn %d.%d!!!\n",
557                                                 socket, i + 4, 1);
558                         return -ENODEV;
559                 }
560                 /* Devices 4-6 function 1 */
561                 pci_read_config_dword(pdev,
562                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
563                 pci_read_config_dword(pdev,
564                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
565                 pci_read_config_dword(pdev,
566                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
567
568                 (*channels)++;
569
570                 for (j = 0; j < 3; j++) {
571                         if (!DIMM_PRESENT(dimm_dod[j]))
572                                 continue;
573                         (*csrows)++;
574                 }
575         }
576
577         debugf0("Number of active channels on socket %d: %d\n",
578                 socket, *channels);
579
580         return 0;
581 }
582
583 static int get_dimm_config(const struct mem_ctl_info *mci, int *csrow)
584 {
585         struct i7core_pvt *pvt = mci->pvt_info;
586         struct csrow_info *csr;
587         struct pci_dev *pdev;
588         int i, j;
589         unsigned long last_page = 0;
590         enum edac_type mode;
591         enum mem_type mtype;
592
593         /* Get data from the MC register, function 0 */
594         pdev = pvt->pci_mcr[0];
595         if (!pdev)
596                 return -ENODEV;
597
598         /* Device 3 function 0 reads */
599         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
600         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
601         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
602         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
603
604         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
605                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
606                 pvt->info.max_dod, pvt->info.ch_map);
607
608         if (ECC_ENABLED(pvt)) {
609                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
610                 if (ECCx8(pvt))
611                         mode = EDAC_S8ECD8ED;
612                 else
613                         mode = EDAC_S4ECD4ED;
614         } else {
615                 debugf0("ECC disabled\n");
616                 mode = EDAC_NONE;
617         }
618
619         /* FIXME: need to handle the error codes */
620         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
621                 "x%x x 0x%x\n",
622                 numdimms(pvt->info.max_dod),
623                 numrank(pvt->info.max_dod >> 2),
624                 numbank(pvt->info.max_dod >> 4),
625                 numrow(pvt->info.max_dod >> 6),
626                 numcol(pvt->info.max_dod >> 9));
627
628         for (i = 0; i < NUM_CHANS; i++) {
629                 u32 data, dimm_dod[3], value[8];
630
631                 if (!pvt->pci_ch[i][0])
632                         continue;
633
634                 if (!CH_ACTIVE(pvt, i)) {
635                         debugf0("Channel %i is not active\n", i);
636                         continue;
637                 }
638                 if (CH_DISABLED(pvt, i)) {
639                         debugf0("Channel %i is disabled\n", i);
640                         continue;
641                 }
642
643                 /* Devices 4-6 function 0 */
644                 pci_read_config_dword(pvt->pci_ch[i][0],
645                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
646
647                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
648                                                 4 : 2;
649
650                 if (data & REGISTERED_DIMM)
651                         mtype = MEM_RDDR3;
652                 else
653                         mtype = MEM_DDR3;
654 #if 0
655                 if (data & THREE_DIMMS_PRESENT)
656                         pvt->channel[i].dimms = 3;
657                 else if (data & SINGLE_QUAD_RANK_PRESENT)
658                         pvt->channel[i].dimms = 1;
659                 else
660                         pvt->channel[i].dimms = 2;
661 #endif
662
663                 /* Devices 4-6 function 1 */
664                 pci_read_config_dword(pvt->pci_ch[i][1],
665                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
666                 pci_read_config_dword(pvt->pci_ch[i][1],
667                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
668                 pci_read_config_dword(pvt->pci_ch[i][1],
669                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
670
671                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
672                         "%d ranks, %cDIMMs\n",
673                         i,
674                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
675                         data,
676                         pvt->channel[i].ranks,
677                         (data & REGISTERED_DIMM) ? 'R' : 'U');
678
679                 for (j = 0; j < 3; j++) {
680                         u32 banks, ranks, rows, cols;
681                         u32 size, npages;
682
683                         if (!DIMM_PRESENT(dimm_dod[j]))
684                                 continue;
685
686                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
687                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
688                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
689                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
690
691                         /* DDR3 has 8 I/O banks */
692                         size = (rows * cols * banks * ranks) >> (20 - 3);
693
694                         pvt->channel[i].dimms++;
695
696                         debugf0("\tdimm %d %d Mb offset: %x, "
697                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
698                                 j, size,
699                                 RANKOFFSET(dimm_dod[j]),
700                                 banks, ranks, rows, cols);
701
702                         npages = MiB_TO_PAGES(size);
703
704                         csr = &mci->csrows[*csrow];
705                         csr->first_page = last_page + 1;
706                         last_page += npages;
707                         csr->last_page = last_page;
708                         csr->nr_pages = npages;
709
710                         csr->page_mask = 0;
711                         csr->grain = 8;
712                         csr->csrow_idx = *csrow;
713                         csr->nr_channels = 1;
714
715                         csr->channels[0].chan_idx = i;
716                         csr->channels[0].ce_count = 0;
717
718                         pvt->csrow_map[i][j] = *csrow;
719
720                         switch (banks) {
721                         case 4:
722                                 csr->dtype = DEV_X4;
723                                 break;
724                         case 8:
725                                 csr->dtype = DEV_X8;
726                                 break;
727                         case 16:
728                                 csr->dtype = DEV_X16;
729                                 break;
730                         default:
731                                 csr->dtype = DEV_UNKNOWN;
732                         }
733
734                         csr->edac_mode = mode;
735                         csr->mtype = mtype;
736
737                         (*csrow)++;
738                 }
739
740                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
741                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
742                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
743                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
744                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
745                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
746                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
747                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
748                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
749                 for (j = 0; j < 8; j++)
750                         debugf1("\t\t%#x\t%#x\t%#x\n",
751                                 (value[j] >> 27) & 0x1,
752                                 (value[j] >> 24) & 0x7,
753                                 (value[j] && ((1 << 24) - 1)));
754         }
755
756         return 0;
757 }
758
759 /****************************************************************************
760                         Error insertion routines
761  ****************************************************************************/
762
763 /* The i7core has independent error injection features per channel.
764    However, to have a simpler code, we don't allow enabling error injection
765    on more than one channel.
766    Also, since a change at an inject parameter will be applied only at enable,
767    we're disabling error injection on all write calls to the sysfs nodes that
768    controls the error code injection.
769  */
770 static int disable_inject(const struct mem_ctl_info *mci)
771 {
772         struct i7core_pvt *pvt = mci->pvt_info;
773
774         pvt->inject.enable = 0;
775
776         if (!pvt->pci_ch[pvt->inject.channel][0])
777                 return -ENODEV;
778
779         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
780                                 MC_CHANNEL_ERROR_INJECT, 0);
781
782         return 0;
783 }
784
785 /*
786  * i7core inject inject.section
787  *
788  *      accept and store error injection inject.section value
789  *      bit 0 - refers to the lower 32-byte half cacheline
790  *      bit 1 - refers to the upper 32-byte half cacheline
791  */
792 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
793                                            const char *data, size_t count)
794 {
795         struct i7core_pvt *pvt = mci->pvt_info;
796         unsigned long value;
797         int rc;
798
799         if (pvt->inject.enable)
800                 disable_inject(mci);
801
802         rc = strict_strtoul(data, 10, &value);
803         if ((rc < 0) || (value > 3))
804                 return -EIO;
805
806         pvt->inject.section = (u32) value;
807         return count;
808 }
809
810 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
811                                               char *data)
812 {
813         struct i7core_pvt *pvt = mci->pvt_info;
814         return sprintf(data, "0x%08x\n", pvt->inject.section);
815 }
816
817 /*
818  * i7core inject.type
819  *
820  *      accept and store error injection inject.section value
821  *      bit 0 - repeat enable - Enable error repetition
822  *      bit 1 - inject ECC error
823  *      bit 2 - inject parity error
824  */
825 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
826                                         const char *data, size_t count)
827 {
828         struct i7core_pvt *pvt = mci->pvt_info;
829         unsigned long value;
830         int rc;
831
832         if (pvt->inject.enable)
833                 disable_inject(mci);
834
835         rc = strict_strtoul(data, 10, &value);
836         if ((rc < 0) || (value > 7))
837                 return -EIO;
838
839         pvt->inject.type = (u32) value;
840         return count;
841 }
842
843 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
844                                               char *data)
845 {
846         struct i7core_pvt *pvt = mci->pvt_info;
847         return sprintf(data, "0x%08x\n", pvt->inject.type);
848 }
849
850 /*
851  * i7core_inject_inject.eccmask_store
852  *
853  * The type of error (UE/CE) will depend on the inject.eccmask value:
854  *   Any bits set to a 1 will flip the corresponding ECC bit
855  *   Correctable errors can be injected by flipping 1 bit or the bits within
856  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
857  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
858  *   uncorrectable error to be injected.
859  */
860 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
861                                         const char *data, size_t count)
862 {
863         struct i7core_pvt *pvt = mci->pvt_info;
864         unsigned long value;
865         int rc;
866
867         if (pvt->inject.enable)
868                 disable_inject(mci);
869
870         rc = strict_strtoul(data, 10, &value);
871         if (rc < 0)
872                 return -EIO;
873
874         pvt->inject.eccmask = (u32) value;
875         return count;
876 }
877
878 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
879                                               char *data)
880 {
881         struct i7core_pvt *pvt = mci->pvt_info;
882         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
883 }
884
885 /*
886  * i7core_addrmatch
887  *
888  * The type of error (UE/CE) will depend on the inject.eccmask value:
889  *   Any bits set to a 1 will flip the corresponding ECC bit
890  *   Correctable errors can be injected by flipping 1 bit or the bits within
891  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
892  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
893  *   uncorrectable error to be injected.
894  */
895
896 #define DECLARE_ADDR_MATCH(param, limit)                        \
897 static ssize_t i7core_inject_store_##param(                     \
898                 struct mem_ctl_info *mci,                       \
899                 const char *data, size_t count)                 \
900 {                                                               \
901         struct i7core_pvt *pvt;                                 \
902         long value;                                             \
903         int rc;                                                 \
904                                                                 \
905         debugf1("%s()\n", __func__);                            \
906         pvt = mci->pvt_info;                                    \
907                                                                 \
908         if (pvt->inject.enable)                                 \
909                 disable_inject(mci);                            \
910                                                                 \
911         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
912                 value = -1;                                     \
913         else {                                                  \
914                 rc = strict_strtoul(data, 10, &value);          \
915                 if ((rc < 0) || (value >= limit))               \
916                         return -EIO;                            \
917         }                                                       \
918                                                                 \
919         pvt->inject.param = value;                              \
920                                                                 \
921         return count;                                           \
922 }                                                               \
923                                                                 \
924 static ssize_t i7core_inject_show_##param(                      \
925                 struct mem_ctl_info *mci,                       \
926                 char *data)                                     \
927 {                                                               \
928         struct i7core_pvt *pvt;                                 \
929                                                                 \
930         pvt = mci->pvt_info;                                    \
931         debugf1("%s() pvt=%p\n", __func__, pvt);                \
932         if (pvt->inject.param < 0)                              \
933                 return sprintf(data, "any\n");                  \
934         else                                                    \
935                 return sprintf(data, "%d\n", pvt->inject.param);\
936 }
937
938 #define ATTR_ADDR_MATCH(param)                                  \
939         {                                                       \
940                 .attr = {                                       \
941                         .name = #param,                         \
942                         .mode = (S_IRUGO | S_IWUSR)             \
943                 },                                              \
944                 .show  = i7core_inject_show_##param,            \
945                 .store = i7core_inject_store_##param,           \
946         }
947
948 DECLARE_ADDR_MATCH(channel, 3);
949 DECLARE_ADDR_MATCH(dimm, 3);
950 DECLARE_ADDR_MATCH(rank, 4);
951 DECLARE_ADDR_MATCH(bank, 32);
952 DECLARE_ADDR_MATCH(page, 0x10000);
953 DECLARE_ADDR_MATCH(col, 0x4000);
954
955 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
956 {
957         u32 read;
958         int count;
959
960         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
961                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
962                 where, val);
963
964         for (count = 0; count < 10; count++) {
965                 if (count)
966                         msleep(100);
967                 pci_write_config_dword(dev, where, val);
968                 pci_read_config_dword(dev, where, &read);
969
970                 if (read == val)
971                         return 0;
972         }
973
974         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
975                 "write=%08x. Read=%08x\n",
976                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
977                 where, val, read);
978
979         return -EINVAL;
980 }
981
982 /*
983  * This routine prepares the Memory Controller for error injection.
984  * The error will be injected when some process tries to write to the
985  * memory that matches the given criteria.
986  * The criteria can be set in terms of a mask where dimm, rank, bank, page
987  * and col can be specified.
988  * A -1 value for any of the mask items will make the MCU to ignore
989  * that matching criteria for error injection.
990  *
991  * It should be noticed that the error will only happen after a write operation
992  * on a memory that matches the condition. if REPEAT_EN is not enabled at
993  * inject mask, then it will produce just one error. Otherwise, it will repeat
994  * until the injectmask would be cleaned.
995  *
996  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
997  *    is reliable enough to check if the MC is using the
998  *    three channels. However, this is not clear at the datasheet.
999  */
1000 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1001                                        const char *data, size_t count)
1002 {
1003         struct i7core_pvt *pvt = mci->pvt_info;
1004         u32 injectmask;
1005         u64 mask = 0;
1006         int  rc;
1007         long enable;
1008
1009         if (!pvt->pci_ch[pvt->inject.channel][0])
1010                 return 0;
1011
1012         rc = strict_strtoul(data, 10, &enable);
1013         if ((rc < 0))
1014                 return 0;
1015
1016         if (enable) {
1017                 pvt->inject.enable = 1;
1018         } else {
1019                 disable_inject(mci);
1020                 return count;
1021         }
1022
1023         /* Sets pvt->inject.dimm mask */
1024         if (pvt->inject.dimm < 0)
1025                 mask |= 1LL << 41;
1026         else {
1027                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1028                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1029                 else
1030                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1031         }
1032
1033         /* Sets pvt->inject.rank mask */
1034         if (pvt->inject.rank < 0)
1035                 mask |= 1LL << 40;
1036         else {
1037                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1038                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1039                 else
1040                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1041         }
1042
1043         /* Sets pvt->inject.bank mask */
1044         if (pvt->inject.bank < 0)
1045                 mask |= 1LL << 39;
1046         else
1047                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1048
1049         /* Sets pvt->inject.page mask */
1050         if (pvt->inject.page < 0)
1051                 mask |= 1LL << 38;
1052         else
1053                 mask |= (pvt->inject.page & 0xffff) << 14;
1054
1055         /* Sets pvt->inject.column mask */
1056         if (pvt->inject.col < 0)
1057                 mask |= 1LL << 37;
1058         else
1059                 mask |= (pvt->inject.col & 0x3fff);
1060
1061         /*
1062          * bit    0: REPEAT_EN
1063          * bits 1-2: MASK_HALF_CACHELINE
1064          * bit    3: INJECT_ECC
1065          * bit    4: INJECT_ADDR_PARITY
1066          */
1067
1068         injectmask = (pvt->inject.type & 1) |
1069                      (pvt->inject.section & 0x3) << 1 |
1070                      (pvt->inject.type & 0x6) << (3 - 1);
1071
1072         /* Unlock writes to registers - this register is write only */
1073         pci_write_config_dword(pvt->pci_noncore,
1074                                MC_CFG_CONTROL, 0x2);
1075
1076         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1077                                MC_CHANNEL_ADDR_MATCH, mask);
1078         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1079                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1080
1081         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1082                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1083
1084         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1085                                MC_CHANNEL_ERROR_INJECT, injectmask);
1086
1087         /*
1088          * This is something undocumented, based on my tests
1089          * Without writing 8 to this register, errors aren't injected. Not sure
1090          * why.
1091          */
1092         pci_write_config_dword(pvt->pci_noncore,
1093                                MC_CFG_CONTROL, 8);
1094
1095         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1096                 " inject 0x%08x\n",
1097                 mask, pvt->inject.eccmask, injectmask);
1098
1099
1100         return count;
1101 }
1102
1103 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1104                                         char *data)
1105 {
1106         struct i7core_pvt *pvt = mci->pvt_info;
1107         u32 injectmask;
1108
1109         if (!pvt->pci_ch[pvt->inject.channel][0])
1110                 return 0;
1111
1112         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1113                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1114
1115         debugf0("Inject error read: 0x%018x\n", injectmask);
1116
1117         if (injectmask & 0x0c)
1118                 pvt->inject.enable = 1;
1119
1120         return sprintf(data, "%d\n", pvt->inject.enable);
1121 }
1122
1123 #define DECLARE_COUNTER(param)                                  \
1124 static ssize_t i7core_show_counter_##param(                     \
1125                 struct mem_ctl_info *mci,                       \
1126                 char *data)                                     \
1127 {                                                               \
1128         struct i7core_pvt *pvt = mci->pvt_info;                 \
1129                                                                 \
1130         debugf1("%s() \n", __func__);                           \
1131         if (!pvt->ce_count_available || (pvt->is_registered))   \
1132                 return sprintf(data, "data unavailable\n");     \
1133         return sprintf(data, "%lu\n",                           \
1134                         pvt->udimm_ce_count[param]);            \
1135 }
1136
1137 #define ATTR_COUNTER(param)                                     \
1138         {                                                       \
1139                 .attr = {                                       \
1140                         .name = __stringify(udimm##param),      \
1141                         .mode = (S_IRUGO | S_IWUSR)             \
1142                 },                                              \
1143                 .show  = i7core_show_counter_##param            \
1144         }
1145
1146 DECLARE_COUNTER(0);
1147 DECLARE_COUNTER(1);
1148 DECLARE_COUNTER(2);
1149
1150 /*
1151  * Sysfs struct
1152  */
1153
1154 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1155         ATTR_ADDR_MATCH(channel),
1156         ATTR_ADDR_MATCH(dimm),
1157         ATTR_ADDR_MATCH(rank),
1158         ATTR_ADDR_MATCH(bank),
1159         ATTR_ADDR_MATCH(page),
1160         ATTR_ADDR_MATCH(col),
1161         { } /* End of list */
1162 };
1163
1164 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1165         .name  = "inject_addrmatch",
1166         .mcidev_attr = i7core_addrmatch_attrs,
1167 };
1168
1169 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1170         ATTR_COUNTER(0),
1171         ATTR_COUNTER(1),
1172         ATTR_COUNTER(2),
1173         { .attr = { .name = NULL } }
1174 };
1175
1176 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1177         .name  = "all_channel_counts",
1178         .mcidev_attr = i7core_udimm_counters_attrs,
1179 };
1180
1181 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1182         {
1183                 .attr = {
1184                         .name = "inject_section",
1185                         .mode = (S_IRUGO | S_IWUSR)
1186                 },
1187                 .show  = i7core_inject_section_show,
1188                 .store = i7core_inject_section_store,
1189         }, {
1190                 .attr = {
1191                         .name = "inject_type",
1192                         .mode = (S_IRUGO | S_IWUSR)
1193                 },
1194                 .show  = i7core_inject_type_show,
1195                 .store = i7core_inject_type_store,
1196         }, {
1197                 .attr = {
1198                         .name = "inject_eccmask",
1199                         .mode = (S_IRUGO | S_IWUSR)
1200                 },
1201                 .show  = i7core_inject_eccmask_show,
1202                 .store = i7core_inject_eccmask_store,
1203         }, {
1204                 .grp = &i7core_inject_addrmatch,
1205         }, {
1206                 .attr = {
1207                         .name = "inject_enable",
1208                         .mode = (S_IRUGO | S_IWUSR)
1209                 },
1210                 .show  = i7core_inject_enable_show,
1211                 .store = i7core_inject_enable_store,
1212         },
1213         { }     /* End of list */
1214 };
1215
1216 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1217         {
1218                 .attr = {
1219                         .name = "inject_section",
1220                         .mode = (S_IRUGO | S_IWUSR)
1221                 },
1222                 .show  = i7core_inject_section_show,
1223                 .store = i7core_inject_section_store,
1224         }, {
1225                 .attr = {
1226                         .name = "inject_type",
1227                         .mode = (S_IRUGO | S_IWUSR)
1228                 },
1229                 .show  = i7core_inject_type_show,
1230                 .store = i7core_inject_type_store,
1231         }, {
1232                 .attr = {
1233                         .name = "inject_eccmask",
1234                         .mode = (S_IRUGO | S_IWUSR)
1235                 },
1236                 .show  = i7core_inject_eccmask_show,
1237                 .store = i7core_inject_eccmask_store,
1238         }, {
1239                 .grp = &i7core_inject_addrmatch,
1240         }, {
1241                 .attr = {
1242                         .name = "inject_enable",
1243                         .mode = (S_IRUGO | S_IWUSR)
1244                 },
1245                 .show  = i7core_inject_enable_show,
1246                 .store = i7core_inject_enable_store,
1247         }, {
1248                 .grp = &i7core_udimm_counters,
1249         },
1250         { }     /* End of list */
1251 };
1252
1253 /****************************************************************************
1254         Device initialization routines: put/get, init/exit
1255  ****************************************************************************/
1256
1257 /*
1258  *      i7core_put_devices      'put' all the devices that we have
1259  *                              reserved via 'get'
1260  */
1261 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1262 {
1263         int i;
1264
1265         debugf0(__FILE__ ": %s()\n", __func__);
1266         for (i = 0; i < i7core_dev->n_devs; i++) {
1267                 struct pci_dev *pdev = i7core_dev->pdev[i];
1268                 if (!pdev)
1269                         continue;
1270                 debugf0("Removing dev %02x:%02x.%d\n",
1271                         pdev->bus->number,
1272                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1273                 pci_dev_put(pdev);
1274         }
1275 }
1276
1277 static void i7core_put_all_devices(void)
1278 {
1279         struct i7core_dev *i7core_dev, *tmp;
1280
1281         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1282                 i7core_put_devices(i7core_dev);
1283                 free_i7core_dev(i7core_dev);
1284         }
1285 }
1286
1287 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1288 {
1289         struct pci_dev *pdev = NULL;
1290         int i;
1291
1292         /*
1293          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1294          * aren't announced by acpi. So, we need to use a legacy scan probing
1295          * to detect them
1296          */
1297         while (table && table->descr) {
1298                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1299                 if (unlikely(!pdev)) {
1300                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1301                                 pcibios_scan_specific_bus(255-i);
1302                 }
1303                 pci_dev_put(pdev);
1304                 table++;
1305         }
1306 }
1307
1308 static unsigned i7core_pci_lastbus(void)
1309 {
1310         int last_bus = 0, bus;
1311         struct pci_bus *b = NULL;
1312
1313         while ((b = pci_find_next_bus(b)) != NULL) {
1314                 bus = b->number;
1315                 debugf0("Found bus %d\n", bus);
1316                 if (bus > last_bus)
1317                         last_bus = bus;
1318         }
1319
1320         debugf0("Last bus %d\n", last_bus);
1321
1322         return last_bus;
1323 }
1324
1325 /*
1326  *      i7core_get_devices      Find and perform 'get' operation on the MCH's
1327  *                      device/functions we want to reference for this driver
1328  *
1329  *                      Need to 'get' device 16 func 1 and func 2
1330  */
1331 static int i7core_get_onedevice(struct pci_dev **prev,
1332                                 const struct pci_id_table *table,
1333                                 const unsigned devno,
1334                                 const unsigned last_bus)
1335 {
1336         struct i7core_dev *i7core_dev;
1337         const struct pci_id_descr *dev_descr = &table->descr[devno];
1338
1339         struct pci_dev *pdev = NULL;
1340         u8 bus = 0;
1341         u8 socket = 0;
1342
1343         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1344                               dev_descr->dev_id, *prev);
1345
1346         /*
1347          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1348          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1349          * to probe for the alternate address in case of failure
1350          */
1351         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1352                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1353                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1354
1355         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1356                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1357                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1358                                       *prev);
1359
1360         if (!pdev) {
1361                 if (*prev) {
1362                         *prev = pdev;
1363                         return 0;
1364                 }
1365
1366                 if (dev_descr->optional)
1367                         return 0;
1368
1369                 if (devno == 0)
1370                         return -ENODEV;
1371
1372                 i7core_printk(KERN_INFO,
1373                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1374                         dev_descr->dev, dev_descr->func,
1375                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1376
1377                 /* End of list, leave */
1378                 return -ENODEV;
1379         }
1380         bus = pdev->bus->number;
1381
1382         socket = last_bus - bus;
1383
1384         i7core_dev = get_i7core_dev(socket);
1385         if (!i7core_dev) {
1386                 i7core_dev = alloc_i7core_dev(socket, table);
1387                 if (!i7core_dev)
1388                         return -ENOMEM;
1389         }
1390
1391         if (i7core_dev->pdev[devno]) {
1392                 i7core_printk(KERN_ERR,
1393                         "Duplicated device for "
1394                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395                         bus, dev_descr->dev, dev_descr->func,
1396                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1397                 pci_dev_put(pdev);
1398                 return -ENODEV;
1399         }
1400
1401         i7core_dev->pdev[devno] = pdev;
1402
1403         /* Sanity check */
1404         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1405                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1406                 i7core_printk(KERN_ERR,
1407                         "Device PCI ID %04x:%04x "
1408                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1409                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1410                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1411                         bus, dev_descr->dev, dev_descr->func);
1412                 return -ENODEV;
1413         }
1414
1415         /* Be sure that the device is enabled */
1416         if (unlikely(pci_enable_device(pdev) < 0)) {
1417                 i7core_printk(KERN_ERR,
1418                         "Couldn't enable "
1419                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1420                         bus, dev_descr->dev, dev_descr->func,
1421                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1422                 return -ENODEV;
1423         }
1424
1425         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1426                 socket, bus, dev_descr->dev,
1427                 dev_descr->func,
1428                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1429
1430         *prev = pdev;
1431
1432         return 0;
1433 }
1434
1435 static int i7core_get_devices(const struct pci_id_table *table)
1436 {
1437         int i, rc, last_bus;
1438         struct pci_dev *pdev = NULL;
1439
1440         last_bus = i7core_pci_lastbus();
1441
1442         while (table && table->descr) {
1443                 for (i = 0; i < table->n_devs; i++) {
1444                         pdev = NULL;
1445                         do {
1446                                 rc = i7core_get_onedevice(&pdev, table, i,
1447                                                           last_bus);
1448                                 if (rc < 0) {
1449                                         if (i == 0) {
1450                                                 i = table->n_devs;
1451                                                 break;
1452                                         }
1453                                         i7core_put_all_devices();
1454                                         return -ENODEV;
1455                                 }
1456                         } while (pdev);
1457                 }
1458                 table++;
1459         }
1460
1461         return 0;
1462 }
1463
1464 static int mci_bind_devs(struct mem_ctl_info *mci,
1465                          struct i7core_dev *i7core_dev)
1466 {
1467         struct i7core_pvt *pvt = mci->pvt_info;
1468         struct pci_dev *pdev;
1469         int i, func, slot;
1470
1471         /* Associates i7core_dev and mci for future usage */
1472         pvt->i7core_dev = i7core_dev;
1473         i7core_dev->mci = mci;
1474
1475         pvt->is_registered = 0;
1476         for (i = 0; i < i7core_dev->n_devs; i++) {
1477                 pdev = i7core_dev->pdev[i];
1478                 if (!pdev)
1479                         continue;
1480
1481                 func = PCI_FUNC(pdev->devfn);
1482                 slot = PCI_SLOT(pdev->devfn);
1483                 if (slot == 3) {
1484                         if (unlikely(func > MAX_MCR_FUNC))
1485                                 goto error;
1486                         pvt->pci_mcr[func] = pdev;
1487                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1488                         if (unlikely(func > MAX_CHAN_FUNC))
1489                                 goto error;
1490                         pvt->pci_ch[slot - 4][func] = pdev;
1491                 } else if (!slot && !func)
1492                         pvt->pci_noncore = pdev;
1493                 else
1494                         goto error;
1495
1496                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1497                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1498                         pdev, i7core_dev->socket);
1499
1500                 if (PCI_SLOT(pdev->devfn) == 3 &&
1501                         PCI_FUNC(pdev->devfn) == 2)
1502                         pvt->is_registered = 1;
1503         }
1504
1505         return 0;
1506
1507 error:
1508         i7core_printk(KERN_ERR, "Device %d, function %d "
1509                       "is out of the expected range\n",
1510                       slot, func);
1511         return -EINVAL;
1512 }
1513
1514 /****************************************************************************
1515                         Error check routines
1516  ****************************************************************************/
1517 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1518                                       const int chan,
1519                                       const int dimm,
1520                                       const int add)
1521 {
1522         char *msg;
1523         struct i7core_pvt *pvt = mci->pvt_info;
1524         int row = pvt->csrow_map[chan][dimm], i;
1525
1526         for (i = 0; i < add; i++) {
1527                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1528                                 "(Socket=%d channel=%d dimm=%d)",
1529                                 pvt->i7core_dev->socket, chan, dimm);
1530
1531                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1532                 kfree (msg);
1533         }
1534 }
1535
1536 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1537                                          const int chan,
1538                                          const int new0,
1539                                          const int new1,
1540                                          const int new2)
1541 {
1542         struct i7core_pvt *pvt = mci->pvt_info;
1543         int add0 = 0, add1 = 0, add2 = 0;
1544         /* Updates CE counters if it is not the first time here */
1545         if (pvt->ce_count_available) {
1546                 /* Updates CE counters */
1547
1548                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1549                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1550                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1551
1552                 if (add2 < 0)
1553                         add2 += 0x7fff;
1554                 pvt->rdimm_ce_count[chan][2] += add2;
1555
1556                 if (add1 < 0)
1557                         add1 += 0x7fff;
1558                 pvt->rdimm_ce_count[chan][1] += add1;
1559
1560                 if (add0 < 0)
1561                         add0 += 0x7fff;
1562                 pvt->rdimm_ce_count[chan][0] += add0;
1563         } else
1564                 pvt->ce_count_available = 1;
1565
1566         /* Store the new values */
1567         pvt->rdimm_last_ce_count[chan][2] = new2;
1568         pvt->rdimm_last_ce_count[chan][1] = new1;
1569         pvt->rdimm_last_ce_count[chan][0] = new0;
1570
1571         /*updated the edac core */
1572         if (add0 != 0)
1573                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1574         if (add1 != 0)
1575                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1576         if (add2 != 0)
1577                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1578
1579 }
1580
1581 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1582 {
1583         struct i7core_pvt *pvt = mci->pvt_info;
1584         u32 rcv[3][2];
1585         int i, new0, new1, new2;
1586
1587         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1588         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1589                                                                 &rcv[0][0]);
1590         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1591                                                                 &rcv[0][1]);
1592         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1593                                                                 &rcv[1][0]);
1594         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1595                                                                 &rcv[1][1]);
1596         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1597                                                                 &rcv[2][0]);
1598         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1599                                                                 &rcv[2][1]);
1600         for (i = 0 ; i < 3; i++) {
1601                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1602                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1603                 /*if the channel has 3 dimms*/
1604                 if (pvt->channel[i].dimms > 2) {
1605                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1606                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1607                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1608                 } else {
1609                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1610                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1611                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1612                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1613                         new2 = 0;
1614                 }
1615
1616                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1617         }
1618 }
1619
1620 /* This function is based on the device 3 function 4 registers as described on:
1621  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1622  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1623  * also available at:
1624  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1625  */
1626 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1627 {
1628         struct i7core_pvt *pvt = mci->pvt_info;
1629         u32 rcv1, rcv0;
1630         int new0, new1, new2;
1631
1632         if (!pvt->pci_mcr[4]) {
1633                 debugf0("%s MCR registers not found\n", __func__);
1634                 return;
1635         }
1636
1637         /* Corrected test errors */
1638         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1639         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1640
1641         /* Store the new values */
1642         new2 = DIMM2_COR_ERR(rcv1);
1643         new1 = DIMM1_COR_ERR(rcv0);
1644         new0 = DIMM0_COR_ERR(rcv0);
1645
1646         /* Updates CE counters if it is not the first time here */
1647         if (pvt->ce_count_available) {
1648                 /* Updates CE counters */
1649                 int add0, add1, add2;
1650
1651                 add2 = new2 - pvt->udimm_last_ce_count[2];
1652                 add1 = new1 - pvt->udimm_last_ce_count[1];
1653                 add0 = new0 - pvt->udimm_last_ce_count[0];
1654
1655                 if (add2 < 0)
1656                         add2 += 0x7fff;
1657                 pvt->udimm_ce_count[2] += add2;
1658
1659                 if (add1 < 0)
1660                         add1 += 0x7fff;
1661                 pvt->udimm_ce_count[1] += add1;
1662
1663                 if (add0 < 0)
1664                         add0 += 0x7fff;
1665                 pvt->udimm_ce_count[0] += add0;
1666
1667                 if (add0 | add1 | add2)
1668                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1669                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1670                                       add0, add1, add2);
1671         } else
1672                 pvt->ce_count_available = 1;
1673
1674         /* Store the new values */
1675         pvt->udimm_last_ce_count[2] = new2;
1676         pvt->udimm_last_ce_count[1] = new1;
1677         pvt->udimm_last_ce_count[0] = new0;
1678 }
1679
1680 /*
1681  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1682  * Architectures Software Developer’s Manual Volume 3B.
1683  * Nehalem are defined as family 0x06, model 0x1a
1684  *
1685  * The MCA registers used here are the following ones:
1686  *     struct mce field MCA Register
1687  *     m->status        MSR_IA32_MC8_STATUS
1688  *     m->addr          MSR_IA32_MC8_ADDR
1689  *     m->misc          MSR_IA32_MC8_MISC
1690  * In the case of Nehalem, the error information is masked at .status and .misc
1691  * fields
1692  */
1693 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1694                                     const struct mce *m)
1695 {
1696         struct i7core_pvt *pvt = mci->pvt_info;
1697         char *type, *optype, *err, *msg;
1698         unsigned long error = m->status & 0x1ff0000l;
1699         u32 optypenum = (m->status >> 4) & 0x07;
1700         u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1701         u32 dimm = (m->misc >> 16) & 0x3;
1702         u32 channel = (m->misc >> 18) & 0x3;
1703         u32 syndrome = m->misc >> 32;
1704         u32 errnum = find_first_bit(&error, 32);
1705         int csrow;
1706
1707         if (m->mcgstatus & 1)
1708                 type = "FATAL";
1709         else
1710                 type = "NON_FATAL";
1711
1712         switch (optypenum) {
1713         case 0:
1714                 optype = "generic undef request";
1715                 break;
1716         case 1:
1717                 optype = "read error";
1718                 break;
1719         case 2:
1720                 optype = "write error";
1721                 break;
1722         case 3:
1723                 optype = "addr/cmd error";
1724                 break;
1725         case 4:
1726                 optype = "scrubbing error";
1727                 break;
1728         default:
1729                 optype = "reserved";
1730                 break;
1731         }
1732
1733         switch (errnum) {
1734         case 16:
1735                 err = "read ECC error";
1736                 break;
1737         case 17:
1738                 err = "RAS ECC error";
1739                 break;
1740         case 18:
1741                 err = "write parity error";
1742                 break;
1743         case 19:
1744                 err = "redundacy loss";
1745                 break;
1746         case 20:
1747                 err = "reserved";
1748                 break;
1749         case 21:
1750                 err = "memory range error";
1751                 break;
1752         case 22:
1753                 err = "RTID out of range";
1754                 break;
1755         case 23:
1756                 err = "address parity error";
1757                 break;
1758         case 24:
1759                 err = "byte enable parity error";
1760                 break;
1761         default:
1762                 err = "unknown";
1763         }
1764
1765         /* FIXME: should convert addr into bank and rank information */
1766         msg = kasprintf(GFP_ATOMIC,
1767                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1768                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1769                 type, (long long) m->addr, m->cpu, dimm, channel,
1770                 syndrome, core_err_cnt, (long long)m->status,
1771                 (long long)m->misc, optype, err);
1772
1773         debugf0("%s", msg);
1774
1775         csrow = pvt->csrow_map[channel][dimm];
1776
1777         /* Call the helper to output message */
1778         if (m->mcgstatus & 1)
1779                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1780                                 0 /* FIXME: should be channel here */, msg);
1781         else if (!pvt->is_registered)
1782                 edac_mc_handle_fbd_ce(mci, csrow,
1783                                 0 /* FIXME: should be channel here */, msg);
1784
1785         kfree(msg);
1786 }
1787
1788 /*
1789  *      i7core_check_error      Retrieve and process errors reported by the
1790  *                              hardware. Called by the Core module.
1791  */
1792 static void i7core_check_error(struct mem_ctl_info *mci)
1793 {
1794         struct i7core_pvt *pvt = mci->pvt_info;
1795         int i;
1796         unsigned count = 0;
1797         struct mce *m;
1798
1799         /*
1800          * MCE first step: Copy all mce errors into a temporary buffer
1801          * We use a double buffering here, to reduce the risk of
1802          * loosing an error.
1803          */
1804         smp_rmb();
1805         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1806                 % MCE_LOG_LEN;
1807         if (!count)
1808                 goto check_ce_error;
1809
1810         m = pvt->mce_outentry;
1811         if (pvt->mce_in + count > MCE_LOG_LEN) {
1812                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1813
1814                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1815                 smp_wmb();
1816                 pvt->mce_in = 0;
1817                 count -= l;
1818                 m += l;
1819         }
1820         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1821         smp_wmb();
1822         pvt->mce_in += count;
1823
1824         smp_rmb();
1825         if (pvt->mce_overrun) {
1826                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1827                               pvt->mce_overrun);
1828                 smp_wmb();
1829                 pvt->mce_overrun = 0;
1830         }
1831
1832         /*
1833          * MCE second step: parse errors and display
1834          */
1835         for (i = 0; i < count; i++)
1836                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1837
1838         /*
1839          * Now, let's increment CE error counts
1840          */
1841 check_ce_error:
1842         if (!pvt->is_registered)
1843                 i7core_udimm_check_mc_ecc_err(mci);
1844         else
1845                 i7core_rdimm_check_mc_ecc_err(mci);
1846 }
1847
1848 /*
1849  * i7core_mce_check_error       Replicates mcelog routine to get errors
1850  *                              This routine simply queues mcelog errors, and
1851  *                              return. The error itself should be handled later
1852  *                              by i7core_check_error.
1853  * WARNING: As this routine should be called at NMI time, extra care should
1854  * be taken to avoid deadlocks, and to be as fast as possible.
1855  */
1856 static int i7core_mce_check_error(void *priv, struct mce *mce)
1857 {
1858         struct mem_ctl_info *mci = priv;
1859         struct i7core_pvt *pvt = mci->pvt_info;
1860
1861         /*
1862          * Just let mcelog handle it if the error is
1863          * outside the memory controller
1864          */
1865         if (((mce->status & 0xffff) >> 7) != 1)
1866                 return 0;
1867
1868         /* Bank 8 registers are the only ones that we know how to handle */
1869         if (mce->bank != 8)
1870                 return 0;
1871
1872 #ifdef CONFIG_SMP
1873         /* Only handle if it is the right mc controller */
1874         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1875                 return 0;
1876 #endif
1877
1878         smp_rmb();
1879         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1880                 smp_wmb();
1881                 pvt->mce_overrun++;
1882                 return 0;
1883         }
1884
1885         /* Copy memory error at the ringbuffer */
1886         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1887         smp_wmb();
1888         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1889
1890         /* Handle fatal errors immediately */
1891         if (mce->mcgstatus & 1)
1892                 i7core_check_error(mci);
1893
1894         /* Advice mcelog that the error were handled */
1895         return 1;
1896 }
1897
1898 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1899                                const int num_channels, const int num_csrows)
1900 {
1901         struct mem_ctl_info *mci;
1902         struct i7core_pvt *pvt;
1903         int csrow = 0;
1904         int rc;
1905
1906         /* allocate a new MC control structure */
1907         mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1908                             i7core_dev->socket);
1909         if (unlikely(!mci))
1910                 return -ENOMEM;
1911
1912         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1913                 __func__, mci, &i7core_dev->pdev[0]->dev);
1914
1915         /* record ptr to the generic device */
1916         mci->dev = &i7core_dev->pdev[0]->dev;
1917
1918         pvt = mci->pvt_info;
1919         memset(pvt, 0, sizeof(*pvt));
1920
1921         /*
1922          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1923          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1924          * memory channels
1925          */
1926         mci->mtype_cap = MEM_FLAG_DDR3;
1927         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1928         mci->edac_cap = EDAC_FLAG_NONE;
1929         mci->mod_name = "i7core_edac.c";
1930         mci->mod_ver = I7CORE_REVISION;
1931         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1932                                   i7core_dev->socket);
1933         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1934         mci->ctl_page_to_phys = NULL;
1935
1936         if (pvt->is_registered)
1937                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1938         else
1939                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1940
1941         /* Set the function pointer to an actual operation function */
1942         mci->edac_check = i7core_check_error;
1943
1944         /* Store pci devices at mci for faster access */
1945         rc = mci_bind_devs(mci, i7core_dev);
1946         if (unlikely(rc < 0))
1947                 goto fail;
1948
1949         /* Get dimm basic config */
1950         get_dimm_config(mci, &csrow);
1951
1952         /* add this new MC control structure to EDAC's list of MCs */
1953         if (unlikely(edac_mc_add_mc(mci))) {
1954                 debugf0("MC: " __FILE__
1955                         ": %s(): failed edac_mc_add_mc()\n", __func__);
1956                 /* FIXME: perhaps some code should go here that disables error
1957                  * reporting if we just enabled it
1958                  */
1959
1960                 rc = -EINVAL;
1961                 goto fail;
1962         }
1963
1964         /* Default error mask is any memory */
1965         pvt->inject.channel = 0;
1966         pvt->inject.dimm = -1;
1967         pvt->inject.rank = -1;
1968         pvt->inject.bank = -1;
1969         pvt->inject.page = -1;
1970         pvt->inject.col = -1;
1971
1972         /* Registers on edac_mce in order to receive memory errors */
1973         pvt->edac_mce.priv = mci;
1974         pvt->edac_mce.check_error = i7core_mce_check_error;
1975
1976         /* allocating generic PCI control info */
1977         pvt->i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1978                                                  EDAC_MOD_STR);
1979         if (unlikely(!pvt->i7core_pci)) {
1980                 printk(KERN_WARNING
1981                         "%s(): Unable to create PCI control\n",
1982                         __func__);
1983                 printk(KERN_WARNING
1984                         "%s(): PCI error report via EDAC not setup\n",
1985                         __func__);
1986         }
1987
1988         rc = edac_mce_register(&pvt->edac_mce);
1989         if (unlikely(rc < 0)) {
1990                 debugf0("MC: " __FILE__
1991                         ": %s(): failed edac_mce_register()\n", __func__);
1992         }
1993
1994 fail:
1995         if (rc < 0)
1996                 edac_mc_free(mci);
1997         return rc;
1998 }
1999
2000 /*
2001  *      i7core_probe    Probe for ONE instance of device to see if it is
2002  *                      present.
2003  *      return:
2004  *              0 for FOUND a device
2005  *              < 0 for error code
2006  */
2007
2008 static int __devinit i7core_probe(struct pci_dev *pdev,
2009                                   const struct pci_device_id *id)
2010 {
2011         int rc;
2012         struct i7core_dev *i7core_dev;
2013
2014         /* get the pci devices we want to reserve for our use */
2015         mutex_lock(&i7core_edac_lock);
2016
2017         /*
2018          * All memory controllers are allocated at the first pass.
2019          */
2020         if (unlikely(probed >= 1)) {
2021                 mutex_unlock(&i7core_edac_lock);
2022                 return -EINVAL;
2023         }
2024         probed++;
2025
2026         rc = i7core_get_devices(pci_dev_table);
2027         if (unlikely(rc < 0))
2028                 goto fail0;
2029
2030         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2031                 int channels;
2032                 int csrows;
2033
2034                 /* Check the number of active and not disabled channels */
2035                 rc = i7core_get_active_channels(i7core_dev->socket,
2036                                                 &channels, &csrows);
2037                 if (unlikely(rc < 0))
2038                         goto fail1;
2039
2040                 rc = i7core_register_mci(i7core_dev, channels, csrows);
2041                 if (unlikely(rc < 0))
2042                         goto fail1;
2043         }
2044
2045         i7core_printk(KERN_INFO, "Driver loaded.\n");
2046
2047         mutex_unlock(&i7core_edac_lock);
2048         return 0;
2049
2050 fail1:
2051         i7core_put_all_devices();
2052 fail0:
2053         mutex_unlock(&i7core_edac_lock);
2054         return rc;
2055 }
2056
2057 /*
2058  *      i7core_remove   destructor for one instance of device
2059  *
2060  */
2061 static void __devexit i7core_remove(struct pci_dev *pdev)
2062 {
2063         struct mem_ctl_info *mci;
2064         struct i7core_dev *i7core_dev, *tmp;
2065         struct i7core_pvt *pvt;
2066
2067         debugf0(__FILE__ ": %s()\n", __func__);
2068
2069         /*
2070          * we have a trouble here: pdev value for removal will be wrong, since
2071          * it will point to the X58 register used to detect that the machine
2072          * is a Nehalem or upper design. However, due to the way several PCI
2073          * devices are grouped together to provide MC functionality, we need
2074          * to use a different method for releasing the devices
2075          */
2076
2077         mutex_lock(&i7core_edac_lock);
2078         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
2079                 mci = find_mci_by_dev(&i7core_dev->pdev[0]->dev);
2080                 if (unlikely(!mci || !mci->pvt_info)) {
2081                         debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2082                                 __func__, &i7core_dev->pdev[0]->dev);
2083
2084                                 i7core_printk(KERN_ERR,
2085                                       "Couldn't find mci hanler\n");
2086                 } else {
2087                         pvt = mci->pvt_info;
2088                         i7core_dev = pvt->i7core_dev;
2089
2090                         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2091                                 __func__, mci, &i7core_dev->pdev[0]->dev);
2092
2093                         /* Disable MCE NMI handler */
2094                         edac_mce_unregister(&pvt->edac_mce);
2095
2096                         /* Disable EDAC polling */
2097                         if (likely(pvt->i7core_pci))
2098                                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2099                         else
2100                                 i7core_printk(KERN_ERR,
2101                                               "Couldn't find mem_ctl_info for socket %d\n",
2102                                               i7core_dev->socket);
2103                         pvt->i7core_pci = NULL;
2104
2105                         /* Remove MC sysfs nodes */
2106                         edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
2107
2108                         debugf1("%s: free mci struct\n", mci->ctl_name);
2109                         kfree(mci->ctl_name);
2110                         edac_mc_free(mci);
2111
2112                         /* Release PCI resources */
2113                         i7core_put_devices(i7core_dev);
2114                         free_i7core_dev(i7core_dev);
2115                 }
2116         }
2117         probed--;
2118
2119         mutex_unlock(&i7core_edac_lock);
2120 }
2121
2122 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2123
2124 /*
2125  *      i7core_driver   pci_driver structure for this module
2126  *
2127  */
2128 static struct pci_driver i7core_driver = {
2129         .name     = "i7core_edac",
2130         .probe    = i7core_probe,
2131         .remove   = __devexit_p(i7core_remove),
2132         .id_table = i7core_pci_tbl,
2133 };
2134
2135 /*
2136  *      i7core_init             Module entry function
2137  *                      Try to initialize this module for its devices
2138  */
2139 static int __init i7core_init(void)
2140 {
2141         int pci_rc;
2142
2143         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2144
2145         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2146         opstate_init();
2147
2148         if (use_pci_fixup)
2149                 i7core_xeon_pci_fixup(pci_dev_table);
2150
2151         pci_rc = pci_register_driver(&i7core_driver);
2152
2153         if (pci_rc >= 0)
2154                 return 0;
2155
2156         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2157                       pci_rc);
2158
2159         return pci_rc;
2160 }
2161
2162 /*
2163  *      i7core_exit()   Module exit function
2164  *                      Unregister the driver
2165  */
2166 static void __exit i7core_exit(void)
2167 {
2168         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2169         pci_unregister_driver(&i7core_driver);
2170 }
2171
2172 module_init(i7core_init);
2173 module_exit(i7core_exit);
2174
2175 MODULE_LICENSE("GPL");
2176 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2177 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2178 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2179                    I7CORE_REVISION);
2180
2181 module_param(edac_op_state, int, 0444);
2182 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");