i7core_edac: use edac's own way to print errors
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/smp.h>
37 #include <asm/mce.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81   #define MC_CFG_UNLOCK         0x02
82   #define MC_CFG_LOCK           0x00
83
84         /* OFFSETS for Device 3 Function 0 */
85
86 #define MC_CONTROL      0x48
87 #define MC_STATUS       0x4c
88 #define MC_MAX_DOD      0x64
89
90 /*
91  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
92  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93  */
94
95 #define MC_TEST_ERR_RCV1        0x60
96   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
97
98 #define MC_TEST_ERR_RCV0        0x64
99   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
100   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
101
102 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
103 #define MC_SSRCONTROL           0x48
104   #define SSR_MODE_DISABLE      0x00
105   #define SSR_MODE_ENABLE       0x01
106   #define SSR_MODE_MASK         0x03
107
108 #define MC_SCRUB_CONTROL        0x4c
109   #define STARTSCRUB            (1 << 24)
110
111 #define MC_COR_ECC_CNT_0        0x80
112 #define MC_COR_ECC_CNT_1        0x84
113 #define MC_COR_ECC_CNT_2        0x88
114 #define MC_COR_ECC_CNT_3        0x8c
115 #define MC_COR_ECC_CNT_4        0x90
116 #define MC_COR_ECC_CNT_5        0x94
117
118 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
119 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
120
121
122         /* OFFSETS for Devices 4,5 and 6 Function 0 */
123
124 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
125   #define THREE_DIMMS_PRESENT           (1 << 24)
126   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
127   #define QUAD_RANK_PRESENT             (1 << 22)
128   #define REGISTERED_DIMM               (1 << 15)
129
130 #define MC_CHANNEL_MAPPER       0x60
131   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
132   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
133
134 #define MC_CHANNEL_RANK_PRESENT 0x7c
135   #define RANK_PRESENT_MASK             0xffff
136
137 #define MC_CHANNEL_ADDR_MATCH   0xf0
138 #define MC_CHANNEL_ERROR_MASK   0xf8
139 #define MC_CHANNEL_ERROR_INJECT 0xfc
140   #define INJECT_ADDR_PARITY    0x10
141   #define INJECT_ECC            0x08
142   #define MASK_CACHELINE        0x06
143   #define MASK_FULL_CACHELINE   0x06
144   #define MASK_MSB32_CACHELINE  0x04
145   #define MASK_LSB32_CACHELINE  0x02
146   #define NO_MASK_CACHELINE     0x00
147   #define REPEAT_EN             0x01
148
149         /* OFFSETS for Devices 4,5 and 6 Function 1 */
150
151 #define MC_DOD_CH_DIMM0         0x48
152 #define MC_DOD_CH_DIMM1         0x4c
153 #define MC_DOD_CH_DIMM2         0x50
154   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
155   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
156   #define DIMM_PRESENT_MASK     (1 << 9)
157   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
158   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
159   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
160   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
161   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
162   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
163   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
164   #define MC_DOD_NUMCOL_MASK            3
165   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
166
167 #define MC_RANK_PRESENT         0x7c
168
169 #define MC_SAG_CH_0     0x80
170 #define MC_SAG_CH_1     0x84
171 #define MC_SAG_CH_2     0x88
172 #define MC_SAG_CH_3     0x8c
173 #define MC_SAG_CH_4     0x90
174 #define MC_SAG_CH_5     0x94
175 #define MC_SAG_CH_6     0x98
176 #define MC_SAG_CH_7     0x9c
177
178 #define MC_RIR_LIMIT_CH_0       0x40
179 #define MC_RIR_LIMIT_CH_1       0x44
180 #define MC_RIR_LIMIT_CH_2       0x48
181 #define MC_RIR_LIMIT_CH_3       0x4C
182 #define MC_RIR_LIMIT_CH_4       0x50
183 #define MC_RIR_LIMIT_CH_5       0x54
184 #define MC_RIR_LIMIT_CH_6       0x58
185 #define MC_RIR_LIMIT_CH_7       0x5C
186 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
187
188 #define MC_RIR_WAY_CH           0x80
189   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
190   #define MC_RIR_WAY_RANK_MASK          0x7
191
192 /*
193  * i7core structs
194  */
195
196 #define NUM_CHANS 3
197 #define MAX_DIMMS 3             /* Max DIMMS per channel */
198 #define MAX_MCR_FUNC  4
199 #define MAX_CHAN_FUNC 3
200
201 struct i7core_info {
202         u32     mc_control;
203         u32     mc_status;
204         u32     max_dod;
205         u32     ch_map;
206 };
207
208
209 struct i7core_inject {
210         int     enable;
211
212         u32     section;
213         u32     type;
214         u32     eccmask;
215
216         /* Error address mask */
217         int channel, dimm, rank, bank, page, col;
218 };
219
220 struct i7core_channel {
221         u32             ranks;
222         u32             dimms;
223 };
224
225 struct pci_id_descr {
226         int                     dev;
227         int                     func;
228         int                     dev_id;
229         int                     optional;
230 };
231
232 struct pci_id_table {
233         const struct pci_id_descr       *descr;
234         int                             n_devs;
235 };
236
237 struct i7core_dev {
238         struct list_head        list;
239         u8                      socket;
240         struct pci_dev          **pdev;
241         int                     n_devs;
242         struct mem_ctl_info     *mci;
243 };
244
245 struct i7core_pvt {
246         struct pci_dev  *pci_noncore;
247         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
248         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
249
250         struct i7core_dev *i7core_dev;
251
252         struct i7core_info      info;
253         struct i7core_inject    inject;
254         struct i7core_channel   channel[NUM_CHANS];
255
256         int             ce_count_available;
257         int             csrow_map[NUM_CHANS][MAX_DIMMS];
258
259                         /* ECC corrected errors counts per udimm */
260         unsigned long   udimm_ce_count[MAX_DIMMS];
261         int             udimm_last_ce_count[MAX_DIMMS];
262                         /* ECC corrected errors counts per rdimm */
263         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
264         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
265
266         bool            is_registered, enable_scrub;
267
268         /* Fifo double buffers */
269         struct mce              mce_entry[MCE_LOG_LEN];
270         struct mce              mce_outentry[MCE_LOG_LEN];
271
272         /* Fifo in/out counters */
273         unsigned                mce_in, mce_out;
274
275         /* Count indicator to show errors not got */
276         unsigned                mce_overrun;
277
278         /* Struct to control EDAC polling */
279         struct edac_pci_ctl_info *i7core_pci;
280 };
281
282 #define PCI_DESCR(device, function, device_id)  \
283         .dev = (device),                        \
284         .func = (function),                     \
285         .dev_id = (device_id)
286
287 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
288                 /* Memory controller */
289         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
290         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
291                         /* Exists only for RDIMM */
292         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
293         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
294
295                 /* Channel 0 */
296         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
297         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
298         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
299         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
300
301                 /* Channel 1 */
302         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
303         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
304         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
305         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
306
307                 /* Channel 2 */
308         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
309         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
310         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
311         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
312
313                 /* Generic Non-core registers */
314         /*
315          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
316          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
317          * the probing code needs to test for the other address in case of
318          * failure of this one
319          */
320         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
321
322 };
323
324 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
325         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
326         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
327         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
328
329         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
330         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
331         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
332         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
333
334         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
335         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
336         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
337         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
338
339         /*
340          * This is the PCI device has an alternate address on some
341          * processors like Core i7 860
342          */
343         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
344 };
345
346 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
347                 /* Memory controller */
348         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
349         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
350                         /* Exists only for RDIMM */
351         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
352         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
353
354                 /* Channel 0 */
355         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
356         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
357         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
358         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
359
360                 /* Channel 1 */
361         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
362         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
363         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
364         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
365
366                 /* Channel 2 */
367         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
368         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
369         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
370         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
371
372                 /* Generic Non-core registers */
373         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
374
375 };
376
377 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
378 static const struct pci_id_table pci_dev_table[] = {
379         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
380         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
381         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
382         {0,}                    /* 0 terminated list. */
383 };
384
385 /*
386  *      pci_device_id   table for which devices we are looking for
387  */
388 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
389         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
390         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
391         {0,}                    /* 0 terminated list. */
392 };
393
394 /****************************************************************************
395                         Anciliary status routines
396  ****************************************************************************/
397
398         /* MC_CONTROL bits */
399 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
400 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
401
402         /* MC_STATUS bits */
403 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
404 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
405
406         /* MC_MAX_DOD read functions */
407 static inline int numdimms(u32 dimms)
408 {
409         return (dimms & 0x3) + 1;
410 }
411
412 static inline int numrank(u32 rank)
413 {
414         static int ranks[4] = { 1, 2, 4, -EINVAL };
415
416         return ranks[rank & 0x3];
417 }
418
419 static inline int numbank(u32 bank)
420 {
421         static int banks[4] = { 4, 8, 16, -EINVAL };
422
423         return banks[bank & 0x3];
424 }
425
426 static inline int numrow(u32 row)
427 {
428         static int rows[8] = {
429                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
430                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
431         };
432
433         return rows[row & 0x7];
434 }
435
436 static inline int numcol(u32 col)
437 {
438         static int cols[8] = {
439                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
440         };
441         return cols[col & 0x3];
442 }
443
444 static struct i7core_dev *get_i7core_dev(u8 socket)
445 {
446         struct i7core_dev *i7core_dev;
447
448         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
449                 if (i7core_dev->socket == socket)
450                         return i7core_dev;
451         }
452
453         return NULL;
454 }
455
456 static struct i7core_dev *alloc_i7core_dev(u8 socket,
457                                            const struct pci_id_table *table)
458 {
459         struct i7core_dev *i7core_dev;
460
461         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
462         if (!i7core_dev)
463                 return NULL;
464
465         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
466                                    GFP_KERNEL);
467         if (!i7core_dev->pdev) {
468                 kfree(i7core_dev);
469                 return NULL;
470         }
471
472         i7core_dev->socket = socket;
473         i7core_dev->n_devs = table->n_devs;
474         list_add_tail(&i7core_dev->list, &i7core_edac_list);
475
476         return i7core_dev;
477 }
478
479 static void free_i7core_dev(struct i7core_dev *i7core_dev)
480 {
481         list_del(&i7core_dev->list);
482         kfree(i7core_dev->pdev);
483         kfree(i7core_dev);
484 }
485
486 /****************************************************************************
487                         Memory check routines
488  ****************************************************************************/
489 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
490                                           unsigned func)
491 {
492         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
493         int i;
494
495         if (!i7core_dev)
496                 return NULL;
497
498         for (i = 0; i < i7core_dev->n_devs; i++) {
499                 if (!i7core_dev->pdev[i])
500                         continue;
501
502                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
503                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
504                         return i7core_dev->pdev[i];
505                 }
506         }
507
508         return NULL;
509 }
510
511 /**
512  * i7core_get_active_channels() - gets the number of channels and csrows
513  * @socket:     Quick Path Interconnect socket
514  * @channels:   Number of channels that will be returned
515  * @csrows:     Number of csrows found
516  *
517  * Since EDAC core needs to know in advance the number of available channels
518  * and csrows, in order to allocate memory for csrows/channels, it is needed
519  * to run two similar steps. At the first step, implemented on this function,
520  * it checks the number of csrows/channels present at one socket.
521  * this is used in order to properly allocate the size of mci components.
522  *
523  * It should be noticed that none of the current available datasheets explain
524  * or even mention how csrows are seen by the memory controller. So, we need
525  * to add a fake description for csrows.
526  * So, this driver is attributing one DIMM memory for one csrow.
527  */
528 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
529                                       unsigned *csrows)
530 {
531         struct pci_dev *pdev = NULL;
532         int i, j;
533         u32 status, control;
534
535         *channels = 0;
536         *csrows = 0;
537
538         pdev = get_pdev_slot_func(socket, 3, 0);
539         if (!pdev) {
540                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
541                               socket);
542                 return -ENODEV;
543         }
544
545         /* Device 3 function 0 reads */
546         pci_read_config_dword(pdev, MC_STATUS, &status);
547         pci_read_config_dword(pdev, MC_CONTROL, &control);
548
549         for (i = 0; i < NUM_CHANS; i++) {
550                 u32 dimm_dod[3];
551                 /* Check if the channel is active */
552                 if (!(control & (1 << (8 + i))))
553                         continue;
554
555                 /* Check if the channel is disabled */
556                 if (status & (1 << i))
557                         continue;
558
559                 pdev = get_pdev_slot_func(socket, i + 4, 1);
560                 if (!pdev) {
561                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
562                                                 "fn %d.%d!!!\n",
563                                                 socket, i + 4, 1);
564                         return -ENODEV;
565                 }
566                 /* Devices 4-6 function 1 */
567                 pci_read_config_dword(pdev,
568                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
569                 pci_read_config_dword(pdev,
570                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
571                 pci_read_config_dword(pdev,
572                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
573
574                 (*channels)++;
575
576                 for (j = 0; j < 3; j++) {
577                         if (!DIMM_PRESENT(dimm_dod[j]))
578                                 continue;
579                         (*csrows)++;
580                 }
581         }
582
583         debugf0("Number of active channels on socket %d: %d\n",
584                 socket, *channels);
585
586         return 0;
587 }
588
589 static int get_dimm_config(const struct mem_ctl_info *mci)
590 {
591         struct i7core_pvt *pvt = mci->pvt_info;
592         struct csrow_info *csr;
593         struct pci_dev *pdev;
594         int i, j;
595         int csrow = 0;
596         unsigned long last_page = 0;
597         enum edac_type mode;
598         enum mem_type mtype;
599
600         /* Get data from the MC register, function 0 */
601         pdev = pvt->pci_mcr[0];
602         if (!pdev)
603                 return -ENODEV;
604
605         /* Device 3 function 0 reads */
606         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
607         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
608         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
609         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
610
611         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
612                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
613                 pvt->info.max_dod, pvt->info.ch_map);
614
615         if (ECC_ENABLED(pvt)) {
616                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
617                 if (ECCx8(pvt))
618                         mode = EDAC_S8ECD8ED;
619                 else
620                         mode = EDAC_S4ECD4ED;
621         } else {
622                 debugf0("ECC disabled\n");
623                 mode = EDAC_NONE;
624         }
625
626         /* FIXME: need to handle the error codes */
627         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
628                 "x%x x 0x%x\n",
629                 numdimms(pvt->info.max_dod),
630                 numrank(pvt->info.max_dod >> 2),
631                 numbank(pvt->info.max_dod >> 4),
632                 numrow(pvt->info.max_dod >> 6),
633                 numcol(pvt->info.max_dod >> 9));
634
635         for (i = 0; i < NUM_CHANS; i++) {
636                 u32 data, dimm_dod[3], value[8];
637
638                 if (!pvt->pci_ch[i][0])
639                         continue;
640
641                 if (!CH_ACTIVE(pvt, i)) {
642                         debugf0("Channel %i is not active\n", i);
643                         continue;
644                 }
645                 if (CH_DISABLED(pvt, i)) {
646                         debugf0("Channel %i is disabled\n", i);
647                         continue;
648                 }
649
650                 /* Devices 4-6 function 0 */
651                 pci_read_config_dword(pvt->pci_ch[i][0],
652                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
653
654                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
655                                                 4 : 2;
656
657                 if (data & REGISTERED_DIMM)
658                         mtype = MEM_RDDR3;
659                 else
660                         mtype = MEM_DDR3;
661 #if 0
662                 if (data & THREE_DIMMS_PRESENT)
663                         pvt->channel[i].dimms = 3;
664                 else if (data & SINGLE_QUAD_RANK_PRESENT)
665                         pvt->channel[i].dimms = 1;
666                 else
667                         pvt->channel[i].dimms = 2;
668 #endif
669
670                 /* Devices 4-6 function 1 */
671                 pci_read_config_dword(pvt->pci_ch[i][1],
672                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
673                 pci_read_config_dword(pvt->pci_ch[i][1],
674                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
675                 pci_read_config_dword(pvt->pci_ch[i][1],
676                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
677
678                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
679                         "%d ranks, %cDIMMs\n",
680                         i,
681                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
682                         data,
683                         pvt->channel[i].ranks,
684                         (data & REGISTERED_DIMM) ? 'R' : 'U');
685
686                 for (j = 0; j < 3; j++) {
687                         u32 banks, ranks, rows, cols;
688                         u32 size, npages;
689
690                         if (!DIMM_PRESENT(dimm_dod[j]))
691                                 continue;
692
693                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
694                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
695                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
696                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
697
698                         /* DDR3 has 8 I/O banks */
699                         size = (rows * cols * banks * ranks) >> (20 - 3);
700
701                         pvt->channel[i].dimms++;
702
703                         debugf0("\tdimm %d %d Mb offset: %x, "
704                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
705                                 j, size,
706                                 RANKOFFSET(dimm_dod[j]),
707                                 banks, ranks, rows, cols);
708
709                         npages = MiB_TO_PAGES(size);
710
711                         csr = &mci->csrows[csrow];
712                         csr->first_page = last_page + 1;
713                         last_page += npages;
714                         csr->last_page = last_page;
715                         csr->nr_pages = npages;
716
717                         csr->page_mask = 0;
718                         csr->grain = 8;
719                         csr->csrow_idx = csrow;
720                         csr->nr_channels = 1;
721
722                         csr->channels[0].chan_idx = i;
723                         csr->channels[0].ce_count = 0;
724
725                         pvt->csrow_map[i][j] = csrow;
726
727                         switch (banks) {
728                         case 4:
729                                 csr->dtype = DEV_X4;
730                                 break;
731                         case 8:
732                                 csr->dtype = DEV_X8;
733                                 break;
734                         case 16:
735                                 csr->dtype = DEV_X16;
736                                 break;
737                         default:
738                                 csr->dtype = DEV_UNKNOWN;
739                         }
740
741                         csr->edac_mode = mode;
742                         csr->mtype = mtype;
743
744                         csrow++;
745                 }
746
747                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
748                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
749                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
750                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
751                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
752                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
753                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
754                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
755                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
756                 for (j = 0; j < 8; j++)
757                         debugf1("\t\t%#x\t%#x\t%#x\n",
758                                 (value[j] >> 27) & 0x1,
759                                 (value[j] >> 24) & 0x7,
760                                 (value[j] & ((1 << 24) - 1)));
761         }
762
763         return 0;
764 }
765
766 /****************************************************************************
767                         Error insertion routines
768  ****************************************************************************/
769
770 /* The i7core has independent error injection features per channel.
771    However, to have a simpler code, we don't allow enabling error injection
772    on more than one channel.
773    Also, since a change at an inject parameter will be applied only at enable,
774    we're disabling error injection on all write calls to the sysfs nodes that
775    controls the error code injection.
776  */
777 static int disable_inject(const struct mem_ctl_info *mci)
778 {
779         struct i7core_pvt *pvt = mci->pvt_info;
780
781         pvt->inject.enable = 0;
782
783         if (!pvt->pci_ch[pvt->inject.channel][0])
784                 return -ENODEV;
785
786         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
787                                 MC_CHANNEL_ERROR_INJECT, 0);
788
789         return 0;
790 }
791
792 /*
793  * i7core inject inject.section
794  *
795  *      accept and store error injection inject.section value
796  *      bit 0 - refers to the lower 32-byte half cacheline
797  *      bit 1 - refers to the upper 32-byte half cacheline
798  */
799 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
800                                            const char *data, size_t count)
801 {
802         struct i7core_pvt *pvt = mci->pvt_info;
803         unsigned long value;
804         int rc;
805
806         if (pvt->inject.enable)
807                 disable_inject(mci);
808
809         rc = strict_strtoul(data, 10, &value);
810         if ((rc < 0) || (value > 3))
811                 return -EIO;
812
813         pvt->inject.section = (u32) value;
814         return count;
815 }
816
817 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
818                                               char *data)
819 {
820         struct i7core_pvt *pvt = mci->pvt_info;
821         return sprintf(data, "0x%08x\n", pvt->inject.section);
822 }
823
824 /*
825  * i7core inject.type
826  *
827  *      accept and store error injection inject.section value
828  *      bit 0 - repeat enable - Enable error repetition
829  *      bit 1 - inject ECC error
830  *      bit 2 - inject parity error
831  */
832 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
833                                         const char *data, size_t count)
834 {
835         struct i7core_pvt *pvt = mci->pvt_info;
836         unsigned long value;
837         int rc;
838
839         if (pvt->inject.enable)
840                 disable_inject(mci);
841
842         rc = strict_strtoul(data, 10, &value);
843         if ((rc < 0) || (value > 7))
844                 return -EIO;
845
846         pvt->inject.type = (u32) value;
847         return count;
848 }
849
850 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
851                                               char *data)
852 {
853         struct i7core_pvt *pvt = mci->pvt_info;
854         return sprintf(data, "0x%08x\n", pvt->inject.type);
855 }
856
857 /*
858  * i7core_inject_inject.eccmask_store
859  *
860  * The type of error (UE/CE) will depend on the inject.eccmask value:
861  *   Any bits set to a 1 will flip the corresponding ECC bit
862  *   Correctable errors can be injected by flipping 1 bit or the bits within
863  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
864  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
865  *   uncorrectable error to be injected.
866  */
867 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
868                                         const char *data, size_t count)
869 {
870         struct i7core_pvt *pvt = mci->pvt_info;
871         unsigned long value;
872         int rc;
873
874         if (pvt->inject.enable)
875                 disable_inject(mci);
876
877         rc = strict_strtoul(data, 10, &value);
878         if (rc < 0)
879                 return -EIO;
880
881         pvt->inject.eccmask = (u32) value;
882         return count;
883 }
884
885 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
886                                               char *data)
887 {
888         struct i7core_pvt *pvt = mci->pvt_info;
889         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
890 }
891
892 /*
893  * i7core_addrmatch
894  *
895  * The type of error (UE/CE) will depend on the inject.eccmask value:
896  *   Any bits set to a 1 will flip the corresponding ECC bit
897  *   Correctable errors can be injected by flipping 1 bit or the bits within
898  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
899  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
900  *   uncorrectable error to be injected.
901  */
902
903 #define DECLARE_ADDR_MATCH(param, limit)                        \
904 static ssize_t i7core_inject_store_##param(                     \
905                 struct mem_ctl_info *mci,                       \
906                 const char *data, size_t count)                 \
907 {                                                               \
908         struct i7core_pvt *pvt;                                 \
909         long value;                                             \
910         int rc;                                                 \
911                                                                 \
912         debugf1("%s()\n", __func__);                            \
913         pvt = mci->pvt_info;                                    \
914                                                                 \
915         if (pvt->inject.enable)                                 \
916                 disable_inject(mci);                            \
917                                                                 \
918         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
919                 value = -1;                                     \
920         else {                                                  \
921                 rc = strict_strtoul(data, 10, &value);          \
922                 if ((rc < 0) || (value >= limit))               \
923                         return -EIO;                            \
924         }                                                       \
925                                                                 \
926         pvt->inject.param = value;                              \
927                                                                 \
928         return count;                                           \
929 }                                                               \
930                                                                 \
931 static ssize_t i7core_inject_show_##param(                      \
932                 struct mem_ctl_info *mci,                       \
933                 char *data)                                     \
934 {                                                               \
935         struct i7core_pvt *pvt;                                 \
936                                                                 \
937         pvt = mci->pvt_info;                                    \
938         debugf1("%s() pvt=%p\n", __func__, pvt);                \
939         if (pvt->inject.param < 0)                              \
940                 return sprintf(data, "any\n");                  \
941         else                                                    \
942                 return sprintf(data, "%d\n", pvt->inject.param);\
943 }
944
945 #define ATTR_ADDR_MATCH(param)                                  \
946         {                                                       \
947                 .attr = {                                       \
948                         .name = #param,                         \
949                         .mode = (S_IRUGO | S_IWUSR)             \
950                 },                                              \
951                 .show  = i7core_inject_show_##param,            \
952                 .store = i7core_inject_store_##param,           \
953         }
954
955 DECLARE_ADDR_MATCH(channel, 3);
956 DECLARE_ADDR_MATCH(dimm, 3);
957 DECLARE_ADDR_MATCH(rank, 4);
958 DECLARE_ADDR_MATCH(bank, 32);
959 DECLARE_ADDR_MATCH(page, 0x10000);
960 DECLARE_ADDR_MATCH(col, 0x4000);
961
962 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
963 {
964         u32 read;
965         int count;
966
967         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
968                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
969                 where, val);
970
971         for (count = 0; count < 10; count++) {
972                 if (count)
973                         msleep(100);
974                 pci_write_config_dword(dev, where, val);
975                 pci_read_config_dword(dev, where, &read);
976
977                 if (read == val)
978                         return 0;
979         }
980
981         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
982                 "write=%08x. Read=%08x\n",
983                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
984                 where, val, read);
985
986         return -EINVAL;
987 }
988
989 /*
990  * This routine prepares the Memory Controller for error injection.
991  * The error will be injected when some process tries to write to the
992  * memory that matches the given criteria.
993  * The criteria can be set in terms of a mask where dimm, rank, bank, page
994  * and col can be specified.
995  * A -1 value for any of the mask items will make the MCU to ignore
996  * that matching criteria for error injection.
997  *
998  * It should be noticed that the error will only happen after a write operation
999  * on a memory that matches the condition. if REPEAT_EN is not enabled at
1000  * inject mask, then it will produce just one error. Otherwise, it will repeat
1001  * until the injectmask would be cleaned.
1002  *
1003  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
1004  *    is reliable enough to check if the MC is using the
1005  *    three channels. However, this is not clear at the datasheet.
1006  */
1007 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1008                                        const char *data, size_t count)
1009 {
1010         struct i7core_pvt *pvt = mci->pvt_info;
1011         u32 injectmask;
1012         u64 mask = 0;
1013         int  rc;
1014         long enable;
1015
1016         if (!pvt->pci_ch[pvt->inject.channel][0])
1017                 return 0;
1018
1019         rc = strict_strtoul(data, 10, &enable);
1020         if ((rc < 0))
1021                 return 0;
1022
1023         if (enable) {
1024                 pvt->inject.enable = 1;
1025         } else {
1026                 disable_inject(mci);
1027                 return count;
1028         }
1029
1030         /* Sets pvt->inject.dimm mask */
1031         if (pvt->inject.dimm < 0)
1032                 mask |= 1LL << 41;
1033         else {
1034                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1035                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1036                 else
1037                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1038         }
1039
1040         /* Sets pvt->inject.rank mask */
1041         if (pvt->inject.rank < 0)
1042                 mask |= 1LL << 40;
1043         else {
1044                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1045                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1046                 else
1047                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1048         }
1049
1050         /* Sets pvt->inject.bank mask */
1051         if (pvt->inject.bank < 0)
1052                 mask |= 1LL << 39;
1053         else
1054                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1055
1056         /* Sets pvt->inject.page mask */
1057         if (pvt->inject.page < 0)
1058                 mask |= 1LL << 38;
1059         else
1060                 mask |= (pvt->inject.page & 0xffff) << 14;
1061
1062         /* Sets pvt->inject.column mask */
1063         if (pvt->inject.col < 0)
1064                 mask |= 1LL << 37;
1065         else
1066                 mask |= (pvt->inject.col & 0x3fff);
1067
1068         /*
1069          * bit    0: REPEAT_EN
1070          * bits 1-2: MASK_HALF_CACHELINE
1071          * bit    3: INJECT_ECC
1072          * bit    4: INJECT_ADDR_PARITY
1073          */
1074
1075         injectmask = (pvt->inject.type & 1) |
1076                      (pvt->inject.section & 0x3) << 1 |
1077                      (pvt->inject.type & 0x6) << (3 - 1);
1078
1079         /* Unlock writes to registers - this register is write only */
1080         pci_write_config_dword(pvt->pci_noncore,
1081                                MC_CFG_CONTROL, 0x2);
1082
1083         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1084                                MC_CHANNEL_ADDR_MATCH, mask);
1085         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1086                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1087
1088         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1089                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1090
1091         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1092                                MC_CHANNEL_ERROR_INJECT, injectmask);
1093
1094         /*
1095          * This is something undocumented, based on my tests
1096          * Without writing 8 to this register, errors aren't injected. Not sure
1097          * why.
1098          */
1099         pci_write_config_dword(pvt->pci_noncore,
1100                                MC_CFG_CONTROL, 8);
1101
1102         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1103                 " inject 0x%08x\n",
1104                 mask, pvt->inject.eccmask, injectmask);
1105
1106
1107         return count;
1108 }
1109
1110 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1111                                         char *data)
1112 {
1113         struct i7core_pvt *pvt = mci->pvt_info;
1114         u32 injectmask;
1115
1116         if (!pvt->pci_ch[pvt->inject.channel][0])
1117                 return 0;
1118
1119         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1120                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1121
1122         debugf0("Inject error read: 0x%018x\n", injectmask);
1123
1124         if (injectmask & 0x0c)
1125                 pvt->inject.enable = 1;
1126
1127         return sprintf(data, "%d\n", pvt->inject.enable);
1128 }
1129
1130 #define DECLARE_COUNTER(param)                                  \
1131 static ssize_t i7core_show_counter_##param(                     \
1132                 struct mem_ctl_info *mci,                       \
1133                 char *data)                                     \
1134 {                                                               \
1135         struct i7core_pvt *pvt = mci->pvt_info;                 \
1136                                                                 \
1137         debugf1("%s() \n", __func__);                           \
1138         if (!pvt->ce_count_available || (pvt->is_registered))   \
1139                 return sprintf(data, "data unavailable\n");     \
1140         return sprintf(data, "%lu\n",                           \
1141                         pvt->udimm_ce_count[param]);            \
1142 }
1143
1144 #define ATTR_COUNTER(param)                                     \
1145         {                                                       \
1146                 .attr = {                                       \
1147                         .name = __stringify(udimm##param),      \
1148                         .mode = (S_IRUGO | S_IWUSR)             \
1149                 },                                              \
1150                 .show  = i7core_show_counter_##param            \
1151         }
1152
1153 DECLARE_COUNTER(0);
1154 DECLARE_COUNTER(1);
1155 DECLARE_COUNTER(2);
1156
1157 /*
1158  * Sysfs struct
1159  */
1160
1161 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1162         ATTR_ADDR_MATCH(channel),
1163         ATTR_ADDR_MATCH(dimm),
1164         ATTR_ADDR_MATCH(rank),
1165         ATTR_ADDR_MATCH(bank),
1166         ATTR_ADDR_MATCH(page),
1167         ATTR_ADDR_MATCH(col),
1168         { } /* End of list */
1169 };
1170
1171 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1172         .name  = "inject_addrmatch",
1173         .mcidev_attr = i7core_addrmatch_attrs,
1174 };
1175
1176 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1177         ATTR_COUNTER(0),
1178         ATTR_COUNTER(1),
1179         ATTR_COUNTER(2),
1180         { .attr = { .name = NULL } }
1181 };
1182
1183 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1184         .name  = "all_channel_counts",
1185         .mcidev_attr = i7core_udimm_counters_attrs,
1186 };
1187
1188 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1189         {
1190                 .attr = {
1191                         .name = "inject_section",
1192                         .mode = (S_IRUGO | S_IWUSR)
1193                 },
1194                 .show  = i7core_inject_section_show,
1195                 .store = i7core_inject_section_store,
1196         }, {
1197                 .attr = {
1198                         .name = "inject_type",
1199                         .mode = (S_IRUGO | S_IWUSR)
1200                 },
1201                 .show  = i7core_inject_type_show,
1202                 .store = i7core_inject_type_store,
1203         }, {
1204                 .attr = {
1205                         .name = "inject_eccmask",
1206                         .mode = (S_IRUGO | S_IWUSR)
1207                 },
1208                 .show  = i7core_inject_eccmask_show,
1209                 .store = i7core_inject_eccmask_store,
1210         }, {
1211                 .grp = &i7core_inject_addrmatch,
1212         }, {
1213                 .attr = {
1214                         .name = "inject_enable",
1215                         .mode = (S_IRUGO | S_IWUSR)
1216                 },
1217                 .show  = i7core_inject_enable_show,
1218                 .store = i7core_inject_enable_store,
1219         },
1220         { }     /* End of list */
1221 };
1222
1223 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1224         {
1225                 .attr = {
1226                         .name = "inject_section",
1227                         .mode = (S_IRUGO | S_IWUSR)
1228                 },
1229                 .show  = i7core_inject_section_show,
1230                 .store = i7core_inject_section_store,
1231         }, {
1232                 .attr = {
1233                         .name = "inject_type",
1234                         .mode = (S_IRUGO | S_IWUSR)
1235                 },
1236                 .show  = i7core_inject_type_show,
1237                 .store = i7core_inject_type_store,
1238         }, {
1239                 .attr = {
1240                         .name = "inject_eccmask",
1241                         .mode = (S_IRUGO | S_IWUSR)
1242                 },
1243                 .show  = i7core_inject_eccmask_show,
1244                 .store = i7core_inject_eccmask_store,
1245         }, {
1246                 .grp = &i7core_inject_addrmatch,
1247         }, {
1248                 .attr = {
1249                         .name = "inject_enable",
1250                         .mode = (S_IRUGO | S_IWUSR)
1251                 },
1252                 .show  = i7core_inject_enable_show,
1253                 .store = i7core_inject_enable_store,
1254         }, {
1255                 .grp = &i7core_udimm_counters,
1256         },
1257         { }     /* End of list */
1258 };
1259
1260 /****************************************************************************
1261         Device initialization routines: put/get, init/exit
1262  ****************************************************************************/
1263
1264 /*
1265  *      i7core_put_all_devices  'put' all the devices that we have
1266  *                              reserved via 'get'
1267  */
1268 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1269 {
1270         int i;
1271
1272         debugf0(__FILE__ ": %s()\n", __func__);
1273         for (i = 0; i < i7core_dev->n_devs; i++) {
1274                 struct pci_dev *pdev = i7core_dev->pdev[i];
1275                 if (!pdev)
1276                         continue;
1277                 debugf0("Removing dev %02x:%02x.%d\n",
1278                         pdev->bus->number,
1279                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1280                 pci_dev_put(pdev);
1281         }
1282 }
1283
1284 static void i7core_put_all_devices(void)
1285 {
1286         struct i7core_dev *i7core_dev, *tmp;
1287
1288         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1289                 i7core_put_devices(i7core_dev);
1290                 free_i7core_dev(i7core_dev);
1291         }
1292 }
1293
1294 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1295 {
1296         struct pci_dev *pdev = NULL;
1297         int i;
1298
1299         /*
1300          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1301          * aren't announced by acpi. So, we need to use a legacy scan probing
1302          * to detect them
1303          */
1304         while (table && table->descr) {
1305                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1306                 if (unlikely(!pdev)) {
1307                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1308                                 pcibios_scan_specific_bus(255-i);
1309                 }
1310                 pci_dev_put(pdev);
1311                 table++;
1312         }
1313 }
1314
1315 static unsigned i7core_pci_lastbus(void)
1316 {
1317         int last_bus = 0, bus;
1318         struct pci_bus *b = NULL;
1319
1320         while ((b = pci_find_next_bus(b)) != NULL) {
1321                 bus = b->number;
1322                 debugf0("Found bus %d\n", bus);
1323                 if (bus > last_bus)
1324                         last_bus = bus;
1325         }
1326
1327         debugf0("Last bus %d\n", last_bus);
1328
1329         return last_bus;
1330 }
1331
1332 /*
1333  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1334  *                      device/functions we want to reference for this driver
1335  *
1336  *                      Need to 'get' device 16 func 1 and func 2
1337  */
1338 static int i7core_get_onedevice(struct pci_dev **prev,
1339                                 const struct pci_id_table *table,
1340                                 const unsigned devno,
1341                                 const unsigned last_bus)
1342 {
1343         struct i7core_dev *i7core_dev;
1344         const struct pci_id_descr *dev_descr = &table->descr[devno];
1345
1346         struct pci_dev *pdev = NULL;
1347         u8 bus = 0;
1348         u8 socket = 0;
1349
1350         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1351                               dev_descr->dev_id, *prev);
1352
1353         /*
1354          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1355          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1356          * to probe for the alternate address in case of failure
1357          */
1358         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1359                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1360                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1361
1362         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1363                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1364                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1365                                       *prev);
1366
1367         if (!pdev) {
1368                 if (*prev) {
1369                         *prev = pdev;
1370                         return 0;
1371                 }
1372
1373                 if (dev_descr->optional)
1374                         return 0;
1375
1376                 if (devno == 0)
1377                         return -ENODEV;
1378
1379                 i7core_printk(KERN_INFO,
1380                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1381                         dev_descr->dev, dev_descr->func,
1382                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1383
1384                 /* End of list, leave */
1385                 return -ENODEV;
1386         }
1387         bus = pdev->bus->number;
1388
1389         socket = last_bus - bus;
1390
1391         i7core_dev = get_i7core_dev(socket);
1392         if (!i7core_dev) {
1393                 i7core_dev = alloc_i7core_dev(socket, table);
1394                 if (!i7core_dev) {
1395                         pci_dev_put(pdev);
1396                         return -ENOMEM;
1397                 }
1398         }
1399
1400         if (i7core_dev->pdev[devno]) {
1401                 i7core_printk(KERN_ERR,
1402                         "Duplicated device for "
1403                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1404                         bus, dev_descr->dev, dev_descr->func,
1405                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1406                 pci_dev_put(pdev);
1407                 return -ENODEV;
1408         }
1409
1410         i7core_dev->pdev[devno] = pdev;
1411
1412         /* Sanity check */
1413         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1414                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1415                 i7core_printk(KERN_ERR,
1416                         "Device PCI ID %04x:%04x "
1417                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1418                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1419                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1420                         bus, dev_descr->dev, dev_descr->func);
1421                 return -ENODEV;
1422         }
1423
1424         /* Be sure that the device is enabled */
1425         if (unlikely(pci_enable_device(pdev) < 0)) {
1426                 i7core_printk(KERN_ERR,
1427                         "Couldn't enable "
1428                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1429                         bus, dev_descr->dev, dev_descr->func,
1430                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1431                 return -ENODEV;
1432         }
1433
1434         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1435                 socket, bus, dev_descr->dev,
1436                 dev_descr->func,
1437                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1438
1439         /*
1440          * As stated on drivers/pci/search.c, the reference count for
1441          * @from is always decremented if it is not %NULL. So, as we need
1442          * to get all devices up to null, we need to do a get for the device
1443          */
1444         pci_dev_get(pdev);
1445
1446         *prev = pdev;
1447
1448         return 0;
1449 }
1450
1451 static int i7core_get_all_devices(void)
1452 {
1453         int i, rc, last_bus;
1454         struct pci_dev *pdev = NULL;
1455         const struct pci_id_table *table = pci_dev_table;
1456
1457         last_bus = i7core_pci_lastbus();
1458
1459         while (table && table->descr) {
1460                 for (i = 0; i < table->n_devs; i++) {
1461                         pdev = NULL;
1462                         do {
1463                                 rc = i7core_get_onedevice(&pdev, table, i,
1464                                                           last_bus);
1465                                 if (rc < 0) {
1466                                         if (i == 0) {
1467                                                 i = table->n_devs;
1468                                                 break;
1469                                         }
1470                                         i7core_put_all_devices();
1471                                         return -ENODEV;
1472                                 }
1473                         } while (pdev);
1474                 }
1475                 table++;
1476         }
1477
1478         return 0;
1479 }
1480
1481 static int mci_bind_devs(struct mem_ctl_info *mci,
1482                          struct i7core_dev *i7core_dev)
1483 {
1484         struct i7core_pvt *pvt = mci->pvt_info;
1485         struct pci_dev *pdev;
1486         int i, func, slot;
1487         char *family;
1488
1489         pvt->is_registered = false;
1490         pvt->enable_scrub  = false;
1491         for (i = 0; i < i7core_dev->n_devs; i++) {
1492                 pdev = i7core_dev->pdev[i];
1493                 if (!pdev)
1494                         continue;
1495
1496                 func = PCI_FUNC(pdev->devfn);
1497                 slot = PCI_SLOT(pdev->devfn);
1498                 if (slot == 3) {
1499                         if (unlikely(func > MAX_MCR_FUNC))
1500                                 goto error;
1501                         pvt->pci_mcr[func] = pdev;
1502                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1503                         if (unlikely(func > MAX_CHAN_FUNC))
1504                                 goto error;
1505                         pvt->pci_ch[slot - 4][func] = pdev;
1506                 } else if (!slot && !func) {
1507                         pvt->pci_noncore = pdev;
1508
1509                         /* Detect the processor family */
1510                         switch (pdev->device) {
1511                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1512                                 family = "Xeon 35xx/ i7core";
1513                                 pvt->enable_scrub = false;
1514                                 break;
1515                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1516                                 family = "i7-800/i5-700";
1517                                 pvt->enable_scrub = false;
1518                                 break;
1519                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1520                                 family = "Xeon 34xx";
1521                                 pvt->enable_scrub = false;
1522                                 break;
1523                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1524                                 family = "Xeon 55xx";
1525                                 pvt->enable_scrub = true;
1526                                 break;
1527                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1528                                 family = "Xeon 56xx / i7-900";
1529                                 pvt->enable_scrub = true;
1530                                 break;
1531                         default:
1532                                 family = "unknown";
1533                                 pvt->enable_scrub = false;
1534                         }
1535                         debugf0("Detected a processor type %s\n", family);
1536                 } else
1537                         goto error;
1538
1539                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1540                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1541                         pdev, i7core_dev->socket);
1542
1543                 if (PCI_SLOT(pdev->devfn) == 3 &&
1544                         PCI_FUNC(pdev->devfn) == 2)
1545                         pvt->is_registered = true;
1546         }
1547
1548         return 0;
1549
1550 error:
1551         i7core_printk(KERN_ERR, "Device %d, function %d "
1552                       "is out of the expected range\n",
1553                       slot, func);
1554         return -EINVAL;
1555 }
1556
1557 /****************************************************************************
1558                         Error check routines
1559  ****************************************************************************/
1560 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1561                                       const int chan,
1562                                       const int dimm,
1563                                       const int add)
1564 {
1565         char *msg;
1566         struct i7core_pvt *pvt = mci->pvt_info;
1567         int row = pvt->csrow_map[chan][dimm], i;
1568
1569         for (i = 0; i < add; i++) {
1570                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1571                                 "(Socket=%d channel=%d dimm=%d)",
1572                                 pvt->i7core_dev->socket, chan, dimm);
1573
1574                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1575                 kfree (msg);
1576         }
1577 }
1578
1579 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1580                                          const int chan,
1581                                          const int new0,
1582                                          const int new1,
1583                                          const int new2)
1584 {
1585         struct i7core_pvt *pvt = mci->pvt_info;
1586         int add0 = 0, add1 = 0, add2 = 0;
1587         /* Updates CE counters if it is not the first time here */
1588         if (pvt->ce_count_available) {
1589                 /* Updates CE counters */
1590
1591                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1592                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1593                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1594
1595                 if (add2 < 0)
1596                         add2 += 0x7fff;
1597                 pvt->rdimm_ce_count[chan][2] += add2;
1598
1599                 if (add1 < 0)
1600                         add1 += 0x7fff;
1601                 pvt->rdimm_ce_count[chan][1] += add1;
1602
1603                 if (add0 < 0)
1604                         add0 += 0x7fff;
1605                 pvt->rdimm_ce_count[chan][0] += add0;
1606         } else
1607                 pvt->ce_count_available = 1;
1608
1609         /* Store the new values */
1610         pvt->rdimm_last_ce_count[chan][2] = new2;
1611         pvt->rdimm_last_ce_count[chan][1] = new1;
1612         pvt->rdimm_last_ce_count[chan][0] = new0;
1613
1614         /*updated the edac core */
1615         if (add0 != 0)
1616                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1617         if (add1 != 0)
1618                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1619         if (add2 != 0)
1620                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1621
1622 }
1623
1624 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1625 {
1626         struct i7core_pvt *pvt = mci->pvt_info;
1627         u32 rcv[3][2];
1628         int i, new0, new1, new2;
1629
1630         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1631         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1632                                                                 &rcv[0][0]);
1633         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1634                                                                 &rcv[0][1]);
1635         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1636                                                                 &rcv[1][0]);
1637         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1638                                                                 &rcv[1][1]);
1639         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1640                                                                 &rcv[2][0]);
1641         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1642                                                                 &rcv[2][1]);
1643         for (i = 0 ; i < 3; i++) {
1644                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1645                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1646                 /*if the channel has 3 dimms*/
1647                 if (pvt->channel[i].dimms > 2) {
1648                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1649                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1650                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1651                 } else {
1652                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1653                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1654                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1655                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1656                         new2 = 0;
1657                 }
1658
1659                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1660         }
1661 }
1662
1663 /* This function is based on the device 3 function 4 registers as described on:
1664  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1665  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1666  * also available at:
1667  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1668  */
1669 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1670 {
1671         struct i7core_pvt *pvt = mci->pvt_info;
1672         u32 rcv1, rcv0;
1673         int new0, new1, new2;
1674
1675         if (!pvt->pci_mcr[4]) {
1676                 debugf0("%s MCR registers not found\n", __func__);
1677                 return;
1678         }
1679
1680         /* Corrected test errors */
1681         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1682         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1683
1684         /* Store the new values */
1685         new2 = DIMM2_COR_ERR(rcv1);
1686         new1 = DIMM1_COR_ERR(rcv0);
1687         new0 = DIMM0_COR_ERR(rcv0);
1688
1689         /* Updates CE counters if it is not the first time here */
1690         if (pvt->ce_count_available) {
1691                 /* Updates CE counters */
1692                 int add0, add1, add2;
1693
1694                 add2 = new2 - pvt->udimm_last_ce_count[2];
1695                 add1 = new1 - pvt->udimm_last_ce_count[1];
1696                 add0 = new0 - pvt->udimm_last_ce_count[0];
1697
1698                 if (add2 < 0)
1699                         add2 += 0x7fff;
1700                 pvt->udimm_ce_count[2] += add2;
1701
1702                 if (add1 < 0)
1703                         add1 += 0x7fff;
1704                 pvt->udimm_ce_count[1] += add1;
1705
1706                 if (add0 < 0)
1707                         add0 += 0x7fff;
1708                 pvt->udimm_ce_count[0] += add0;
1709
1710                 if (add0 | add1 | add2)
1711                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1712                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1713                                       add0, add1, add2);
1714         } else
1715                 pvt->ce_count_available = 1;
1716
1717         /* Store the new values */
1718         pvt->udimm_last_ce_count[2] = new2;
1719         pvt->udimm_last_ce_count[1] = new1;
1720         pvt->udimm_last_ce_count[0] = new0;
1721 }
1722
1723 /*
1724  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1725  * Architectures Software Developer’s Manual Volume 3B.
1726  * Nehalem are defined as family 0x06, model 0x1a
1727  *
1728  * The MCA registers used here are the following ones:
1729  *     struct mce field MCA Register
1730  *     m->status        MSR_IA32_MC8_STATUS
1731  *     m->addr          MSR_IA32_MC8_ADDR
1732  *     m->misc          MSR_IA32_MC8_MISC
1733  * In the case of Nehalem, the error information is masked at .status and .misc
1734  * fields
1735  */
1736 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1737                                     const struct mce *m)
1738 {
1739         struct i7core_pvt *pvt = mci->pvt_info;
1740         char *type, *optype, *err, *msg;
1741         unsigned long error = m->status & 0x1ff0000l;
1742         u32 optypenum = (m->status >> 4) & 0x07;
1743         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1744         u32 dimm = (m->misc >> 16) & 0x3;
1745         u32 channel = (m->misc >> 18) & 0x3;
1746         u32 syndrome = m->misc >> 32;
1747         u32 errnum = find_first_bit(&error, 32);
1748         int csrow;
1749
1750         if (m->mcgstatus & 1)
1751                 type = "FATAL";
1752         else
1753                 type = "NON_FATAL";
1754
1755         switch (optypenum) {
1756         case 0:
1757                 optype = "generic undef request";
1758                 break;
1759         case 1:
1760                 optype = "read error";
1761                 break;
1762         case 2:
1763                 optype = "write error";
1764                 break;
1765         case 3:
1766                 optype = "addr/cmd error";
1767                 break;
1768         case 4:
1769                 optype = "scrubbing error";
1770                 break;
1771         default:
1772                 optype = "reserved";
1773                 break;
1774         }
1775
1776         switch (errnum) {
1777         case 16:
1778                 err = "read ECC error";
1779                 break;
1780         case 17:
1781                 err = "RAS ECC error";
1782                 break;
1783         case 18:
1784                 err = "write parity error";
1785                 break;
1786         case 19:
1787                 err = "redundacy loss";
1788                 break;
1789         case 20:
1790                 err = "reserved";
1791                 break;
1792         case 21:
1793                 err = "memory range error";
1794                 break;
1795         case 22:
1796                 err = "RTID out of range";
1797                 break;
1798         case 23:
1799                 err = "address parity error";
1800                 break;
1801         case 24:
1802                 err = "byte enable parity error";
1803                 break;
1804         default:
1805                 err = "unknown";
1806         }
1807
1808         /* FIXME: should convert addr into bank and rank information */
1809         msg = kasprintf(GFP_ATOMIC,
1810                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1811                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1812                 type, (long long) m->addr, m->cpu, dimm, channel,
1813                 syndrome, core_err_cnt, (long long)m->status,
1814                 (long long)m->misc, optype, err);
1815
1816         debugf0("%s", msg);
1817
1818         csrow = pvt->csrow_map[channel][dimm];
1819
1820         /* Call the helper to output message */
1821         if (m->mcgstatus & 1)
1822                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1823                                 0 /* FIXME: should be channel here */, msg);
1824         else if (!pvt->is_registered)
1825                 edac_mc_handle_fbd_ce(mci, csrow,
1826                                 0 /* FIXME: should be channel here */, msg);
1827
1828         kfree(msg);
1829 }
1830
1831 /*
1832  *      i7core_check_error      Retrieve and process errors reported by the
1833  *                              hardware. Called by the Core module.
1834  */
1835 static void i7core_check_error(struct mem_ctl_info *mci)
1836 {
1837         struct i7core_pvt *pvt = mci->pvt_info;
1838         int i;
1839         unsigned count = 0;
1840         struct mce *m;
1841
1842         /*
1843          * MCE first step: Copy all mce errors into a temporary buffer
1844          * We use a double buffering here, to reduce the risk of
1845          * losing an error.
1846          */
1847         smp_rmb();
1848         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1849                 % MCE_LOG_LEN;
1850         if (!count)
1851                 goto check_ce_error;
1852
1853         m = pvt->mce_outentry;
1854         if (pvt->mce_in + count > MCE_LOG_LEN) {
1855                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1856
1857                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1858                 smp_wmb();
1859                 pvt->mce_in = 0;
1860                 count -= l;
1861                 m += l;
1862         }
1863         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1864         smp_wmb();
1865         pvt->mce_in += count;
1866
1867         smp_rmb();
1868         if (pvt->mce_overrun) {
1869                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1870                               pvt->mce_overrun);
1871                 smp_wmb();
1872                 pvt->mce_overrun = 0;
1873         }
1874
1875         /*
1876          * MCE second step: parse errors and display
1877          */
1878         for (i = 0; i < count; i++)
1879                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1880
1881         /*
1882          * Now, let's increment CE error counts
1883          */
1884 check_ce_error:
1885         if (!pvt->is_registered)
1886                 i7core_udimm_check_mc_ecc_err(mci);
1887         else
1888                 i7core_rdimm_check_mc_ecc_err(mci);
1889 }
1890
1891 /*
1892  * i7core_mce_check_error       Replicates mcelog routine to get errors
1893  *                              This routine simply queues mcelog errors, and
1894  *                              return. The error itself should be handled later
1895  *                              by i7core_check_error.
1896  * WARNING: As this routine should be called at NMI time, extra care should
1897  * be taken to avoid deadlocks, and to be as fast as possible.
1898  */
1899 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1900                                   void *data)
1901 {
1902         struct mce *mce = (struct mce *)data;
1903         struct i7core_dev *i7_dev;
1904         struct mem_ctl_info *mci;
1905         struct i7core_pvt *pvt;
1906
1907         i7_dev = get_i7core_dev(mce->socketid);
1908         if (!i7_dev)
1909                 return NOTIFY_BAD;
1910
1911         mci = i7_dev->mci;
1912         pvt = mci->pvt_info;
1913
1914         /*
1915          * Just let mcelog handle it if the error is
1916          * outside the memory controller
1917          */
1918         if (((mce->status & 0xffff) >> 7) != 1)
1919                 return NOTIFY_DONE;
1920
1921         /* Bank 8 registers are the only ones that we know how to handle */
1922         if (mce->bank != 8)
1923                 return NOTIFY_DONE;
1924
1925 #ifdef CONFIG_SMP
1926         /* Only handle if it is the right mc controller */
1927         if (mce->socketid != pvt->i7core_dev->socket)
1928                 return NOTIFY_DONE;
1929 #endif
1930
1931         smp_rmb();
1932         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1933                 smp_wmb();
1934                 pvt->mce_overrun++;
1935                 return NOTIFY_DONE;
1936         }
1937
1938         /* Copy memory error at the ringbuffer */
1939         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1940         smp_wmb();
1941         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1942
1943         /* Handle fatal errors immediately */
1944         if (mce->mcgstatus & 1)
1945                 i7core_check_error(mci);
1946
1947         /* Advise mcelog that the errors were handled */
1948         return NOTIFY_STOP;
1949 }
1950
1951 static struct notifier_block i7_mce_dec = {
1952         .notifier_call  = i7core_mce_check_error,
1953 };
1954
1955 /*
1956  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1957  *                              to hardware according to SCRUBINTERVAL formula
1958  *                              found in datasheet.
1959  */
1960 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1961 {
1962         struct i7core_pvt *pvt = mci->pvt_info;
1963         struct pci_dev *pdev;
1964         const u32 cache_line_size = 64;
1965         const u32 freq_dclk = 800*1000000;
1966         u32 dw_scrub;
1967         u32 dw_ssr;
1968
1969         /* Get data from the MC register, function 2 */
1970         pdev = pvt->pci_mcr[2];
1971         if (!pdev)
1972                 return -ENODEV;
1973
1974         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1975
1976         if (new_bw == 0) {
1977                 /* Prepare to disable petrol scrub */
1978                 dw_scrub &= ~STARTSCRUB;
1979                 /* Stop the patrol scrub engine */
1980                 write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff);
1981
1982                 /* Get current status of scrub rate and set bit to disable */
1983                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1984                 dw_ssr &= ~SSR_MODE_MASK;
1985                 dw_ssr |= SSR_MODE_DISABLE;
1986         } else {
1987                 /*
1988                  * Translate the desired scrub rate to a register value and
1989                  * program the cooresponding register value.
1990                  */
1991                 dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw);
1992
1993                 /* Start the patrol scrub engine */
1994                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1995                                        STARTSCRUB | dw_scrub);
1996
1997                 /* Get current status of scrub rate and set bit to enable */
1998                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1999                 dw_ssr &= ~SSR_MODE_MASK;
2000                 dw_ssr |= SSR_MODE_ENABLE;
2001         }
2002         /* Disable or enable scrubbing */
2003         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2004
2005         return new_bw;
2006 }
2007
2008 /*
2009  * get_sdram_scrub_rate         This routine convert current scrub rate value
2010  *                              into byte/sec bandwidth accourding to
2011  *                              SCRUBINTERVAL formula found in datasheet.
2012  */
2013 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2014 {
2015         struct i7core_pvt *pvt = mci->pvt_info;
2016         struct pci_dev *pdev;
2017         const u32 cache_line_size = 64;
2018         const u32 freq_dclk = 800*1000000;
2019         u32 scrubval;
2020
2021         /* Get data from the MC register, function 2 */
2022         pdev = pvt->pci_mcr[2];
2023         if (!pdev)
2024                 return -ENODEV;
2025
2026         /* Get current scrub control data */
2027         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2028
2029         /* Mask highest 8-bits to 0 */
2030         scrubval &=  0x00ffffff;
2031         if (!scrubval)
2032                 return 0;
2033
2034         /* Calculate scrub rate value into byte/sec bandwidth */
2035         return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval);
2036 }
2037
2038 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2039 {
2040         struct i7core_pvt *pvt = mci->pvt_info;
2041         u32 pci_lock;
2042
2043         /* Unlock writes to pci registers */
2044         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2045         pci_lock &= ~0x3;
2046         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2047                                pci_lock | MC_CFG_UNLOCK);
2048
2049         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2050         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2051 }
2052
2053 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2054 {
2055         struct i7core_pvt *pvt = mci->pvt_info;
2056         u32 pci_lock;
2057
2058         /* Lock writes to pci registers */
2059         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2060         pci_lock &= ~0x3;
2061         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2062                                pci_lock | MC_CFG_LOCK);
2063 }
2064
2065 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2066 {
2067         pvt->i7core_pci = edac_pci_create_generic_ctl(
2068                                                 &pvt->i7core_dev->pdev[0]->dev,
2069                                                 EDAC_MOD_STR);
2070         if (unlikely(!pvt->i7core_pci))
2071                 i7core_printk(KERN_WARNING,
2072                               "Unable to setup PCI error report via EDAC\n");
2073 }
2074
2075 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2076 {
2077         if (likely(pvt->i7core_pci))
2078                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2079         else
2080                 i7core_printk(KERN_ERR,
2081                                 "Couldn't find mem_ctl_info for socket %d\n",
2082                                 pvt->i7core_dev->socket);
2083         pvt->i7core_pci = NULL;
2084 }
2085
2086 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2087 {
2088         struct mem_ctl_info *mci = i7core_dev->mci;
2089         struct i7core_pvt *pvt;
2090
2091         if (unlikely(!mci || !mci->pvt_info)) {
2092                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2093                         __func__, &i7core_dev->pdev[0]->dev);
2094
2095                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2096                 return;
2097         }
2098
2099         pvt = mci->pvt_info;
2100
2101         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2102                 __func__, mci, &i7core_dev->pdev[0]->dev);
2103
2104         /* Disable scrubrate setting */
2105         if (pvt->enable_scrub)
2106                 disable_sdram_scrub_setting(mci);
2107
2108         atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
2109
2110         /* Disable EDAC polling */
2111         i7core_pci_ctl_release(pvt);
2112
2113         /* Remove MC sysfs nodes */
2114         edac_mc_del_mc(mci->dev);
2115
2116         debugf1("%s: free mci struct\n", mci->ctl_name);
2117         kfree(mci->ctl_name);
2118         edac_mc_free(mci);
2119         i7core_dev->mci = NULL;
2120 }
2121
2122 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2123 {
2124         struct mem_ctl_info *mci;
2125         struct i7core_pvt *pvt;
2126         int rc, channels, csrows;
2127
2128         /* Check the number of active and not disabled channels */
2129         rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
2130         if (unlikely(rc < 0))
2131                 return rc;
2132
2133         /* allocate a new MC control structure */
2134         mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
2135         if (unlikely(!mci))
2136                 return -ENOMEM;
2137
2138         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2139                 __func__, mci, &i7core_dev->pdev[0]->dev);
2140
2141         pvt = mci->pvt_info;
2142         memset(pvt, 0, sizeof(*pvt));
2143
2144         /* Associates i7core_dev and mci for future usage */
2145         pvt->i7core_dev = i7core_dev;
2146         i7core_dev->mci = mci;
2147
2148         /*
2149          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2150          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2151          * memory channels
2152          */
2153         mci->mtype_cap = MEM_FLAG_DDR3;
2154         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2155         mci->edac_cap = EDAC_FLAG_NONE;
2156         mci->mod_name = "i7core_edac.c";
2157         mci->mod_ver = I7CORE_REVISION;
2158         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2159                                   i7core_dev->socket);
2160         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2161         mci->ctl_page_to_phys = NULL;
2162
2163         /* Store pci devices at mci for faster access */
2164         rc = mci_bind_devs(mci, i7core_dev);
2165         if (unlikely(rc < 0))
2166                 goto fail0;
2167
2168         if (pvt->is_registered)
2169                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2170         else
2171                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2172
2173         /* Get dimm basic config */
2174         get_dimm_config(mci);
2175         /* record ptr to the generic device */
2176         mci->dev = &i7core_dev->pdev[0]->dev;
2177         /* Set the function pointer to an actual operation function */
2178         mci->edac_check = i7core_check_error;
2179
2180         /* Enable scrubrate setting */
2181         if (pvt->enable_scrub)
2182                 enable_sdram_scrub_setting(mci);
2183
2184         /* add this new MC control structure to EDAC's list of MCs */
2185         if (unlikely(edac_mc_add_mc(mci))) {
2186                 debugf0("MC: " __FILE__
2187                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2188                 /* FIXME: perhaps some code should go here that disables error
2189                  * reporting if we just enabled it
2190                  */
2191
2192                 rc = -EINVAL;
2193                 goto fail0;
2194         }
2195
2196         /* Default error mask is any memory */
2197         pvt->inject.channel = 0;
2198         pvt->inject.dimm = -1;
2199         pvt->inject.rank = -1;
2200         pvt->inject.bank = -1;
2201         pvt->inject.page = -1;
2202         pvt->inject.col = -1;
2203
2204         /* allocating generic PCI control info */
2205         i7core_pci_ctl_create(pvt);
2206
2207         atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
2208
2209         return 0;
2210
2211 fail0:
2212         kfree(mci->ctl_name);
2213         edac_mc_free(mci);
2214         i7core_dev->mci = NULL;
2215         return rc;
2216 }
2217
2218 /*
2219  *      i7core_probe    Probe for ONE instance of device to see if it is
2220  *                      present.
2221  *      return:
2222  *              0 for FOUND a device
2223  *              < 0 for error code
2224  */
2225
2226 static int __devinit i7core_probe(struct pci_dev *pdev,
2227                                   const struct pci_device_id *id)
2228 {
2229         int rc;
2230         struct i7core_dev *i7core_dev;
2231
2232         /* get the pci devices we want to reserve for our use */
2233         mutex_lock(&i7core_edac_lock);
2234
2235         /*
2236          * All memory controllers are allocated at the first pass.
2237          */
2238         if (unlikely(probed >= 1)) {
2239                 mutex_unlock(&i7core_edac_lock);
2240                 return -ENODEV;
2241         }
2242         probed++;
2243
2244         rc = i7core_get_all_devices();
2245         if (unlikely(rc < 0))
2246                 goto fail0;
2247
2248         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2249                 rc = i7core_register_mci(i7core_dev);
2250                 if (unlikely(rc < 0))
2251                         goto fail1;
2252         }
2253
2254         i7core_printk(KERN_INFO, "Driver loaded.\n");
2255
2256         mutex_unlock(&i7core_edac_lock);
2257         return 0;
2258
2259 fail1:
2260         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2261                 i7core_unregister_mci(i7core_dev);
2262
2263         i7core_put_all_devices();
2264 fail0:
2265         mutex_unlock(&i7core_edac_lock);
2266         return rc;
2267 }
2268
2269 /*
2270  *      i7core_remove   destructor for one instance of device
2271  *
2272  */
2273 static void __devexit i7core_remove(struct pci_dev *pdev)
2274 {
2275         struct i7core_dev *i7core_dev;
2276
2277         debugf0(__FILE__ ": %s()\n", __func__);
2278
2279         /*
2280          * we have a trouble here: pdev value for removal will be wrong, since
2281          * it will point to the X58 register used to detect that the machine
2282          * is a Nehalem or upper design. However, due to the way several PCI
2283          * devices are grouped together to provide MC functionality, we need
2284          * to use a different method for releasing the devices
2285          */
2286
2287         mutex_lock(&i7core_edac_lock);
2288
2289         if (unlikely(!probed)) {
2290                 mutex_unlock(&i7core_edac_lock);
2291                 return;
2292         }
2293
2294         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2295                 i7core_unregister_mci(i7core_dev);
2296
2297         /* Release PCI resources */
2298         i7core_put_all_devices();
2299
2300         probed--;
2301
2302         mutex_unlock(&i7core_edac_lock);
2303 }
2304
2305 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2306
2307 /*
2308  *      i7core_driver   pci_driver structure for this module
2309  *
2310  */
2311 static struct pci_driver i7core_driver = {
2312         .name     = "i7core_edac",
2313         .probe    = i7core_probe,
2314         .remove   = __devexit_p(i7core_remove),
2315         .id_table = i7core_pci_tbl,
2316 };
2317
2318 /*
2319  *      i7core_init             Module entry function
2320  *                      Try to initialize this module for its devices
2321  */
2322 static int __init i7core_init(void)
2323 {
2324         int pci_rc;
2325
2326         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2327
2328         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2329         opstate_init();
2330
2331         if (use_pci_fixup)
2332                 i7core_xeon_pci_fixup(pci_dev_table);
2333
2334         pci_rc = pci_register_driver(&i7core_driver);
2335
2336         if (pci_rc >= 0)
2337                 return 0;
2338
2339         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2340                       pci_rc);
2341
2342         return pci_rc;
2343 }
2344
2345 /*
2346  *      i7core_exit()   Module exit function
2347  *                      Unregister the driver
2348  */
2349 static void __exit i7core_exit(void)
2350 {
2351         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2352         pci_unregister_driver(&i7core_driver);
2353 }
2354
2355 module_init(i7core_init);
2356 module_exit(i7core_exit);
2357
2358 MODULE_LICENSE("GPL");
2359 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2360 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2361 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2362                    I7CORE_REVISION);
2363
2364 module_param(edac_op_state, int, 0444);
2365 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");