EDAC i7core: Use mce socketid for better compatibility
[pandora-kernel.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81   #define MC_CFG_UNLOCK         0x02
82   #define MC_CFG_LOCK           0x00
83
84         /* OFFSETS for Device 3 Function 0 */
85
86 #define MC_CONTROL      0x48
87 #define MC_STATUS       0x4c
88 #define MC_MAX_DOD      0x64
89
90 /*
91  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
92  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93  */
94
95 #define MC_TEST_ERR_RCV1        0x60
96   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
97
98 #define MC_TEST_ERR_RCV0        0x64
99   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
100   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
101
102 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
103 #define MC_SSRCONTROL           0x48
104   #define SSR_MODE_DISABLE      0x00
105   #define SSR_MODE_ENABLE       0x01
106   #define SSR_MODE_MASK         0x03
107
108 #define MC_SCRUB_CONTROL        0x4c
109   #define STARTSCRUB            (1 << 24)
110
111 #define MC_COR_ECC_CNT_0        0x80
112 #define MC_COR_ECC_CNT_1        0x84
113 #define MC_COR_ECC_CNT_2        0x88
114 #define MC_COR_ECC_CNT_3        0x8c
115 #define MC_COR_ECC_CNT_4        0x90
116 #define MC_COR_ECC_CNT_5        0x94
117
118 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
119 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
120
121
122         /* OFFSETS for Devices 4,5 and 6 Function 0 */
123
124 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
125   #define THREE_DIMMS_PRESENT           (1 << 24)
126   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
127   #define QUAD_RANK_PRESENT             (1 << 22)
128   #define REGISTERED_DIMM               (1 << 15)
129
130 #define MC_CHANNEL_MAPPER       0x60
131   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
132   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
133
134 #define MC_CHANNEL_RANK_PRESENT 0x7c
135   #define RANK_PRESENT_MASK             0xffff
136
137 #define MC_CHANNEL_ADDR_MATCH   0xf0
138 #define MC_CHANNEL_ERROR_MASK   0xf8
139 #define MC_CHANNEL_ERROR_INJECT 0xfc
140   #define INJECT_ADDR_PARITY    0x10
141   #define INJECT_ECC            0x08
142   #define MASK_CACHELINE        0x06
143   #define MASK_FULL_CACHELINE   0x06
144   #define MASK_MSB32_CACHELINE  0x04
145   #define MASK_LSB32_CACHELINE  0x02
146   #define NO_MASK_CACHELINE     0x00
147   #define REPEAT_EN             0x01
148
149         /* OFFSETS for Devices 4,5 and 6 Function 1 */
150
151 #define MC_DOD_CH_DIMM0         0x48
152 #define MC_DOD_CH_DIMM1         0x4c
153 #define MC_DOD_CH_DIMM2         0x50
154   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
155   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
156   #define DIMM_PRESENT_MASK     (1 << 9)
157   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
158   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
159   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
160   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
161   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
162   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
163   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
164   #define MC_DOD_NUMCOL_MASK            3
165   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
166
167 #define MC_RANK_PRESENT         0x7c
168
169 #define MC_SAG_CH_0     0x80
170 #define MC_SAG_CH_1     0x84
171 #define MC_SAG_CH_2     0x88
172 #define MC_SAG_CH_3     0x8c
173 #define MC_SAG_CH_4     0x90
174 #define MC_SAG_CH_5     0x94
175 #define MC_SAG_CH_6     0x98
176 #define MC_SAG_CH_7     0x9c
177
178 #define MC_RIR_LIMIT_CH_0       0x40
179 #define MC_RIR_LIMIT_CH_1       0x44
180 #define MC_RIR_LIMIT_CH_2       0x48
181 #define MC_RIR_LIMIT_CH_3       0x4C
182 #define MC_RIR_LIMIT_CH_4       0x50
183 #define MC_RIR_LIMIT_CH_5       0x54
184 #define MC_RIR_LIMIT_CH_6       0x58
185 #define MC_RIR_LIMIT_CH_7       0x5C
186 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
187
188 #define MC_RIR_WAY_CH           0x80
189   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
190   #define MC_RIR_WAY_RANK_MASK          0x7
191
192 /*
193  * i7core structs
194  */
195
196 #define NUM_CHANS 3
197 #define MAX_DIMMS 3             /* Max DIMMS per channel */
198 #define MAX_MCR_FUNC  4
199 #define MAX_CHAN_FUNC 3
200
201 struct i7core_info {
202         u32     mc_control;
203         u32     mc_status;
204         u32     max_dod;
205         u32     ch_map;
206 };
207
208
209 struct i7core_inject {
210         int     enable;
211
212         u32     section;
213         u32     type;
214         u32     eccmask;
215
216         /* Error address mask */
217         int channel, dimm, rank, bank, page, col;
218 };
219
220 struct i7core_channel {
221         u32             ranks;
222         u32             dimms;
223 };
224
225 struct pci_id_descr {
226         int                     dev;
227         int                     func;
228         int                     dev_id;
229         int                     optional;
230 };
231
232 struct pci_id_table {
233         const struct pci_id_descr       *descr;
234         int                             n_devs;
235 };
236
237 struct i7core_dev {
238         struct list_head        list;
239         u8                      socket;
240         struct pci_dev          **pdev;
241         int                     n_devs;
242         struct mem_ctl_info     *mci;
243 };
244
245 struct i7core_pvt {
246         struct pci_dev  *pci_noncore;
247         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
248         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
249
250         struct i7core_dev *i7core_dev;
251
252         struct i7core_info      info;
253         struct i7core_inject    inject;
254         struct i7core_channel   channel[NUM_CHANS];
255
256         int             ce_count_available;
257         int             csrow_map[NUM_CHANS][MAX_DIMMS];
258
259                         /* ECC corrected errors counts per udimm */
260         unsigned long   udimm_ce_count[MAX_DIMMS];
261         int             udimm_last_ce_count[MAX_DIMMS];
262                         /* ECC corrected errors counts per rdimm */
263         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
264         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
265
266         bool            is_registered, enable_scrub;
267
268         /* mcelog glue */
269         struct edac_mce         edac_mce;
270
271         /* Fifo double buffers */
272         struct mce              mce_entry[MCE_LOG_LEN];
273         struct mce              mce_outentry[MCE_LOG_LEN];
274
275         /* Fifo in/out counters */
276         unsigned                mce_in, mce_out;
277
278         /* Count indicator to show errors not got */
279         unsigned                mce_overrun;
280
281         /* Struct to control EDAC polling */
282         struct edac_pci_ctl_info *i7core_pci;
283 };
284
285 #define PCI_DESCR(device, function, device_id)  \
286         .dev = (device),                        \
287         .func = (function),                     \
288         .dev_id = (device_id)
289
290 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
291                 /* Memory controller */
292         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
293         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
294                         /* Exists only for RDIMM */
295         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
296         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
297
298                 /* Channel 0 */
299         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
300         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
301         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
302         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
303
304                 /* Channel 1 */
305         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
306         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
307         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
308         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
309
310                 /* Channel 2 */
311         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
312         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
313         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
314         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
315
316                 /* Generic Non-core registers */
317         /*
318          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
319          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
320          * the probing code needs to test for the other address in case of
321          * failure of this one
322          */
323         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
324
325 };
326
327 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
328         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
329         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
330         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
331
332         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
333         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
334         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
335         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
336
337         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
338         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
339         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
340         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
341
342         /*
343          * This is the PCI device has an alternate address on some
344          * processors like Core i7 860
345          */
346         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
347 };
348
349 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
350                 /* Memory controller */
351         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
352         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
353                         /* Exists only for RDIMM */
354         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
355         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
356
357                 /* Channel 0 */
358         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
359         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
360         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
361         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
362
363                 /* Channel 1 */
364         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
365         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
366         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
367         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
368
369                 /* Channel 2 */
370         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
371         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
372         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
373         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
374
375                 /* Generic Non-core registers */
376         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
377
378 };
379
380 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
381 static const struct pci_id_table pci_dev_table[] = {
382         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
383         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
384         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
385         {0,}                    /* 0 terminated list. */
386 };
387
388 /*
389  *      pci_device_id   table for which devices we are looking for
390  */
391 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
392         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
393         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
394         {0,}                    /* 0 terminated list. */
395 };
396
397 /****************************************************************************
398                         Anciliary status routines
399  ****************************************************************************/
400
401         /* MC_CONTROL bits */
402 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
403 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
404
405         /* MC_STATUS bits */
406 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
407 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
408
409         /* MC_MAX_DOD read functions */
410 static inline int numdimms(u32 dimms)
411 {
412         return (dimms & 0x3) + 1;
413 }
414
415 static inline int numrank(u32 rank)
416 {
417         static int ranks[4] = { 1, 2, 4, -EINVAL };
418
419         return ranks[rank & 0x3];
420 }
421
422 static inline int numbank(u32 bank)
423 {
424         static int banks[4] = { 4, 8, 16, -EINVAL };
425
426         return banks[bank & 0x3];
427 }
428
429 static inline int numrow(u32 row)
430 {
431         static int rows[8] = {
432                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
433                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
434         };
435
436         return rows[row & 0x7];
437 }
438
439 static inline int numcol(u32 col)
440 {
441         static int cols[8] = {
442                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
443         };
444         return cols[col & 0x3];
445 }
446
447 static struct i7core_dev *get_i7core_dev(u8 socket)
448 {
449         struct i7core_dev *i7core_dev;
450
451         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
452                 if (i7core_dev->socket == socket)
453                         return i7core_dev;
454         }
455
456         return NULL;
457 }
458
459 static struct i7core_dev *alloc_i7core_dev(u8 socket,
460                                            const struct pci_id_table *table)
461 {
462         struct i7core_dev *i7core_dev;
463
464         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
465         if (!i7core_dev)
466                 return NULL;
467
468         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
469                                    GFP_KERNEL);
470         if (!i7core_dev->pdev) {
471                 kfree(i7core_dev);
472                 return NULL;
473         }
474
475         i7core_dev->socket = socket;
476         i7core_dev->n_devs = table->n_devs;
477         list_add_tail(&i7core_dev->list, &i7core_edac_list);
478
479         return i7core_dev;
480 }
481
482 static void free_i7core_dev(struct i7core_dev *i7core_dev)
483 {
484         list_del(&i7core_dev->list);
485         kfree(i7core_dev->pdev);
486         kfree(i7core_dev);
487 }
488
489 /****************************************************************************
490                         Memory check routines
491  ****************************************************************************/
492 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
493                                           unsigned func)
494 {
495         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
496         int i;
497
498         if (!i7core_dev)
499                 return NULL;
500
501         for (i = 0; i < i7core_dev->n_devs; i++) {
502                 if (!i7core_dev->pdev[i])
503                         continue;
504
505                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
506                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
507                         return i7core_dev->pdev[i];
508                 }
509         }
510
511         return NULL;
512 }
513
514 /**
515  * i7core_get_active_channels() - gets the number of channels and csrows
516  * @socket:     Quick Path Interconnect socket
517  * @channels:   Number of channels that will be returned
518  * @csrows:     Number of csrows found
519  *
520  * Since EDAC core needs to know in advance the number of available channels
521  * and csrows, in order to allocate memory for csrows/channels, it is needed
522  * to run two similar steps. At the first step, implemented on this function,
523  * it checks the number of csrows/channels present at one socket.
524  * this is used in order to properly allocate the size of mci components.
525  *
526  * It should be noticed that none of the current available datasheets explain
527  * or even mention how csrows are seen by the memory controller. So, we need
528  * to add a fake description for csrows.
529  * So, this driver is attributing one DIMM memory for one csrow.
530  */
531 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
532                                       unsigned *csrows)
533 {
534         struct pci_dev *pdev = NULL;
535         int i, j;
536         u32 status, control;
537
538         *channels = 0;
539         *csrows = 0;
540
541         pdev = get_pdev_slot_func(socket, 3, 0);
542         if (!pdev) {
543                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
544                               socket);
545                 return -ENODEV;
546         }
547
548         /* Device 3 function 0 reads */
549         pci_read_config_dword(pdev, MC_STATUS, &status);
550         pci_read_config_dword(pdev, MC_CONTROL, &control);
551
552         for (i = 0; i < NUM_CHANS; i++) {
553                 u32 dimm_dod[3];
554                 /* Check if the channel is active */
555                 if (!(control & (1 << (8 + i))))
556                         continue;
557
558                 /* Check if the channel is disabled */
559                 if (status & (1 << i))
560                         continue;
561
562                 pdev = get_pdev_slot_func(socket, i + 4, 1);
563                 if (!pdev) {
564                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
565                                                 "fn %d.%d!!!\n",
566                                                 socket, i + 4, 1);
567                         return -ENODEV;
568                 }
569                 /* Devices 4-6 function 1 */
570                 pci_read_config_dword(pdev,
571                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
572                 pci_read_config_dword(pdev,
573                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
574                 pci_read_config_dword(pdev,
575                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
576
577                 (*channels)++;
578
579                 for (j = 0; j < 3; j++) {
580                         if (!DIMM_PRESENT(dimm_dod[j]))
581                                 continue;
582                         (*csrows)++;
583                 }
584         }
585
586         debugf0("Number of active channels on socket %d: %d\n",
587                 socket, *channels);
588
589         return 0;
590 }
591
592 static int get_dimm_config(const struct mem_ctl_info *mci)
593 {
594         struct i7core_pvt *pvt = mci->pvt_info;
595         struct csrow_info *csr;
596         struct pci_dev *pdev;
597         int i, j;
598         int csrow = 0;
599         unsigned long last_page = 0;
600         enum edac_type mode;
601         enum mem_type mtype;
602
603         /* Get data from the MC register, function 0 */
604         pdev = pvt->pci_mcr[0];
605         if (!pdev)
606                 return -ENODEV;
607
608         /* Device 3 function 0 reads */
609         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
610         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
611         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
612         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
613
614         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
615                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
616                 pvt->info.max_dod, pvt->info.ch_map);
617
618         if (ECC_ENABLED(pvt)) {
619                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
620                 if (ECCx8(pvt))
621                         mode = EDAC_S8ECD8ED;
622                 else
623                         mode = EDAC_S4ECD4ED;
624         } else {
625                 debugf0("ECC disabled\n");
626                 mode = EDAC_NONE;
627         }
628
629         /* FIXME: need to handle the error codes */
630         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
631                 "x%x x 0x%x\n",
632                 numdimms(pvt->info.max_dod),
633                 numrank(pvt->info.max_dod >> 2),
634                 numbank(pvt->info.max_dod >> 4),
635                 numrow(pvt->info.max_dod >> 6),
636                 numcol(pvt->info.max_dod >> 9));
637
638         for (i = 0; i < NUM_CHANS; i++) {
639                 u32 data, dimm_dod[3], value[8];
640
641                 if (!pvt->pci_ch[i][0])
642                         continue;
643
644                 if (!CH_ACTIVE(pvt, i)) {
645                         debugf0("Channel %i is not active\n", i);
646                         continue;
647                 }
648                 if (CH_DISABLED(pvt, i)) {
649                         debugf0("Channel %i is disabled\n", i);
650                         continue;
651                 }
652
653                 /* Devices 4-6 function 0 */
654                 pci_read_config_dword(pvt->pci_ch[i][0],
655                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
656
657                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
658                                                 4 : 2;
659
660                 if (data & REGISTERED_DIMM)
661                         mtype = MEM_RDDR3;
662                 else
663                         mtype = MEM_DDR3;
664 #if 0
665                 if (data & THREE_DIMMS_PRESENT)
666                         pvt->channel[i].dimms = 3;
667                 else if (data & SINGLE_QUAD_RANK_PRESENT)
668                         pvt->channel[i].dimms = 1;
669                 else
670                         pvt->channel[i].dimms = 2;
671 #endif
672
673                 /* Devices 4-6 function 1 */
674                 pci_read_config_dword(pvt->pci_ch[i][1],
675                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
676                 pci_read_config_dword(pvt->pci_ch[i][1],
677                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
678                 pci_read_config_dword(pvt->pci_ch[i][1],
679                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
680
681                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
682                         "%d ranks, %cDIMMs\n",
683                         i,
684                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
685                         data,
686                         pvt->channel[i].ranks,
687                         (data & REGISTERED_DIMM) ? 'R' : 'U');
688
689                 for (j = 0; j < 3; j++) {
690                         u32 banks, ranks, rows, cols;
691                         u32 size, npages;
692
693                         if (!DIMM_PRESENT(dimm_dod[j]))
694                                 continue;
695
696                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
697                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
698                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
699                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
700
701                         /* DDR3 has 8 I/O banks */
702                         size = (rows * cols * banks * ranks) >> (20 - 3);
703
704                         pvt->channel[i].dimms++;
705
706                         debugf0("\tdimm %d %d Mb offset: %x, "
707                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
708                                 j, size,
709                                 RANKOFFSET(dimm_dod[j]),
710                                 banks, ranks, rows, cols);
711
712                         npages = MiB_TO_PAGES(size);
713
714                         csr = &mci->csrows[csrow];
715                         csr->first_page = last_page + 1;
716                         last_page += npages;
717                         csr->last_page = last_page;
718                         csr->nr_pages = npages;
719
720                         csr->page_mask = 0;
721                         csr->grain = 8;
722                         csr->csrow_idx = csrow;
723                         csr->nr_channels = 1;
724
725                         csr->channels[0].chan_idx = i;
726                         csr->channels[0].ce_count = 0;
727
728                         pvt->csrow_map[i][j] = csrow;
729
730                         switch (banks) {
731                         case 4:
732                                 csr->dtype = DEV_X4;
733                                 break;
734                         case 8:
735                                 csr->dtype = DEV_X8;
736                                 break;
737                         case 16:
738                                 csr->dtype = DEV_X16;
739                                 break;
740                         default:
741                                 csr->dtype = DEV_UNKNOWN;
742                         }
743
744                         csr->edac_mode = mode;
745                         csr->mtype = mtype;
746
747                         csrow++;
748                 }
749
750                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
751                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
752                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
753                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
754                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
755                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
756                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
757                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
758                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
759                 for (j = 0; j < 8; j++)
760                         debugf1("\t\t%#x\t%#x\t%#x\n",
761                                 (value[j] >> 27) & 0x1,
762                                 (value[j] >> 24) & 0x7,
763                                 (value[j] & ((1 << 24) - 1)));
764         }
765
766         return 0;
767 }
768
769 /****************************************************************************
770                         Error insertion routines
771  ****************************************************************************/
772
773 /* The i7core has independent error injection features per channel.
774    However, to have a simpler code, we don't allow enabling error injection
775    on more than one channel.
776    Also, since a change at an inject parameter will be applied only at enable,
777    we're disabling error injection on all write calls to the sysfs nodes that
778    controls the error code injection.
779  */
780 static int disable_inject(const struct mem_ctl_info *mci)
781 {
782         struct i7core_pvt *pvt = mci->pvt_info;
783
784         pvt->inject.enable = 0;
785
786         if (!pvt->pci_ch[pvt->inject.channel][0])
787                 return -ENODEV;
788
789         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
790                                 MC_CHANNEL_ERROR_INJECT, 0);
791
792         return 0;
793 }
794
795 /*
796  * i7core inject inject.section
797  *
798  *      accept and store error injection inject.section value
799  *      bit 0 - refers to the lower 32-byte half cacheline
800  *      bit 1 - refers to the upper 32-byte half cacheline
801  */
802 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
803                                            const char *data, size_t count)
804 {
805         struct i7core_pvt *pvt = mci->pvt_info;
806         unsigned long value;
807         int rc;
808
809         if (pvt->inject.enable)
810                 disable_inject(mci);
811
812         rc = strict_strtoul(data, 10, &value);
813         if ((rc < 0) || (value > 3))
814                 return -EIO;
815
816         pvt->inject.section = (u32) value;
817         return count;
818 }
819
820 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
821                                               char *data)
822 {
823         struct i7core_pvt *pvt = mci->pvt_info;
824         return sprintf(data, "0x%08x\n", pvt->inject.section);
825 }
826
827 /*
828  * i7core inject.type
829  *
830  *      accept and store error injection inject.section value
831  *      bit 0 - repeat enable - Enable error repetition
832  *      bit 1 - inject ECC error
833  *      bit 2 - inject parity error
834  */
835 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
836                                         const char *data, size_t count)
837 {
838         struct i7core_pvt *pvt = mci->pvt_info;
839         unsigned long value;
840         int rc;
841
842         if (pvt->inject.enable)
843                 disable_inject(mci);
844
845         rc = strict_strtoul(data, 10, &value);
846         if ((rc < 0) || (value > 7))
847                 return -EIO;
848
849         pvt->inject.type = (u32) value;
850         return count;
851 }
852
853 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
854                                               char *data)
855 {
856         struct i7core_pvt *pvt = mci->pvt_info;
857         return sprintf(data, "0x%08x\n", pvt->inject.type);
858 }
859
860 /*
861  * i7core_inject_inject.eccmask_store
862  *
863  * The type of error (UE/CE) will depend on the inject.eccmask value:
864  *   Any bits set to a 1 will flip the corresponding ECC bit
865  *   Correctable errors can be injected by flipping 1 bit or the bits within
866  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
867  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
868  *   uncorrectable error to be injected.
869  */
870 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
871                                         const char *data, size_t count)
872 {
873         struct i7core_pvt *pvt = mci->pvt_info;
874         unsigned long value;
875         int rc;
876
877         if (pvt->inject.enable)
878                 disable_inject(mci);
879
880         rc = strict_strtoul(data, 10, &value);
881         if (rc < 0)
882                 return -EIO;
883
884         pvt->inject.eccmask = (u32) value;
885         return count;
886 }
887
888 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
889                                               char *data)
890 {
891         struct i7core_pvt *pvt = mci->pvt_info;
892         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
893 }
894
895 /*
896  * i7core_addrmatch
897  *
898  * The type of error (UE/CE) will depend on the inject.eccmask value:
899  *   Any bits set to a 1 will flip the corresponding ECC bit
900  *   Correctable errors can be injected by flipping 1 bit or the bits within
901  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
902  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
903  *   uncorrectable error to be injected.
904  */
905
906 #define DECLARE_ADDR_MATCH(param, limit)                        \
907 static ssize_t i7core_inject_store_##param(                     \
908                 struct mem_ctl_info *mci,                       \
909                 const char *data, size_t count)                 \
910 {                                                               \
911         struct i7core_pvt *pvt;                                 \
912         long value;                                             \
913         int rc;                                                 \
914                                                                 \
915         debugf1("%s()\n", __func__);                            \
916         pvt = mci->pvt_info;                                    \
917                                                                 \
918         if (pvt->inject.enable)                                 \
919                 disable_inject(mci);                            \
920                                                                 \
921         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
922                 value = -1;                                     \
923         else {                                                  \
924                 rc = strict_strtoul(data, 10, &value);          \
925                 if ((rc < 0) || (value >= limit))               \
926                         return -EIO;                            \
927         }                                                       \
928                                                                 \
929         pvt->inject.param = value;                              \
930                                                                 \
931         return count;                                           \
932 }                                                               \
933                                                                 \
934 static ssize_t i7core_inject_show_##param(                      \
935                 struct mem_ctl_info *mci,                       \
936                 char *data)                                     \
937 {                                                               \
938         struct i7core_pvt *pvt;                                 \
939                                                                 \
940         pvt = mci->pvt_info;                                    \
941         debugf1("%s() pvt=%p\n", __func__, pvt);                \
942         if (pvt->inject.param < 0)                              \
943                 return sprintf(data, "any\n");                  \
944         else                                                    \
945                 return sprintf(data, "%d\n", pvt->inject.param);\
946 }
947
948 #define ATTR_ADDR_MATCH(param)                                  \
949         {                                                       \
950                 .attr = {                                       \
951                         .name = #param,                         \
952                         .mode = (S_IRUGO | S_IWUSR)             \
953                 },                                              \
954                 .show  = i7core_inject_show_##param,            \
955                 .store = i7core_inject_store_##param,           \
956         }
957
958 DECLARE_ADDR_MATCH(channel, 3);
959 DECLARE_ADDR_MATCH(dimm, 3);
960 DECLARE_ADDR_MATCH(rank, 4);
961 DECLARE_ADDR_MATCH(bank, 32);
962 DECLARE_ADDR_MATCH(page, 0x10000);
963 DECLARE_ADDR_MATCH(col, 0x4000);
964
965 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
966 {
967         u32 read;
968         int count;
969
970         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
971                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
972                 where, val);
973
974         for (count = 0; count < 10; count++) {
975                 if (count)
976                         msleep(100);
977                 pci_write_config_dword(dev, where, val);
978                 pci_read_config_dword(dev, where, &read);
979
980                 if (read == val)
981                         return 0;
982         }
983
984         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
985                 "write=%08x. Read=%08x\n",
986                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
987                 where, val, read);
988
989         return -EINVAL;
990 }
991
992 /*
993  * This routine prepares the Memory Controller for error injection.
994  * The error will be injected when some process tries to write to the
995  * memory that matches the given criteria.
996  * The criteria can be set in terms of a mask where dimm, rank, bank, page
997  * and col can be specified.
998  * A -1 value for any of the mask items will make the MCU to ignore
999  * that matching criteria for error injection.
1000  *
1001  * It should be noticed that the error will only happen after a write operation
1002  * on a memory that matches the condition. if REPEAT_EN is not enabled at
1003  * inject mask, then it will produce just one error. Otherwise, it will repeat
1004  * until the injectmask would be cleaned.
1005  *
1006  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
1007  *    is reliable enough to check if the MC is using the
1008  *    three channels. However, this is not clear at the datasheet.
1009  */
1010 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1011                                        const char *data, size_t count)
1012 {
1013         struct i7core_pvt *pvt = mci->pvt_info;
1014         u32 injectmask;
1015         u64 mask = 0;
1016         int  rc;
1017         long enable;
1018
1019         if (!pvt->pci_ch[pvt->inject.channel][0])
1020                 return 0;
1021
1022         rc = strict_strtoul(data, 10, &enable);
1023         if ((rc < 0))
1024                 return 0;
1025
1026         if (enable) {
1027                 pvt->inject.enable = 1;
1028         } else {
1029                 disable_inject(mci);
1030                 return count;
1031         }
1032
1033         /* Sets pvt->inject.dimm mask */
1034         if (pvt->inject.dimm < 0)
1035                 mask |= 1LL << 41;
1036         else {
1037                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1038                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1039                 else
1040                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1041         }
1042
1043         /* Sets pvt->inject.rank mask */
1044         if (pvt->inject.rank < 0)
1045                 mask |= 1LL << 40;
1046         else {
1047                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1048                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1049                 else
1050                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1051         }
1052
1053         /* Sets pvt->inject.bank mask */
1054         if (pvt->inject.bank < 0)
1055                 mask |= 1LL << 39;
1056         else
1057                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1058
1059         /* Sets pvt->inject.page mask */
1060         if (pvt->inject.page < 0)
1061                 mask |= 1LL << 38;
1062         else
1063                 mask |= (pvt->inject.page & 0xffff) << 14;
1064
1065         /* Sets pvt->inject.column mask */
1066         if (pvt->inject.col < 0)
1067                 mask |= 1LL << 37;
1068         else
1069                 mask |= (pvt->inject.col & 0x3fff);
1070
1071         /*
1072          * bit    0: REPEAT_EN
1073          * bits 1-2: MASK_HALF_CACHELINE
1074          * bit    3: INJECT_ECC
1075          * bit    4: INJECT_ADDR_PARITY
1076          */
1077
1078         injectmask = (pvt->inject.type & 1) |
1079                      (pvt->inject.section & 0x3) << 1 |
1080                      (pvt->inject.type & 0x6) << (3 - 1);
1081
1082         /* Unlock writes to registers - this register is write only */
1083         pci_write_config_dword(pvt->pci_noncore,
1084                                MC_CFG_CONTROL, 0x2);
1085
1086         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1087                                MC_CHANNEL_ADDR_MATCH, mask);
1088         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1089                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1090
1091         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1092                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1093
1094         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1095                                MC_CHANNEL_ERROR_INJECT, injectmask);
1096
1097         /*
1098          * This is something undocumented, based on my tests
1099          * Without writing 8 to this register, errors aren't injected. Not sure
1100          * why.
1101          */
1102         pci_write_config_dword(pvt->pci_noncore,
1103                                MC_CFG_CONTROL, 8);
1104
1105         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1106                 " inject 0x%08x\n",
1107                 mask, pvt->inject.eccmask, injectmask);
1108
1109
1110         return count;
1111 }
1112
1113 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1114                                         char *data)
1115 {
1116         struct i7core_pvt *pvt = mci->pvt_info;
1117         u32 injectmask;
1118
1119         if (!pvt->pci_ch[pvt->inject.channel][0])
1120                 return 0;
1121
1122         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1123                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1124
1125         debugf0("Inject error read: 0x%018x\n", injectmask);
1126
1127         if (injectmask & 0x0c)
1128                 pvt->inject.enable = 1;
1129
1130         return sprintf(data, "%d\n", pvt->inject.enable);
1131 }
1132
1133 #define DECLARE_COUNTER(param)                                  \
1134 static ssize_t i7core_show_counter_##param(                     \
1135                 struct mem_ctl_info *mci,                       \
1136                 char *data)                                     \
1137 {                                                               \
1138         struct i7core_pvt *pvt = mci->pvt_info;                 \
1139                                                                 \
1140         debugf1("%s() \n", __func__);                           \
1141         if (!pvt->ce_count_available || (pvt->is_registered))   \
1142                 return sprintf(data, "data unavailable\n");     \
1143         return sprintf(data, "%lu\n",                           \
1144                         pvt->udimm_ce_count[param]);            \
1145 }
1146
1147 #define ATTR_COUNTER(param)                                     \
1148         {                                                       \
1149                 .attr = {                                       \
1150                         .name = __stringify(udimm##param),      \
1151                         .mode = (S_IRUGO | S_IWUSR)             \
1152                 },                                              \
1153                 .show  = i7core_show_counter_##param            \
1154         }
1155
1156 DECLARE_COUNTER(0);
1157 DECLARE_COUNTER(1);
1158 DECLARE_COUNTER(2);
1159
1160 /*
1161  * Sysfs struct
1162  */
1163
1164 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1165         ATTR_ADDR_MATCH(channel),
1166         ATTR_ADDR_MATCH(dimm),
1167         ATTR_ADDR_MATCH(rank),
1168         ATTR_ADDR_MATCH(bank),
1169         ATTR_ADDR_MATCH(page),
1170         ATTR_ADDR_MATCH(col),
1171         { } /* End of list */
1172 };
1173
1174 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1175         .name  = "inject_addrmatch",
1176         .mcidev_attr = i7core_addrmatch_attrs,
1177 };
1178
1179 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1180         ATTR_COUNTER(0),
1181         ATTR_COUNTER(1),
1182         ATTR_COUNTER(2),
1183         { .attr = { .name = NULL } }
1184 };
1185
1186 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1187         .name  = "all_channel_counts",
1188         .mcidev_attr = i7core_udimm_counters_attrs,
1189 };
1190
1191 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1192         {
1193                 .attr = {
1194                         .name = "inject_section",
1195                         .mode = (S_IRUGO | S_IWUSR)
1196                 },
1197                 .show  = i7core_inject_section_show,
1198                 .store = i7core_inject_section_store,
1199         }, {
1200                 .attr = {
1201                         .name = "inject_type",
1202                         .mode = (S_IRUGO | S_IWUSR)
1203                 },
1204                 .show  = i7core_inject_type_show,
1205                 .store = i7core_inject_type_store,
1206         }, {
1207                 .attr = {
1208                         .name = "inject_eccmask",
1209                         .mode = (S_IRUGO | S_IWUSR)
1210                 },
1211                 .show  = i7core_inject_eccmask_show,
1212                 .store = i7core_inject_eccmask_store,
1213         }, {
1214                 .grp = &i7core_inject_addrmatch,
1215         }, {
1216                 .attr = {
1217                         .name = "inject_enable",
1218                         .mode = (S_IRUGO | S_IWUSR)
1219                 },
1220                 .show  = i7core_inject_enable_show,
1221                 .store = i7core_inject_enable_store,
1222         },
1223         { }     /* End of list */
1224 };
1225
1226 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1227         {
1228                 .attr = {
1229                         .name = "inject_section",
1230                         .mode = (S_IRUGO | S_IWUSR)
1231                 },
1232                 .show  = i7core_inject_section_show,
1233                 .store = i7core_inject_section_store,
1234         }, {
1235                 .attr = {
1236                         .name = "inject_type",
1237                         .mode = (S_IRUGO | S_IWUSR)
1238                 },
1239                 .show  = i7core_inject_type_show,
1240                 .store = i7core_inject_type_store,
1241         }, {
1242                 .attr = {
1243                         .name = "inject_eccmask",
1244                         .mode = (S_IRUGO | S_IWUSR)
1245                 },
1246                 .show  = i7core_inject_eccmask_show,
1247                 .store = i7core_inject_eccmask_store,
1248         }, {
1249                 .grp = &i7core_inject_addrmatch,
1250         }, {
1251                 .attr = {
1252                         .name = "inject_enable",
1253                         .mode = (S_IRUGO | S_IWUSR)
1254                 },
1255                 .show  = i7core_inject_enable_show,
1256                 .store = i7core_inject_enable_store,
1257         }, {
1258                 .grp = &i7core_udimm_counters,
1259         },
1260         { }     /* End of list */
1261 };
1262
1263 /****************************************************************************
1264         Device initialization routines: put/get, init/exit
1265  ****************************************************************************/
1266
1267 /*
1268  *      i7core_put_all_devices  'put' all the devices that we have
1269  *                              reserved via 'get'
1270  */
1271 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1272 {
1273         int i;
1274
1275         debugf0(__FILE__ ": %s()\n", __func__);
1276         for (i = 0; i < i7core_dev->n_devs; i++) {
1277                 struct pci_dev *pdev = i7core_dev->pdev[i];
1278                 if (!pdev)
1279                         continue;
1280                 debugf0("Removing dev %02x:%02x.%d\n",
1281                         pdev->bus->number,
1282                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1283                 pci_dev_put(pdev);
1284         }
1285 }
1286
1287 static void i7core_put_all_devices(void)
1288 {
1289         struct i7core_dev *i7core_dev, *tmp;
1290
1291         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1292                 i7core_put_devices(i7core_dev);
1293                 free_i7core_dev(i7core_dev);
1294         }
1295 }
1296
1297 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1298 {
1299         struct pci_dev *pdev = NULL;
1300         int i;
1301
1302         /*
1303          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1304          * aren't announced by acpi. So, we need to use a legacy scan probing
1305          * to detect them
1306          */
1307         while (table && table->descr) {
1308                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1309                 if (unlikely(!pdev)) {
1310                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1311                                 pcibios_scan_specific_bus(255-i);
1312                 }
1313                 pci_dev_put(pdev);
1314                 table++;
1315         }
1316 }
1317
1318 static unsigned i7core_pci_lastbus(void)
1319 {
1320         int last_bus = 0, bus;
1321         struct pci_bus *b = NULL;
1322
1323         while ((b = pci_find_next_bus(b)) != NULL) {
1324                 bus = b->number;
1325                 debugf0("Found bus %d\n", bus);
1326                 if (bus > last_bus)
1327                         last_bus = bus;
1328         }
1329
1330         debugf0("Last bus %d\n", last_bus);
1331
1332         return last_bus;
1333 }
1334
1335 /*
1336  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1337  *                      device/functions we want to reference for this driver
1338  *
1339  *                      Need to 'get' device 16 func 1 and func 2
1340  */
1341 static int i7core_get_onedevice(struct pci_dev **prev,
1342                                 const struct pci_id_table *table,
1343                                 const unsigned devno,
1344                                 const unsigned last_bus)
1345 {
1346         struct i7core_dev *i7core_dev;
1347         const struct pci_id_descr *dev_descr = &table->descr[devno];
1348
1349         struct pci_dev *pdev = NULL;
1350         u8 bus = 0;
1351         u8 socket = 0;
1352
1353         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1354                               dev_descr->dev_id, *prev);
1355
1356         /*
1357          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1358          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1359          * to probe for the alternate address in case of failure
1360          */
1361         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1362                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1363                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1364
1365         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1366                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1367                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1368                                       *prev);
1369
1370         if (!pdev) {
1371                 if (*prev) {
1372                         *prev = pdev;
1373                         return 0;
1374                 }
1375
1376                 if (dev_descr->optional)
1377                         return 0;
1378
1379                 if (devno == 0)
1380                         return -ENODEV;
1381
1382                 i7core_printk(KERN_INFO,
1383                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1384                         dev_descr->dev, dev_descr->func,
1385                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1386
1387                 /* End of list, leave */
1388                 return -ENODEV;
1389         }
1390         bus = pdev->bus->number;
1391
1392         socket = last_bus - bus;
1393
1394         i7core_dev = get_i7core_dev(socket);
1395         if (!i7core_dev) {
1396                 i7core_dev = alloc_i7core_dev(socket, table);
1397                 if (!i7core_dev) {
1398                         pci_dev_put(pdev);
1399                         return -ENOMEM;
1400                 }
1401         }
1402
1403         if (i7core_dev->pdev[devno]) {
1404                 i7core_printk(KERN_ERR,
1405                         "Duplicated device for "
1406                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1407                         bus, dev_descr->dev, dev_descr->func,
1408                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1409                 pci_dev_put(pdev);
1410                 return -ENODEV;
1411         }
1412
1413         i7core_dev->pdev[devno] = pdev;
1414
1415         /* Sanity check */
1416         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1417                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1418                 i7core_printk(KERN_ERR,
1419                         "Device PCI ID %04x:%04x "
1420                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1421                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1422                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1423                         bus, dev_descr->dev, dev_descr->func);
1424                 return -ENODEV;
1425         }
1426
1427         /* Be sure that the device is enabled */
1428         if (unlikely(pci_enable_device(pdev) < 0)) {
1429                 i7core_printk(KERN_ERR,
1430                         "Couldn't enable "
1431                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1432                         bus, dev_descr->dev, dev_descr->func,
1433                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1434                 return -ENODEV;
1435         }
1436
1437         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1438                 socket, bus, dev_descr->dev,
1439                 dev_descr->func,
1440                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1441
1442         /*
1443          * As stated on drivers/pci/search.c, the reference count for
1444          * @from is always decremented if it is not %NULL. So, as we need
1445          * to get all devices up to null, we need to do a get for the device
1446          */
1447         pci_dev_get(pdev);
1448
1449         *prev = pdev;
1450
1451         return 0;
1452 }
1453
1454 static int i7core_get_all_devices(void)
1455 {
1456         int i, rc, last_bus;
1457         struct pci_dev *pdev = NULL;
1458         const struct pci_id_table *table = pci_dev_table;
1459
1460         last_bus = i7core_pci_lastbus();
1461
1462         while (table && table->descr) {
1463                 for (i = 0; i < table->n_devs; i++) {
1464                         pdev = NULL;
1465                         do {
1466                                 rc = i7core_get_onedevice(&pdev, table, i,
1467                                                           last_bus);
1468                                 if (rc < 0) {
1469                                         if (i == 0) {
1470                                                 i = table->n_devs;
1471                                                 break;
1472                                         }
1473                                         i7core_put_all_devices();
1474                                         return -ENODEV;
1475                                 }
1476                         } while (pdev);
1477                 }
1478                 table++;
1479         }
1480
1481         return 0;
1482 }
1483
1484 static int mci_bind_devs(struct mem_ctl_info *mci,
1485                          struct i7core_dev *i7core_dev)
1486 {
1487         struct i7core_pvt *pvt = mci->pvt_info;
1488         struct pci_dev *pdev;
1489         int i, func, slot;
1490         char *family;
1491
1492         pvt->is_registered = false;
1493         pvt->enable_scrub  = false;
1494         for (i = 0; i < i7core_dev->n_devs; i++) {
1495                 pdev = i7core_dev->pdev[i];
1496                 if (!pdev)
1497                         continue;
1498
1499                 func = PCI_FUNC(pdev->devfn);
1500                 slot = PCI_SLOT(pdev->devfn);
1501                 if (slot == 3) {
1502                         if (unlikely(func > MAX_MCR_FUNC))
1503                                 goto error;
1504                         pvt->pci_mcr[func] = pdev;
1505                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1506                         if (unlikely(func > MAX_CHAN_FUNC))
1507                                 goto error;
1508                         pvt->pci_ch[slot - 4][func] = pdev;
1509                 } else if (!slot && !func) {
1510                         pvt->pci_noncore = pdev;
1511
1512                         /* Detect the processor family */
1513                         switch (pdev->device) {
1514                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1515                                 family = "Xeon 35xx/ i7core";
1516                                 pvt->enable_scrub = false;
1517                                 break;
1518                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1519                                 family = "i7-800/i5-700";
1520                                 pvt->enable_scrub = false;
1521                                 break;
1522                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1523                                 family = "Xeon 34xx";
1524                                 pvt->enable_scrub = false;
1525                                 break;
1526                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1527                                 family = "Xeon 55xx";
1528                                 pvt->enable_scrub = true;
1529                                 break;
1530                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1531                                 family = "Xeon 56xx / i7-900";
1532                                 pvt->enable_scrub = true;
1533                                 break;
1534                         default:
1535                                 family = "unknown";
1536                                 pvt->enable_scrub = false;
1537                         }
1538                         debugf0("Detected a processor type %s\n", family);
1539                 } else
1540                         goto error;
1541
1542                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1543                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1544                         pdev, i7core_dev->socket);
1545
1546                 if (PCI_SLOT(pdev->devfn) == 3 &&
1547                         PCI_FUNC(pdev->devfn) == 2)
1548                         pvt->is_registered = true;
1549         }
1550
1551         return 0;
1552
1553 error:
1554         i7core_printk(KERN_ERR, "Device %d, function %d "
1555                       "is out of the expected range\n",
1556                       slot, func);
1557         return -EINVAL;
1558 }
1559
1560 /****************************************************************************
1561                         Error check routines
1562  ****************************************************************************/
1563 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1564                                       const int chan,
1565                                       const int dimm,
1566                                       const int add)
1567 {
1568         char *msg;
1569         struct i7core_pvt *pvt = mci->pvt_info;
1570         int row = pvt->csrow_map[chan][dimm], i;
1571
1572         for (i = 0; i < add; i++) {
1573                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1574                                 "(Socket=%d channel=%d dimm=%d)",
1575                                 pvt->i7core_dev->socket, chan, dimm);
1576
1577                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1578                 kfree (msg);
1579         }
1580 }
1581
1582 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1583                                          const int chan,
1584                                          const int new0,
1585                                          const int new1,
1586                                          const int new2)
1587 {
1588         struct i7core_pvt *pvt = mci->pvt_info;
1589         int add0 = 0, add1 = 0, add2 = 0;
1590         /* Updates CE counters if it is not the first time here */
1591         if (pvt->ce_count_available) {
1592                 /* Updates CE counters */
1593
1594                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1595                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1596                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1597
1598                 if (add2 < 0)
1599                         add2 += 0x7fff;
1600                 pvt->rdimm_ce_count[chan][2] += add2;
1601
1602                 if (add1 < 0)
1603                         add1 += 0x7fff;
1604                 pvt->rdimm_ce_count[chan][1] += add1;
1605
1606                 if (add0 < 0)
1607                         add0 += 0x7fff;
1608                 pvt->rdimm_ce_count[chan][0] += add0;
1609         } else
1610                 pvt->ce_count_available = 1;
1611
1612         /* Store the new values */
1613         pvt->rdimm_last_ce_count[chan][2] = new2;
1614         pvt->rdimm_last_ce_count[chan][1] = new1;
1615         pvt->rdimm_last_ce_count[chan][0] = new0;
1616
1617         /*updated the edac core */
1618         if (add0 != 0)
1619                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1620         if (add1 != 0)
1621                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1622         if (add2 != 0)
1623                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1624
1625 }
1626
1627 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1628 {
1629         struct i7core_pvt *pvt = mci->pvt_info;
1630         u32 rcv[3][2];
1631         int i, new0, new1, new2;
1632
1633         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1634         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1635                                                                 &rcv[0][0]);
1636         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1637                                                                 &rcv[0][1]);
1638         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1639                                                                 &rcv[1][0]);
1640         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1641                                                                 &rcv[1][1]);
1642         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1643                                                                 &rcv[2][0]);
1644         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1645                                                                 &rcv[2][1]);
1646         for (i = 0 ; i < 3; i++) {
1647                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1648                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1649                 /*if the channel has 3 dimms*/
1650                 if (pvt->channel[i].dimms > 2) {
1651                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1652                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1653                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1654                 } else {
1655                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1656                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1657                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1658                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1659                         new2 = 0;
1660                 }
1661
1662                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1663         }
1664 }
1665
1666 /* This function is based on the device 3 function 4 registers as described on:
1667  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1668  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1669  * also available at:
1670  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1671  */
1672 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1673 {
1674         struct i7core_pvt *pvt = mci->pvt_info;
1675         u32 rcv1, rcv0;
1676         int new0, new1, new2;
1677
1678         if (!pvt->pci_mcr[4]) {
1679                 debugf0("%s MCR registers not found\n", __func__);
1680                 return;
1681         }
1682
1683         /* Corrected test errors */
1684         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1685         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1686
1687         /* Store the new values */
1688         new2 = DIMM2_COR_ERR(rcv1);
1689         new1 = DIMM1_COR_ERR(rcv0);
1690         new0 = DIMM0_COR_ERR(rcv0);
1691
1692         /* Updates CE counters if it is not the first time here */
1693         if (pvt->ce_count_available) {
1694                 /* Updates CE counters */
1695                 int add0, add1, add2;
1696
1697                 add2 = new2 - pvt->udimm_last_ce_count[2];
1698                 add1 = new1 - pvt->udimm_last_ce_count[1];
1699                 add0 = new0 - pvt->udimm_last_ce_count[0];
1700
1701                 if (add2 < 0)
1702                         add2 += 0x7fff;
1703                 pvt->udimm_ce_count[2] += add2;
1704
1705                 if (add1 < 0)
1706                         add1 += 0x7fff;
1707                 pvt->udimm_ce_count[1] += add1;
1708
1709                 if (add0 < 0)
1710                         add0 += 0x7fff;
1711                 pvt->udimm_ce_count[0] += add0;
1712
1713                 if (add0 | add1 | add2)
1714                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1715                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1716                                       add0, add1, add2);
1717         } else
1718                 pvt->ce_count_available = 1;
1719
1720         /* Store the new values */
1721         pvt->udimm_last_ce_count[2] = new2;
1722         pvt->udimm_last_ce_count[1] = new1;
1723         pvt->udimm_last_ce_count[0] = new0;
1724 }
1725
1726 /*
1727  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1728  * Architectures Software Developer’s Manual Volume 3B.
1729  * Nehalem are defined as family 0x06, model 0x1a
1730  *
1731  * The MCA registers used here are the following ones:
1732  *     struct mce field MCA Register
1733  *     m->status        MSR_IA32_MC8_STATUS
1734  *     m->addr          MSR_IA32_MC8_ADDR
1735  *     m->misc          MSR_IA32_MC8_MISC
1736  * In the case of Nehalem, the error information is masked at .status and .misc
1737  * fields
1738  */
1739 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1740                                     const struct mce *m)
1741 {
1742         struct i7core_pvt *pvt = mci->pvt_info;
1743         char *type, *optype, *err, *msg;
1744         unsigned long error = m->status & 0x1ff0000l;
1745         u32 optypenum = (m->status >> 4) & 0x07;
1746         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1747         u32 dimm = (m->misc >> 16) & 0x3;
1748         u32 channel = (m->misc >> 18) & 0x3;
1749         u32 syndrome = m->misc >> 32;
1750         u32 errnum = find_first_bit(&error, 32);
1751         int csrow;
1752
1753         if (m->mcgstatus & 1)
1754                 type = "FATAL";
1755         else
1756                 type = "NON_FATAL";
1757
1758         switch (optypenum) {
1759         case 0:
1760                 optype = "generic undef request";
1761                 break;
1762         case 1:
1763                 optype = "read error";
1764                 break;
1765         case 2:
1766                 optype = "write error";
1767                 break;
1768         case 3:
1769                 optype = "addr/cmd error";
1770                 break;
1771         case 4:
1772                 optype = "scrubbing error";
1773                 break;
1774         default:
1775                 optype = "reserved";
1776                 break;
1777         }
1778
1779         switch (errnum) {
1780         case 16:
1781                 err = "read ECC error";
1782                 break;
1783         case 17:
1784                 err = "RAS ECC error";
1785                 break;
1786         case 18:
1787                 err = "write parity error";
1788                 break;
1789         case 19:
1790                 err = "redundacy loss";
1791                 break;
1792         case 20:
1793                 err = "reserved";
1794                 break;
1795         case 21:
1796                 err = "memory range error";
1797                 break;
1798         case 22:
1799                 err = "RTID out of range";
1800                 break;
1801         case 23:
1802                 err = "address parity error";
1803                 break;
1804         case 24:
1805                 err = "byte enable parity error";
1806                 break;
1807         default:
1808                 err = "unknown";
1809         }
1810
1811         /* FIXME: should convert addr into bank and rank information */
1812         msg = kasprintf(GFP_ATOMIC,
1813                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1814                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1815                 type, (long long) m->addr, m->cpu, dimm, channel,
1816                 syndrome, core_err_cnt, (long long)m->status,
1817                 (long long)m->misc, optype, err);
1818
1819         debugf0("%s", msg);
1820
1821         csrow = pvt->csrow_map[channel][dimm];
1822
1823         /* Call the helper to output message */
1824         if (m->mcgstatus & 1)
1825                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1826                                 0 /* FIXME: should be channel here */, msg);
1827         else if (!pvt->is_registered)
1828                 edac_mc_handle_fbd_ce(mci, csrow,
1829                                 0 /* FIXME: should be channel here */, msg);
1830
1831         kfree(msg);
1832 }
1833
1834 /*
1835  *      i7core_check_error      Retrieve and process errors reported by the
1836  *                              hardware. Called by the Core module.
1837  */
1838 static void i7core_check_error(struct mem_ctl_info *mci)
1839 {
1840         struct i7core_pvt *pvt = mci->pvt_info;
1841         int i;
1842         unsigned count = 0;
1843         struct mce *m;
1844
1845         /*
1846          * MCE first step: Copy all mce errors into a temporary buffer
1847          * We use a double buffering here, to reduce the risk of
1848          * losing an error.
1849          */
1850         smp_rmb();
1851         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1852                 % MCE_LOG_LEN;
1853         if (!count)
1854                 goto check_ce_error;
1855
1856         m = pvt->mce_outentry;
1857         if (pvt->mce_in + count > MCE_LOG_LEN) {
1858                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1859
1860                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1861                 smp_wmb();
1862                 pvt->mce_in = 0;
1863                 count -= l;
1864                 m += l;
1865         }
1866         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1867         smp_wmb();
1868         pvt->mce_in += count;
1869
1870         smp_rmb();
1871         if (pvt->mce_overrun) {
1872                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1873                               pvt->mce_overrun);
1874                 smp_wmb();
1875                 pvt->mce_overrun = 0;
1876         }
1877
1878         /*
1879          * MCE second step: parse errors and display
1880          */
1881         for (i = 0; i < count; i++)
1882                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1883
1884         /*
1885          * Now, let's increment CE error counts
1886          */
1887 check_ce_error:
1888         if (!pvt->is_registered)
1889                 i7core_udimm_check_mc_ecc_err(mci);
1890         else
1891                 i7core_rdimm_check_mc_ecc_err(mci);
1892 }
1893
1894 /*
1895  * i7core_mce_check_error       Replicates mcelog routine to get errors
1896  *                              This routine simply queues mcelog errors, and
1897  *                              return. The error itself should be handled later
1898  *                              by i7core_check_error.
1899  * WARNING: As this routine should be called at NMI time, extra care should
1900  * be taken to avoid deadlocks, and to be as fast as possible.
1901  */
1902 static int i7core_mce_check_error(void *priv, struct mce *mce)
1903 {
1904         struct mem_ctl_info *mci = priv;
1905         struct i7core_pvt *pvt = mci->pvt_info;
1906
1907         /*
1908          * Just let mcelog handle it if the error is
1909          * outside the memory controller
1910          */
1911         if (((mce->status & 0xffff) >> 7) != 1)
1912                 return 0;
1913
1914         /* Bank 8 registers are the only ones that we know how to handle */
1915         if (mce->bank != 8)
1916                 return 0;
1917
1918 #ifdef CONFIG_SMP
1919         /* Only handle if it is the right mc controller */
1920         if (mce->socketid != pvt->i7core_dev->socket)
1921                 return 0;
1922 #endif
1923
1924         smp_rmb();
1925         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1926                 smp_wmb();
1927                 pvt->mce_overrun++;
1928                 return 0;
1929         }
1930
1931         /* Copy memory error at the ringbuffer */
1932         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1933         smp_wmb();
1934         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1935
1936         /* Handle fatal errors immediately */
1937         if (mce->mcgstatus & 1)
1938                 i7core_check_error(mci);
1939
1940         /* Advise mcelog that the errors were handled */
1941         return 1;
1942 }
1943
1944 /*
1945  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1946  *                              to hardware according to SCRUBINTERVAL formula
1947  *                              found in datasheet.
1948  */
1949 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1950 {
1951         struct i7core_pvt *pvt = mci->pvt_info;
1952         struct pci_dev *pdev;
1953         const u32 cache_line_size = 64;
1954         const u32 freq_dclk = 800*1000000;
1955         u32 dw_scrub;
1956         u32 dw_ssr;
1957
1958         /* Get data from the MC register, function 2 */
1959         pdev = pvt->pci_mcr[2];
1960         if (!pdev)
1961                 return -ENODEV;
1962
1963         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1964
1965         if (new_bw == 0) {
1966                 /* Prepare to disable petrol scrub */
1967                 dw_scrub &= ~STARTSCRUB;
1968                 /* Stop the patrol scrub engine */
1969                 write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff);
1970
1971                 /* Get current status of scrub rate and set bit to disable */
1972                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1973                 dw_ssr &= ~SSR_MODE_MASK;
1974                 dw_ssr |= SSR_MODE_DISABLE;
1975         } else {
1976                 /*
1977                  * Translate the desired scrub rate to a register value and
1978                  * program the cooresponding register value.
1979                  */
1980                 dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw);
1981
1982                 /* Start the patrol scrub engine */
1983                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1984                                        STARTSCRUB | dw_scrub);
1985
1986                 /* Get current status of scrub rate and set bit to enable */
1987                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1988                 dw_ssr &= ~SSR_MODE_MASK;
1989                 dw_ssr |= SSR_MODE_ENABLE;
1990         }
1991         /* Disable or enable scrubbing */
1992         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
1993
1994         return new_bw;
1995 }
1996
1997 /*
1998  * get_sdram_scrub_rate         This routine convert current scrub rate value
1999  *                              into byte/sec bandwidth accourding to
2000  *                              SCRUBINTERVAL formula found in datasheet.
2001  */
2002 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2003 {
2004         struct i7core_pvt *pvt = mci->pvt_info;
2005         struct pci_dev *pdev;
2006         const u32 cache_line_size = 64;
2007         const u32 freq_dclk = 800*1000000;
2008         u32 scrubval;
2009
2010         /* Get data from the MC register, function 2 */
2011         pdev = pvt->pci_mcr[2];
2012         if (!pdev)
2013                 return -ENODEV;
2014
2015         /* Get current scrub control data */
2016         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2017
2018         /* Mask highest 8-bits to 0 */
2019         scrubval &=  0x00ffffff;
2020         if (!scrubval)
2021                 return 0;
2022
2023         /* Calculate scrub rate value into byte/sec bandwidth */
2024         return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval);
2025 }
2026
2027 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2028 {
2029         struct i7core_pvt *pvt = mci->pvt_info;
2030         u32 pci_lock;
2031
2032         /* Unlock writes to pci registers */
2033         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2034         pci_lock &= ~0x3;
2035         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2036                                pci_lock | MC_CFG_UNLOCK);
2037
2038         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2039         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2040 }
2041
2042 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2043 {
2044         struct i7core_pvt *pvt = mci->pvt_info;
2045         u32 pci_lock;
2046
2047         /* Lock writes to pci registers */
2048         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2049         pci_lock &= ~0x3;
2050         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2051                                pci_lock | MC_CFG_LOCK);
2052 }
2053
2054 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2055 {
2056         pvt->i7core_pci = edac_pci_create_generic_ctl(
2057                                                 &pvt->i7core_dev->pdev[0]->dev,
2058                                                 EDAC_MOD_STR);
2059         if (unlikely(!pvt->i7core_pci))
2060                 pr_warn("Unable to setup PCI error report via EDAC\n");
2061 }
2062
2063 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2064 {
2065         if (likely(pvt->i7core_pci))
2066                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2067         else
2068                 i7core_printk(KERN_ERR,
2069                                 "Couldn't find mem_ctl_info for socket %d\n",
2070                                 pvt->i7core_dev->socket);
2071         pvt->i7core_pci = NULL;
2072 }
2073
2074 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2075 {
2076         struct mem_ctl_info *mci = i7core_dev->mci;
2077         struct i7core_pvt *pvt;
2078
2079         if (unlikely(!mci || !mci->pvt_info)) {
2080                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2081                         __func__, &i7core_dev->pdev[0]->dev);
2082
2083                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2084                 return;
2085         }
2086
2087         pvt = mci->pvt_info;
2088
2089         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2090                 __func__, mci, &i7core_dev->pdev[0]->dev);
2091
2092         /* Disable scrubrate setting */
2093         if (pvt->enable_scrub)
2094                 disable_sdram_scrub_setting(mci);
2095
2096         /* Disable MCE NMI handler */
2097         edac_mce_unregister(&pvt->edac_mce);
2098
2099         /* Disable EDAC polling */
2100         i7core_pci_ctl_release(pvt);
2101
2102         /* Remove MC sysfs nodes */
2103         edac_mc_del_mc(mci->dev);
2104
2105         debugf1("%s: free mci struct\n", mci->ctl_name);
2106         kfree(mci->ctl_name);
2107         edac_mc_free(mci);
2108         i7core_dev->mci = NULL;
2109 }
2110
2111 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2112 {
2113         struct mem_ctl_info *mci;
2114         struct i7core_pvt *pvt;
2115         int rc, channels, csrows;
2116
2117         /* Check the number of active and not disabled channels */
2118         rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
2119         if (unlikely(rc < 0))
2120                 return rc;
2121
2122         /* allocate a new MC control structure */
2123         mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
2124         if (unlikely(!mci))
2125                 return -ENOMEM;
2126
2127         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2128                 __func__, mci, &i7core_dev->pdev[0]->dev);
2129
2130         pvt = mci->pvt_info;
2131         memset(pvt, 0, sizeof(*pvt));
2132
2133         /* Associates i7core_dev and mci for future usage */
2134         pvt->i7core_dev = i7core_dev;
2135         i7core_dev->mci = mci;
2136
2137         /*
2138          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2139          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2140          * memory channels
2141          */
2142         mci->mtype_cap = MEM_FLAG_DDR3;
2143         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2144         mci->edac_cap = EDAC_FLAG_NONE;
2145         mci->mod_name = "i7core_edac.c";
2146         mci->mod_ver = I7CORE_REVISION;
2147         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2148                                   i7core_dev->socket);
2149         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2150         mci->ctl_page_to_phys = NULL;
2151
2152         /* Store pci devices at mci for faster access */
2153         rc = mci_bind_devs(mci, i7core_dev);
2154         if (unlikely(rc < 0))
2155                 goto fail0;
2156
2157         if (pvt->is_registered)
2158                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2159         else
2160                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2161
2162         /* Get dimm basic config */
2163         get_dimm_config(mci);
2164         /* record ptr to the generic device */
2165         mci->dev = &i7core_dev->pdev[0]->dev;
2166         /* Set the function pointer to an actual operation function */
2167         mci->edac_check = i7core_check_error;
2168
2169         /* Enable scrubrate setting */
2170         if (pvt->enable_scrub)
2171                 enable_sdram_scrub_setting(mci);
2172
2173         /* add this new MC control structure to EDAC's list of MCs */
2174         if (unlikely(edac_mc_add_mc(mci))) {
2175                 debugf0("MC: " __FILE__
2176                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2177                 /* FIXME: perhaps some code should go here that disables error
2178                  * reporting if we just enabled it
2179                  */
2180
2181                 rc = -EINVAL;
2182                 goto fail0;
2183         }
2184
2185         /* Default error mask is any memory */
2186         pvt->inject.channel = 0;
2187         pvt->inject.dimm = -1;
2188         pvt->inject.rank = -1;
2189         pvt->inject.bank = -1;
2190         pvt->inject.page = -1;
2191         pvt->inject.col = -1;
2192
2193         /* allocating generic PCI control info */
2194         i7core_pci_ctl_create(pvt);
2195
2196         /* Registers on edac_mce in order to receive memory errors */
2197         pvt->edac_mce.priv = mci;
2198         pvt->edac_mce.check_error = i7core_mce_check_error;
2199         rc = edac_mce_register(&pvt->edac_mce);
2200         if (unlikely(rc < 0)) {
2201                 debugf0("MC: " __FILE__
2202                         ": %s(): failed edac_mce_register()\n", __func__);
2203                 goto fail1;
2204         }
2205
2206         return 0;
2207
2208 fail1:
2209         i7core_pci_ctl_release(pvt);
2210         edac_mc_del_mc(mci->dev);
2211 fail0:
2212         kfree(mci->ctl_name);
2213         edac_mc_free(mci);
2214         i7core_dev->mci = NULL;
2215         return rc;
2216 }
2217
2218 /*
2219  *      i7core_probe    Probe for ONE instance of device to see if it is
2220  *                      present.
2221  *      return:
2222  *              0 for FOUND a device
2223  *              < 0 for error code
2224  */
2225
2226 static int __devinit i7core_probe(struct pci_dev *pdev,
2227                                   const struct pci_device_id *id)
2228 {
2229         int rc;
2230         struct i7core_dev *i7core_dev;
2231
2232         /* get the pci devices we want to reserve for our use */
2233         mutex_lock(&i7core_edac_lock);
2234
2235         /*
2236          * All memory controllers are allocated at the first pass.
2237          */
2238         if (unlikely(probed >= 1)) {
2239                 mutex_unlock(&i7core_edac_lock);
2240                 return -ENODEV;
2241         }
2242         probed++;
2243
2244         rc = i7core_get_all_devices();
2245         if (unlikely(rc < 0))
2246                 goto fail0;
2247
2248         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2249                 rc = i7core_register_mci(i7core_dev);
2250                 if (unlikely(rc < 0))
2251                         goto fail1;
2252         }
2253
2254         i7core_printk(KERN_INFO, "Driver loaded.\n");
2255
2256         mutex_unlock(&i7core_edac_lock);
2257         return 0;
2258
2259 fail1:
2260         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2261                 i7core_unregister_mci(i7core_dev);
2262
2263         i7core_put_all_devices();
2264 fail0:
2265         mutex_unlock(&i7core_edac_lock);
2266         return rc;
2267 }
2268
2269 /*
2270  *      i7core_remove   destructor for one instance of device
2271  *
2272  */
2273 static void __devexit i7core_remove(struct pci_dev *pdev)
2274 {
2275         struct i7core_dev *i7core_dev;
2276
2277         debugf0(__FILE__ ": %s()\n", __func__);
2278
2279         /*
2280          * we have a trouble here: pdev value for removal will be wrong, since
2281          * it will point to the X58 register used to detect that the machine
2282          * is a Nehalem or upper design. However, due to the way several PCI
2283          * devices are grouped together to provide MC functionality, we need
2284          * to use a different method for releasing the devices
2285          */
2286
2287         mutex_lock(&i7core_edac_lock);
2288
2289         if (unlikely(!probed)) {
2290                 mutex_unlock(&i7core_edac_lock);
2291                 return;
2292         }
2293
2294         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2295                 i7core_unregister_mci(i7core_dev);
2296
2297         /* Release PCI resources */
2298         i7core_put_all_devices();
2299
2300         probed--;
2301
2302         mutex_unlock(&i7core_edac_lock);
2303 }
2304
2305 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2306
2307 /*
2308  *      i7core_driver   pci_driver structure for this module
2309  *
2310  */
2311 static struct pci_driver i7core_driver = {
2312         .name     = "i7core_edac",
2313         .probe    = i7core_probe,
2314         .remove   = __devexit_p(i7core_remove),
2315         .id_table = i7core_pci_tbl,
2316 };
2317
2318 /*
2319  *      i7core_init             Module entry function
2320  *                      Try to initialize this module for its devices
2321  */
2322 static int __init i7core_init(void)
2323 {
2324         int pci_rc;
2325
2326         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2327
2328         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2329         opstate_init();
2330
2331         if (use_pci_fixup)
2332                 i7core_xeon_pci_fixup(pci_dev_table);
2333
2334         pci_rc = pci_register_driver(&i7core_driver);
2335
2336         if (pci_rc >= 0)
2337                 return 0;
2338
2339         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2340                       pci_rc);
2341
2342         return pci_rc;
2343 }
2344
2345 /*
2346  *      i7core_exit()   Module exit function
2347  *                      Unregister the driver
2348  */
2349 static void __exit i7core_exit(void)
2350 {
2351         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2352         pci_unregister_driver(&i7core_driver);
2353 }
2354
2355 module_init(i7core_init);
2356 module_exit(i7core_exit);
2357
2358 MODULE_LICENSE("GPL");
2359 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2360 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2361 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2362                    I7CORE_REVISION);
2363
2364 module_param(edac_op_state, int, 0444);
2365 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");