X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=drivers%2Fedac%2Fi7core_edac.c;h=6b8b7b41ec5f8bbad42647792417ce64f433d117;hp=772219fa10beac51d8d4d68c28d53572667f2768;hb=52707f918cca231f8461d45e78a60014795f20d9;hpb=5566cb7c91ba4ff4447278bb27896b4a2bb7d18a diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 772219fa10be..6b8b7b41ec5f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1,9 +1,14 @@ -/* Intel 7 core Memory Controller kernel module (Nehalem) +/* Intel i7 core/Nehalem Memory Controller kernel module + * + * This driver supports yhe memory controllers found on the Intel + * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx, + * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield + * and Westmere-EP. * * This file may be distributed under the terms of the * GNU General Public License version 2 only. * - * Copyright (c) 2009 by: + * Copyright (c) 2009-2010 by: * Mauro Carvalho Chehab * * Red Hat Inc. http://www.redhat.com @@ -25,13 +30,23 @@ #include #include #include +#include #include #include +#include +#include +#include #include "edac_core.h" -/* To use the new pci_[read/write]_config_qword instead of two dword */ -#define USE_QWORD 1 +/* + * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core + * registers start at bus 255, and are not reported by BIOS. + * We currently find devices with only 2 sockets. In order to support more QPI + * Quick Path Interconnect, just increment this number. + */ +#define MAX_SOCKET_BUSES 2 + /* * Alter this version for the module when modifications are made @@ -39,10 +54,6 @@ #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__ #define EDAC_MOD_STR "i7core_edac" -/* HACK: temporary, just to enable all logs, for now */ -#undef debugf0 -#define debugf0(fmt, arg...) edac_printk(KERN_INFO, "i7core", fmt, ##arg) - /* * Debug macros */ @@ -56,6 +67,10 @@ * i7core Memory Controller Registers */ + /* OFFSETS for Device 0 Function 0 */ + +#define MC_CFG_CONTROL 0x90 + /* OFFSETS for Device 3 Function 0 */ #define MC_CONTROL 0x48 @@ -74,6 +89,18 @@ #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff) #define DIMM0_COR_ERR(r) ((r) & 0x7fff) +/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */ +#define MC_COR_ECC_CNT_0 0x80 +#define MC_COR_ECC_CNT_1 0x84 +#define MC_COR_ECC_CNT_2 0x88 +#define MC_COR_ECC_CNT_3 0x8c +#define MC_COR_ECC_CNT_4 0x90 +#define MC_COR_ECC_CNT_5 0x94 + +#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff) +#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff) + + /* OFFSETS for Devices 4,5 and 6 Function 0 */ #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58 @@ -102,6 +129,7 @@ #define REPEAT_EN 0x01 /* OFFSETS for Devices 4,5 and 6 Function 1 */ + #define MC_DOD_CH_DIMM0 0x48 #define MC_DOD_CH_DIMM1 0x4c #define MC_DOD_CH_DIMM2 0x50 @@ -113,7 +141,7 @@ #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7) #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5)) #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5) - #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3)| (1 << 2)) + #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2)) #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2) #define MC_DOD_NUMCOL_MASK 3 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK) @@ -177,42 +205,79 @@ struct i7core_channel { }; struct pci_id_descr { - int dev; - int func; - int dev_id; - struct pci_dev *pdev; + int dev; + int func; + int dev_id; + int optional; +}; + +struct pci_id_table { + struct pci_id_descr *descr; + int n_devs; +}; + +struct i7core_dev { + struct list_head list; + u8 socket; + struct pci_dev **pdev; + int n_devs; + struct mem_ctl_info *mci; }; struct i7core_pvt { - struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1]; - struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1]; + struct pci_dev *pci_noncore; + struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1]; + struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1]; + + struct i7core_dev *i7core_dev; + struct i7core_info info; struct i7core_inject inject; struct i7core_channel channel[NUM_CHANS]; - int channels; /* Number of active channels */ + + int channels; /* Number of active channels */ int ce_count_available; - unsigned long ce_count[MAX_DIMMS]; /* ECC corrected errors counts per dimm */ - int last_ce_count[MAX_DIMMS]; + int csrow_map[NUM_CHANS][MAX_DIMMS]; -}; + /* ECC corrected errors counts per udimm */ + unsigned long udimm_ce_count[MAX_DIMMS]; + int udimm_last_ce_count[MAX_DIMMS]; + /* ECC corrected errors counts per rdimm */ + unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS]; + int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS]; -/* Device name and register DID (Device ID) */ -struct i7core_dev_info { - const char *ctl_name; /* name for this device */ - u16 fsb_mapping_errors; /* DID for the branchmap,control */ + unsigned int is_registered; + + /* mcelog glue */ + struct edac_mce edac_mce; + + /* Fifo double buffers */ + struct mce mce_entry[MCE_LOG_LEN]; + struct mce mce_outentry[MCE_LOG_LEN]; + + /* Fifo in/out counters */ + unsigned mce_in, mce_out; + + /* Count indicator to show errors not got */ + unsigned mce_overrun; }; +/* Static vars */ +static LIST_HEAD(i7core_edac_list); +static DEFINE_MUTEX(i7core_edac_lock); + #define PCI_DESCR(device, function, device_id) \ .dev = (device), \ .func = (function), \ .dev_id = (device_id) -struct pci_id_descr pci_devs[] = { +struct pci_id_descr pci_dev_descr_i7core_nehalem[] = { /* Memory controller */ { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) }, { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) }, - { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM is supported */ + /* Exists only for RDIMM */ + { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 }, { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) }, /* Channel 0 */ @@ -232,27 +297,87 @@ struct pci_id_descr pci_devs[] = { { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) }, { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) }, { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) }, + + /* Generic Non-core registers */ + /* + * This is the PCI device on i7core and on Xeon 35xx (8086:2c41) + * On Xeon 55xx, however, it has a different id (8086:2c40). So, + * the probing code needs to test for the other address in case of + * failure of this one + */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) }, + +}; + +struct pci_id_descr pci_dev_descr_lynnfield[] = { + { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) }, + { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) }, + { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) }, + + { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) }, + { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) }, + { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) }, + { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) }, + + { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) }, + { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) }, + { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) }, + { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) }, + + /* + * This is the PCI device has an alternate address on some + * processors like Core i7 860 + */ + { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE) }, +}; + +struct pci_id_descr pci_dev_descr_i7core_westmere[] = { + /* Memory controller */ + { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) }, + { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) }, + /* Exists only for RDIMM */ + { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 }, + { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) }, + + /* Channel 0 */ + { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) }, + { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) }, + { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) }, + { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) }, + + /* Channel 1 */ + { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) }, + { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) }, + { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) }, + { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) }, + + /* Channel 2 */ + { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) }, + { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) }, + { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) }, + { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) }, + + /* Generic Non-core registers */ + { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2) }, + +}; + +#define PCI_ID_TABLE_ENTRY(A) { A, ARRAY_SIZE(A) } +struct pci_id_table pci_dev_table[] = { + PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem), + PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield), + PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere), }; -#define N_DEVS ARRAY_SIZE(pci_devs) /* * pci_device_id table for which devices we are looking for - * This should match the first device at pci_devs table */ static const struct pci_device_id i7core_pci_tbl[] __devinitdata = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_MCR)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)}, {0,} /* 0 terminated list. */ }; - -/* Table of devices attributes supported by this driver */ -static const struct i7core_dev_info i7core_devs[] = { - { - .ctl_name = "i7 Core", - .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7_MCR, - }, -}; - static struct edac_pci_ctl_info *i7core_pci; /**************************************************************************** @@ -264,7 +389,7 @@ static struct edac_pci_ctl_info *i7core_pci; #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1)) /* MC_STATUS bits */ -#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3)) +#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4)) #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch)) /* MC_MAX_DOD read functions */ @@ -305,31 +430,74 @@ static inline int numcol(u32 col) return cols[col & 0x3]; } +static struct i7core_dev *get_i7core_dev(u8 socket) +{ + struct i7core_dev *i7core_dev; + + list_for_each_entry(i7core_dev, &i7core_edac_list, list) { + if (i7core_dev->socket == socket) + return i7core_dev; + } + + return NULL; +} /**************************************************************************** Memory check routines ****************************************************************************/ -static int i7core_get_active_channels(int *channels) +static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot, + unsigned func) { - struct pci_dev *pdev = NULL; + struct i7core_dev *i7core_dev = get_i7core_dev(socket); int i; - u32 status, control; - *channels = 0; + if (!i7core_dev) + return NULL; - for (i = 0; i < N_DEVS; i++) { - if (!pci_devs[i].pdev) + for (i = 0; i < i7core_dev->n_devs; i++) { + if (!i7core_dev->pdev[i]) continue; - if (PCI_SLOT(pci_devs[i].pdev->devfn) == 3 && - PCI_FUNC(pci_devs[i].pdev->devfn) == 0) { - pdev = pci_devs[i].pdev; - break; + if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot && + PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) { + return i7core_dev->pdev[i]; } } + return NULL; +} + +/** + * i7core_get_active_channels() - gets the number of channels and csrows + * @socket: Quick Path Interconnect socket + * @channels: Number of channels that will be returned + * @csrows: Number of csrows found + * + * Since EDAC core needs to know in advance the number of available channels + * and csrows, in order to allocate memory for csrows/channels, it is needed + * to run two similar steps. At the first step, implemented on this function, + * it checks the number of csrows/channels present at one socket. + * this is used in order to properly allocate the size of mci components. + * + * It should be noticed that none of the current available datasheets explain + * or even mention how csrows are seen by the memory controller. So, we need + * to add a fake description for csrows. + * So, this driver is attributing one DIMM memory for one csrow. + */ +static int i7core_get_active_channels(u8 socket, unsigned *channels, + unsigned *csrows) +{ + struct pci_dev *pdev = NULL; + int i, j; + u32 status, control; + + *channels = 0; + *csrows = 0; + + pdev = get_pdev_slot_func(socket, 3, 0); if (!pdev) { - i7core_printk(KERN_ERR, "Couldn't find fn 3.0!!!\n"); + i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n", + socket); return -ENODEV; } @@ -338,29 +506,51 @@ static int i7core_get_active_channels(int *channels) pci_read_config_dword(pdev, MC_CONTROL, &control); for (i = 0; i < NUM_CHANS; i++) { + u32 dimm_dod[3]; /* Check if the channel is active */ if (!(control & (1 << (8 + i)))) continue; /* Check if the channel is disabled */ - if (status & (1 << i)) { + if (status & (1 << i)) continue; + + pdev = get_pdev_slot_func(socket, i + 4, 1); + if (!pdev) { + i7core_printk(KERN_ERR, "Couldn't find socket %d " + "fn %d.%d!!!\n", + socket, i + 4, 1); + return -ENODEV; } + /* Devices 4-6 function 1 */ + pci_read_config_dword(pdev, + MC_DOD_CH_DIMM0, &dimm_dod[0]); + pci_read_config_dword(pdev, + MC_DOD_CH_DIMM1, &dimm_dod[1]); + pci_read_config_dword(pdev, + MC_DOD_CH_DIMM2, &dimm_dod[2]); (*channels)++; + + for (j = 0; j < 3; j++) { + if (!DIMM_PRESENT(dimm_dod[j])) + continue; + (*csrows)++; + } } - debugf0("Number of active channels: %d\n", *channels); + debugf0("Number of active channels on socket %d: %d\n", + socket, *channels); return 0; } -static int get_dimm_config(struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci, int *csrow) { struct i7core_pvt *pvt = mci->pvt_info; struct csrow_info *csr; struct pci_dev *pdev; - int i, j, csrow = 0; + int i, j; unsigned long last_page = 0; enum edac_type mode; enum mem_type mtype; @@ -376,12 +566,12 @@ static int get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod); pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map); - debugf0("MC control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n", - pvt->info.mc_control, pvt->info.mc_status, + debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n", + pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map); if (ECC_ENABLED(pvt)) { - debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ?8:4); + debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4); if (ECCx8(pvt)) mode = EDAC_S8ECD8ED; else @@ -392,19 +582,20 @@ static int get_dimm_config(struct mem_ctl_info *mci) } /* FIXME: need to handle the error codes */ - debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked\n", + debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked " + "x%x x 0x%x\n", numdimms(pvt->info.max_dod), numrank(pvt->info.max_dod >> 2), - numbank(pvt->info.max_dod >> 4)); - debugf0("DOD Max rows x colums = 0x%x x 0x%x\n", + numbank(pvt->info.max_dod >> 4), numrow(pvt->info.max_dod >> 6), numcol(pvt->info.max_dod >> 9)); - debugf0("Memory channel configuration:\n"); - for (i = 0; i < NUM_CHANS; i++) { u32 data, dimm_dod[3], value[8]; + if (!pvt->pci_ch[i][0]) + continue; + if (!CH_ACTIVE(pvt, i)) { debugf0("Channel %i is not active\n", i); continue; @@ -418,7 +609,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_ch[i][0], MC_CHANNEL_DIMM_INIT_PARAMS, &data); - pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT)? 4 : 2; + pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ? + 4 : 2; if (data & REGISTERED_DIMM) mtype = MEM_RDDR3; @@ -447,7 +639,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i), data, pvt->channel[i].ranks, - (data & REGISTERED_DIMM)? 'R' : 'U'); + (data & REGISTERED_DIMM) ? 'R' : 'U'); for (j = 0; j < 3; j++) { u32 banks, ranks, rows, cols; @@ -466,24 +658,33 @@ static int get_dimm_config(struct mem_ctl_info *mci) pvt->channel[i].dimms++; - debugf0("\tdimm %d (0x%08x) %d Mb offset: %x, " - "numbank: %d,\n\t\t" - "numrank: %d, numrow: %#x, numcol: %#x\n", - j, dimm_dod[j], size, + debugf0("\tdimm %d %d Mb offset: %x, " + "bank: %d, rank: %d, row: %#x, col: %#x\n", + j, size, RANKOFFSET(dimm_dod[j]), banks, ranks, rows, cols); - npages = cols * rows; /* FIXME */ +#if PAGE_SHIFT > 20 + npages = size >> (PAGE_SHIFT - 20); +#else + npages = size << (20 - PAGE_SHIFT); +#endif - csr = &mci->csrows[csrow]; + csr = &mci->csrows[*csrow]; csr->first_page = last_page + 1; last_page += npages; csr->last_page = last_page; csr->nr_pages = npages; csr->page_mask = 0; - csr->grain = 0; - csr->csrow_idx = csrow; + csr->grain = 8; + csr->csrow_idx = *csrow; + csr->nr_channels = 1; + + csr->channels[0].chan_idx = i; + csr->channels[0].ce_count = 0; + + pvt->csrow_map[i][j] = *csrow; switch (banks) { case 4: @@ -502,7 +703,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) csr->edac_mode = mode; csr->mtype = mtype; - csrow++; + (*csrow)++; } pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]); @@ -513,9 +714,9 @@ static int get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]); pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]); pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]); - printk("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i); + debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i); for (j = 0; j < 8; j++) - printk("\t\t%#x\t%#x\t%#x\n", + debugf1("\t\t%#x\t%#x\t%#x\n", (value[j] >> 27) & 0x1, (value[j] >> 24) & 0x7, (value[j] && ((1 << 24) - 1))); @@ -545,7 +746,7 @@ static int disable_inject(struct mem_ctl_info *mci) return -ENODEV; pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ERROR_MASK, 0); + MC_CHANNEL_ERROR_INJECT, 0); return 0; } @@ -565,11 +766,11 @@ static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci, int rc; if (pvt->inject.enable) - disable_inject(mci); + disable_inject(mci); rc = strict_strtoul(data, 10, &value); if ((rc < 0) || (value > 3)) - return 0; + return -EIO; pvt->inject.section = (u32) value; return count; @@ -598,11 +799,11 @@ static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci, int rc; if (pvt->inject.enable) - disable_inject(mci); + disable_inject(mci); rc = strict_strtoul(data, 10, &value); if ((rc < 0) || (value > 7)) - return 0; + return -EIO; pvt->inject.type = (u32) value; return count; @@ -633,11 +834,11 @@ static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci, int rc; if (pvt->inject.enable) - disable_inject(mci); + disable_inject(mci); rc = strict_strtoul(data, 10, &value); if (rc < 0) - return 0; + return -EIO; pvt->inject.eccmask = (u32) value; return count; @@ -660,104 +861,91 @@ static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci, * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an * uncorrectable error to be injected. */ -static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci, - const char *data, size_t count) -{ - struct i7core_pvt *pvt = mci->pvt_info; - char *cmd, *val; - long value; - int rc; - if (pvt->inject.enable) - disable_inject(mci); - - do { - cmd = strsep((char **) &data, ":"); - if (!cmd) - break; - val = strsep((char **) &data, " \n\t"); - if (!val) - return cmd - data; - - if (!strcasecmp(val,"any")) - value = -1; - else { - rc = strict_strtol(val, 10, &value); - if ((rc < 0) || (value < 0)) - return cmd - data; - } +#define DECLARE_ADDR_MATCH(param, limit) \ +static ssize_t i7core_inject_store_##param( \ + struct mem_ctl_info *mci, \ + const char *data, size_t count) \ +{ \ + struct i7core_pvt *pvt; \ + long value; \ + int rc; \ + \ + debugf1("%s()\n", __func__); \ + pvt = mci->pvt_info; \ + \ + if (pvt->inject.enable) \ + disable_inject(mci); \ + \ + if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\ + value = -1; \ + else { \ + rc = strict_strtoul(data, 10, &value); \ + if ((rc < 0) || (value >= limit)) \ + return -EIO; \ + } \ + \ + pvt->inject.param = value; \ + \ + return count; \ +} \ + \ +static ssize_t i7core_inject_show_##param( \ + struct mem_ctl_info *mci, \ + char *data) \ +{ \ + struct i7core_pvt *pvt; \ + \ + pvt = mci->pvt_info; \ + debugf1("%s() pvt=%p\n", __func__, pvt); \ + if (pvt->inject.param < 0) \ + return sprintf(data, "any\n"); \ + else \ + return sprintf(data, "%d\n", pvt->inject.param);\ +} - if (!strcasecmp(cmd,"channel")) { - if (value < 3) - pvt->inject.channel = value; - else - return cmd - data; - } else if (!strcasecmp(cmd,"dimm")) { - if (value < 4) - pvt->inject.dimm = value; - else - return cmd - data; - } else if (!strcasecmp(cmd,"rank")) { - if (value < 4) - pvt->inject.rank = value; - else - return cmd - data; - } else if (!strcasecmp(cmd,"bank")) { - if (value < 4) - pvt->inject.bank = value; - else - return cmd - data; - } else if (!strcasecmp(cmd,"page")) { - if (value <= 0xffff) - pvt->inject.page = value; - else - return cmd - data; - } else if (!strcasecmp(cmd,"col") || - !strcasecmp(cmd,"column")) { - if (value <= 0x3fff) - pvt->inject.col = value; - else - return cmd - data; - } - } while (1); +#define ATTR_ADDR_MATCH(param) \ + { \ + .attr = { \ + .name = #param, \ + .mode = (S_IRUGO | S_IWUSR) \ + }, \ + .show = i7core_inject_show_##param, \ + .store = i7core_inject_store_##param, \ + } - return count; -} +DECLARE_ADDR_MATCH(channel, 3); +DECLARE_ADDR_MATCH(dimm, 3); +DECLARE_ADDR_MATCH(rank, 4); +DECLARE_ADDR_MATCH(bank, 32); +DECLARE_ADDR_MATCH(page, 0x10000); +DECLARE_ADDR_MATCH(col, 0x4000); -static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci, - char *data) +static int write_and_test(struct pci_dev *dev, int where, u32 val) { - struct i7core_pvt *pvt = mci->pvt_info; - char channel[4], dimm[4], bank[4], rank[4], page[7], col[7]; + u32 read; + int count; - if (pvt->inject.channel < 0) - sprintf(channel, "any"); - else - sprintf(channel, "%d", pvt->inject.channel); - if (pvt->inject.dimm < 0) - sprintf(dimm, "any"); - else - sprintf(dimm, "%d", pvt->inject.dimm); - if (pvt->inject.bank < 0) - sprintf(bank, "any"); - else - sprintf(bank, "%d", pvt->inject.bank); - if (pvt->inject.rank < 0) - sprintf(rank, "any"); - else - sprintf(rank, "%d", pvt->inject.rank); - if (pvt->inject.page < 0) - sprintf(page, "any"); - else - sprintf(page, "0x%04x", pvt->inject.page); - if (pvt->inject.col < 0) - sprintf(col, "any"); - else - sprintf(col, "0x%04x", pvt->inject.col); + debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n", + dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + where, val); + + for (count = 0; count < 10; count++) { + if (count) + msleep(100); + pci_write_config_dword(dev, where, val); + pci_read_config_dword(dev, where, &read); + + if (read == val) + return 0; + } + + i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x " + "write=%08x. Read=%08x\n", + dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + where, val, read); - return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n" - "rank: %s\npage: %s\ncolumn: %s\n", - channel, dimm, bank, rank, page, col); + return -EINVAL; } /* @@ -803,74 +991,41 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, /* Sets pvt->inject.dimm mask */ if (pvt->inject.dimm < 0) - mask |= 1L << 41; + mask |= 1LL << 41; else { if (pvt->channel[pvt->inject.channel].dimms > 2) - mask |= (pvt->inject.dimm & 0x3L) << 35; + mask |= (pvt->inject.dimm & 0x3LL) << 35; else - mask |= (pvt->inject.dimm & 0x1L) << 36; + mask |= (pvt->inject.dimm & 0x1LL) << 36; } /* Sets pvt->inject.rank mask */ if (pvt->inject.rank < 0) - mask |= 1L << 40; + mask |= 1LL << 40; else { if (pvt->channel[pvt->inject.channel].dimms > 2) - mask |= (pvt->inject.rank & 0x1L) << 34; + mask |= (pvt->inject.rank & 0x1LL) << 34; else - mask |= (pvt->inject.rank & 0x3L) << 34; + mask |= (pvt->inject.rank & 0x3LL) << 34; } /* Sets pvt->inject.bank mask */ if (pvt->inject.bank < 0) - mask |= 1L << 39; + mask |= 1LL << 39; else - mask |= (pvt->inject.bank & 0x15L) << 30; + mask |= (pvt->inject.bank & 0x15LL) << 30; /* Sets pvt->inject.page mask */ if (pvt->inject.page < 0) - mask |= 1L << 38; + mask |= 1LL << 38; else - mask |= (pvt->inject.page & 0xffffL) << 14; + mask |= (pvt->inject.page & 0xffff) << 14; /* Sets pvt->inject.column mask */ if (pvt->inject.col < 0) - mask |= 1L << 37; + mask |= 1LL << 37; else - mask |= (pvt->inject.col & 0x3fffL); - -#if USE_QWORD - pci_write_config_qword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH, mask); -#else - pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH, mask); - pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L); -#endif - -#if 1 -#if USE_QWORD - u64 rdmask; - pci_read_config_qword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH, &rdmask); - debugf0("Inject addr match write 0x%016llx, read: 0x%016llx\n", - mask, rdmask); -#else - u32 rdmask1, rdmask2; - - pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH, &rdmask1); - pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ADDR_MATCH + 4, &rdmask2); - - debugf0("Inject addr match write 0x%016llx, read: 0x%08x%08x\n", - mask, rdmask1, rdmask2); -#endif -#endif - - pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask); + mask |= (pvt->inject.col & 0x3fff); /* * bit 0: REPEAT_EN @@ -883,12 +1038,32 @@ static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci, (pvt->inject.section & 0x3) << 1 | (pvt->inject.type & 0x6) << (3 - 1); - pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ERROR_MASK, injectmask); + /* Unlock writes to registers - this register is write only */ + pci_write_config_dword(pvt->pci_noncore, + MC_CFG_CONTROL, 0x2); - debugf0("Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n", - mask, pvt->inject.eccmask, injectmask); + write_and_test(pvt->pci_ch[pvt->inject.channel][0], + MC_CHANNEL_ADDR_MATCH, mask); + write_and_test(pvt->pci_ch[pvt->inject.channel][0], + MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L); + + write_and_test(pvt->pci_ch[pvt->inject.channel][0], + MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask); + write_and_test(pvt->pci_ch[pvt->inject.channel][0], + MC_CHANNEL_ERROR_INJECT, injectmask); + + /* + * This is something undocumented, based on my tests + * Without writing 8 to this register, errors aren't injected. Not sure + * why. + */ + pci_write_config_dword(pvt->pci_noncore, + MC_CFG_CONTROL, 8); + + debugf0("Error inject addr match 0x%016llx, ecc 0x%08x," + " inject 0x%08x\n", + mask, pvt->inject.eccmask, injectmask); return count; @@ -900,8 +1075,11 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, struct i7core_pvt *pvt = mci->pvt_info; u32 injectmask; + if (!pvt->pci_ch[pvt->inject.channel][0]) + return 0; + pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0], - MC_CHANNEL_ERROR_MASK, &injectmask); + MC_CHANNEL_ERROR_INJECT, &injectmask); debugf0("Inject error read: 0x%018x\n", injectmask); @@ -911,24 +1089,65 @@ static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci, return sprintf(data, "%d\n", pvt->inject.enable); } -static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data) -{ - struct i7core_pvt *pvt = mci->pvt_info; +#define DECLARE_COUNTER(param) \ +static ssize_t i7core_show_counter_##param( \ + struct mem_ctl_info *mci, \ + char *data) \ +{ \ + struct i7core_pvt *pvt = mci->pvt_info; \ + \ + debugf1("%s() \n", __func__); \ + if (!pvt->ce_count_available || (pvt->is_registered)) \ + return sprintf(data, "data unavailable\n"); \ + return sprintf(data, "%lu\n", \ + pvt->udimm_ce_count[param]); \ +} - if (!pvt->ce_count_available) - return sprintf(data, "unavailable\n"); +#define ATTR_COUNTER(param) \ + { \ + .attr = { \ + .name = __stringify(udimm##param), \ + .mode = (S_IRUGO | S_IWUSR) \ + }, \ + .show = i7core_show_counter_##param \ + } - return sprintf(data, "dimm0: %lu\ndimm1: %lu\ndimm2: %lu\n", - pvt->ce_count[0], - pvt->ce_count[1], - pvt->ce_count[2]); -} +DECLARE_COUNTER(0); +DECLARE_COUNTER(1); +DECLARE_COUNTER(2); /* * Sysfs struct */ -static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { + +static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = { + ATTR_ADDR_MATCH(channel), + ATTR_ADDR_MATCH(dimm), + ATTR_ADDR_MATCH(rank), + ATTR_ADDR_MATCH(bank), + ATTR_ADDR_MATCH(page), + ATTR_ADDR_MATCH(col), + { .attr = { .name = NULL } } +}; + +static struct mcidev_sysfs_group i7core_inject_addrmatch = { + .name = "inject_addrmatch", + .mcidev_attr = i7core_addrmatch_attrs, +}; + +static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = { + ATTR_COUNTER(0), + ATTR_COUNTER(1), + ATTR_COUNTER(2), +}; + +static struct mcidev_sysfs_group i7core_udimm_counters = { + .name = "all_channel_counts", + .mcidev_attr = i7core_udimm_counters_attrs, +}; + +static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = { { .attr = { .name = "inject_section", @@ -951,12 +1170,7 @@ static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { .show = i7core_inject_eccmask_show, .store = i7core_inject_eccmask_store, }, { - .attr = { - .name = "inject_addrmatch", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_inject_addrmatch_show, - .store = i7core_inject_addrmatch_store, + .grp = &i7core_inject_addrmatch, }, { .attr = { .name = "inject_enable", @@ -964,14 +1178,9 @@ static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { }, .show = i7core_inject_enable_show, .store = i7core_inject_enable_store, - }, { - .attr = { - .name = "corrected_error_counts", - .mode = (S_IRUGO | S_IWUSR) - }, - .show = i7core_ce_regs_show, - .store = NULL, }, + { .attr = { .name = NULL } }, /* Reserved for udimm counters */ + { .attr = { .name = NULL } } }; /**************************************************************************** @@ -982,12 +1191,50 @@ static struct mcidev_sysfs_attribute i7core_inj_attrs[] = { * i7core_put_devices 'put' all the devices that we have * reserved via 'get' */ -static void i7core_put_devices(void) +static void i7core_put_devices(struct i7core_dev *i7core_dev) { int i; - for (i = 0; i < N_DEVS; i++) - pci_dev_put(pci_devs[i].pdev); + debugf0(__FILE__ ": %s()\n", __func__); + for (i = 0; i < i7core_dev->n_devs; i++) { + struct pci_dev *pdev = i7core_dev->pdev[i]; + if (!pdev) + continue; + debugf0("Removing dev %02x:%02x.%d\n", + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + pci_dev_put(pdev); + } + kfree(i7core_dev->pdev); + list_del(&i7core_dev->list); + kfree(i7core_dev); +} + +static void i7core_put_all_devices(void) +{ + struct i7core_dev *i7core_dev, *tmp; + + list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) + i7core_put_devices(i7core_dev); +} + +static void __init i7core_xeon_pci_fixup(struct pci_id_table *table) +{ + struct pci_dev *pdev = NULL; + int i; + /* + * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses + * aren't announced by acpi. So, we need to use a legacy scan probing + * to detect them + */ + while (table && table->descr) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL); + if (unlikely(!pdev)) { + for (i = 0; i < MAX_SOCKET_BUSES; i++) + pcibios_scan_specific_bus(255-i); + } + table++; + } } /* @@ -996,77 +1243,163 @@ static void i7core_put_devices(void) * * Need to 'get' device 16 func 1 and func 2 */ -static int i7core_get_devices(void) +int i7core_get_onedevice(struct pci_dev **prev, int devno, + struct pci_id_descr *dev_descr, unsigned n_devs) { - int rc, i; + struct i7core_dev *i7core_dev; + struct pci_dev *pdev = NULL; + u8 bus = 0; + u8 socket = 0; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + dev_descr->dev_id, *prev); - for (i = 0; i < N_DEVS; i++) { + /* + * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs + * is at addr 8086:2c40, instead of 8086:2c41. So, we need + * to probe for the alternate address in case of failure + */ + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - pci_devs[i].dev_id, NULL); - if (likely(pdev)) - pci_devs[i].pdev = pdev; - else { - i7core_printk(KERN_ERR, - "Device not found: PCI ID %04x:%04x " - "(dev %d, func %d)\n", - PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, - pci_devs[i].dev,pci_devs[i].func); + PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); - /* Dev 3 function 2 only exists on chips with RDIMMs */ - if ((pci_devs[i].dev == 3) && (pci_devs[i].func == 2)) - continue; + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, + *prev); - /* End of list, leave */ - rc = -ENODEV; - goto error; + if (!pdev) { + if (*prev) { + *prev = pdev; + return 0; } - /* Sanity check */ - if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[i].dev || - PCI_FUNC(pdev->devfn) != pci_devs[i].func)) { - i7core_printk(KERN_ERR, - "Device PCI ID %04x:%04x " - "has fn %d.%d instead of fn %d.%d\n", - PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - pci_devs[i].dev, pci_devs[i].func); - rc = -EINVAL; - goto error; - } + if (dev_descr->optional) + return 0; - /* Be sure that the device is enabled */ - rc = pci_enable_device(pdev); - if (unlikely(rc < 0)) { - i7core_printk(KERN_ERR, - "Couldn't enable PCI ID %04x:%04x " - "fn %d.%d\n", - PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - goto error; + if (devno == 0) + return -ENODEV; + + i7core_printk(KERN_ERR, + "Device not found: dev %02x.%d PCI ID %04x:%04x\n", + dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + + /* End of list, leave */ + return -ENODEV; + } + bus = pdev->bus->number; + + if (bus == 0x3f) + socket = 0; + else + socket = 255 - bus; + + i7core_dev = get_i7core_dev(socket); + if (!i7core_dev) { + i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL); + if (!i7core_dev) + return -ENOMEM; + i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs, + GFP_KERNEL); + if (!i7core_dev->pdev) { + kfree(i7core_dev); + return -ENOMEM; } + i7core_dev->socket = socket; + i7core_dev->n_devs = n_devs; + list_add_tail(&i7core_dev->list, &i7core_edac_list); + } + + if (i7core_dev->pdev[devno]) { + i7core_printk(KERN_ERR, + "Duplicated device for " + "dev %02x:%02x.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + pci_dev_put(pdev); + return -ENODEV; + } + + i7core_dev->pdev[devno] = pdev; + + /* Sanity check */ + if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev || + PCI_FUNC(pdev->devfn) != dev_descr->func)) { + i7core_printk(KERN_ERR, + "Device PCI ID %04x:%04x " + "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n", + PCI_VENDOR_ID_INTEL, dev_descr->dev_id, + bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + bus, dev_descr->dev, dev_descr->func); + return -ENODEV; + } - i7core_printk(KERN_INFO, - "Registered device %0x:%0x fn %d.%d\n", - PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + i7core_printk(KERN_ERR, + "Couldn't enable " + "dev %02x:%02x.%d PCI ID %04x:%04x\n", + bus, dev_descr->dev, dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + return -ENODEV; } + debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n", + socket, bus, dev_descr->dev, + dev_descr->func, + PCI_VENDOR_ID_INTEL, dev_descr->dev_id); + + *prev = pdev; + return 0; +} -error: - i7core_put_devices(); - return -EINVAL; +static int i7core_get_devices(struct pci_id_table *table) +{ + int i, rc; + struct pci_dev *pdev = NULL; + struct pci_id_descr *dev_descr; + + while (table && table->descr) { + dev_descr = table->descr; + for (i = 0; i < table->n_devs; i++) { + pdev = NULL; + do { + rc = i7core_get_onedevice(&pdev, i, &dev_descr[i], + table->n_devs); + if (rc < 0) { + if (i == 0) { + i = table->n_devs; + break; + } + i7core_put_all_devices(); + return -ENODEV; + } + } while (pdev); + } + table++; + } + + return 0; + return 0; } -static int mci_bind_devs(struct mem_ctl_info *mci) +static int mci_bind_devs(struct mem_ctl_info *mci, + struct i7core_dev *i7core_dev) { struct i7core_pvt *pvt = mci->pvt_info; struct pci_dev *pdev; int i, func, slot; - for (i = 0; i < N_DEVS; i++) { - pdev = pci_devs[i].pdev; + /* Associates i7core_dev and mci for future usage */ + pvt->i7core_dev = i7core_dev; + i7core_dev->mci = mci; + + pvt->is_registered = 0; + for (i = 0; i < i7core_dev->n_devs; i++) { + pdev = i7core_dev->pdev[i]; if (!pdev) continue; @@ -1080,12 +1413,29 @@ static int mci_bind_devs(struct mem_ctl_info *mci) if (unlikely(func > MAX_CHAN_FUNC)) goto error; pvt->pci_ch[slot - 4][func] = pdev; - } else + } else if (!slot && !func) + pvt->pci_noncore = pdev; + else goto error; - debugf0("Associated fn %d.%d, dev = %p\n", - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev); + debugf0("Associated fn %d.%d, dev = %p, socket %d\n", + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev, i7core_dev->socket); + + if (PCI_SLOT(pdev->devfn) == 3 && + PCI_FUNC(pdev->devfn) == 2) + pvt->is_registered = 1; } + + /* + * Add extra nodes to count errors on udimm + * For registered memory, this is not needed, since the counters + * are already displayed at the standard locations + */ + if (!pvt->is_registered) + i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp = + &i7core_udimm_counters; + return 0; error: @@ -1098,6 +1448,103 @@ error: /**************************************************************************** Error check routines ****************************************************************************/ +static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci, + int chan, int dimm, int add) +{ + char *msg; + struct i7core_pvt *pvt = mci->pvt_info; + int row = pvt->csrow_map[chan][dimm], i; + + for (i = 0; i < add; i++) { + msg = kasprintf(GFP_KERNEL, "Corrected error " + "(Socket=%d channel=%d dimm=%d)", + pvt->i7core_dev->socket, chan, dimm); + + edac_mc_handle_fbd_ce(mci, row, 0, msg); + kfree (msg); + } +} + +static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci, + int chan, int new0, int new1, int new2) +{ + struct i7core_pvt *pvt = mci->pvt_info; + int add0 = 0, add1 = 0, add2 = 0; + /* Updates CE counters if it is not the first time here */ + if (pvt->ce_count_available) { + /* Updates CE counters */ + + add2 = new2 - pvt->rdimm_last_ce_count[chan][2]; + add1 = new1 - pvt->rdimm_last_ce_count[chan][1]; + add0 = new0 - pvt->rdimm_last_ce_count[chan][0]; + + if (add2 < 0) + add2 += 0x7fff; + pvt->rdimm_ce_count[chan][2] += add2; + + if (add1 < 0) + add1 += 0x7fff; + pvt->rdimm_ce_count[chan][1] += add1; + + if (add0 < 0) + add0 += 0x7fff; + pvt->rdimm_ce_count[chan][0] += add0; + } else + pvt->ce_count_available = 1; + + /* Store the new values */ + pvt->rdimm_last_ce_count[chan][2] = new2; + pvt->rdimm_last_ce_count[chan][1] = new1; + pvt->rdimm_last_ce_count[chan][0] = new0; + + /*updated the edac core */ + if (add0 != 0) + i7core_rdimm_update_csrow(mci, chan, 0, add0); + if (add1 != 0) + i7core_rdimm_update_csrow(mci, chan, 1, add1); + if (add2 != 0) + i7core_rdimm_update_csrow(mci, chan, 2, add2); + +} + +static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci) +{ + struct i7core_pvt *pvt = mci->pvt_info; + u32 rcv[3][2]; + int i, new0, new1, new2; + + /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/ + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0, + &rcv[0][0]); + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1, + &rcv[0][1]); + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2, + &rcv[1][0]); + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3, + &rcv[1][1]); + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4, + &rcv[2][0]); + pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5, + &rcv[2][1]); + for (i = 0 ; i < 3; i++) { + debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n", + (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]); + /*if the channel has 3 dimms*/ + if (pvt->channel[i].dimms > 2) { + new0 = DIMM_BOT_COR_ERR(rcv[i][0]); + new1 = DIMM_TOP_COR_ERR(rcv[i][0]); + new2 = DIMM_BOT_COR_ERR(rcv[i][1]); + } else { + new0 = DIMM_TOP_COR_ERR(rcv[i][0]) + + DIMM_BOT_COR_ERR(rcv[i][0]); + new1 = DIMM_TOP_COR_ERR(rcv[i][1]) + + DIMM_BOT_COR_ERR(rcv[i][1]); + new2 = 0; + } + + i7core_rdimm_update_ce_count(mci, i, new0, new1, new2); + } +} /* This function is based on the device 3 function 4 registers as described on: * Intel Xeon Processor 5500 Series Datasheet Volume 2 @@ -1105,18 +1552,18 @@ error: * also available at: * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf */ -static void check_mc_test_err(struct mem_ctl_info *mci) +static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci) { struct i7core_pvt *pvt = mci->pvt_info; u32 rcv1, rcv0; int new0, new1, new2; if (!pvt->pci_mcr[4]) { - debugf0("%s MCR registers not found\n",__func__); + debugf0("%s MCR registers not found\n", __func__); return; } - /* Corrected error reads */ + /* Corrected test errors */ pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1); pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0); @@ -1125,37 +1572,146 @@ static void check_mc_test_err(struct mem_ctl_info *mci) new1 = DIMM1_COR_ERR(rcv0); new0 = DIMM0_COR_ERR(rcv0); - debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n", - (pvt->ce_count_available ? "UPDATE" : "READ"), - rcv1, rcv0, new0, new1, new2); - /* Updates CE counters if it is not the first time here */ if (pvt->ce_count_available) { /* Updates CE counters */ int add0, add1, add2; - add2 = new2 - pvt->last_ce_count[2]; - add1 = new1 - pvt->last_ce_count[1]; - add0 = new0 - pvt->last_ce_count[0]; + add2 = new2 - pvt->udimm_last_ce_count[2]; + add1 = new1 - pvt->udimm_last_ce_count[1]; + add0 = new0 - pvt->udimm_last_ce_count[0]; if (add2 < 0) add2 += 0x7fff; - pvt->ce_count[2] += add2; + pvt->udimm_ce_count[2] += add2; if (add1 < 0) add1 += 0x7fff; - pvt->ce_count[1] += add1; + pvt->udimm_ce_count[1] += add1; if (add0 < 0) add0 += 0x7fff; - pvt->ce_count[0] += add0; + pvt->udimm_ce_count[0] += add0; + + if (add0 | add1 | add2) + i7core_printk(KERN_ERR, "New Corrected error(s): " + "dimm0: +%d, dimm1: +%d, dimm2 +%d\n", + add0, add1, add2); } else pvt->ce_count_available = 1; /* Store the new values */ - pvt->last_ce_count[2] = new2; - pvt->last_ce_count[1] = new1; - pvt->last_ce_count[0] = new0; + pvt->udimm_last_ce_count[2] = new2; + pvt->udimm_last_ce_count[1] = new1; + pvt->udimm_last_ce_count[0] = new0; +} + +/* + * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32 + * Architectures Software Developer’s Manual Volume 3B. + * Nehalem are defined as family 0x06, model 0x1a + * + * The MCA registers used here are the following ones: + * struct mce field MCA Register + * m->status MSR_IA32_MC8_STATUS + * m->addr MSR_IA32_MC8_ADDR + * m->misc MSR_IA32_MC8_MISC + * In the case of Nehalem, the error information is masked at .status and .misc + * fields + */ +static void i7core_mce_output_error(struct mem_ctl_info *mci, + struct mce *m) +{ + struct i7core_pvt *pvt = mci->pvt_info; + char *type, *optype, *err, *msg; + unsigned long error = m->status & 0x1ff0000l; + u32 optypenum = (m->status >> 4) & 0x07; + u32 core_err_cnt = (m->status >> 38) && 0x7fff; + u32 dimm = (m->misc >> 16) & 0x3; + u32 channel = (m->misc >> 18) & 0x3; + u32 syndrome = m->misc >> 32; + u32 errnum = find_first_bit(&error, 32); + int csrow; + + if (m->mcgstatus & 1) + type = "FATAL"; + else + type = "NON_FATAL"; + + switch (optypenum) { + case 0: + optype = "generic undef request"; + break; + case 1: + optype = "read error"; + break; + case 2: + optype = "write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + + switch (errnum) { + case 16: + err = "read ECC error"; + break; + case 17: + err = "RAS ECC error"; + break; + case 18: + err = "write parity error"; + break; + case 19: + err = "redundacy loss"; + break; + case 20: + err = "reserved"; + break; + case 21: + err = "memory range error"; + break; + case 22: + err = "RTID out of range"; + break; + case 23: + err = "address parity error"; + break; + case 24: + err = "byte enable parity error"; + break; + default: + err = "unknown"; + } + + /* FIXME: should convert addr into bank and rank information */ + msg = kasprintf(GFP_ATOMIC, + "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, " + "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n", + type, (long long) m->addr, m->cpu, dimm, channel, + syndrome, core_err_cnt, (long long)m->status, + (long long)m->misc, optype, err); + + debugf0("%s", msg); + + csrow = pvt->csrow_map[channel][dimm]; + + /* Call the helper to output message */ + if (m->mcgstatus & 1) + edac_mc_handle_fbd_ue(mci, csrow, 0, + 0 /* FIXME: should be channel here */, msg); + else if (!pvt->is_registered) + edac_mc_handle_fbd_ce(mci, csrow, + 0 /* FIXME: should be channel here */, msg); + + kfree(msg); } /* @@ -1164,76 +1720,157 @@ static void check_mc_test_err(struct mem_ctl_info *mci) */ static void i7core_check_error(struct mem_ctl_info *mci) { - check_mc_test_err(mci); + struct i7core_pvt *pvt = mci->pvt_info; + int i; + unsigned count = 0; + struct mce *m; + + /* + * MCE first step: Copy all mce errors into a temporary buffer + * We use a double buffering here, to reduce the risk of + * loosing an error. + */ + smp_rmb(); + count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) + % MCE_LOG_LEN; + if (!count) + goto check_ce_error; + + m = pvt->mce_outentry; + if (pvt->mce_in + count > MCE_LOG_LEN) { + unsigned l = MCE_LOG_LEN - pvt->mce_in; + + memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); + smp_wmb(); + pvt->mce_in = 0; + count -= l; + m += l; + } + memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); + smp_wmb(); + pvt->mce_in += count; + + smp_rmb(); + if (pvt->mce_overrun) { + i7core_printk(KERN_ERR, "Lost %d memory errors\n", + pvt->mce_overrun); + smp_wmb(); + pvt->mce_overrun = 0; + } + + /* + * MCE second step: parse errors and display + */ + for (i = 0; i < count; i++) + i7core_mce_output_error(mci, &pvt->mce_outentry[i]); + + /* + * Now, let's increment CE error counts + */ +check_ce_error: + if (!pvt->is_registered) + i7core_udimm_check_mc_ecc_err(mci); + else + i7core_rdimm_check_mc_ecc_err(mci); } /* - * i7core_probe Probe for ONE instance of device to see if it is - * present. - * return: - * 0 for FOUND a device - * < 0 for error code + * i7core_mce_check_error Replicates mcelog routine to get errors + * This routine simply queues mcelog errors, and + * return. The error itself should be handled later + * by i7core_check_error. + * WARNING: As this routine should be called at NMI time, extra care should + * be taken to avoid deadlocks, and to be as fast as possible. */ -static int __devinit i7core_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i7core_mce_check_error(void *priv, struct mce *mce) { - struct mem_ctl_info *mci; - struct i7core_pvt *pvt; - int num_channels = 0; - int num_csrows; - int dev_idx = id->driver_data; - int rc; + struct mem_ctl_info *mci = priv; + struct i7core_pvt *pvt = mci->pvt_info; - if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs))) - return -EINVAL; + /* + * Just let mcelog handle it if the error is + * outside the memory controller + */ + if (((mce->status & 0xffff) >> 7) != 1) + return 0; - /* get the pci devices we want to reserve for our use */ - rc = i7core_get_devices(); - if (unlikely(rc < 0)) - return rc; + /* Bank 8 registers are the only ones that we know how to handle */ + if (mce->bank != 8) + return 0; - /* Check the number of active and not disabled channels */ - rc = i7core_get_active_channels(&num_channels); - if (unlikely (rc < 0)) - goto fail0; +#ifdef CONFIG_SMP + /* Only handle if it is the right mc controller */ + if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) + return 0; +#endif + + smp_rmb(); + if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { + smp_wmb(); + pvt->mce_overrun++; + return 0; + } + + /* Copy memory error at the ringbuffer */ + memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); + smp_wmb(); + pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; + + /* Handle fatal errors immediately */ + if (mce->mcgstatus & 1) + i7core_check_error(mci); - /* FIXME: we currently don't know the number of csrows */ - num_csrows = num_channels; + /* Advice mcelog that the error were handled */ + return 1; +} + +static int i7core_register_mci(struct i7core_dev *i7core_dev, + int num_channels, int num_csrows) +{ + struct mem_ctl_info *mci; + struct i7core_pvt *pvt; + int csrow = 0; + int rc; /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); - if (unlikely (!mci)) { - rc = -ENOMEM; - goto fail0; - } + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, + i7core_dev->socket); + if (unlikely(!mci)) + return -ENOMEM; debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); - mci->dev = &pdev->dev; /* record ptr to the generic device */ + /* record ptr to the generic device */ + mci->dev = &i7core_dev->pdev[0]->dev; pvt = mci->pvt_info; memset(pvt, 0, sizeof(*pvt)); - mci->mc_idx = 0; - mci->mtype_cap = MEM_FLAG_DDR3; /* FIXME: how to handle RDDR3? */ + /* + * FIXME: how to handle RDDR3 at MCI level? It is possible to have + * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different + * memory channels + */ + mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE; mci->mod_name = "i7core_edac.c"; mci->mod_ver = I7CORE_REVISION; - mci->ctl_name = i7core_devs[dev_idx].ctl_name; - mci->dev_name = pci_name(pdev); + mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", + i7core_dev->socket); + mci->dev_name = pci_name(i7core_dev->pdev[0]); mci->ctl_page_to_phys = NULL; - mci->mc_driver_sysfs_attributes = i7core_inj_attrs; + mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs; /* Set the function pointer to an actual operation function */ mci->edac_check = i7core_check_error; /* Store pci devices at mci for faster access */ - rc = mci_bind_devs(mci); - if (unlikely (rc < 0)) - goto fail1; + rc = mci_bind_devs(mci, i7core_dev); + if (unlikely(rc < 0)) + goto fail; /* Get dimm basic config */ - get_dimm_config(mci); + get_dimm_config(mci, &csrow); /* add this new MC control structure to EDAC's list of MCs */ if (unlikely(edac_mc_add_mc(mci))) { @@ -1244,12 +1881,13 @@ static int __devinit i7core_probe(struct pci_dev *pdev, */ rc = -EINVAL; - goto fail1; + goto fail; } /* allocating generic PCI control info */ - i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); - if (unlikely (!i7core_pci)) { + i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev, + EDAC_MOD_STR); + if (unlikely(!i7core_pci)) { printk(KERN_WARNING "%s(): Unable to create PCI control\n", __func__); @@ -1266,15 +1904,73 @@ static int __devinit i7core_probe(struct pci_dev *pdev, pvt->inject.page = -1; pvt->inject.col = -1; + /* Registers on edac_mce in order to receive memory errors */ + pvt->edac_mce.priv = mci; + pvt->edac_mce.check_error = i7core_mce_check_error; + + rc = edac_mce_register(&pvt->edac_mce); + if (unlikely(rc < 0)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mce_register()\n", __func__); + } + +fail: + if (rc < 0) + edac_mc_free(mci); + return rc; +} + +/* + * i7core_probe Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int __devinit i7core_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int dev_idx = id->driver_data; + int rc; + struct i7core_dev *i7core_dev; + + /* + * All memory controllers are allocated at the first pass. + */ + if (unlikely(dev_idx >= 1)) + return -EINVAL; + + /* get the pci devices we want to reserve for our use */ + mutex_lock(&i7core_edac_lock); + + rc = i7core_get_devices(pci_dev_table); + if (unlikely(rc < 0)) + goto fail0; + + list_for_each_entry(i7core_dev, &i7core_edac_list, list) { + int channels; + int csrows; + + /* Check the number of active and not disabled channels */ + rc = i7core_get_active_channels(i7core_dev->socket, + &channels, &csrows); + if (unlikely(rc < 0)) + goto fail1; + + rc = i7core_register_mci(i7core_dev, channels, csrows); + if (unlikely(rc < 0)) + goto fail1; + } + i7core_printk(KERN_INFO, "Driver loaded.\n"); + mutex_unlock(&i7core_edac_lock); return 0; fail1: - edac_mc_free(mci); - + i7core_put_all_devices(); fail0: - i7core_put_devices(); + mutex_unlock(&i7core_edac_lock); return rc; } @@ -1285,21 +1981,39 @@ fail0: static void __devexit i7core_remove(struct pci_dev *pdev) { struct mem_ctl_info *mci; + struct i7core_dev *i7core_dev, *tmp; debugf0(__FILE__ ": %s()\n", __func__); if (i7core_pci) edac_pci_release_generic_ctl(i7core_pci); - mci = edac_mc_del_mc(&pdev->dev); - - if (!mci) - return; - - /* retrieve references to resources, and free those resources */ - i7core_put_devices(); + /* + * we have a trouble here: pdev value for removal will be wrong, since + * it will point to the X58 register used to detect that the machine + * is a Nehalem or upper design. However, due to the way several PCI + * devices are grouped together to provide MC functionality, we need + * to use a different method for releasing the devices + */ - edac_mc_free(mci); + mutex_lock(&i7core_edac_lock); + list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) { + mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev); + if (mci) { + struct i7core_pvt *pvt = mci->pvt_info; + + i7core_dev = pvt->i7core_dev; + edac_mce_unregister(&pvt->edac_mce); + kfree(mci->ctl_name); + edac_mc_free(mci); + i7core_put_devices(i7core_dev); + } else { + i7core_printk(KERN_ERR, + "Couldn't find mci for socket %d\n", + i7core_dev->socket); + } + } + mutex_unlock(&i7core_edac_lock); } MODULE_DEVICE_TABLE(pci, i7core_pci_tbl); @@ -1328,9 +2042,17 @@ static int __init i7core_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); + i7core_xeon_pci_fixup(pci_dev_table); + pci_rc = pci_register_driver(&i7core_driver); - return (pci_rc < 0) ? pci_rc : 0; + if (pci_rc >= 0) + return 0; + + i7core_printk(KERN_ERR, "Failed to register device with error %d.\n", + pci_rc); + + return pci_rc; } /*