#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
-#include <linux/edac_mce.h>
#include <linux/smp.h>
+#include <asm/mce.h>
#include <asm/processor.h>
+#include <asm/div64.h>
#include "edac_core.h"
#define MC_SCRUB_CONTROL 0x4c
#define STARTSCRUB (1 << 24)
+ #define SCRUBINTERVAL_MASK 0xffffff
#define MC_COR_ECC_CNT_0 0x80
#define MC_COR_ECC_CNT_1 0x84
bool is_registered, enable_scrub;
- /* mcelog glue */
- struct edac_mce edac_mce;
-
/* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN];
struct mce mce_outentry[MCE_LOG_LEN];
/* Count indicator to show errors not got */
unsigned mce_overrun;
+ /* DCLK Frequency used for computing scrub rate */
+ int dclk_freq;
+
/* Struct to control EDAC polling */
struct edac_pci_ctl_info *i7core_pci;
};
csr->edac_mode = mode;
csr->mtype = mtype;
+ snprintf(csr->channels[0].label,
+ sizeof(csr->channels[0].label),
+ "CPU#%uChannel#%u_DIMM#%u",
+ pvt->i7core_dev->socket, i, j);
csrow++;
}
* WARNING: As this routine should be called at NMI time, extra care should
* be taken to avoid deadlocks, and to be as fast as possible.
*/
-static int i7core_mce_check_error(void *priv, struct mce *mce)
+static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
+ void *data)
{
- struct mem_ctl_info *mci = priv;
- struct i7core_pvt *pvt = mci->pvt_info;
+ struct mce *mce = (struct mce *)data;
+ struct i7core_dev *i7_dev;
+ struct mem_ctl_info *mci;
+ struct i7core_pvt *pvt;
+
+ i7_dev = get_i7core_dev(mce->socketid);
+ if (!i7_dev)
+ return NOTIFY_BAD;
+
+ mci = i7_dev->mci;
+ pvt = mci->pvt_info;
/*
* Just let mcelog handle it if the error is
* outside the memory controller
*/
if (((mce->status & 0xffff) >> 7) != 1)
- return 0;
+ return NOTIFY_DONE;
/* Bank 8 registers are the only ones that we know how to handle */
if (mce->bank != 8)
- return 0;
+ return NOTIFY_DONE;
#ifdef CONFIG_SMP
/* Only handle if it is the right mc controller */
- if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
- return 0;
+ if (mce->socketid != pvt->i7core_dev->socket)
+ return NOTIFY_DONE;
#endif
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
pvt->mce_overrun++;
- return 0;
+ return NOTIFY_DONE;
}
/* Copy memory error at the ringbuffer */
i7core_check_error(mci);
/* Advise mcelog that the errors were handled */
- return 1;
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block i7_mce_dec = {
+ .notifier_call = i7core_mce_check_error,
+};
+
+struct memdev_dmi_entry {
+ u8 type;
+ u8 length;
+ u16 handle;
+ u16 phys_mem_array_handle;
+ u16 mem_err_info_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+ u8 manufacturer;
+ u8 serial_number;
+ u8 asset_tag;
+ u8 part_number;
+ u8 attributes;
+ u32 extended_size;
+ u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+
+/*
+ * Decode the DRAM Clock Frequency, be paranoid, make sure that all
+ * memory devices show the same speed, and if they don't then consider
+ * all speeds to be invalid.
+ */
+static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
+{
+ int *dclk_freq = _dclk_freq;
+ u16 dmi_mem_clk_speed;
+
+ if (*dclk_freq == -1)
+ return;
+
+ if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+ struct memdev_dmi_entry *memdev_dmi_entry =
+ (struct memdev_dmi_entry *)dh;
+ unsigned long conf_mem_clk_speed_offset =
+ (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
+ (unsigned long)&memdev_dmi_entry->type;
+ unsigned long speed_offset =
+ (unsigned long)&memdev_dmi_entry->speed -
+ (unsigned long)&memdev_dmi_entry->type;
+
+ /* Check that a DIMM is present */
+ if (memdev_dmi_entry->size == 0)
+ return;
+
+ /*
+ * Pick the configured speed if it's available, otherwise
+ * pick the DIMM speed, or we don't have a speed.
+ */
+ if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
+ dmi_mem_clk_speed =
+ memdev_dmi_entry->conf_mem_clk_speed;
+ } else if (memdev_dmi_entry->length > speed_offset) {
+ dmi_mem_clk_speed = memdev_dmi_entry->speed;
+ } else {
+ *dclk_freq = -1;
+ return;
+ }
+
+ if (*dclk_freq == 0) {
+ /* First pass, speed was 0 */
+ if (dmi_mem_clk_speed > 0) {
+ /* Set speed if a valid speed is read */
+ *dclk_freq = dmi_mem_clk_speed;
+ } else {
+ /* Otherwise we don't have a valid speed */
+ *dclk_freq = -1;
+ }
+ } else if (*dclk_freq > 0 &&
+ *dclk_freq != dmi_mem_clk_speed) {
+ /*
+ * If we have a speed, check that all DIMMS are the same
+ * speed, otherwise set the speed as invalid.
+ */
+ *dclk_freq = -1;
+ }
+ }
+}
+
+/*
+ * The default DCLK frequency is used as a fallback if we
+ * fail to find anything reliable in the DMI. The value
+ * is taken straight from the datasheet.
+ */
+#define DEFAULT_DCLK_FREQ 800
+
+static int get_dclk_freq(void)
+{
+ int dclk_freq = 0;
+
+ dmi_walk(decode_dclk, (void *)&dclk_freq);
+
+ if (dclk_freq < 1)
+ return DEFAULT_DCLK_FREQ;
+
+ return dclk_freq;
}
/*
{
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
- const u32 cache_line_size = 64;
- const u32 freq_dclk = 800*1000000;
u32 dw_scrub;
u32 dw_ssr;
/* Prepare to disable petrol scrub */
dw_scrub &= ~STARTSCRUB;
/* Stop the patrol scrub engine */
- write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff);
+ write_and_test(pdev, MC_SCRUB_CONTROL,
+ dw_scrub & ~SCRUBINTERVAL_MASK);
/* Get current status of scrub rate and set bit to disable */
pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
dw_ssr &= ~SSR_MODE_MASK;
dw_ssr |= SSR_MODE_DISABLE;
} else {
+ const int cache_line_size = 64;
+ const u32 freq_dclk_mhz = pvt->dclk_freq;
+ unsigned long long scrub_interval;
/*
* Translate the desired scrub rate to a register value and
- * program the cooresponding register value.
+ * program the corresponding register value.
*/
- dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw);
+ scrub_interval = (unsigned long long)freq_dclk_mhz *
+ cache_line_size * 1000000;
+ do_div(scrub_interval, new_bw);
+
+ if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
+ return -EINVAL;
+
+ dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
/* Start the patrol scrub engine */
pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
const u32 cache_line_size = 64;
- const u32 freq_dclk = 800*1000000;
+ const u32 freq_dclk_mhz = pvt->dclk_freq;
+ unsigned long long scrub_rate;
u32 scrubval;
/* Get data from the MC register, function 2 */
pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
/* Mask highest 8-bits to 0 */
- scrubval &= 0x00ffffff;
+ scrubval &= SCRUBINTERVAL_MASK;
if (!scrubval)
return 0;
/* Calculate scrub rate value into byte/sec bandwidth */
- return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval);
+ scrub_rate = (unsigned long long)freq_dclk_mhz *
+ 1000000 * cache_line_size;
+ do_div(scrub_rate, scrubval);
+ return (int)scrub_rate;
}
static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
&pvt->i7core_dev->pdev[0]->dev,
EDAC_MOD_STR);
if (unlikely(!pvt->i7core_pci))
- pr_warn("Unable to setup PCI error report via EDAC\n");
+ i7core_printk(KERN_WARNING,
+ "Unable to setup PCI error report via EDAC\n");
}
static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
if (pvt->enable_scrub)
disable_sdram_scrub_setting(mci);
- /* Disable MCE NMI handler */
- edac_mce_unregister(&pvt->edac_mce);
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
/* Disable EDAC polling */
i7core_pci_ctl_release(pvt);
/* allocating generic PCI control info */
i7core_pci_ctl_create(pvt);
- /* Registers on edac_mce in order to receive memory errors */
- pvt->edac_mce.priv = mci;
- pvt->edac_mce.check_error = i7core_mce_check_error;
- rc = edac_mce_register(&pvt->edac_mce);
- if (unlikely(rc < 0)) {
- debugf0("MC: " __FILE__
- ": %s(): failed edac_mce_register()\n", __func__);
- goto fail1;
- }
+ /* DCLK for scrub rate setting */
+ pvt->dclk_freq = get_dclk_freq();
+
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
return 0;
-fail1:
- i7core_pci_ctl_release(pvt);
- edac_mc_del_mc(mci->dev);
fail0:
kfree(mci->ctl_name);
edac_mc_free(mci);
static int __devinit i7core_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- int rc;
+ int rc, count = 0;
struct i7core_dev *i7core_dev;
/* get the pci devices we want to reserve for our use */
goto fail0;
list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
+ count++;
rc = i7core_register_mci(i7core_dev);
if (unlikely(rc < 0))
goto fail1;
}
- i7core_printk(KERN_INFO, "Driver loaded.\n");
+ /*
+ * Nehalem-EX uses a different memory controller. However, as the
+ * memory controller is not visible on some Nehalem/Nehalem-EP, we
+ * need to indirectly probe via a X58 PCI device. The same devices
+ * are found on (some) Nehalem-EX. So, on those machines, the
+ * probe routine needs to return -ENODEV, as the actual Memory
+ * Controller registers won't be detected.
+ */
+ if (!count) {
+ rc = -ENODEV;
+ goto fail1;
+ }
+
+ i7core_printk(KERN_INFO,
+ "Driver loaded, %d memory controller(s) found.\n",
+ count);
mutex_unlock(&i7core_edac_lock);
return 0;