From 41cda0e0cae2dc5e6e2e55c2bc03f0cda4291990 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 9 Aug 2012 18:41:07 +0200 Subject: [PATCH] amd64_edac: Improve error injection When injecting DRAM ECC errors over the F3xB[8,C] interface, the machine does this by injecting the error in the next non-cached access. This takes relatively long time on a normal system so that in order for us to expedite it, we disable the caches around the injection. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 10 +++++----- drivers/edac/amd64_edac.h | 23 +++++++++++++++++++---- drivers/edac/amd64_edac_inj.c | 18 +++++++++++++++++- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0e7e4388bdcc..b52d848def59 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -60,8 +60,8 @@ struct scrubrate { { 0x00, 0UL}, /* scrubbing off */ }; -static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, - u32 *val, const char *func) +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func) { int err = 0; @@ -1983,11 +1983,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, struct mce *m) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); u8 ecc_type = (m->status >> 45) & 0x3; + u8 xec = XEC(m->status, 0x1f); + u16 ec = EC(m->status); - /* Bail early out if this was an 'observed' error */ + /* Bail out early if this was an 'observed' error */ if (PP(ec) == NBSL_PP_OBS) return; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 0b184a809f87..eacce615fe1c 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,9 +273,10 @@ #define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1) #define F10_NB_ARRAY_DATA 0xBC +#define F10_NB_ARR_ECC_WR_REQ BIT(17) #define SET_NB_DRAM_INJECTION_WRITE(inj) \ (BIT(((inj.word) & 0xF) + 20) | \ - BIT(17) | inj.bit_map) + F10_NB_ARR_ECC_WR_REQ | inj.bit_map) #define SET_NB_DRAM_INJECTION_READ(inj) \ (BIT(((inj.word) & 0xF) + 20) | \ BIT(16) | inj.bit_map) @@ -306,9 +307,9 @@ enum amd_families { /* Error injection control structure */ struct error_injection { - u32 section; - u32 word; - u32 bit_map; + u32 section; + u32 word; + u32 bit_map; }; /* low and high part of PCI config space regs */ @@ -460,6 +461,8 @@ struct amd64_family_type { struct low_ops ops; }; +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func); int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, u32 val, const char *func); @@ -476,3 +479,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size); #define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/* Injection helpers */ +static inline void disable_caches(void *dummy) +{ + write_cr0(read_cr0() | X86_CR0_CD); + wbinvd(); +} + +static inline void enable_caches(void *dummy) +{ + write_cr0(read_cr0() & ~X86_CR0_CD); +} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 8977e2fa61da..8c171fa1cb9b 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -153,8 +153,8 @@ static ssize_t amd64_inject_write_store(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; + u32 section, word_bits, tmp; unsigned long value; - u32 section, word_bits; int ret; ret = strict_strtoul(data, 10, &value); @@ -168,9 +168,25 @@ static ssize_t amd64_inject_write_store(struct device *dev, word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); + pr_notice_once("Don't forget to decrease MCE polling interval in\n" + "/sys/bus/machinecheck/devices/machinecheck/check_interval\n" + "so that you can get the error report faster.\n"); + + on_each_cpu(disable_caches, NULL, 1); + /* Issue 'word' and 'bit' along with the READ request */ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + retry: + /* wait until injection happens */ + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); + if (tmp & F10_NB_ARR_ECC_WR_REQ) { + cpu_relax(); + goto retry; + } + + on_each_cpu(enable_caches, NULL, 1); + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); return count; -- 2.39.5