2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/proc_fs.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/smp.h>
22 #include <linux/init.h>
23 #include <linux/sysctl.h>
24 #include <linux/highmem.h>
25 #include <linux/timer.h>
26 #include <linux/slab.h>
27 #include <linux/jiffies.h>
28 #include <linux/spinlock.h>
29 #include <linux/list.h>
30 #include <linux/sysdev.h>
31 #include <linux/ctype.h>
32 #include <linux/kthread.h>
34 #include <asm/uaccess.h>
40 #define EDAC_MC_VERSION "Ver: 2.0.0 " __DATE__
42 /* For now, disable the EDAC sysfs code. The sysfs interface that EDAC
43 * presents to user space needs more thought, and is likely to change
46 #define DISABLE_EDAC_SYSFS
48 #ifdef CONFIG_EDAC_DEBUG
49 /* Values of 0 to 4 will generate output */
50 int edac_debug_level = 1;
51 EXPORT_SYMBOL(edac_debug_level);
54 /* EDAC Controls, setable by module parameter, and sysfs */
55 static int log_ue = 1;
56 static int log_ce = 1;
57 static int panic_on_ue;
58 static int poll_msec = 1000;
60 static int check_pci_parity = 0; /* default YES check PCI parity */
61 static int panic_on_pci_parity; /* default no panic on PCI Parity */
62 static atomic_t pci_parity_count = ATOMIC_INIT(0);
64 /* lock to memory controller's control array */
65 static DECLARE_MUTEX(mem_ctls_mutex);
66 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
68 static struct task_struct *edac_thread;
70 /* Structure of the whitelist and blacklist arrays */
71 struct edac_pci_device_list {
72 unsigned int vendor; /* Vendor ID */
73 unsigned int device; /* Deviice ID */
77 #define MAX_LISTED_PCI_DEVICES 32
79 /* List of PCI devices (vendor-id:device-id) that should be skipped */
80 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
81 static int pci_blacklist_count;
83 /* List of PCI devices (vendor-id:device-id) that should be scanned */
84 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
85 static int pci_whitelist_count ;
87 /* START sysfs data and methods */
89 #ifndef DISABLE_EDAC_SYSFS
91 static const char *mem_types[] = {
92 [MEM_EMPTY] = "Empty",
93 [MEM_RESERVED] = "Reserved",
94 [MEM_UNKNOWN] = "Unknown",
98 [MEM_SDR] = "Unbuffered-SDR",
99 [MEM_RDR] = "Registered-SDR",
100 [MEM_DDR] = "Unbuffered-DDR",
101 [MEM_RDDR] = "Registered-DDR",
105 static const char *dev_types[] = {
106 [DEV_UNKNOWN] = "Unknown",
116 static const char *edac_caps[] = {
117 [EDAC_UNKNOWN] = "Unknown",
118 [EDAC_NONE] = "None",
119 [EDAC_RESERVED] = "Reserved",
120 [EDAC_PARITY] = "PARITY",
122 [EDAC_SECDED] = "SECDED",
123 [EDAC_S2ECD2ED] = "S2ECD2ED",
124 [EDAC_S4ECD4ED] = "S4ECD4ED",
125 [EDAC_S8ECD8ED] = "S8ECD8ED",
126 [EDAC_S16ECD16ED] = "S16ECD16ED"
130 /* sysfs object: /sys/devices/system/edac */
131 static struct sysdev_class edac_class = {
132 set_kset_name("edac"),
136 * /sys/devices/system/edac/mc
137 * /sys/devices/system/edac/pci
139 static struct kobject edac_memctrl_kobj;
140 static struct kobject edac_pci_kobj;
142 /* We use these to wait for the reference counts on edac_memctrl_kobj and
143 * edac_pci_kobj to reach 0.
145 static struct completion edac_memctrl_kobj_complete;
146 static struct completion edac_pci_kobj_complete;
149 * /sys/devices/system/edac/mc;
150 * data structures and methods
153 static ssize_t memctrl_string_show(void *ptr, char *buffer)
155 char *value = (char*) ptr;
156 return sprintf(buffer, "%s\n", value);
160 static ssize_t memctrl_int_show(void *ptr, char *buffer)
162 int *value = (int*) ptr;
163 return sprintf(buffer, "%d\n", *value);
166 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
168 int *value = (int*) ptr;
170 if (isdigit(*buffer))
171 *value = simple_strtoul(buffer, NULL, 0);
176 struct memctrl_dev_attribute {
177 struct attribute attr;
179 ssize_t (*show)(void *,char *);
180 ssize_t (*store)(void *, const char *, size_t);
183 /* Set of show/store abstract level functions for memory control object */
185 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
187 struct memctrl_dev_attribute *memctrl_dev;
188 memctrl_dev = (struct memctrl_dev_attribute*)attr;
190 if (memctrl_dev->show)
191 return memctrl_dev->show(memctrl_dev->value, buffer);
196 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
197 const char *buffer, size_t count)
199 struct memctrl_dev_attribute *memctrl_dev;
200 memctrl_dev = (struct memctrl_dev_attribute*)attr;
202 if (memctrl_dev->store)
203 return memctrl_dev->store(memctrl_dev->value, buffer, count);
207 static struct sysfs_ops memctrlfs_ops = {
208 .show = memctrl_dev_show,
209 .store = memctrl_dev_store
212 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
213 struct memctrl_dev_attribute attr_##_name = { \
214 .attr = {.name = __stringify(_name), .mode = _mode }, \
220 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
221 struct memctrl_dev_attribute attr_##_name = { \
222 .attr = {.name = __stringify(_name), .mode = _mode }, \
228 /* cwrow<id> attribute f*/
230 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
233 /* csrow<id> control files */
234 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
235 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
236 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
237 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
240 /* Base Attributes of the memory ECC object */
241 static struct memctrl_dev_attribute *memctrl_attr[] = {
249 /* Main MC kobject release() function */
250 static void edac_memctrl_master_release(struct kobject *kobj)
252 debugf1("%s()\n", __func__);
253 complete(&edac_memctrl_kobj_complete);
256 static struct kobj_type ktype_memctrl = {
257 .release = edac_memctrl_master_release,
258 .sysfs_ops = &memctrlfs_ops,
259 .default_attrs = (struct attribute **) memctrl_attr,
262 #endif /* DISABLE_EDAC_SYSFS */
264 /* Initialize the main sysfs entries for edac:
265 * /sys/devices/system/edac
272 static int edac_sysfs_memctrl_setup(void)
273 #ifdef DISABLE_EDAC_SYSFS
281 debugf1("%s()\n", __func__);
283 /* create the /sys/devices/system/edac directory */
284 err = sysdev_class_register(&edac_class);
286 /* Init the MC's kobject */
287 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
288 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
289 edac_memctrl_kobj.ktype = &ktype_memctrl;
291 /* generate sysfs "..../edac/mc" */
292 err = kobject_set_name(&edac_memctrl_kobj,"mc");
294 /* FIXME: maybe new sysdev_create_subdir() */
295 err = kobject_register(&edac_memctrl_kobj);
297 debugf1("Failed to register '.../edac/mc'\n");
299 debugf1("Registered '.../edac/mc' kobject\n");
303 debugf1("%s() error=%d\n", __func__, err);
308 #endif /* DISABLE_EDAC_SYSFS */
312 * the '..../edac/mc' kobject followed by '..../edac' itself
314 static void edac_sysfs_memctrl_teardown(void)
316 #ifndef DISABLE_EDAC_SYSFS
317 debugf0("MC: " __FILE__ ": %s()\n", __func__);
319 /* Unregister the MC's kobject and wait for reference count to reach
322 init_completion(&edac_memctrl_kobj_complete);
323 kobject_unregister(&edac_memctrl_kobj);
324 wait_for_completion(&edac_memctrl_kobj_complete);
326 /* Unregister the 'edac' object */
327 sysdev_class_unregister(&edac_class);
328 #endif /* DISABLE_EDAC_SYSFS */
331 #ifndef DISABLE_EDAC_SYSFS
334 * /sys/devices/system/edac/pci;
335 * data structures and methods
338 struct list_control {
339 struct edac_pci_device_list *list;
345 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
346 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
348 struct list_control *listctl;
349 struct edac_pci_device_list *list;
355 list = listctl->list;
357 for (i = 0; i < *(listctl->count); i++, list++ ) {
359 len += snprintf(p + len, (PAGE_SIZE-len), ",");
361 len += snprintf(p + len,
364 list->vendor,list->device);
367 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
369 return (ssize_t) len;
374 * Scan string from **s to **e looking for one 'vendor:device' tuple
375 * where each field is a hex value
377 * return 0 if an entry is NOT found
378 * return 1 if an entry is found
379 * fill in *vendor_id and *device_id with values found
381 * In both cases, make sure *s has been moved forward toward *e
383 static int parse_one_device(const char **s,const char **e,
384 unsigned int *vendor_id, unsigned int *device_id)
386 const char *runner, *p;
388 /* if null byte, we are done */
390 (*s)++; /* keep *s moving */
394 /* skip over newlines & whitespace */
395 if ((**s == '\n') || isspace(**s)) {
400 if (!isxdigit(**s)) {
405 /* parse vendor_id */
407 while (runner < *e) {
408 /* scan for vendor:device delimiter */
409 if (*runner == ':') {
410 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
417 if (!isxdigit(*runner)) {
422 /* parse device_id */
424 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
433 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
436 struct list_control *listctl;
437 struct edac_pci_device_list *list;
438 unsigned int vendor_id, device_id;
446 list = listctl->list;
447 index = listctl->count;
450 while (*index < MAX_LISTED_PCI_DEVICES) {
452 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
453 list[ *index ].vendor = vendor_id;
454 list[ *index ].device = device_id;
458 /* check for all data consume */
467 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
470 return sprintf(buffer,"%d\n",*value);
473 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
477 if (isdigit(*buffer))
478 *value = simple_strtoul(buffer,NULL,0);
483 struct edac_pci_dev_attribute {
484 struct attribute attr;
486 ssize_t (*show)(void *,char *);
487 ssize_t (*store)(void *, const char *,size_t);
490 /* Set of show/store abstract level functions for PCI Parity object */
491 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
494 struct edac_pci_dev_attribute *edac_pci_dev;
495 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
497 if (edac_pci_dev->show)
498 return edac_pci_dev->show(edac_pci_dev->value, buffer);
502 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
503 const char *buffer, size_t count)
505 struct edac_pci_dev_attribute *edac_pci_dev;
506 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
508 if (edac_pci_dev->show)
509 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
513 static struct sysfs_ops edac_pci_sysfs_ops = {
514 .show = edac_pci_dev_show,
515 .store = edac_pci_dev_store
519 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
520 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
521 .attr = {.name = __stringify(_name), .mode = _mode }, \
527 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
528 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
529 .attr = {.name = __stringify(_name), .mode = _mode }, \
536 static struct list_control pci_whitelist_control = {
537 .list = pci_whitelist,
538 .count = &pci_whitelist_count
541 static struct list_control pci_blacklist_control = {
542 .list = pci_blacklist,
543 .count = &pci_blacklist_count
546 /* whitelist attribute */
547 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
548 &pci_whitelist_control,
550 edac_pci_list_string_show,
551 edac_pci_list_string_store);
553 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
554 &pci_blacklist_control,
556 edac_pci_list_string_show,
557 edac_pci_list_string_store);
560 /* PCI Parity control files */
561 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
562 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
563 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
565 /* Base Attributes of the memory ECC object */
566 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
567 &edac_pci_attr_check_pci_parity,
568 &edac_pci_attr_panic_on_pci_parity,
569 &edac_pci_attr_pci_parity_count,
573 /* No memory to release */
574 static void edac_pci_release(struct kobject *kobj)
576 debugf1("%s()\n", __func__);
577 complete(&edac_pci_kobj_complete);
580 static struct kobj_type ktype_edac_pci = {
581 .release = edac_pci_release,
582 .sysfs_ops = &edac_pci_sysfs_ops,
583 .default_attrs = (struct attribute **) edac_pci_attr,
586 #endif /* DISABLE_EDAC_SYSFS */
589 * edac_sysfs_pci_setup()
592 static int edac_sysfs_pci_setup(void)
593 #ifdef DISABLE_EDAC_SYSFS
601 debugf1("%s()\n", __func__);
603 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
604 edac_pci_kobj.parent = &edac_class.kset.kobj;
605 edac_pci_kobj.ktype = &ktype_edac_pci;
607 err = kobject_set_name(&edac_pci_kobj, "pci");
609 /* Instanstiate the csrow object */
610 /* FIXME: maybe new sysdev_create_subdir() */
611 err = kobject_register(&edac_pci_kobj);
613 debugf1("Failed to register '.../edac/pci'\n");
615 debugf1("Registered '.../edac/pci' kobject\n");
619 #endif /* DISABLE_EDAC_SYSFS */
621 static void edac_sysfs_pci_teardown(void)
623 #ifndef DISABLE_EDAC_SYSFS
624 debugf0("%s()\n", __func__);
625 init_completion(&edac_pci_kobj_complete);
626 kobject_unregister(&edac_pci_kobj);
627 wait_for_completion(&edac_pci_kobj_complete);
631 #ifndef DISABLE_EDAC_SYSFS
633 /* EDAC sysfs CSROW data structures and methods */
635 /* Set of more detailed csrow<id> attribute show/store functions */
636 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
640 if (csrow->nr_channels > 0) {
641 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
642 csrow->channels[0].label);
647 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
651 if (csrow->nr_channels > 0) {
652 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
653 csrow->channels[1].label);
658 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
659 const char *data, size_t size)
661 ssize_t max_size = 0;
663 if (csrow->nr_channels > 0) {
664 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
665 strncpy(csrow->channels[0].label, data, max_size);
666 csrow->channels[0].label[max_size] = '\0';
671 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
672 const char *data, size_t size)
674 ssize_t max_size = 0;
676 if (csrow->nr_channels > 1) {
677 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
678 strncpy(csrow->channels[1].label, data, max_size);
679 csrow->channels[1].label[max_size] = '\0';
684 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
686 return sprintf(data,"%u\n", csrow->ue_count);
689 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
691 return sprintf(data,"%u\n", csrow->ce_count);
694 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
698 if (csrow->nr_channels > 0) {
699 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
704 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
708 if (csrow->nr_channels > 1) {
709 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
714 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
716 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
719 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
721 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
724 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
726 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
729 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
731 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
734 struct csrowdev_attribute {
735 struct attribute attr;
736 ssize_t (*show)(struct csrow_info *,char *);
737 ssize_t (*store)(struct csrow_info *, const char *,size_t);
740 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
741 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
743 /* Set of show/store higher level functions for csrow objects */
744 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
747 struct csrow_info *csrow = to_csrow(kobj);
748 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
750 if (csrowdev_attr->show)
751 return csrowdev_attr->show(csrow, buffer);
755 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
756 const char *buffer, size_t count)
758 struct csrow_info *csrow = to_csrow(kobj);
759 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
761 if (csrowdev_attr->store)
762 return csrowdev_attr->store(csrow, buffer, count);
766 static struct sysfs_ops csrowfs_ops = {
767 .show = csrowdev_show,
768 .store = csrowdev_store
771 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
772 struct csrowdev_attribute attr_##_name = { \
773 .attr = {.name = __stringify(_name), .mode = _mode }, \
778 /* cwrow<id>/attribute files */
779 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
780 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
781 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
782 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
783 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
784 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
785 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
786 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
788 /* control/attribute files */
789 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
790 csrow_ch0_dimm_label_show,
791 csrow_ch0_dimm_label_store);
792 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
793 csrow_ch1_dimm_label_show,
794 csrow_ch1_dimm_label_store);
797 /* Attributes of the CSROW<id> object */
798 static struct csrowdev_attribute *csrow_attr[] = {
807 &attr_ch0_dimm_label,
808 &attr_ch1_dimm_label,
813 /* No memory to release */
814 static void edac_csrow_instance_release(struct kobject *kobj)
816 struct csrow_info *cs;
818 debugf1("%s()\n", __func__);
819 cs = container_of(kobj, struct csrow_info, kobj);
820 complete(&cs->kobj_complete);
823 static struct kobj_type ktype_csrow = {
824 .release = edac_csrow_instance_release,
825 .sysfs_ops = &csrowfs_ops,
826 .default_attrs = (struct attribute **) csrow_attr,
829 /* Create a CSROW object under specifed edac_mc_device */
830 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
831 struct csrow_info *csrow, int index )
835 debugf0("%s()\n", __func__);
837 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
839 /* generate ..../edac/mc/mc<id>/csrow<index> */
841 csrow->kobj.parent = edac_mci_kobj;
842 csrow->kobj.ktype = &ktype_csrow;
844 /* name this instance of csrow<id> */
845 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
847 /* Instanstiate the csrow object */
848 err = kobject_register(&csrow->kobj);
850 debugf0("Failed to register CSROW%d\n",index);
852 debugf0("Registered CSROW%d\n",index);
858 /* sysfs data structures and methods for the MCI kobjects */
860 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
861 const char *data, size_t count )
865 mci->ue_noinfo_count = 0;
866 mci->ce_noinfo_count = 0;
869 for (row = 0; row < mci->nr_csrows; row++) {
870 struct csrow_info *ri = &mci->csrows[row];
874 for (chan = 0; chan < ri->nr_channels; chan++)
875 ri->channels[chan].ce_count = 0;
877 mci->start_time = jiffies;
882 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
884 return sprintf(data,"%d\n", mci->ue_count);
887 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
889 return sprintf(data,"%d\n", mci->ce_count);
892 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
894 return sprintf(data,"%d\n", mci->ce_noinfo_count);
897 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
899 return sprintf(data,"%d\n", mci->ue_noinfo_count);
902 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
904 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
907 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
909 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
912 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
914 return sprintf(data,"%s\n", mci->ctl_name);
917 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
922 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
923 if ((edac_cap >> bit_idx) & 0x1)
924 p += sprintf(p, "%s ", edac_caps[bit_idx]);
930 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
934 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
935 p += sprintf(p, "\n");
940 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
945 p += mci_output_edac_cap(p,mci->edac_cap);
946 p += sprintf(p, "\n");
951 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
956 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
957 if ((mtype_cap >> bit_idx) & 0x1)
958 p += sprintf(p, "%s ", mem_types[bit_idx]);
964 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
968 p += mci_output_mtype_cap(p,mci->mtype_cap);
969 p += sprintf(p, "\n");
974 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
976 int total_pages, csrow_idx;
978 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
980 struct csrow_info *csrow = &mci->csrows[csrow_idx];
982 if (!csrow->nr_pages)
984 total_pages += csrow->nr_pages;
987 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
990 struct mcidev_attribute {
991 struct attribute attr;
992 ssize_t (*show)(struct mem_ctl_info *,char *);
993 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
996 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
997 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
999 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
1002 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1003 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1005 if (mcidev_attr->show)
1006 return mcidev_attr->show(mem_ctl_info, buffer);
1010 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
1011 const char *buffer, size_t count)
1013 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1014 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1016 if (mcidev_attr->store)
1017 return mcidev_attr->store(mem_ctl_info, buffer, count);
1021 static struct sysfs_ops mci_ops = {
1022 .show = mcidev_show,
1023 .store = mcidev_store
1026 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
1027 struct mcidev_attribute mci_attr_##_name = { \
1028 .attr = {.name = __stringify(_name), .mode = _mode }, \
1034 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
1036 /* Attribute files */
1037 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
1038 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
1039 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
1040 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
1041 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
1042 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
1043 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
1044 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
1045 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1046 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1047 mci_edac_current_capability_show,NULL);
1048 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1049 mci_supported_mem_type_show,NULL);
1052 static struct mcidev_attribute *mci_attr[] = {
1053 &mci_attr_reset_counters,
1054 &mci_attr_module_name,
1056 &mci_attr_edac_capability,
1057 &mci_attr_edac_current_capability,
1058 &mci_attr_supported_mem_type,
1060 &mci_attr_seconds_since_reset,
1061 &mci_attr_ue_noinfo_count,
1062 &mci_attr_ce_noinfo_count,
1070 * Release of a MC controlling instance
1072 static void edac_mci_instance_release(struct kobject *kobj)
1074 struct mem_ctl_info *mci;
1077 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1078 complete(&mci->kobj_complete);
1081 static struct kobj_type ktype_mci = {
1082 .release = edac_mci_instance_release,
1083 .sysfs_ops = &mci_ops,
1084 .default_attrs = (struct attribute **) mci_attr,
1087 #endif /* DISABLE_EDAC_SYSFS */
1089 #define EDAC_DEVICE_SYMLINK "device"
1092 * Create a new Memory Controller kobject instance,
1093 * mc<id> under the 'mc' directory
1099 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1100 #ifdef DISABLE_EDAC_SYSFS
1108 struct csrow_info *csrow;
1109 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1111 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1113 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1115 /* set the name of the mc<id> object */
1116 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1120 /* link to our parent the '..../edac/mc' object */
1121 edac_mci_kobj->parent = &edac_memctrl_kobj;
1122 edac_mci_kobj->ktype = &ktype_mci;
1124 /* register the mc<id> kobject */
1125 err = kobject_register(edac_mci_kobj);
1129 /* create a symlink for the device */
1130 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1131 EDAC_DEVICE_SYMLINK);
1135 /* Make directories for each CSROW object
1136 * under the mc<id> kobject
1138 for (i = 0; i < mci->nr_csrows; i++) {
1140 csrow = &mci->csrows[i];
1142 /* Only expose populated CSROWs */
1143 if (csrow->nr_pages > 0) {
1144 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1153 /* CSROW error: backout what has already been registered, */
1155 for ( i--; i >= 0; i--) {
1156 if (csrow->nr_pages > 0) {
1157 init_completion(&csrow->kobj_complete);
1158 kobject_unregister(&mci->csrows[i].kobj);
1159 wait_for_completion(&csrow->kobj_complete);
1164 init_completion(&mci->kobj_complete);
1165 kobject_unregister(edac_mci_kobj);
1166 wait_for_completion(&mci->kobj_complete);
1170 #endif /* DISABLE_EDAC_SYSFS */
1173 * remove a Memory Controller instance
1175 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1177 #ifndef DISABLE_EDAC_SYSFS
1180 debugf0("%s()\n", __func__);
1182 /* remove all csrow kobjects */
1183 for (i = 0; i < mci->nr_csrows; i++) {
1184 if (mci->csrows[i].nr_pages > 0) {
1185 init_completion(&mci->csrows[i].kobj_complete);
1186 kobject_unregister(&mci->csrows[i].kobj);
1187 wait_for_completion(&mci->csrows[i].kobj_complete);
1191 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1192 init_completion(&mci->kobj_complete);
1193 kobject_unregister(&mci->edac_mci_kobj);
1194 wait_for_completion(&mci->kobj_complete);
1195 #endif /* DISABLE_EDAC_SYSFS */
1198 /* END OF sysfs data and methods */
1200 #ifdef CONFIG_EDAC_DEBUG
1202 EXPORT_SYMBOL(edac_mc_dump_channel);
1204 void edac_mc_dump_channel(struct channel_info *chan)
1206 debugf4("\tchannel = %p\n", chan);
1207 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1208 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1209 debugf4("\tchannel->label = '%s'\n", chan->label);
1210 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1214 EXPORT_SYMBOL(edac_mc_dump_csrow);
1216 void edac_mc_dump_csrow(struct csrow_info *csrow)
1218 debugf4("\tcsrow = %p\n", csrow);
1219 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1220 debugf4("\tcsrow->first_page = 0x%lx\n",
1222 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1223 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1224 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1225 debugf4("\tcsrow->nr_channels = %d\n",
1226 csrow->nr_channels);
1227 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1228 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1232 EXPORT_SYMBOL(edac_mc_dump_mci);
1234 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1236 debugf3("\tmci = %p\n", mci);
1237 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1238 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1239 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1240 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1241 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1242 mci->nr_csrows, mci->csrows);
1243 debugf3("\tpdev = %p\n", mci->pdev);
1244 debugf3("\tmod_name:ctl_name = %s:%s\n",
1245 mci->mod_name, mci->ctl_name);
1246 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1250 #endif /* CONFIG_EDAC_DEBUG */
1252 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1253 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1254 * compiler would provide for X and return the aligned result.
1256 * If 'size' is a constant, the compiler will optimize this whole function
1257 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1259 static inline char * align_ptr (void *ptr, unsigned size)
1263 /* Here we assume that the alignment of a "long long" is the most
1264 * stringent alignment that the compiler will ever provide by default.
1265 * As far as I know, this is a reasonable assumption.
1267 if (size > sizeof(long))
1268 align = sizeof(long long);
1269 else if (size > sizeof(int))
1270 align = sizeof(long);
1271 else if (size > sizeof(short))
1272 align = sizeof(int);
1273 else if (size > sizeof(char))
1274 align = sizeof(short);
1276 return (char *) ptr;
1281 return (char *) ptr;
1283 return (char *) (((unsigned long) ptr) + align - r);
1287 EXPORT_SYMBOL(edac_mc_alloc);
1290 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1291 * @size_pvt: size of private storage needed
1292 * @nr_csrows: Number of CWROWS needed for this MC
1293 * @nr_chans: Number of channels for the MC
1295 * Everything is kmalloc'ed as one big chunk - more efficient.
1296 * Only can be used if all structures have the same lifetime - otherwise
1297 * you have to allocate and initialize your own structures.
1299 * Use edac_mc_free() to free mc structures allocated by this function.
1302 * NULL allocation failed
1303 * struct mem_ctl_info pointer
1305 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1308 struct mem_ctl_info *mci;
1309 struct csrow_info *csi, *csrow;
1310 struct channel_info *chi, *chp, *chan;
1315 /* Figure out the offsets of the various items from the start of an mc
1316 * structure. We want the alignment of each item to be at least as
1317 * stringent as what the compiler would provide if we could simply
1318 * hardcode everything into a single struct.
1320 mci = (struct mem_ctl_info *) 0;
1321 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1322 chi = (struct channel_info *)
1323 align_ptr(&csi[nr_csrows], sizeof(*chi));
1324 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1325 size = ((unsigned long) pvt) + sz_pvt;
1327 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1330 /* Adjust pointers so they point within the memory we just allocated
1331 * rather than an imaginary chunk of memory located at address 0.
1333 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1334 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1335 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1337 memset(mci, 0, size); /* clear all fields */
1340 mci->pvt_info = pvt;
1341 mci->nr_csrows = nr_csrows;
1343 for (row = 0; row < nr_csrows; row++) {
1345 csrow->csrow_idx = row;
1347 csrow->nr_channels = nr_chans;
1348 chp = &chi[row * nr_chans];
1349 csrow->channels = chp;
1351 for (chn = 0; chn < nr_chans; chn++) {
1353 chan->chan_idx = chn;
1354 chan->csrow = csrow;
1362 EXPORT_SYMBOL(edac_mc_free);
1365 * edac_mc_free: Free a previously allocated 'mci' structure
1366 * @mci: pointer to a struct mem_ctl_info structure
1368 void edac_mc_free(struct mem_ctl_info *mci)
1375 EXPORT_SYMBOL(edac_mc_find_mci_by_pdev);
1377 struct mem_ctl_info *edac_mc_find_mci_by_pdev(struct pci_dev *pdev)
1379 struct mem_ctl_info *mci;
1380 struct list_head *item;
1382 debugf3("%s()\n", __func__);
1384 list_for_each(item, &mc_devices) {
1385 mci = list_entry(item, struct mem_ctl_info, link);
1387 if (mci->pdev == pdev)
1394 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1396 struct list_head *item, *insert_before;
1397 struct mem_ctl_info *p;
1400 if (list_empty(&mc_devices)) {
1402 insert_before = &mc_devices;
1404 if (edac_mc_find_mci_by_pdev(mci->pdev)) {
1405 edac_printk(KERN_WARNING, EDAC_MC,
1406 "%s (%s) %s %s already assigned %d\n",
1407 mci->pdev->dev.bus_id,
1408 pci_name(mci->pdev), mci->mod_name,
1409 mci->ctl_name, mci->mc_idx);
1413 insert_before = NULL;
1416 list_for_each(item, &mc_devices) {
1417 p = list_entry(item, struct mem_ctl_info, link);
1419 if (p->mc_idx != i) {
1420 insert_before = item;
1429 if (insert_before == NULL)
1430 insert_before = &mc_devices;
1433 list_add_tail_rcu(&mci->link, insert_before);
1438 static void complete_mc_list_del (struct rcu_head *head)
1440 struct mem_ctl_info *mci;
1442 mci = container_of(head, struct mem_ctl_info, rcu);
1443 INIT_LIST_HEAD(&mci->link);
1444 complete(&mci->complete);
1448 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1450 list_del_rcu(&mci->link);
1451 init_completion(&mci->complete);
1452 call_rcu(&mci->rcu, complete_mc_list_del);
1453 wait_for_completion(&mci->complete);
1457 EXPORT_SYMBOL(edac_mc_add_mc);
1460 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
1461 * create sysfs entries associated with mci structure
1462 * @mci: pointer to the mci structure to be added to the list
1469 /* FIXME - should a warning be printed if no error detection? correction? */
1470 int edac_mc_add_mc(struct mem_ctl_info *mci)
1472 debugf0("%s()\n", __func__);
1473 #ifdef CONFIG_EDAC_DEBUG
1474 if (edac_debug_level >= 3)
1475 edac_mc_dump_mci(mci);
1476 if (edac_debug_level >= 4) {
1479 for (i = 0; i < mci->nr_csrows; i++) {
1481 edac_mc_dump_csrow(&mci->csrows[i]);
1482 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1483 edac_mc_dump_channel(&mci->csrows[i].
1488 down(&mem_ctls_mutex);
1490 if (add_mc_to_global_list(mci))
1493 /* set load time so that error rate can be tracked */
1494 mci->start_time = jiffies;
1496 if (edac_create_sysfs_mci_device(mci)) {
1497 edac_mc_printk(mci, KERN_WARNING,
1498 "failed to create sysfs device\n");
1502 /* Report action taken */
1503 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n",
1504 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1506 up(&mem_ctls_mutex);
1510 del_mc_from_global_list(mci);
1513 up(&mem_ctls_mutex);
1518 EXPORT_SYMBOL(edac_mc_del_mc);
1521 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
1522 * remove mci structure from global list
1523 * @mci: Pointer to struct mem_ctl_info structure
1529 int edac_mc_del_mc(struct mem_ctl_info *mci)
1533 debugf0("MC%d: %s()\n", mci->mc_idx, __func__);
1534 edac_remove_sysfs_mci_device(mci);
1535 down(&mem_ctls_mutex);
1536 del_mc_from_global_list(mci);
1537 edac_printk(KERN_INFO, EDAC_MC,
1538 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx,
1539 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1541 up(&mem_ctls_mutex);
1547 EXPORT_SYMBOL(edac_mc_scrub_block);
1549 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1554 unsigned long flags = 0;
1556 debugf3("%s()\n", __func__);
1558 /* ECC error page was not in our memory. Ignore it. */
1559 if(!pfn_valid(page))
1562 /* Find the actual page structure then map it and fix */
1563 pg = pfn_to_page(page);
1565 if (PageHighMem(pg))
1566 local_irq_save(flags);
1568 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1570 /* Perform architecture specific atomic scrub operation */
1571 atomic_scrub(virt_addr + offset, size);
1573 /* Unmap and complete */
1574 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1576 if (PageHighMem(pg))
1577 local_irq_restore(flags);
1581 /* FIXME - should return -1 */
1582 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1584 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1587 struct csrow_info *csrows = mci->csrows;
1590 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
1593 for (i = 0; i < mci->nr_csrows; i++) {
1594 struct csrow_info *csrow = &csrows[i];
1596 if (csrow->nr_pages == 0)
1599 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
1600 "mask(0x%lx)\n", mci->mc_idx, __func__,
1601 csrow->first_page, page, csrow->last_page,
1604 if ((page >= csrow->first_page) &&
1605 (page <= csrow->last_page) &&
1606 ((page & csrow->page_mask) ==
1607 (csrow->first_page & csrow->page_mask))) {
1614 edac_mc_printk(mci, KERN_ERR,
1615 "could not look up page error address %lx\n",
1616 (unsigned long) page);
1622 EXPORT_SYMBOL(edac_mc_handle_ce);
1624 /* FIXME - setable log (warning/emerg) levels */
1625 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1626 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1627 unsigned long page_frame_number,
1628 unsigned long offset_in_page,
1629 unsigned long syndrome, int row, int channel,
1632 unsigned long remapped_page;
1634 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1636 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1637 if (row >= mci->nr_csrows || row < 0) {
1638 /* something is wrong */
1639 edac_mc_printk(mci, KERN_ERR,
1640 "INTERNAL ERROR: row out of range "
1641 "(%d >= %d)\n", row, mci->nr_csrows);
1642 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1645 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1646 /* something is wrong */
1647 edac_mc_printk(mci, KERN_ERR,
1648 "INTERNAL ERROR: channel out of range "
1649 "(%d >= %d)\n", channel,
1650 mci->csrows[row].nr_channels);
1651 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1656 /* FIXME - put in DIMM location */
1657 edac_mc_printk(mci, KERN_WARNING,
1658 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
1659 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
1660 page_frame_number, offset_in_page,
1661 mci->csrows[row].grain, syndrome, row, channel,
1662 mci->csrows[row].channels[channel].label, msg);
1665 mci->csrows[row].ce_count++;
1666 mci->csrows[row].channels[channel].ce_count++;
1668 if (mci->scrub_mode & SCRUB_SW_SRC) {
1670 * Some MC's can remap memory so that it is still available
1671 * at a different address when PCI devices map into memory.
1672 * MC's that can't do this lose the memory where PCI devices
1673 * are mapped. This mapping is MC dependant and so we call
1674 * back into the MC driver for it to map the MC page to
1675 * a physical (CPU) page which can then be mapped to a virtual
1676 * page - which can then be scrubbed.
1678 remapped_page = mci->ctl_page_to_phys ?
1679 mci->ctl_page_to_phys(mci, page_frame_number) :
1682 edac_mc_scrub_block(remapped_page, offset_in_page,
1683 mci->csrows[row].grain);
1688 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1690 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1694 edac_mc_printk(mci, KERN_WARNING,
1695 "CE - no information available: %s\n", msg);
1696 mci->ce_noinfo_count++;
1701 EXPORT_SYMBOL(edac_mc_handle_ue);
1703 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1704 unsigned long page_frame_number,
1705 unsigned long offset_in_page, int row,
1708 int len = EDAC_MC_LABEL_LEN * 4;
1709 char labels[len + 1];
1714 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1716 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1717 if (row >= mci->nr_csrows || row < 0) {
1718 /* something is wrong */
1719 edac_mc_printk(mci, KERN_ERR,
1720 "INTERNAL ERROR: row out of range "
1721 "(%d >= %d)\n", row, mci->nr_csrows);
1722 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1726 chars = snprintf(pos, len + 1, "%s",
1727 mci->csrows[row].channels[0].label);
1730 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1732 chars = snprintf(pos, len + 1, ":%s",
1733 mci->csrows[row].channels[chan].label);
1739 edac_mc_printk(mci, KERN_EMERG,
1740 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
1741 "labels \"%s\": %s\n", page_frame_number,
1742 offset_in_page, mci->csrows[row].grain, row, labels,
1747 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1748 " labels \"%s\": %s\n", mci->mc_idx,
1749 page_frame_number, offset_in_page,
1750 mci->csrows[row].grain, row, labels, msg);
1753 mci->csrows[row].ue_count++;
1757 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1759 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1763 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1766 edac_mc_printk(mci, KERN_WARNING,
1767 "UE - no information available: %s\n", msg);
1768 mci->ue_noinfo_count++;
1775 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1780 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1781 pci_read_config_word(dev, where, &status);
1783 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1784 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1787 if (status == 0xFFFF) {
1789 pci_read_config_dword(dev, 0, &sanity);
1790 if (sanity == 0xFFFFFFFF)
1793 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1797 /* reset only the bits we are interested in */
1798 pci_write_config_word(dev, where, status);
1803 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1805 /* Clear any PCI parity errors logged by this device. */
1806 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1810 get_pci_parity_status(dev, 0);
1812 /* read the device TYPE, looking for bridges */
1813 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1815 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1816 get_pci_parity_status(dev, 1);
1820 * PCI Parity polling
1823 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1828 /* read the STATUS register on this device
1830 status = get_pci_parity_status(dev, 0);
1832 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1834 /* check the status reg for errors */
1836 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1837 edac_printk(KERN_CRIT, EDAC_PCI,
1838 "Signaled System Error on %s\n",
1841 if (status & (PCI_STATUS_PARITY)) {
1842 edac_printk(KERN_CRIT, EDAC_PCI,
1843 "Master Data Parity Error on %s\n",
1846 atomic_inc(&pci_parity_count);
1849 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1850 edac_printk(KERN_CRIT, EDAC_PCI,
1851 "Detected Parity Error on %s\n",
1854 atomic_inc(&pci_parity_count);
1858 /* read the device TYPE, looking for bridges */
1859 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1861 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1863 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1864 /* On bridges, need to examine secondary status register */
1865 status = get_pci_parity_status(dev, 1);
1867 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1868 status, dev->dev.bus_id );
1870 /* check the secondary status reg for errors */
1872 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1873 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1874 "Signaled System Error on %s\n",
1877 if (status & (PCI_STATUS_PARITY)) {
1878 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1879 "Master Data Parity Error on "
1880 "%s\n", pci_name(dev));
1882 atomic_inc(&pci_parity_count);
1885 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1886 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1887 "Detected Parity Error on %s\n",
1890 atomic_inc(&pci_parity_count);
1897 * check_dev_on_list: Scan for a PCI device on a white/black list
1898 * @list: an EDAC &edac_pci_device_list white/black list pointer
1899 * @free_index: index of next free entry on the list
1900 * @pci_dev: PCI Device pointer
1902 * see if list contains the device.
1904 * Returns: 0 not found
1907 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1908 struct pci_dev *dev)
1911 int rc = 0; /* Assume not found */
1912 unsigned short vendor=dev->vendor;
1913 unsigned short device=dev->device;
1915 /* Scan the list, looking for a vendor/device match
1917 for (i = 0; i < free_index; i++, list++ ) {
1918 if ( (list->vendor == vendor ) &&
1919 (list->device == device )) {
1929 * pci_dev parity list iterator
1930 * Scan the PCI device list for one iteration, looking for SERRORs
1931 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1933 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1935 struct pci_dev *dev=NULL;
1937 /* request for kernel access to the next PCI device, if any,
1938 * and while we are looking at it have its reference count
1939 * bumped until we are done with it
1941 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1943 /* if whitelist exists then it has priority, so only scan those
1944 * devices on the whitelist
1946 if (pci_whitelist_count > 0 ) {
1947 if (check_dev_on_list(pci_whitelist,
1948 pci_whitelist_count, dev))
1952 * if no whitelist, then check if this devices is
1955 if (!check_dev_on_list(pci_blacklist,
1956 pci_blacklist_count, dev))
1962 static void do_pci_parity_check(void)
1964 unsigned long flags;
1967 debugf3("%s()\n", __func__);
1969 if (!check_pci_parity)
1972 before_count = atomic_read(&pci_parity_count);
1974 /* scan all PCI devices looking for a Parity Error on devices and
1977 local_irq_save(flags);
1978 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1979 local_irq_restore(flags);
1981 /* Only if operator has selected panic on PCI Error */
1982 if (panic_on_pci_parity) {
1983 /* If the count is different 'after' from 'before' */
1984 if (before_count != atomic_read(&pci_parity_count))
1985 panic("EDAC: PCI Parity Error");
1990 static inline void clear_pci_parity_errors(void)
1992 /* Clear any PCI bus parity errors that devices initially have logged
1993 * in their registers.
1995 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1999 #else /* CONFIG_PCI */
2002 static inline void do_pci_parity_check(void)
2008 static inline void clear_pci_parity_errors(void)
2014 #endif /* CONFIG_PCI */
2017 * Iterate over all MC instances and check for ECC, et al, errors
2019 static inline void check_mc_devices (void)
2021 unsigned long flags;
2022 struct list_head *item;
2023 struct mem_ctl_info *mci;
2025 debugf3("%s()\n", __func__);
2027 /* during poll, have interrupts off */
2028 local_irq_save(flags);
2030 list_for_each(item, &mc_devices) {
2031 mci = list_entry(item, struct mem_ctl_info, link);
2033 if (mci->edac_check != NULL)
2034 mci->edac_check(mci);
2037 local_irq_restore(flags);
2042 * Check MC status every poll_msec.
2043 * Check PCI status every poll_msec as well.
2045 * This where the work gets done for edac.
2047 * SMP safe, doesn't use NMI, and auto-rate-limits.
2049 static void do_edac_check(void)
2051 debugf3("%s()\n", __func__);
2053 do_pci_parity_check();
2056 static int edac_kernel_thread(void *arg)
2058 while (!kthread_should_stop()) {
2061 /* goto sleep for the interval */
2062 schedule_timeout_interruptible((HZ * poll_msec) / 1000);
2071 * module initialization entry point
2073 static int __init edac_mc_init(void)
2075 edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n");
2078 * Harvest and clear any boot/initialization PCI parity errors
2080 * FIXME: This only clears errors logged by devices present at time of
2081 * module initialization. We should also do an initial clear
2082 * of each newly hotplugged device.
2084 clear_pci_parity_errors();
2086 /* Create the MC sysfs entires */
2087 if (edac_sysfs_memctrl_setup()) {
2088 edac_printk(KERN_ERR, EDAC_MC,
2089 "Error initializing sysfs code\n");
2093 /* Create the PCI parity sysfs entries */
2094 if (edac_sysfs_pci_setup()) {
2095 edac_sysfs_memctrl_teardown();
2096 edac_printk(KERN_ERR, EDAC_MC,
2097 "EDAC PCI: Error initializing sysfs code\n");
2101 /* create our kernel thread */
2102 edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac");
2103 if (IS_ERR(edac_thread)) {
2104 /* remove the sysfs entries */
2105 edac_sysfs_memctrl_teardown();
2106 edac_sysfs_pci_teardown();
2107 return PTR_ERR(edac_thread);
2116 * module exit/termination functioni
2118 static void __exit edac_mc_exit(void)
2120 debugf0("%s()\n", __func__);
2122 kthread_stop(edac_thread);
2124 /* tear down the sysfs device */
2125 edac_sysfs_memctrl_teardown();
2126 edac_sysfs_pci_teardown();
2132 module_init(edac_mc_init);
2133 module_exit(edac_mc_exit);
2135 MODULE_LICENSE("GPL");
2136 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2137 "Based on.work by Dan Hollis et al");
2138 MODULE_DESCRIPTION("Core library routines for MC reporting");
2140 module_param(panic_on_ue, int, 0644);
2141 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2142 module_param(check_pci_parity, int, 0644);
2143 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2144 module_param(panic_on_pci_parity, int, 0644);
2145 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2146 module_param(log_ue, int, 0644);
2147 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2148 module_param(log_ce, int, 0644);
2149 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2150 module_param(poll_msec, int, 0644);
2151 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2152 #ifdef CONFIG_EDAC_DEBUG
2153 module_param(edac_debug_level, int, 0644);
2154 MODULE_PARM_DESC(edac_debug_level, "Debug level");