]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/edac/i7core_edac.c
i7core_edac: Fix oops when trying to inject errors
[karo-tx-linux.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81
82         /* OFFSETS for Device 3 Function 0 */
83
84 #define MC_CONTROL      0x48
85 #define MC_STATUS       0x4c
86 #define MC_MAX_DOD      0x64
87
88 /*
89  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91  */
92
93 #define MC_TEST_ERR_RCV1        0x60
94   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
95
96 #define MC_TEST_ERR_RCV0        0x64
97   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
98   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
99
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0        0x80
102 #define MC_COR_ECC_CNT_1        0x84
103 #define MC_COR_ECC_CNT_2        0x88
104 #define MC_COR_ECC_CNT_3        0x8c
105 #define MC_COR_ECC_CNT_4        0x90
106 #define MC_COR_ECC_CNT_5        0x94
107
108 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
110
111
112         /* OFFSETS for Devices 4,5 and 6 Function 0 */
113
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115   #define THREE_DIMMS_PRESENT           (1 << 24)
116   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
117   #define QUAD_RANK_PRESENT             (1 << 22)
118   #define REGISTERED_DIMM               (1 << 15)
119
120 #define MC_CHANNEL_MAPPER       0x60
121   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
123
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125   #define RANK_PRESENT_MASK             0xffff
126
127 #define MC_CHANNEL_ADDR_MATCH   0xf0
128 #define MC_CHANNEL_ERROR_MASK   0xf8
129 #define MC_CHANNEL_ERROR_INJECT 0xfc
130   #define INJECT_ADDR_PARITY    0x10
131   #define INJECT_ECC            0x08
132   #define MASK_CACHELINE        0x06
133   #define MASK_FULL_CACHELINE   0x06
134   #define MASK_MSB32_CACHELINE  0x04
135   #define MASK_LSB32_CACHELINE  0x02
136   #define NO_MASK_CACHELINE     0x00
137   #define REPEAT_EN             0x01
138
139         /* OFFSETS for Devices 4,5 and 6 Function 1 */
140
141 #define MC_DOD_CH_DIMM0         0x48
142 #define MC_DOD_CH_DIMM1         0x4c
143 #define MC_DOD_CH_DIMM2         0x50
144   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
145   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
146   #define DIMM_PRESENT_MASK     (1 << 9)
147   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
148   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
149   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
151   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
153   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
154   #define MC_DOD_NUMCOL_MASK            3
155   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
156
157 #define MC_RANK_PRESENT         0x7c
158
159 #define MC_SAG_CH_0     0x80
160 #define MC_SAG_CH_1     0x84
161 #define MC_SAG_CH_2     0x88
162 #define MC_SAG_CH_3     0x8c
163 #define MC_SAG_CH_4     0x90
164 #define MC_SAG_CH_5     0x94
165 #define MC_SAG_CH_6     0x98
166 #define MC_SAG_CH_7     0x9c
167
168 #define MC_RIR_LIMIT_CH_0       0x40
169 #define MC_RIR_LIMIT_CH_1       0x44
170 #define MC_RIR_LIMIT_CH_2       0x48
171 #define MC_RIR_LIMIT_CH_3       0x4C
172 #define MC_RIR_LIMIT_CH_4       0x50
173 #define MC_RIR_LIMIT_CH_5       0x54
174 #define MC_RIR_LIMIT_CH_6       0x58
175 #define MC_RIR_LIMIT_CH_7       0x5C
176 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
177
178 #define MC_RIR_WAY_CH           0x80
179   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
180   #define MC_RIR_WAY_RANK_MASK          0x7
181
182 /*
183  * i7core structs
184  */
185
186 #define NUM_CHANS 3
187 #define MAX_DIMMS 3             /* Max DIMMS per channel */
188 #define MAX_MCR_FUNC  4
189 #define MAX_CHAN_FUNC 3
190
191 struct i7core_info {
192         u32     mc_control;
193         u32     mc_status;
194         u32     max_dod;
195         u32     ch_map;
196 };
197
198
199 struct i7core_inject {
200         int     enable;
201
202         u32     section;
203         u32     type;
204         u32     eccmask;
205
206         /* Error address mask */
207         int channel, dimm, rank, bank, page, col;
208 };
209
210 struct i7core_channel {
211         u32             ranks;
212         u32             dimms;
213 };
214
215 struct pci_id_descr {
216         int                     dev;
217         int                     func;
218         int                     dev_id;
219         int                     optional;
220 };
221
222 struct pci_id_table {
223         const struct pci_id_descr       *descr;
224         int                             n_devs;
225 };
226
227 struct i7core_dev {
228         struct list_head        list;
229         u8                      socket;
230         struct pci_dev          **pdev;
231         int                     n_devs;
232         struct mem_ctl_info     *mci;
233 };
234
235 struct i7core_pvt {
236         struct pci_dev  *pci_noncore;
237         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
238         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240         struct i7core_dev *i7core_dev;
241
242         struct i7core_info      info;
243         struct i7core_inject    inject;
244         struct i7core_channel   channel[NUM_CHANS];
245
246         int             ce_count_available;
247         int             csrow_map[NUM_CHANS][MAX_DIMMS];
248
249                         /* ECC corrected errors counts per udimm */
250         unsigned long   udimm_ce_count[MAX_DIMMS];
251         int             udimm_last_ce_count[MAX_DIMMS];
252                         /* ECC corrected errors counts per rdimm */
253         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
254         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
255
256         unsigned int    is_registered;
257
258         /* mcelog glue */
259         struct edac_mce         edac_mce;
260
261         /* Fifo double buffers */
262         struct mce              mce_entry[MCE_LOG_LEN];
263         struct mce              mce_outentry[MCE_LOG_LEN];
264
265         /* Fifo in/out counters */
266         unsigned                mce_in, mce_out;
267
268         /* Count indicator to show errors not got */
269         unsigned                mce_overrun;
270
271         /* Struct to control EDAC polling */
272         struct edac_pci_ctl_info *i7core_pci;
273 };
274
275 #define PCI_DESCR(device, function, device_id)  \
276         .dev = (device),                        \
277         .func = (function),                     \
278         .dev_id = (device_id)
279
280 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
281                 /* Memory controller */
282         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
283         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
284                         /* Exists only for RDIMM */
285         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
286         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
287
288                 /* Channel 0 */
289         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
290         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
291         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
292         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
293
294                 /* Channel 1 */
295         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
296         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
297         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
298         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
299
300                 /* Channel 2 */
301         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
302         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
303         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
304         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
305
306                 /* Generic Non-core registers */
307         /*
308          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
309          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
310          * the probing code needs to test for the other address in case of
311          * failure of this one
312          */
313         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
314
315 };
316
317 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
318         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
319         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
320         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
321
322         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
323         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
324         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
325         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
326
327         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
328         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
329         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
330         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
331
332         /*
333          * This is the PCI device has an alternate address on some
334          * processors like Core i7 860
335          */
336         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
337 };
338
339 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
340                 /* Memory controller */
341         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
342         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
343                         /* Exists only for RDIMM */
344         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
345         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
346
347                 /* Channel 0 */
348         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
349         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
350         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
351         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
352
353                 /* Channel 1 */
354         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
355         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
356         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
357         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
358
359                 /* Channel 2 */
360         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
361         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
362         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
363         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
364
365                 /* Generic Non-core registers */
366         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
367
368 };
369
370 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
371 static const struct pci_id_table pci_dev_table[] = {
372         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
373         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
374         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
375         {0,}                    /* 0 terminated list. */
376 };
377
378 /*
379  *      pci_device_id   table for which devices we are looking for
380  */
381 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
382         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
383         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
384         {0,}                    /* 0 terminated list. */
385 };
386
387 /****************************************************************************
388                         Anciliary status routines
389  ****************************************************************************/
390
391         /* MC_CONTROL bits */
392 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
393 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
394
395         /* MC_STATUS bits */
396 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
397 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
398
399         /* MC_MAX_DOD read functions */
400 static inline int numdimms(u32 dimms)
401 {
402         return (dimms & 0x3) + 1;
403 }
404
405 static inline int numrank(u32 rank)
406 {
407         static int ranks[4] = { 1, 2, 4, -EINVAL };
408
409         return ranks[rank & 0x3];
410 }
411
412 static inline int numbank(u32 bank)
413 {
414         static int banks[4] = { 4, 8, 16, -EINVAL };
415
416         return banks[bank & 0x3];
417 }
418
419 static inline int numrow(u32 row)
420 {
421         static int rows[8] = {
422                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
423                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
424         };
425
426         return rows[row & 0x7];
427 }
428
429 static inline int numcol(u32 col)
430 {
431         static int cols[8] = {
432                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
433         };
434         return cols[col & 0x3];
435 }
436
437 static struct i7core_dev *get_i7core_dev(u8 socket)
438 {
439         struct i7core_dev *i7core_dev;
440
441         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
442                 if (i7core_dev->socket == socket)
443                         return i7core_dev;
444         }
445
446         return NULL;
447 }
448
449 static struct i7core_dev *alloc_i7core_dev(u8 socket,
450                                            const struct pci_id_table *table)
451 {
452         struct i7core_dev *i7core_dev;
453
454         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
455         if (!i7core_dev)
456                 return NULL;
457
458         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
459                                    GFP_KERNEL);
460         if (!i7core_dev->pdev) {
461                 kfree(i7core_dev);
462                 return NULL;
463         }
464
465         i7core_dev->socket = socket;
466         i7core_dev->n_devs = table->n_devs;
467         list_add_tail(&i7core_dev->list, &i7core_edac_list);
468
469         return i7core_dev;
470 }
471
472 static void free_i7core_dev(struct i7core_dev *i7core_dev)
473 {
474         list_del(&i7core_dev->list);
475         kfree(i7core_dev->pdev);
476         kfree(i7core_dev);
477 }
478
479 /****************************************************************************
480                         Memory check routines
481  ****************************************************************************/
482 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
483                                           unsigned func)
484 {
485         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
486         int i;
487
488         if (!i7core_dev)
489                 return NULL;
490
491         for (i = 0; i < i7core_dev->n_devs; i++) {
492                 if (!i7core_dev->pdev[i])
493                         continue;
494
495                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
496                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
497                         return i7core_dev->pdev[i];
498                 }
499         }
500
501         return NULL;
502 }
503
504 /**
505  * i7core_get_active_channels() - gets the number of channels and csrows
506  * @socket:     Quick Path Interconnect socket
507  * @channels:   Number of channels that will be returned
508  * @csrows:     Number of csrows found
509  *
510  * Since EDAC core needs to know in advance the number of available channels
511  * and csrows, in order to allocate memory for csrows/channels, it is needed
512  * to run two similar steps. At the first step, implemented on this function,
513  * it checks the number of csrows/channels present at one socket.
514  * this is used in order to properly allocate the size of mci components.
515  *
516  * It should be noticed that none of the current available datasheets explain
517  * or even mention how csrows are seen by the memory controller. So, we need
518  * to add a fake description for csrows.
519  * So, this driver is attributing one DIMM memory for one csrow.
520  */
521 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
522                                       unsigned *csrows)
523 {
524         struct pci_dev *pdev = NULL;
525         int i, j;
526         u32 status, control;
527
528         *channels = 0;
529         *csrows = 0;
530
531         pdev = get_pdev_slot_func(socket, 3, 0);
532         if (!pdev) {
533                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
534                               socket);
535                 return -ENODEV;
536         }
537
538         /* Device 3 function 0 reads */
539         pci_read_config_dword(pdev, MC_STATUS, &status);
540         pci_read_config_dword(pdev, MC_CONTROL, &control);
541
542         for (i = 0; i < NUM_CHANS; i++) {
543                 u32 dimm_dod[3];
544                 /* Check if the channel is active */
545                 if (!(control & (1 << (8 + i))))
546                         continue;
547
548                 /* Check if the channel is disabled */
549                 if (status & (1 << i))
550                         continue;
551
552                 pdev = get_pdev_slot_func(socket, i + 4, 1);
553                 if (!pdev) {
554                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
555                                                 "fn %d.%d!!!\n",
556                                                 socket, i + 4, 1);
557                         return -ENODEV;
558                 }
559                 /* Devices 4-6 function 1 */
560                 pci_read_config_dword(pdev,
561                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
562                 pci_read_config_dword(pdev,
563                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
564                 pci_read_config_dword(pdev,
565                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
566
567                 (*channels)++;
568
569                 for (j = 0; j < 3; j++) {
570                         if (!DIMM_PRESENT(dimm_dod[j]))
571                                 continue;
572                         (*csrows)++;
573                 }
574         }
575
576         debugf0("Number of active channels on socket %d: %d\n",
577                 socket, *channels);
578
579         return 0;
580 }
581
582 static int get_dimm_config(const struct mem_ctl_info *mci)
583 {
584         struct i7core_pvt *pvt = mci->pvt_info;
585         struct csrow_info *csr;
586         struct pci_dev *pdev;
587         int i, j;
588         int csrow = 0;
589         unsigned long last_page = 0;
590         enum edac_type mode;
591         enum mem_type mtype;
592
593         /* Get data from the MC register, function 0 */
594         pdev = pvt->pci_mcr[0];
595         if (!pdev)
596                 return -ENODEV;
597
598         /* Device 3 function 0 reads */
599         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
600         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
601         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
602         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
603
604         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
605                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
606                 pvt->info.max_dod, pvt->info.ch_map);
607
608         if (ECC_ENABLED(pvt)) {
609                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
610                 if (ECCx8(pvt))
611                         mode = EDAC_S8ECD8ED;
612                 else
613                         mode = EDAC_S4ECD4ED;
614         } else {
615                 debugf0("ECC disabled\n");
616                 mode = EDAC_NONE;
617         }
618
619         /* FIXME: need to handle the error codes */
620         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
621                 "x%x x 0x%x\n",
622                 numdimms(pvt->info.max_dod),
623                 numrank(pvt->info.max_dod >> 2),
624                 numbank(pvt->info.max_dod >> 4),
625                 numrow(pvt->info.max_dod >> 6),
626                 numcol(pvt->info.max_dod >> 9));
627
628         for (i = 0; i < NUM_CHANS; i++) {
629                 u32 data, dimm_dod[3], value[8];
630
631                 if (!pvt->pci_ch[i][0])
632                         continue;
633
634                 if (!CH_ACTIVE(pvt, i)) {
635                         debugf0("Channel %i is not active\n", i);
636                         continue;
637                 }
638                 if (CH_DISABLED(pvt, i)) {
639                         debugf0("Channel %i is disabled\n", i);
640                         continue;
641                 }
642
643                 /* Devices 4-6 function 0 */
644                 pci_read_config_dword(pvt->pci_ch[i][0],
645                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
646
647                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
648                                                 4 : 2;
649
650                 if (data & REGISTERED_DIMM)
651                         mtype = MEM_RDDR3;
652                 else
653                         mtype = MEM_DDR3;
654 #if 0
655                 if (data & THREE_DIMMS_PRESENT)
656                         pvt->channel[i].dimms = 3;
657                 else if (data & SINGLE_QUAD_RANK_PRESENT)
658                         pvt->channel[i].dimms = 1;
659                 else
660                         pvt->channel[i].dimms = 2;
661 #endif
662
663                 /* Devices 4-6 function 1 */
664                 pci_read_config_dword(pvt->pci_ch[i][1],
665                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
666                 pci_read_config_dword(pvt->pci_ch[i][1],
667                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
668                 pci_read_config_dword(pvt->pci_ch[i][1],
669                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
670
671                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
672                         "%d ranks, %cDIMMs\n",
673                         i,
674                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
675                         data,
676                         pvt->channel[i].ranks,
677                         (data & REGISTERED_DIMM) ? 'R' : 'U');
678
679                 for (j = 0; j < 3; j++) {
680                         u32 banks, ranks, rows, cols;
681                         u32 size, npages;
682
683                         if (!DIMM_PRESENT(dimm_dod[j]))
684                                 continue;
685
686                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
687                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
688                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
689                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
690
691                         /* DDR3 has 8 I/O banks */
692                         size = (rows * cols * banks * ranks) >> (20 - 3);
693
694                         pvt->channel[i].dimms++;
695
696                         debugf0("\tdimm %d %d Mb offset: %x, "
697                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
698                                 j, size,
699                                 RANKOFFSET(dimm_dod[j]),
700                                 banks, ranks, rows, cols);
701
702                         npages = MiB_TO_PAGES(size);
703
704                         csr = &mci->csrows[csrow];
705                         csr->first_page = last_page + 1;
706                         last_page += npages;
707                         csr->last_page = last_page;
708                         csr->nr_pages = npages;
709
710                         csr->page_mask = 0;
711                         csr->grain = 8;
712                         csr->csrow_idx = csrow;
713                         csr->nr_channels = 1;
714
715                         csr->channels[0].chan_idx = i;
716                         csr->channels[0].ce_count = 0;
717
718                         pvt->csrow_map[i][j] = csrow;
719
720                         switch (banks) {
721                         case 4:
722                                 csr->dtype = DEV_X4;
723                                 break;
724                         case 8:
725                                 csr->dtype = DEV_X8;
726                                 break;
727                         case 16:
728                                 csr->dtype = DEV_X16;
729                                 break;
730                         default:
731                                 csr->dtype = DEV_UNKNOWN;
732                         }
733
734                         csr->edac_mode = mode;
735                         csr->mtype = mtype;
736
737                         csrow++;
738                 }
739
740                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
741                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
742                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
743                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
744                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
745                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
746                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
747                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
748                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
749                 for (j = 0; j < 8; j++)
750                         debugf1("\t\t%#x\t%#x\t%#x\n",
751                                 (value[j] >> 27) & 0x1,
752                                 (value[j] >> 24) & 0x7,
753                                 (value[j] & ((1 << 24) - 1)));
754         }
755
756         return 0;
757 }
758
759 /****************************************************************************
760                         Error insertion routines
761  ****************************************************************************/
762
763 /* The i7core has independent error injection features per channel.
764    However, to have a simpler code, we don't allow enabling error injection
765    on more than one channel.
766    Also, since a change at an inject parameter will be applied only at enable,
767    we're disabling error injection on all write calls to the sysfs nodes that
768    controls the error code injection.
769  */
770 static int disable_inject(const struct mem_ctl_info *mci)
771 {
772         struct i7core_pvt *pvt = mci->pvt_info;
773
774         pvt->inject.enable = 0;
775
776         if (!pvt->pci_ch[pvt->inject.channel][0])
777                 return -ENODEV;
778
779         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
780                                 MC_CHANNEL_ERROR_INJECT, 0);
781
782         return 0;
783 }
784
785 /*
786  * i7core inject inject.section
787  *
788  *      accept and store error injection inject.section value
789  *      bit 0 - refers to the lower 32-byte half cacheline
790  *      bit 1 - refers to the upper 32-byte half cacheline
791  */
792 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
793                                            const char *data, size_t count)
794 {
795         struct i7core_pvt *pvt = mci->pvt_info;
796         unsigned long value;
797         int rc;
798
799         if (pvt->inject.enable)
800                 disable_inject(mci);
801
802         rc = strict_strtoul(data, 10, &value);
803         if ((rc < 0) || (value > 3))
804                 return -EIO;
805
806         pvt->inject.section = (u32) value;
807         return count;
808 }
809
810 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
811                                               char *data)
812 {
813         struct i7core_pvt *pvt = mci->pvt_info;
814         return sprintf(data, "0x%08x\n", pvt->inject.section);
815 }
816
817 /*
818  * i7core inject.type
819  *
820  *      accept and store error injection inject.section value
821  *      bit 0 - repeat enable - Enable error repetition
822  *      bit 1 - inject ECC error
823  *      bit 2 - inject parity error
824  */
825 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
826                                         const char *data, size_t count)
827 {
828         struct i7core_pvt *pvt = mci->pvt_info;
829         unsigned long value;
830         int rc;
831
832         if (pvt->inject.enable)
833                 disable_inject(mci);
834
835         rc = strict_strtoul(data, 10, &value);
836         if ((rc < 0) || (value > 7))
837                 return -EIO;
838
839         pvt->inject.type = (u32) value;
840         return count;
841 }
842
843 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
844                                               char *data)
845 {
846         struct i7core_pvt *pvt = mci->pvt_info;
847         return sprintf(data, "0x%08x\n", pvt->inject.type);
848 }
849
850 /*
851  * i7core_inject_inject.eccmask_store
852  *
853  * The type of error (UE/CE) will depend on the inject.eccmask value:
854  *   Any bits set to a 1 will flip the corresponding ECC bit
855  *   Correctable errors can be injected by flipping 1 bit or the bits within
856  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
857  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
858  *   uncorrectable error to be injected.
859  */
860 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
861                                         const char *data, size_t count)
862 {
863         struct i7core_pvt *pvt = mci->pvt_info;
864         unsigned long value;
865         int rc;
866
867         if (pvt->inject.enable)
868                 disable_inject(mci);
869
870         rc = strict_strtoul(data, 10, &value);
871         if (rc < 0)
872                 return -EIO;
873
874         pvt->inject.eccmask = (u32) value;
875         return count;
876 }
877
878 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
879                                               char *data)
880 {
881         struct i7core_pvt *pvt = mci->pvt_info;
882         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
883 }
884
885 /*
886  * i7core_addrmatch
887  *
888  * The type of error (UE/CE) will depend on the inject.eccmask value:
889  *   Any bits set to a 1 will flip the corresponding ECC bit
890  *   Correctable errors can be injected by flipping 1 bit or the bits within
891  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
892  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
893  *   uncorrectable error to be injected.
894  */
895
896 #define DECLARE_ADDR_MATCH(param, limit)                        \
897 static ssize_t i7core_inject_store_##param(                     \
898                 struct mem_ctl_info *mci,                       \
899                 const char *data, size_t count)                 \
900 {                                                               \
901         struct i7core_pvt *pvt;                                 \
902         long value;                                             \
903         int rc;                                                 \
904                                                                 \
905         debugf1("%s()\n", __func__);                            \
906         pvt = mci->pvt_info;                                    \
907                                                                 \
908         if (pvt->inject.enable)                                 \
909                 disable_inject(mci);                            \
910                                                                 \
911         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
912                 value = -1;                                     \
913         else {                                                  \
914                 rc = strict_strtoul(data, 10, &value);          \
915                 if ((rc < 0) || (value >= limit))               \
916                         return -EIO;                            \
917         }                                                       \
918                                                                 \
919         pvt->inject.param = value;                              \
920                                                                 \
921         return count;                                           \
922 }                                                               \
923                                                                 \
924 static ssize_t i7core_inject_show_##param(                      \
925                 struct mem_ctl_info *mci,                       \
926                 char *data)                                     \
927 {                                                               \
928         struct i7core_pvt *pvt;                                 \
929                                                                 \
930         pvt = mci->pvt_info;                                    \
931         debugf1("%s() pvt=%p\n", __func__, pvt);                \
932         if (pvt->inject.param < 0)                              \
933                 return sprintf(data, "any\n");                  \
934         else                                                    \
935                 return sprintf(data, "%d\n", pvt->inject.param);\
936 }
937
938 #define ATTR_ADDR_MATCH(param)                                  \
939         {                                                       \
940                 .attr = {                                       \
941                         .name = #param,                         \
942                         .mode = (S_IRUGO | S_IWUSR)             \
943                 },                                              \
944                 .show  = i7core_inject_show_##param,            \
945                 .store = i7core_inject_store_##param,           \
946         }
947
948 DECLARE_ADDR_MATCH(channel, 3);
949 DECLARE_ADDR_MATCH(dimm, 3);
950 DECLARE_ADDR_MATCH(rank, 4);
951 DECLARE_ADDR_MATCH(bank, 32);
952 DECLARE_ADDR_MATCH(page, 0x10000);
953 DECLARE_ADDR_MATCH(col, 0x4000);
954
955 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
956 {
957         u32 read;
958         int count;
959
960         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
961                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
962                 where, val);
963
964         for (count = 0; count < 10; count++) {
965                 if (count)
966                         msleep(100);
967                 pci_write_config_dword(dev, where, val);
968                 pci_read_config_dword(dev, where, &read);
969
970                 if (read == val)
971                         return 0;
972         }
973
974         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
975                 "write=%08x. Read=%08x\n",
976                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
977                 where, val, read);
978
979         return -EINVAL;
980 }
981
982 /*
983  * This routine prepares the Memory Controller for error injection.
984  * The error will be injected when some process tries to write to the
985  * memory that matches the given criteria.
986  * The criteria can be set in terms of a mask where dimm, rank, bank, page
987  * and col can be specified.
988  * A -1 value for any of the mask items will make the MCU to ignore
989  * that matching criteria for error injection.
990  *
991  * It should be noticed that the error will only happen after a write operation
992  * on a memory that matches the condition. if REPEAT_EN is not enabled at
993  * inject mask, then it will produce just one error. Otherwise, it will repeat
994  * until the injectmask would be cleaned.
995  *
996  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
997  *    is reliable enough to check if the MC is using the
998  *    three channels. However, this is not clear at the datasheet.
999  */
1000 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1001                                        const char *data, size_t count)
1002 {
1003         struct i7core_pvt *pvt = mci->pvt_info;
1004         u32 injectmask;
1005         u64 mask = 0;
1006         int  rc;
1007         long enable;
1008
1009         if (!pvt->pci_ch[pvt->inject.channel][0])
1010                 return 0;
1011
1012         rc = strict_strtoul(data, 10, &enable);
1013         if ((rc < 0))
1014                 return 0;
1015
1016         if (enable) {
1017                 pvt->inject.enable = 1;
1018         } else {
1019                 disable_inject(mci);
1020                 return count;
1021         }
1022
1023         /* Sets pvt->inject.dimm mask */
1024         if (pvt->inject.dimm < 0)
1025                 mask |= 1LL << 41;
1026         else {
1027                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1028                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1029                 else
1030                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1031         }
1032
1033         /* Sets pvt->inject.rank mask */
1034         if (pvt->inject.rank < 0)
1035                 mask |= 1LL << 40;
1036         else {
1037                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1038                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1039                 else
1040                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1041         }
1042
1043         /* Sets pvt->inject.bank mask */
1044         if (pvt->inject.bank < 0)
1045                 mask |= 1LL << 39;
1046         else
1047                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1048
1049         /* Sets pvt->inject.page mask */
1050         if (pvt->inject.page < 0)
1051                 mask |= 1LL << 38;
1052         else
1053                 mask |= (pvt->inject.page & 0xffff) << 14;
1054
1055         /* Sets pvt->inject.column mask */
1056         if (pvt->inject.col < 0)
1057                 mask |= 1LL << 37;
1058         else
1059                 mask |= (pvt->inject.col & 0x3fff);
1060
1061         /*
1062          * bit    0: REPEAT_EN
1063          * bits 1-2: MASK_HALF_CACHELINE
1064          * bit    3: INJECT_ECC
1065          * bit    4: INJECT_ADDR_PARITY
1066          */
1067
1068         injectmask = (pvt->inject.type & 1) |
1069                      (pvt->inject.section & 0x3) << 1 |
1070                      (pvt->inject.type & 0x6) << (3 - 1);
1071
1072         /* Unlock writes to registers - this register is write only */
1073         pci_write_config_dword(pvt->pci_noncore,
1074                                MC_CFG_CONTROL, 0x2);
1075
1076         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1077                                MC_CHANNEL_ADDR_MATCH, mask);
1078         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1079                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1080
1081         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1082                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1083
1084         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1085                                MC_CHANNEL_ERROR_INJECT, injectmask);
1086
1087         /*
1088          * This is something undocumented, based on my tests
1089          * Without writing 8 to this register, errors aren't injected. Not sure
1090          * why.
1091          */
1092         pci_write_config_dword(pvt->pci_noncore,
1093                                MC_CFG_CONTROL, 8);
1094
1095         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1096                 " inject 0x%08x\n",
1097                 mask, pvt->inject.eccmask, injectmask);
1098
1099
1100         return count;
1101 }
1102
1103 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1104                                         char *data)
1105 {
1106         struct i7core_pvt *pvt = mci->pvt_info;
1107         u32 injectmask;
1108
1109         if (!pvt->pci_ch[pvt->inject.channel][0])
1110                 return 0;
1111
1112         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1113                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1114
1115         debugf0("Inject error read: 0x%018x\n", injectmask);
1116
1117         if (injectmask & 0x0c)
1118                 pvt->inject.enable = 1;
1119
1120         return sprintf(data, "%d\n", pvt->inject.enable);
1121 }
1122
1123 #define DECLARE_COUNTER(param)                                  \
1124 static ssize_t i7core_show_counter_##param(                     \
1125                 struct mem_ctl_info *mci,                       \
1126                 char *data)                                     \
1127 {                                                               \
1128         struct i7core_pvt *pvt = mci->pvt_info;                 \
1129                                                                 \
1130         debugf1("%s() \n", __func__);                           \
1131         if (!pvt->ce_count_available || (pvt->is_registered))   \
1132                 return sprintf(data, "data unavailable\n");     \
1133         return sprintf(data, "%lu\n",                           \
1134                         pvt->udimm_ce_count[param]);            \
1135 }
1136
1137 #define ATTR_COUNTER(param)                                     \
1138         {                                                       \
1139                 .attr = {                                       \
1140                         .name = __stringify(udimm##param),      \
1141                         .mode = (S_IRUGO | S_IWUSR)             \
1142                 },                                              \
1143                 .show  = i7core_show_counter_##param            \
1144         }
1145
1146 DECLARE_COUNTER(0);
1147 DECLARE_COUNTER(1);
1148 DECLARE_COUNTER(2);
1149
1150 /*
1151  * Sysfs struct
1152  */
1153
1154 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1155         ATTR_ADDR_MATCH(channel),
1156         ATTR_ADDR_MATCH(dimm),
1157         ATTR_ADDR_MATCH(rank),
1158         ATTR_ADDR_MATCH(bank),
1159         ATTR_ADDR_MATCH(page),
1160         ATTR_ADDR_MATCH(col),
1161         { } /* End of list */
1162 };
1163
1164 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1165         .name  = "inject_addrmatch",
1166         .mcidev_attr = i7core_addrmatch_attrs,
1167 };
1168
1169 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1170         ATTR_COUNTER(0),
1171         ATTR_COUNTER(1),
1172         ATTR_COUNTER(2),
1173         { .attr = { .name = NULL } }
1174 };
1175
1176 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1177         .name  = "all_channel_counts",
1178         .mcidev_attr = i7core_udimm_counters_attrs,
1179 };
1180
1181 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1182         {
1183                 .attr = {
1184                         .name = "inject_section",
1185                         .mode = (S_IRUGO | S_IWUSR)
1186                 },
1187                 .show  = i7core_inject_section_show,
1188                 .store = i7core_inject_section_store,
1189         }, {
1190                 .attr = {
1191                         .name = "inject_type",
1192                         .mode = (S_IRUGO | S_IWUSR)
1193                 },
1194                 .show  = i7core_inject_type_show,
1195                 .store = i7core_inject_type_store,
1196         }, {
1197                 .attr = {
1198                         .name = "inject_eccmask",
1199                         .mode = (S_IRUGO | S_IWUSR)
1200                 },
1201                 .show  = i7core_inject_eccmask_show,
1202                 .store = i7core_inject_eccmask_store,
1203         }, {
1204                 .grp = &i7core_inject_addrmatch,
1205         }, {
1206                 .attr = {
1207                         .name = "inject_enable",
1208                         .mode = (S_IRUGO | S_IWUSR)
1209                 },
1210                 .show  = i7core_inject_enable_show,
1211                 .store = i7core_inject_enable_store,
1212         },
1213         { }     /* End of list */
1214 };
1215
1216 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1217         {
1218                 .attr = {
1219                         .name = "inject_section",
1220                         .mode = (S_IRUGO | S_IWUSR)
1221                 },
1222                 .show  = i7core_inject_section_show,
1223                 .store = i7core_inject_section_store,
1224         }, {
1225                 .attr = {
1226                         .name = "inject_type",
1227                         .mode = (S_IRUGO | S_IWUSR)
1228                 },
1229                 .show  = i7core_inject_type_show,
1230                 .store = i7core_inject_type_store,
1231         }, {
1232                 .attr = {
1233                         .name = "inject_eccmask",
1234                         .mode = (S_IRUGO | S_IWUSR)
1235                 },
1236                 .show  = i7core_inject_eccmask_show,
1237                 .store = i7core_inject_eccmask_store,
1238         }, {
1239                 .grp = &i7core_inject_addrmatch,
1240         }, {
1241                 .attr = {
1242                         .name = "inject_enable",
1243                         .mode = (S_IRUGO | S_IWUSR)
1244                 },
1245                 .show  = i7core_inject_enable_show,
1246                 .store = i7core_inject_enable_store,
1247         }, {
1248                 .grp = &i7core_udimm_counters,
1249         },
1250         { }     /* End of list */
1251 };
1252
1253 /****************************************************************************
1254         Device initialization routines: put/get, init/exit
1255  ****************************************************************************/
1256
1257 /*
1258  *      i7core_put_all_devices  'put' all the devices that we have
1259  *                              reserved via 'get'
1260  */
1261 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1262 {
1263         int i;
1264
1265         debugf0(__FILE__ ": %s()\n", __func__);
1266         for (i = 0; i < i7core_dev->n_devs; i++) {
1267                 struct pci_dev *pdev = i7core_dev->pdev[i];
1268                 if (!pdev)
1269                         continue;
1270                 debugf0("Removing dev %02x:%02x.%d\n",
1271                         pdev->bus->number,
1272                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1273                 pci_dev_put(pdev);
1274         }
1275 }
1276
1277 static void i7core_put_all_devices(void)
1278 {
1279         struct i7core_dev *i7core_dev, *tmp;
1280
1281         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1282                 i7core_put_devices(i7core_dev);
1283                 free_i7core_dev(i7core_dev);
1284         }
1285 }
1286
1287 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1288 {
1289         struct pci_dev *pdev = NULL;
1290         int i;
1291
1292         /*
1293          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1294          * aren't announced by acpi. So, we need to use a legacy scan probing
1295          * to detect them
1296          */
1297         while (table && table->descr) {
1298                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1299                 if (unlikely(!pdev)) {
1300                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1301                                 pcibios_scan_specific_bus(255-i);
1302                 }
1303                 pci_dev_put(pdev);
1304                 table++;
1305         }
1306 }
1307
1308 static unsigned i7core_pci_lastbus(void)
1309 {
1310         int last_bus = 0, bus;
1311         struct pci_bus *b = NULL;
1312
1313         while ((b = pci_find_next_bus(b)) != NULL) {
1314                 bus = b->number;
1315                 debugf0("Found bus %d\n", bus);
1316                 if (bus > last_bus)
1317                         last_bus = bus;
1318         }
1319
1320         debugf0("Last bus %d\n", last_bus);
1321
1322         return last_bus;
1323 }
1324
1325 /*
1326  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1327  *                      device/functions we want to reference for this driver
1328  *
1329  *                      Need to 'get' device 16 func 1 and func 2
1330  */
1331 static int i7core_get_onedevice(struct pci_dev **prev,
1332                                 const struct pci_id_table *table,
1333                                 const unsigned devno,
1334                                 const unsigned last_bus)
1335 {
1336         struct i7core_dev *i7core_dev;
1337         const struct pci_id_descr *dev_descr = &table->descr[devno];
1338
1339         struct pci_dev *pdev = NULL;
1340         u8 bus = 0;
1341         u8 socket = 0;
1342
1343         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1344                               dev_descr->dev_id, *prev);
1345
1346         /*
1347          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1348          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1349          * to probe for the alternate address in case of failure
1350          */
1351         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1352                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1353                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1354
1355         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1356                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1357                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1358                                       *prev);
1359
1360         if (!pdev) {
1361                 if (*prev) {
1362                         *prev = pdev;
1363                         return 0;
1364                 }
1365
1366                 if (dev_descr->optional)
1367                         return 0;
1368
1369                 if (devno == 0)
1370                         return -ENODEV;
1371
1372                 i7core_printk(KERN_INFO,
1373                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1374                         dev_descr->dev, dev_descr->func,
1375                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1376
1377                 /* End of list, leave */
1378                 return -ENODEV;
1379         }
1380         bus = pdev->bus->number;
1381
1382         socket = last_bus - bus;
1383
1384         i7core_dev = get_i7core_dev(socket);
1385         if (!i7core_dev) {
1386                 i7core_dev = alloc_i7core_dev(socket, table);
1387                 if (!i7core_dev) {
1388                         pci_dev_put(pdev);
1389                         return -ENOMEM;
1390                 }
1391         }
1392
1393         if (i7core_dev->pdev[devno]) {
1394                 i7core_printk(KERN_ERR,
1395                         "Duplicated device for "
1396                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1397                         bus, dev_descr->dev, dev_descr->func,
1398                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1399                 pci_dev_put(pdev);
1400                 return -ENODEV;
1401         }
1402
1403         i7core_dev->pdev[devno] = pdev;
1404
1405         /* Sanity check */
1406         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1407                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1408                 i7core_printk(KERN_ERR,
1409                         "Device PCI ID %04x:%04x "
1410                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1411                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1412                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1413                         bus, dev_descr->dev, dev_descr->func);
1414                 return -ENODEV;
1415         }
1416
1417         /* Be sure that the device is enabled */
1418         if (unlikely(pci_enable_device(pdev) < 0)) {
1419                 i7core_printk(KERN_ERR,
1420                         "Couldn't enable "
1421                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1422                         bus, dev_descr->dev, dev_descr->func,
1423                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1424                 return -ENODEV;
1425         }
1426
1427         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1428                 socket, bus, dev_descr->dev,
1429                 dev_descr->func,
1430                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1431
1432         /*
1433          * As stated on drivers/pci/search.c, the reference count for
1434          * @from is always decremented if it is not %NULL. So, as we need
1435          * to get all devices up to null, we need to do a get for the device
1436          */
1437         pci_dev_get(pdev);
1438
1439         *prev = pdev;
1440
1441         return 0;
1442 }
1443
1444 static int i7core_get_all_devices(void)
1445 {
1446         int i, rc, last_bus;
1447         struct pci_dev *pdev = NULL;
1448         const struct pci_id_table *table = pci_dev_table;
1449
1450         last_bus = i7core_pci_lastbus();
1451
1452         while (table && table->descr) {
1453                 for (i = 0; i < table->n_devs; i++) {
1454                         pdev = NULL;
1455                         do {
1456                                 rc = i7core_get_onedevice(&pdev, table, i,
1457                                                           last_bus);
1458                                 if (rc < 0) {
1459                                         if (i == 0) {
1460                                                 i = table->n_devs;
1461                                                 break;
1462                                         }
1463                                         i7core_put_all_devices();
1464                                         return -ENODEV;
1465                                 }
1466                         } while (pdev);
1467                 }
1468                 table++;
1469         }
1470
1471         return 0;
1472 }
1473
1474 static int mci_bind_devs(struct mem_ctl_info *mci,
1475                          struct i7core_dev *i7core_dev)
1476 {
1477         struct i7core_pvt *pvt = mci->pvt_info;
1478         struct pci_dev *pdev;
1479         int i, func, slot;
1480
1481         pvt->is_registered = 0;
1482         for (i = 0; i < i7core_dev->n_devs; i++) {
1483                 pdev = i7core_dev->pdev[i];
1484                 if (!pdev)
1485                         continue;
1486
1487                 func = PCI_FUNC(pdev->devfn);
1488                 slot = PCI_SLOT(pdev->devfn);
1489                 if (slot == 3) {
1490                         if (unlikely(func > MAX_MCR_FUNC))
1491                                 goto error;
1492                         pvt->pci_mcr[func] = pdev;
1493                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1494                         if (unlikely(func > MAX_CHAN_FUNC))
1495                                 goto error;
1496                         pvt->pci_ch[slot - 4][func] = pdev;
1497                 } else if (!slot && !func)
1498                         pvt->pci_noncore = pdev;
1499                 else
1500                         goto error;
1501
1502                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1503                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1504                         pdev, i7core_dev->socket);
1505
1506                 if (PCI_SLOT(pdev->devfn) == 3 &&
1507                         PCI_FUNC(pdev->devfn) == 2)
1508                         pvt->is_registered = 1;
1509         }
1510
1511         return 0;
1512
1513 error:
1514         i7core_printk(KERN_ERR, "Device %d, function %d "
1515                       "is out of the expected range\n",
1516                       slot, func);
1517         return -EINVAL;
1518 }
1519
1520 /****************************************************************************
1521                         Error check routines
1522  ****************************************************************************/
1523 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1524                                       const int chan,
1525                                       const int dimm,
1526                                       const int add)
1527 {
1528         char *msg;
1529         struct i7core_pvt *pvt = mci->pvt_info;
1530         int row = pvt->csrow_map[chan][dimm], i;
1531
1532         for (i = 0; i < add; i++) {
1533                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1534                                 "(Socket=%d channel=%d dimm=%d)",
1535                                 pvt->i7core_dev->socket, chan, dimm);
1536
1537                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1538                 kfree (msg);
1539         }
1540 }
1541
1542 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1543                                          const int chan,
1544                                          const int new0,
1545                                          const int new1,
1546                                          const int new2)
1547 {
1548         struct i7core_pvt *pvt = mci->pvt_info;
1549         int add0 = 0, add1 = 0, add2 = 0;
1550         /* Updates CE counters if it is not the first time here */
1551         if (pvt->ce_count_available) {
1552                 /* Updates CE counters */
1553
1554                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1555                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1556                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1557
1558                 if (add2 < 0)
1559                         add2 += 0x7fff;
1560                 pvt->rdimm_ce_count[chan][2] += add2;
1561
1562                 if (add1 < 0)
1563                         add1 += 0x7fff;
1564                 pvt->rdimm_ce_count[chan][1] += add1;
1565
1566                 if (add0 < 0)
1567                         add0 += 0x7fff;
1568                 pvt->rdimm_ce_count[chan][0] += add0;
1569         } else
1570                 pvt->ce_count_available = 1;
1571
1572         /* Store the new values */
1573         pvt->rdimm_last_ce_count[chan][2] = new2;
1574         pvt->rdimm_last_ce_count[chan][1] = new1;
1575         pvt->rdimm_last_ce_count[chan][0] = new0;
1576
1577         /*updated the edac core */
1578         if (add0 != 0)
1579                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1580         if (add1 != 0)
1581                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1582         if (add2 != 0)
1583                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1584
1585 }
1586
1587 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1588 {
1589         struct i7core_pvt *pvt = mci->pvt_info;
1590         u32 rcv[3][2];
1591         int i, new0, new1, new2;
1592
1593         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1594         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1595                                                                 &rcv[0][0]);
1596         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1597                                                                 &rcv[0][1]);
1598         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1599                                                                 &rcv[1][0]);
1600         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1601                                                                 &rcv[1][1]);
1602         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1603                                                                 &rcv[2][0]);
1604         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1605                                                                 &rcv[2][1]);
1606         for (i = 0 ; i < 3; i++) {
1607                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1608                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1609                 /*if the channel has 3 dimms*/
1610                 if (pvt->channel[i].dimms > 2) {
1611                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1612                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1613                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1614                 } else {
1615                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1616                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1617                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1618                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1619                         new2 = 0;
1620                 }
1621
1622                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1623         }
1624 }
1625
1626 /* This function is based on the device 3 function 4 registers as described on:
1627  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1628  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1629  * also available at:
1630  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1631  */
1632 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1633 {
1634         struct i7core_pvt *pvt = mci->pvt_info;
1635         u32 rcv1, rcv0;
1636         int new0, new1, new2;
1637
1638         if (!pvt->pci_mcr[4]) {
1639                 debugf0("%s MCR registers not found\n", __func__);
1640                 return;
1641         }
1642
1643         /* Corrected test errors */
1644         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1645         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1646
1647         /* Store the new values */
1648         new2 = DIMM2_COR_ERR(rcv1);
1649         new1 = DIMM1_COR_ERR(rcv0);
1650         new0 = DIMM0_COR_ERR(rcv0);
1651
1652         /* Updates CE counters if it is not the first time here */
1653         if (pvt->ce_count_available) {
1654                 /* Updates CE counters */
1655                 int add0, add1, add2;
1656
1657                 add2 = new2 - pvt->udimm_last_ce_count[2];
1658                 add1 = new1 - pvt->udimm_last_ce_count[1];
1659                 add0 = new0 - pvt->udimm_last_ce_count[0];
1660
1661                 if (add2 < 0)
1662                         add2 += 0x7fff;
1663                 pvt->udimm_ce_count[2] += add2;
1664
1665                 if (add1 < 0)
1666                         add1 += 0x7fff;
1667                 pvt->udimm_ce_count[1] += add1;
1668
1669                 if (add0 < 0)
1670                         add0 += 0x7fff;
1671                 pvt->udimm_ce_count[0] += add0;
1672
1673                 if (add0 | add1 | add2)
1674                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1675                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1676                                       add0, add1, add2);
1677         } else
1678                 pvt->ce_count_available = 1;
1679
1680         /* Store the new values */
1681         pvt->udimm_last_ce_count[2] = new2;
1682         pvt->udimm_last_ce_count[1] = new1;
1683         pvt->udimm_last_ce_count[0] = new0;
1684 }
1685
1686 /*
1687  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1688  * Architectures Software Developer’s Manual Volume 3B.
1689  * Nehalem are defined as family 0x06, model 0x1a
1690  *
1691  * The MCA registers used here are the following ones:
1692  *     struct mce field MCA Register
1693  *     m->status        MSR_IA32_MC8_STATUS
1694  *     m->addr          MSR_IA32_MC8_ADDR
1695  *     m->misc          MSR_IA32_MC8_MISC
1696  * In the case of Nehalem, the error information is masked at .status and .misc
1697  * fields
1698  */
1699 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1700                                     const struct mce *m)
1701 {
1702         struct i7core_pvt *pvt = mci->pvt_info;
1703         char *type, *optype, *err, *msg;
1704         unsigned long error = m->status & 0x1ff0000l;
1705         u32 optypenum = (m->status >> 4) & 0x07;
1706         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1707         u32 dimm = (m->misc >> 16) & 0x3;
1708         u32 channel = (m->misc >> 18) & 0x3;
1709         u32 syndrome = m->misc >> 32;
1710         u32 errnum = find_first_bit(&error, 32);
1711         int csrow;
1712
1713         if (m->mcgstatus & 1)
1714                 type = "FATAL";
1715         else
1716                 type = "NON_FATAL";
1717
1718         switch (optypenum) {
1719         case 0:
1720                 optype = "generic undef request";
1721                 break;
1722         case 1:
1723                 optype = "read error";
1724                 break;
1725         case 2:
1726                 optype = "write error";
1727                 break;
1728         case 3:
1729                 optype = "addr/cmd error";
1730                 break;
1731         case 4:
1732                 optype = "scrubbing error";
1733                 break;
1734         default:
1735                 optype = "reserved";
1736                 break;
1737         }
1738
1739         switch (errnum) {
1740         case 16:
1741                 err = "read ECC error";
1742                 break;
1743         case 17:
1744                 err = "RAS ECC error";
1745                 break;
1746         case 18:
1747                 err = "write parity error";
1748                 break;
1749         case 19:
1750                 err = "redundacy loss";
1751                 break;
1752         case 20:
1753                 err = "reserved";
1754                 break;
1755         case 21:
1756                 err = "memory range error";
1757                 break;
1758         case 22:
1759                 err = "RTID out of range";
1760                 break;
1761         case 23:
1762                 err = "address parity error";
1763                 break;
1764         case 24:
1765                 err = "byte enable parity error";
1766                 break;
1767         default:
1768                 err = "unknown";
1769         }
1770
1771         /* FIXME: should convert addr into bank and rank information */
1772         msg = kasprintf(GFP_ATOMIC,
1773                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1774                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1775                 type, (long long) m->addr, m->cpu, dimm, channel,
1776                 syndrome, core_err_cnt, (long long)m->status,
1777                 (long long)m->misc, optype, err);
1778
1779         debugf0("%s", msg);
1780
1781         csrow = pvt->csrow_map[channel][dimm];
1782
1783         /* Call the helper to output message */
1784         if (m->mcgstatus & 1)
1785                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1786                                 0 /* FIXME: should be channel here */, msg);
1787         else if (!pvt->is_registered)
1788                 edac_mc_handle_fbd_ce(mci, csrow,
1789                                 0 /* FIXME: should be channel here */, msg);
1790
1791         kfree(msg);
1792 }
1793
1794 /*
1795  *      i7core_check_error      Retrieve and process errors reported by the
1796  *                              hardware. Called by the Core module.
1797  */
1798 static void i7core_check_error(struct mem_ctl_info *mci)
1799 {
1800         struct i7core_pvt *pvt = mci->pvt_info;
1801         int i;
1802         unsigned count = 0;
1803         struct mce *m;
1804
1805         /*
1806          * MCE first step: Copy all mce errors into a temporary buffer
1807          * We use a double buffering here, to reduce the risk of
1808          * losing an error.
1809          */
1810         smp_rmb();
1811         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1812                 % MCE_LOG_LEN;
1813         if (!count)
1814                 goto check_ce_error;
1815
1816         m = pvt->mce_outentry;
1817         if (pvt->mce_in + count > MCE_LOG_LEN) {
1818                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1819
1820                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1821                 smp_wmb();
1822                 pvt->mce_in = 0;
1823                 count -= l;
1824                 m += l;
1825         }
1826         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1827         smp_wmb();
1828         pvt->mce_in += count;
1829
1830         smp_rmb();
1831         if (pvt->mce_overrun) {
1832                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1833                               pvt->mce_overrun);
1834                 smp_wmb();
1835                 pvt->mce_overrun = 0;
1836         }
1837
1838         /*
1839          * MCE second step: parse errors and display
1840          */
1841         for (i = 0; i < count; i++)
1842                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1843
1844         /*
1845          * Now, let's increment CE error counts
1846          */
1847 check_ce_error:
1848         if (!pvt->is_registered)
1849                 i7core_udimm_check_mc_ecc_err(mci);
1850         else
1851                 i7core_rdimm_check_mc_ecc_err(mci);
1852 }
1853
1854 /*
1855  * i7core_mce_check_error       Replicates mcelog routine to get errors
1856  *                              This routine simply queues mcelog errors, and
1857  *                              return. The error itself should be handled later
1858  *                              by i7core_check_error.
1859  * WARNING: As this routine should be called at NMI time, extra care should
1860  * be taken to avoid deadlocks, and to be as fast as possible.
1861  */
1862 static int i7core_mce_check_error(void *priv, struct mce *mce)
1863 {
1864         struct mem_ctl_info *mci = priv;
1865         struct i7core_pvt *pvt = mci->pvt_info;
1866
1867         /*
1868          * Just let mcelog handle it if the error is
1869          * outside the memory controller
1870          */
1871         if (((mce->status & 0xffff) >> 7) != 1)
1872                 return 0;
1873
1874         /* Bank 8 registers are the only ones that we know how to handle */
1875         if (mce->bank != 8)
1876                 return 0;
1877
1878 #ifdef CONFIG_SMP
1879         /* Only handle if it is the right mc controller */
1880         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1881                 return 0;
1882 #endif
1883
1884         smp_rmb();
1885         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1886                 smp_wmb();
1887                 pvt->mce_overrun++;
1888                 return 0;
1889         }
1890
1891         /* Copy memory error at the ringbuffer */
1892         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1893         smp_wmb();
1894         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1895
1896         /* Handle fatal errors immediately */
1897         if (mce->mcgstatus & 1)
1898                 i7core_check_error(mci);
1899
1900         /* Advise mcelog that the errors were handled */
1901         return 1;
1902 }
1903
1904 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
1905 {
1906         pvt->i7core_pci = edac_pci_create_generic_ctl(
1907                                                 &pvt->i7core_dev->pdev[0]->dev,
1908                                                 EDAC_MOD_STR);
1909         if (unlikely(!pvt->i7core_pci))
1910                 pr_warn("Unable to setup PCI error report via EDAC\n");
1911 }
1912
1913 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
1914 {
1915         if (likely(pvt->i7core_pci))
1916                 edac_pci_release_generic_ctl(pvt->i7core_pci);
1917         else
1918                 i7core_printk(KERN_ERR,
1919                                 "Couldn't find mem_ctl_info for socket %d\n",
1920                                 pvt->i7core_dev->socket);
1921         pvt->i7core_pci = NULL;
1922 }
1923
1924 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
1925 {
1926         struct mem_ctl_info *mci = i7core_dev->mci;
1927         struct i7core_pvt *pvt;
1928
1929         if (unlikely(!mci || !mci->pvt_info)) {
1930                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1931                         __func__, &i7core_dev->pdev[0]->dev);
1932
1933                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
1934                 return;
1935         }
1936
1937         pvt = mci->pvt_info;
1938
1939         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1940                 __func__, mci, &i7core_dev->pdev[0]->dev);
1941
1942         /* Disable MCE NMI handler */
1943         edac_mce_unregister(&pvt->edac_mce);
1944
1945         /* Disable EDAC polling */
1946         i7core_pci_ctl_release(pvt);
1947
1948         /* Remove MC sysfs nodes */
1949         edac_mc_del_mc(mci->dev);
1950
1951         debugf1("%s: free mci struct\n", mci->ctl_name);
1952         kfree(mci->ctl_name);
1953         edac_mc_free(mci);
1954         i7core_dev->mci = NULL;
1955 }
1956
1957 static int i7core_register_mci(struct i7core_dev *i7core_dev)
1958 {
1959         struct mem_ctl_info *mci;
1960         struct i7core_pvt *pvt;
1961         int rc, channels, csrows;
1962
1963         /* Check the number of active and not disabled channels */
1964         rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
1965         if (unlikely(rc < 0))
1966                 return rc;
1967
1968         /* allocate a new MC control structure */
1969         mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
1970         if (unlikely(!mci))
1971                 return -ENOMEM;
1972
1973         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1974                 __func__, mci, &i7core_dev->pdev[0]->dev);
1975
1976         pvt = mci->pvt_info;
1977         memset(pvt, 0, sizeof(*pvt));
1978
1979         /* Associates i7core_dev and mci for future usage */
1980         pvt->i7core_dev = i7core_dev;
1981         i7core_dev->mci = mci;
1982
1983         /*
1984          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1985          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1986          * memory channels
1987          */
1988         mci->mtype_cap = MEM_FLAG_DDR3;
1989         mci->edac_ctl_cap = EDAC_FLAG_NONE;
1990         mci->edac_cap = EDAC_FLAG_NONE;
1991         mci->mod_name = "i7core_edac.c";
1992         mci->mod_ver = I7CORE_REVISION;
1993         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1994                                   i7core_dev->socket);
1995         mci->dev_name = pci_name(i7core_dev->pdev[0]);
1996         mci->ctl_page_to_phys = NULL;
1997
1998         /* Store pci devices at mci for faster access */
1999         rc = mci_bind_devs(mci, i7core_dev);
2000         if (unlikely(rc < 0))
2001                 goto fail0;
2002
2003         if (pvt->is_registered)
2004                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2005         else
2006                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2007
2008         /* Get dimm basic config */
2009         get_dimm_config(mci);
2010         /* record ptr to the generic device */
2011         mci->dev = &i7core_dev->pdev[0]->dev;
2012         /* Set the function pointer to an actual operation function */
2013         mci->edac_check = i7core_check_error;
2014
2015         /* add this new MC control structure to EDAC's list of MCs */
2016         if (unlikely(edac_mc_add_mc(mci))) {
2017                 debugf0("MC: " __FILE__
2018                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2019                 /* FIXME: perhaps some code should go here that disables error
2020                  * reporting if we just enabled it
2021                  */
2022
2023                 rc = -EINVAL;
2024                 goto fail0;
2025         }
2026
2027         /* Default error mask is any memory */
2028         pvt->inject.channel = 0;
2029         pvt->inject.dimm = -1;
2030         pvt->inject.rank = -1;
2031         pvt->inject.bank = -1;
2032         pvt->inject.page = -1;
2033         pvt->inject.col = -1;
2034
2035         /* allocating generic PCI control info */
2036         i7core_pci_ctl_create(pvt);
2037
2038         /* Registers on edac_mce in order to receive memory errors */
2039         pvt->edac_mce.priv = mci;
2040         pvt->edac_mce.check_error = i7core_mce_check_error;
2041         rc = edac_mce_register(&pvt->edac_mce);
2042         if (unlikely(rc < 0)) {
2043                 debugf0("MC: " __FILE__
2044                         ": %s(): failed edac_mce_register()\n", __func__);
2045                 goto fail1;
2046         }
2047
2048         return 0;
2049
2050 fail1:
2051         i7core_pci_ctl_release(pvt);
2052         edac_mc_del_mc(mci->dev);
2053 fail0:
2054         kfree(mci->ctl_name);
2055         edac_mc_free(mci);
2056         i7core_dev->mci = NULL;
2057         return rc;
2058 }
2059
2060 /*
2061  *      i7core_probe    Probe for ONE instance of device to see if it is
2062  *                      present.
2063  *      return:
2064  *              0 for FOUND a device
2065  *              < 0 for error code
2066  */
2067
2068 static int __devinit i7core_probe(struct pci_dev *pdev,
2069                                   const struct pci_device_id *id)
2070 {
2071         int rc;
2072         struct i7core_dev *i7core_dev;
2073
2074         /* get the pci devices we want to reserve for our use */
2075         mutex_lock(&i7core_edac_lock);
2076
2077         /*
2078          * All memory controllers are allocated at the first pass.
2079          */
2080         if (unlikely(probed >= 1)) {
2081                 mutex_unlock(&i7core_edac_lock);
2082                 return -ENODEV;
2083         }
2084         probed++;
2085
2086         rc = i7core_get_all_devices();
2087         if (unlikely(rc < 0))
2088                 goto fail0;
2089
2090         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2091                 rc = i7core_register_mci(i7core_dev);
2092                 if (unlikely(rc < 0))
2093                         goto fail1;
2094         }
2095
2096         i7core_printk(KERN_INFO, "Driver loaded.\n");
2097
2098         mutex_unlock(&i7core_edac_lock);
2099         return 0;
2100
2101 fail1:
2102         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2103                 i7core_unregister_mci(i7core_dev);
2104
2105         i7core_put_all_devices();
2106 fail0:
2107         mutex_unlock(&i7core_edac_lock);
2108         return rc;
2109 }
2110
2111 /*
2112  *      i7core_remove   destructor for one instance of device
2113  *
2114  */
2115 static void __devexit i7core_remove(struct pci_dev *pdev)
2116 {
2117         struct i7core_dev *i7core_dev;
2118
2119         debugf0(__FILE__ ": %s()\n", __func__);
2120
2121         /*
2122          * we have a trouble here: pdev value for removal will be wrong, since
2123          * it will point to the X58 register used to detect that the machine
2124          * is a Nehalem or upper design. However, due to the way several PCI
2125          * devices are grouped together to provide MC functionality, we need
2126          * to use a different method for releasing the devices
2127          */
2128
2129         mutex_lock(&i7core_edac_lock);
2130
2131         if (unlikely(!probed)) {
2132                 mutex_unlock(&i7core_edac_lock);
2133                 return;
2134         }
2135
2136         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2137                 i7core_unregister_mci(i7core_dev);
2138
2139         /* Release PCI resources */
2140         i7core_put_all_devices();
2141
2142         probed--;
2143
2144         mutex_unlock(&i7core_edac_lock);
2145 }
2146
2147 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2148
2149 /*
2150  *      i7core_driver   pci_driver structure for this module
2151  *
2152  */
2153 static struct pci_driver i7core_driver = {
2154         .name     = "i7core_edac",
2155         .probe    = i7core_probe,
2156         .remove   = __devexit_p(i7core_remove),
2157         .id_table = i7core_pci_tbl,
2158 };
2159
2160 /*
2161  *      i7core_init             Module entry function
2162  *                      Try to initialize this module for its devices
2163  */
2164 static int __init i7core_init(void)
2165 {
2166         int pci_rc;
2167
2168         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2169
2170         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2171         opstate_init();
2172
2173         if (use_pci_fixup)
2174                 i7core_xeon_pci_fixup(pci_dev_table);
2175
2176         pci_rc = pci_register_driver(&i7core_driver);
2177
2178         if (pci_rc >= 0)
2179                 return 0;
2180
2181         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2182                       pci_rc);
2183
2184         return pci_rc;
2185 }
2186
2187 /*
2188  *      i7core_exit()   Module exit function
2189  *                      Unregister the driver
2190  */
2191 static void __exit i7core_exit(void)
2192 {
2193         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2194         pci_unregister_driver(&i7core_driver);
2195 }
2196
2197 module_init(i7core_init);
2198 module_exit(i7core_exit);
2199
2200 MODULE_LICENSE("GPL");
2201 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2202 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2203 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2204                    I7CORE_REVISION);
2205
2206 module_param(edac_op_state, int, 0444);
2207 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");