]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/edac/i7core_edac.c
edac: Remove the legacy EDAC ABI
[karo-tx-linux.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41
42 #include "edac_core.h"
43
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES        2
59
60
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION    " Ver: 1.0.0"
65 #define EDAC_MOD_STR      "i7core_edac"
66
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...)                       \
71         edac_printk(level, "i7core", fmt, ##arg)
72
73 #define i7core_mc_printk(mci, level, fmt, arg...)               \
74         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75
76 /*
77  * i7core Memory Controller Registers
78  */
79
80         /* OFFSETS for Device 0 Function 0 */
81
82 #define MC_CFG_CONTROL  0x90
83   #define MC_CFG_UNLOCK         0x02
84   #define MC_CFG_LOCK           0x00
85
86         /* OFFSETS for Device 3 Function 0 */
87
88 #define MC_CONTROL      0x48
89 #define MC_STATUS       0x4c
90 #define MC_MAX_DOD      0x64
91
92 /*
93  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96
97 #define MC_TEST_ERR_RCV1        0x60
98   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
99
100 #define MC_TEST_ERR_RCV0        0x64
101   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
102   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
103
104 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL           0x48
106   #define SSR_MODE_DISABLE      0x00
107   #define SSR_MODE_ENABLE       0x01
108   #define SSR_MODE_MASK         0x03
109
110 #define MC_SCRUB_CONTROL        0x4c
111   #define STARTSCRUB            (1 << 24)
112   #define SCRUBINTERVAL_MASK    0xffffff
113
114 #define MC_COR_ECC_CNT_0        0x80
115 #define MC_COR_ECC_CNT_1        0x84
116 #define MC_COR_ECC_CNT_2        0x88
117 #define MC_COR_ECC_CNT_3        0x8c
118 #define MC_COR_ECC_CNT_4        0x90
119 #define MC_COR_ECC_CNT_5        0x94
120
121 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
123
124
125         /* OFFSETS for Devices 4,5 and 6 Function 0 */
126
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128   #define THREE_DIMMS_PRESENT           (1 << 24)
129   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
130   #define QUAD_RANK_PRESENT             (1 << 22)
131   #define REGISTERED_DIMM               (1 << 15)
132
133 #define MC_CHANNEL_MAPPER       0x60
134   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
136
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138   #define RANK_PRESENT_MASK             0xffff
139
140 #define MC_CHANNEL_ADDR_MATCH   0xf0
141 #define MC_CHANNEL_ERROR_MASK   0xf8
142 #define MC_CHANNEL_ERROR_INJECT 0xfc
143   #define INJECT_ADDR_PARITY    0x10
144   #define INJECT_ECC            0x08
145   #define MASK_CACHELINE        0x06
146   #define MASK_FULL_CACHELINE   0x06
147   #define MASK_MSB32_CACHELINE  0x04
148   #define MASK_LSB32_CACHELINE  0x02
149   #define NO_MASK_CACHELINE     0x00
150   #define REPEAT_EN             0x01
151
152         /* OFFSETS for Devices 4,5 and 6 Function 1 */
153
154 #define MC_DOD_CH_DIMM0         0x48
155 #define MC_DOD_CH_DIMM1         0x4c
156 #define MC_DOD_CH_DIMM2         0x50
157   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
158   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
159   #define DIMM_PRESENT_MASK     (1 << 9)
160   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
161   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
162   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
164   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
165   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
166   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
167   #define MC_DOD_NUMCOL_MASK            3
168   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
169
170 #define MC_RANK_PRESENT         0x7c
171
172 #define MC_SAG_CH_0     0x80
173 #define MC_SAG_CH_1     0x84
174 #define MC_SAG_CH_2     0x88
175 #define MC_SAG_CH_3     0x8c
176 #define MC_SAG_CH_4     0x90
177 #define MC_SAG_CH_5     0x94
178 #define MC_SAG_CH_6     0x98
179 #define MC_SAG_CH_7     0x9c
180
181 #define MC_RIR_LIMIT_CH_0       0x40
182 #define MC_RIR_LIMIT_CH_1       0x44
183 #define MC_RIR_LIMIT_CH_2       0x48
184 #define MC_RIR_LIMIT_CH_3       0x4C
185 #define MC_RIR_LIMIT_CH_4       0x50
186 #define MC_RIR_LIMIT_CH_5       0x54
187 #define MC_RIR_LIMIT_CH_6       0x58
188 #define MC_RIR_LIMIT_CH_7       0x5C
189 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
190
191 #define MC_RIR_WAY_CH           0x80
192   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
193   #define MC_RIR_WAY_RANK_MASK          0x7
194
195 /*
196  * i7core structs
197  */
198
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3             /* Max DIMMS per channel */
201 #define MAX_MCR_FUNC  4
202 #define MAX_CHAN_FUNC 3
203
204 struct i7core_info {
205         u32     mc_control;
206         u32     mc_status;
207         u32     max_dod;
208         u32     ch_map;
209 };
210
211
212 struct i7core_inject {
213         int     enable;
214
215         u32     section;
216         u32     type;
217         u32     eccmask;
218
219         /* Error address mask */
220         int channel, dimm, rank, bank, page, col;
221 };
222
223 struct i7core_channel {
224         u32             ranks;
225         u32             dimms;
226 };
227
228 struct pci_id_descr {
229         int                     dev;
230         int                     func;
231         int                     dev_id;
232         int                     optional;
233 };
234
235 struct pci_id_table {
236         const struct pci_id_descr       *descr;
237         int                             n_devs;
238 };
239
240 struct i7core_dev {
241         struct list_head        list;
242         u8                      socket;
243         struct pci_dev          **pdev;
244         int                     n_devs;
245         struct mem_ctl_info     *mci;
246 };
247
248 struct i7core_pvt {
249         struct pci_dev  *pci_noncore;
250         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
251         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
252
253         struct i7core_dev *i7core_dev;
254
255         struct i7core_info      info;
256         struct i7core_inject    inject;
257         struct i7core_channel   channel[NUM_CHANS];
258
259         int             ce_count_available;
260
261                         /* ECC corrected errors counts per udimm */
262         unsigned long   udimm_ce_count[MAX_DIMMS];
263         int             udimm_last_ce_count[MAX_DIMMS];
264                         /* ECC corrected errors counts per rdimm */
265         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
266         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
267
268         bool            is_registered, enable_scrub;
269
270         /* Fifo double buffers */
271         struct mce              mce_entry[MCE_LOG_LEN];
272         struct mce              mce_outentry[MCE_LOG_LEN];
273
274         /* Fifo in/out counters */
275         unsigned                mce_in, mce_out;
276
277         /* Count indicator to show errors not got */
278         unsigned                mce_overrun;
279
280         /* DCLK Frequency used for computing scrub rate */
281         int                     dclk_freq;
282
283         /* Struct to control EDAC polling */
284         struct edac_pci_ctl_info *i7core_pci;
285 };
286
287 #define PCI_DESCR(device, function, device_id)  \
288         .dev = (device),                        \
289         .func = (function),                     \
290         .dev_id = (device_id)
291
292 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
293                 /* Memory controller */
294         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
295         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
296                         /* Exists only for RDIMM */
297         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
298         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
299
300                 /* Channel 0 */
301         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
302         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
303         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
304         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
305
306                 /* Channel 1 */
307         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
308         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
309         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
310         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
311
312                 /* Channel 2 */
313         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
314         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
315         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
316         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
317
318                 /* Generic Non-core registers */
319         /*
320          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
321          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
322          * the probing code needs to test for the other address in case of
323          * failure of this one
324          */
325         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
326
327 };
328
329 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
330         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
331         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
332         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
333
334         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
335         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
336         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
337         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
338
339         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
340         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
341         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
342         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
343
344         /*
345          * This is the PCI device has an alternate address on some
346          * processors like Core i7 860
347          */
348         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
349 };
350
351 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
352                 /* Memory controller */
353         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
354         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
355                         /* Exists only for RDIMM */
356         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
357         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
358
359                 /* Channel 0 */
360         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
361         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
362         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
363         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
364
365                 /* Channel 1 */
366         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
367         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
368         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
369         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
370
371                 /* Channel 2 */
372         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
373         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
374         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
375         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
376
377                 /* Generic Non-core registers */
378         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
379
380 };
381
382 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
383 static const struct pci_id_table pci_dev_table[] = {
384         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
385         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
386         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
387         {0,}                    /* 0 terminated list. */
388 };
389
390 /*
391  *      pci_device_id   table for which devices we are looking for
392  */
393 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
394         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
395         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
396         {0,}                    /* 0 terminated list. */
397 };
398
399 /****************************************************************************
400                         Anciliary status routines
401  ****************************************************************************/
402
403         /* MC_CONTROL bits */
404 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
405 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
406
407         /* MC_STATUS bits */
408 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
409 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
410
411         /* MC_MAX_DOD read functions */
412 static inline int numdimms(u32 dimms)
413 {
414         return (dimms & 0x3) + 1;
415 }
416
417 static inline int numrank(u32 rank)
418 {
419         static int ranks[4] = { 1, 2, 4, -EINVAL };
420
421         return ranks[rank & 0x3];
422 }
423
424 static inline int numbank(u32 bank)
425 {
426         static int banks[4] = { 4, 8, 16, -EINVAL };
427
428         return banks[bank & 0x3];
429 }
430
431 static inline int numrow(u32 row)
432 {
433         static int rows[8] = {
434                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
435                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
436         };
437
438         return rows[row & 0x7];
439 }
440
441 static inline int numcol(u32 col)
442 {
443         static int cols[8] = {
444                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
445         };
446         return cols[col & 0x3];
447 }
448
449 static struct i7core_dev *get_i7core_dev(u8 socket)
450 {
451         struct i7core_dev *i7core_dev;
452
453         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
454                 if (i7core_dev->socket == socket)
455                         return i7core_dev;
456         }
457
458         return NULL;
459 }
460
461 static struct i7core_dev *alloc_i7core_dev(u8 socket,
462                                            const struct pci_id_table *table)
463 {
464         struct i7core_dev *i7core_dev;
465
466         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
467         if (!i7core_dev)
468                 return NULL;
469
470         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
471                                    GFP_KERNEL);
472         if (!i7core_dev->pdev) {
473                 kfree(i7core_dev);
474                 return NULL;
475         }
476
477         i7core_dev->socket = socket;
478         i7core_dev->n_devs = table->n_devs;
479         list_add_tail(&i7core_dev->list, &i7core_edac_list);
480
481         return i7core_dev;
482 }
483
484 static void free_i7core_dev(struct i7core_dev *i7core_dev)
485 {
486         list_del(&i7core_dev->list);
487         kfree(i7core_dev->pdev);
488         kfree(i7core_dev);
489 }
490
491 /****************************************************************************
492                         Memory check routines
493  ****************************************************************************/
494
495 static int get_dimm_config(struct mem_ctl_info *mci)
496 {
497         struct i7core_pvt *pvt = mci->pvt_info;
498         struct pci_dev *pdev;
499         int i, j;
500         enum edac_type mode;
501         enum mem_type mtype;
502         struct dimm_info *dimm;
503
504         /* Get data from the MC register, function 0 */
505         pdev = pvt->pci_mcr[0];
506         if (!pdev)
507                 return -ENODEV;
508
509         /* Device 3 function 0 reads */
510         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
511         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
512         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
513         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
514
515         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
516                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
517                 pvt->info.max_dod, pvt->info.ch_map);
518
519         if (ECC_ENABLED(pvt)) {
520                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
521                 if (ECCx8(pvt))
522                         mode = EDAC_S8ECD8ED;
523                 else
524                         mode = EDAC_S4ECD4ED;
525         } else {
526                 debugf0("ECC disabled\n");
527                 mode = EDAC_NONE;
528         }
529
530         /* FIXME: need to handle the error codes */
531         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
532                 "x%x x 0x%x\n",
533                 numdimms(pvt->info.max_dod),
534                 numrank(pvt->info.max_dod >> 2),
535                 numbank(pvt->info.max_dod >> 4),
536                 numrow(pvt->info.max_dod >> 6),
537                 numcol(pvt->info.max_dod >> 9));
538
539         for (i = 0; i < NUM_CHANS; i++) {
540                 u32 data, dimm_dod[3], value[8];
541
542                 if (!pvt->pci_ch[i][0])
543                         continue;
544
545                 if (!CH_ACTIVE(pvt, i)) {
546                         debugf0("Channel %i is not active\n", i);
547                         continue;
548                 }
549                 if (CH_DISABLED(pvt, i)) {
550                         debugf0("Channel %i is disabled\n", i);
551                         continue;
552                 }
553
554                 /* Devices 4-6 function 0 */
555                 pci_read_config_dword(pvt->pci_ch[i][0],
556                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
557
558                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
559                                                 4 : 2;
560
561                 if (data & REGISTERED_DIMM)
562                         mtype = MEM_RDDR3;
563                 else
564                         mtype = MEM_DDR3;
565 #if 0
566                 if (data & THREE_DIMMS_PRESENT)
567                         pvt->channel[i].dimms = 3;
568                 else if (data & SINGLE_QUAD_RANK_PRESENT)
569                         pvt->channel[i].dimms = 1;
570                 else
571                         pvt->channel[i].dimms = 2;
572 #endif
573
574                 /* Devices 4-6 function 1 */
575                 pci_read_config_dword(pvt->pci_ch[i][1],
576                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
577                 pci_read_config_dword(pvt->pci_ch[i][1],
578                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
579                 pci_read_config_dword(pvt->pci_ch[i][1],
580                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
581
582                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
583                         "%d ranks, %cDIMMs\n",
584                         i,
585                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
586                         data,
587                         pvt->channel[i].ranks,
588                         (data & REGISTERED_DIMM) ? 'R' : 'U');
589
590                 for (j = 0; j < 3; j++) {
591                         u32 banks, ranks, rows, cols;
592                         u32 size, npages;
593
594                         if (!DIMM_PRESENT(dimm_dod[j]))
595                                 continue;
596
597                         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
598                                        i, j, 0);
599                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
600                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
601                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
602                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
603
604                         /* DDR3 has 8 I/O banks */
605                         size = (rows * cols * banks * ranks) >> (20 - 3);
606
607                         debugf0("\tdimm %d %d Mb offset: %x, "
608                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
609                                 j, size,
610                                 RANKOFFSET(dimm_dod[j]),
611                                 banks, ranks, rows, cols);
612
613                         npages = MiB_TO_PAGES(size);
614
615                         dimm->nr_pages = npages;
616
617                         switch (banks) {
618                         case 4:
619                                 dimm->dtype = DEV_X4;
620                                 break;
621                         case 8:
622                                 dimm->dtype = DEV_X8;
623                                 break;
624                         case 16:
625                                 dimm->dtype = DEV_X16;
626                                 break;
627                         default:
628                                 dimm->dtype = DEV_UNKNOWN;
629                         }
630
631                         snprintf(dimm->label, sizeof(dimm->label),
632                                  "CPU#%uChannel#%u_DIMM#%u",
633                                  pvt->i7core_dev->socket, i, j);
634                         dimm->grain = 8;
635                         dimm->edac_mode = mode;
636                         dimm->mtype = mtype;
637                 }
638
639                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
640                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
641                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
642                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
643                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
644                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
645                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
646                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
647                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
648                 for (j = 0; j < 8; j++)
649                         debugf1("\t\t%#x\t%#x\t%#x\n",
650                                 (value[j] >> 27) & 0x1,
651                                 (value[j] >> 24) & 0x7,
652                                 (value[j] & ((1 << 24) - 1)));
653         }
654
655         return 0;
656 }
657
658 /****************************************************************************
659                         Error insertion routines
660  ****************************************************************************/
661
662 /* The i7core has independent error injection features per channel.
663    However, to have a simpler code, we don't allow enabling error injection
664    on more than one channel.
665    Also, since a change at an inject parameter will be applied only at enable,
666    we're disabling error injection on all write calls to the sysfs nodes that
667    controls the error code injection.
668  */
669 static int disable_inject(const struct mem_ctl_info *mci)
670 {
671         struct i7core_pvt *pvt = mci->pvt_info;
672
673         pvt->inject.enable = 0;
674
675         if (!pvt->pci_ch[pvt->inject.channel][0])
676                 return -ENODEV;
677
678         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
679                                 MC_CHANNEL_ERROR_INJECT, 0);
680
681         return 0;
682 }
683
684 /*
685  * i7core inject inject.section
686  *
687  *      accept and store error injection inject.section value
688  *      bit 0 - refers to the lower 32-byte half cacheline
689  *      bit 1 - refers to the upper 32-byte half cacheline
690  */
691 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
692                                            const char *data, size_t count)
693 {
694         struct i7core_pvt *pvt = mci->pvt_info;
695         unsigned long value;
696         int rc;
697
698         if (pvt->inject.enable)
699                 disable_inject(mci);
700
701         rc = strict_strtoul(data, 10, &value);
702         if ((rc < 0) || (value > 3))
703                 return -EIO;
704
705         pvt->inject.section = (u32) value;
706         return count;
707 }
708
709 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
710                                               char *data)
711 {
712         struct i7core_pvt *pvt = mci->pvt_info;
713         return sprintf(data, "0x%08x\n", pvt->inject.section);
714 }
715
716 /*
717  * i7core inject.type
718  *
719  *      accept and store error injection inject.section value
720  *      bit 0 - repeat enable - Enable error repetition
721  *      bit 1 - inject ECC error
722  *      bit 2 - inject parity error
723  */
724 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
725                                         const char *data, size_t count)
726 {
727         struct i7core_pvt *pvt = mci->pvt_info;
728         unsigned long value;
729         int rc;
730
731         if (pvt->inject.enable)
732                 disable_inject(mci);
733
734         rc = strict_strtoul(data, 10, &value);
735         if ((rc < 0) || (value > 7))
736                 return -EIO;
737
738         pvt->inject.type = (u32) value;
739         return count;
740 }
741
742 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
743                                               char *data)
744 {
745         struct i7core_pvt *pvt = mci->pvt_info;
746         return sprintf(data, "0x%08x\n", pvt->inject.type);
747 }
748
749 /*
750  * i7core_inject_inject.eccmask_store
751  *
752  * The type of error (UE/CE) will depend on the inject.eccmask value:
753  *   Any bits set to a 1 will flip the corresponding ECC bit
754  *   Correctable errors can be injected by flipping 1 bit or the bits within
755  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
756  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
757  *   uncorrectable error to be injected.
758  */
759 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
760                                         const char *data, size_t count)
761 {
762         struct i7core_pvt *pvt = mci->pvt_info;
763         unsigned long value;
764         int rc;
765
766         if (pvt->inject.enable)
767                 disable_inject(mci);
768
769         rc = strict_strtoul(data, 10, &value);
770         if (rc < 0)
771                 return -EIO;
772
773         pvt->inject.eccmask = (u32) value;
774         return count;
775 }
776
777 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
778                                               char *data)
779 {
780         struct i7core_pvt *pvt = mci->pvt_info;
781         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
782 }
783
784 /*
785  * i7core_addrmatch
786  *
787  * The type of error (UE/CE) will depend on the inject.eccmask value:
788  *   Any bits set to a 1 will flip the corresponding ECC bit
789  *   Correctable errors can be injected by flipping 1 bit or the bits within
790  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
791  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
792  *   uncorrectable error to be injected.
793  */
794
795 #define DECLARE_ADDR_MATCH(param, limit)                        \
796 static ssize_t i7core_inject_store_##param(                     \
797                 struct mem_ctl_info *mci,                       \
798                 const char *data, size_t count)                 \
799 {                                                               \
800         struct i7core_pvt *pvt;                                 \
801         long value;                                             \
802         int rc;                                                 \
803                                                                 \
804         debugf1("%s()\n", __func__);                            \
805         pvt = mci->pvt_info;                                    \
806                                                                 \
807         if (pvt->inject.enable)                                 \
808                 disable_inject(mci);                            \
809                                                                 \
810         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
811                 value = -1;                                     \
812         else {                                                  \
813                 rc = strict_strtoul(data, 10, &value);          \
814                 if ((rc < 0) || (value >= limit))               \
815                         return -EIO;                            \
816         }                                                       \
817                                                                 \
818         pvt->inject.param = value;                              \
819                                                                 \
820         return count;                                           \
821 }                                                               \
822                                                                 \
823 static ssize_t i7core_inject_show_##param(                      \
824                 struct mem_ctl_info *mci,                       \
825                 char *data)                                     \
826 {                                                               \
827         struct i7core_pvt *pvt;                                 \
828                                                                 \
829         pvt = mci->pvt_info;                                    \
830         debugf1("%s() pvt=%p\n", __func__, pvt);                \
831         if (pvt->inject.param < 0)                              \
832                 return sprintf(data, "any\n");                  \
833         else                                                    \
834                 return sprintf(data, "%d\n", pvt->inject.param);\
835 }
836
837 #define ATTR_ADDR_MATCH(param)                                  \
838         {                                                       \
839                 .attr = {                                       \
840                         .name = #param,                         \
841                         .mode = (S_IRUGO | S_IWUSR)             \
842                 },                                              \
843                 .show  = i7core_inject_show_##param,            \
844                 .store = i7core_inject_store_##param,           \
845         }
846
847 DECLARE_ADDR_MATCH(channel, 3);
848 DECLARE_ADDR_MATCH(dimm, 3);
849 DECLARE_ADDR_MATCH(rank, 4);
850 DECLARE_ADDR_MATCH(bank, 32);
851 DECLARE_ADDR_MATCH(page, 0x10000);
852 DECLARE_ADDR_MATCH(col, 0x4000);
853
854 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
855 {
856         u32 read;
857         int count;
858
859         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
860                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
861                 where, val);
862
863         for (count = 0; count < 10; count++) {
864                 if (count)
865                         msleep(100);
866                 pci_write_config_dword(dev, where, val);
867                 pci_read_config_dword(dev, where, &read);
868
869                 if (read == val)
870                         return 0;
871         }
872
873         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
874                 "write=%08x. Read=%08x\n",
875                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
876                 where, val, read);
877
878         return -EINVAL;
879 }
880
881 /*
882  * This routine prepares the Memory Controller for error injection.
883  * The error will be injected when some process tries to write to the
884  * memory that matches the given criteria.
885  * The criteria can be set in terms of a mask where dimm, rank, bank, page
886  * and col can be specified.
887  * A -1 value for any of the mask items will make the MCU to ignore
888  * that matching criteria for error injection.
889  *
890  * It should be noticed that the error will only happen after a write operation
891  * on a memory that matches the condition. if REPEAT_EN is not enabled at
892  * inject mask, then it will produce just one error. Otherwise, it will repeat
893  * until the injectmask would be cleaned.
894  *
895  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
896  *    is reliable enough to check if the MC is using the
897  *    three channels. However, this is not clear at the datasheet.
898  */
899 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
900                                        const char *data, size_t count)
901 {
902         struct i7core_pvt *pvt = mci->pvt_info;
903         u32 injectmask;
904         u64 mask = 0;
905         int  rc;
906         long enable;
907
908         if (!pvt->pci_ch[pvt->inject.channel][0])
909                 return 0;
910
911         rc = strict_strtoul(data, 10, &enable);
912         if ((rc < 0))
913                 return 0;
914
915         if (enable) {
916                 pvt->inject.enable = 1;
917         } else {
918                 disable_inject(mci);
919                 return count;
920         }
921
922         /* Sets pvt->inject.dimm mask */
923         if (pvt->inject.dimm < 0)
924                 mask |= 1LL << 41;
925         else {
926                 if (pvt->channel[pvt->inject.channel].dimms > 2)
927                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
928                 else
929                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
930         }
931
932         /* Sets pvt->inject.rank mask */
933         if (pvt->inject.rank < 0)
934                 mask |= 1LL << 40;
935         else {
936                 if (pvt->channel[pvt->inject.channel].dimms > 2)
937                         mask |= (pvt->inject.rank & 0x1LL) << 34;
938                 else
939                         mask |= (pvt->inject.rank & 0x3LL) << 34;
940         }
941
942         /* Sets pvt->inject.bank mask */
943         if (pvt->inject.bank < 0)
944                 mask |= 1LL << 39;
945         else
946                 mask |= (pvt->inject.bank & 0x15LL) << 30;
947
948         /* Sets pvt->inject.page mask */
949         if (pvt->inject.page < 0)
950                 mask |= 1LL << 38;
951         else
952                 mask |= (pvt->inject.page & 0xffff) << 14;
953
954         /* Sets pvt->inject.column mask */
955         if (pvt->inject.col < 0)
956                 mask |= 1LL << 37;
957         else
958                 mask |= (pvt->inject.col & 0x3fff);
959
960         /*
961          * bit    0: REPEAT_EN
962          * bits 1-2: MASK_HALF_CACHELINE
963          * bit    3: INJECT_ECC
964          * bit    4: INJECT_ADDR_PARITY
965          */
966
967         injectmask = (pvt->inject.type & 1) |
968                      (pvt->inject.section & 0x3) << 1 |
969                      (pvt->inject.type & 0x6) << (3 - 1);
970
971         /* Unlock writes to registers - this register is write only */
972         pci_write_config_dword(pvt->pci_noncore,
973                                MC_CFG_CONTROL, 0x2);
974
975         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
976                                MC_CHANNEL_ADDR_MATCH, mask);
977         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
978                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
979
980         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
981                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
982
983         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
984                                MC_CHANNEL_ERROR_INJECT, injectmask);
985
986         /*
987          * This is something undocumented, based on my tests
988          * Without writing 8 to this register, errors aren't injected. Not sure
989          * why.
990          */
991         pci_write_config_dword(pvt->pci_noncore,
992                                MC_CFG_CONTROL, 8);
993
994         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
995                 " inject 0x%08x\n",
996                 mask, pvt->inject.eccmask, injectmask);
997
998
999         return count;
1000 }
1001
1002 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1003                                         char *data)
1004 {
1005         struct i7core_pvt *pvt = mci->pvt_info;
1006         u32 injectmask;
1007
1008         if (!pvt->pci_ch[pvt->inject.channel][0])
1009                 return 0;
1010
1011         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1012                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1013
1014         debugf0("Inject error read: 0x%018x\n", injectmask);
1015
1016         if (injectmask & 0x0c)
1017                 pvt->inject.enable = 1;
1018
1019         return sprintf(data, "%d\n", pvt->inject.enable);
1020 }
1021
1022 #define DECLARE_COUNTER(param)                                  \
1023 static ssize_t i7core_show_counter_##param(                     \
1024                 struct mem_ctl_info *mci,                       \
1025                 char *data)                                     \
1026 {                                                               \
1027         struct i7core_pvt *pvt = mci->pvt_info;                 \
1028                                                                 \
1029         debugf1("%s() \n", __func__);                           \
1030         if (!pvt->ce_count_available || (pvt->is_registered))   \
1031                 return sprintf(data, "data unavailable\n");     \
1032         return sprintf(data, "%lu\n",                           \
1033                         pvt->udimm_ce_count[param]);            \
1034 }
1035
1036 #define ATTR_COUNTER(param)                                     \
1037         {                                                       \
1038                 .attr = {                                       \
1039                         .name = __stringify(udimm##param),      \
1040                         .mode = (S_IRUGO | S_IWUSR)             \
1041                 },                                              \
1042                 .show  = i7core_show_counter_##param            \
1043         }
1044
1045 DECLARE_COUNTER(0);
1046 DECLARE_COUNTER(1);
1047 DECLARE_COUNTER(2);
1048
1049 /*
1050  * Sysfs struct
1051  */
1052
1053 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1054         ATTR_ADDR_MATCH(channel),
1055         ATTR_ADDR_MATCH(dimm),
1056         ATTR_ADDR_MATCH(rank),
1057         ATTR_ADDR_MATCH(bank),
1058         ATTR_ADDR_MATCH(page),
1059         ATTR_ADDR_MATCH(col),
1060         { } /* End of list */
1061 };
1062
1063 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1064         .name  = "inject_addrmatch",
1065         .mcidev_attr = i7core_addrmatch_attrs,
1066 };
1067
1068 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1069         ATTR_COUNTER(0),
1070         ATTR_COUNTER(1),
1071         ATTR_COUNTER(2),
1072         { .attr = { .name = NULL } }
1073 };
1074
1075 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1076         .name  = "all_channel_counts",
1077         .mcidev_attr = i7core_udimm_counters_attrs,
1078 };
1079
1080 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1081         {
1082                 .attr = {
1083                         .name = "inject_section",
1084                         .mode = (S_IRUGO | S_IWUSR)
1085                 },
1086                 .show  = i7core_inject_section_show,
1087                 .store = i7core_inject_section_store,
1088         }, {
1089                 .attr = {
1090                         .name = "inject_type",
1091                         .mode = (S_IRUGO | S_IWUSR)
1092                 },
1093                 .show  = i7core_inject_type_show,
1094                 .store = i7core_inject_type_store,
1095         }, {
1096                 .attr = {
1097                         .name = "inject_eccmask",
1098                         .mode = (S_IRUGO | S_IWUSR)
1099                 },
1100                 .show  = i7core_inject_eccmask_show,
1101                 .store = i7core_inject_eccmask_store,
1102         }, {
1103                 .grp = &i7core_inject_addrmatch,
1104         }, {
1105                 .attr = {
1106                         .name = "inject_enable",
1107                         .mode = (S_IRUGO | S_IWUSR)
1108                 },
1109                 .show  = i7core_inject_enable_show,
1110                 .store = i7core_inject_enable_store,
1111         },
1112         { }     /* End of list */
1113 };
1114
1115 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1116         {
1117                 .attr = {
1118                         .name = "inject_section",
1119                         .mode = (S_IRUGO | S_IWUSR)
1120                 },
1121                 .show  = i7core_inject_section_show,
1122                 .store = i7core_inject_section_store,
1123         }, {
1124                 .attr = {
1125                         .name = "inject_type",
1126                         .mode = (S_IRUGO | S_IWUSR)
1127                 },
1128                 .show  = i7core_inject_type_show,
1129                 .store = i7core_inject_type_store,
1130         }, {
1131                 .attr = {
1132                         .name = "inject_eccmask",
1133                         .mode = (S_IRUGO | S_IWUSR)
1134                 },
1135                 .show  = i7core_inject_eccmask_show,
1136                 .store = i7core_inject_eccmask_store,
1137         }, {
1138                 .grp = &i7core_inject_addrmatch,
1139         }, {
1140                 .attr = {
1141                         .name = "inject_enable",
1142                         .mode = (S_IRUGO | S_IWUSR)
1143                 },
1144                 .show  = i7core_inject_enable_show,
1145                 .store = i7core_inject_enable_store,
1146         }, {
1147                 .grp = &i7core_udimm_counters,
1148         },
1149         { }     /* End of list */
1150 };
1151
1152 /****************************************************************************
1153         Device initialization routines: put/get, init/exit
1154  ****************************************************************************/
1155
1156 /*
1157  *      i7core_put_all_devices  'put' all the devices that we have
1158  *                              reserved via 'get'
1159  */
1160 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1161 {
1162         int i;
1163
1164         debugf0(__FILE__ ": %s()\n", __func__);
1165         for (i = 0; i < i7core_dev->n_devs; i++) {
1166                 struct pci_dev *pdev = i7core_dev->pdev[i];
1167                 if (!pdev)
1168                         continue;
1169                 debugf0("Removing dev %02x:%02x.%d\n",
1170                         pdev->bus->number,
1171                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1172                 pci_dev_put(pdev);
1173         }
1174 }
1175
1176 static void i7core_put_all_devices(void)
1177 {
1178         struct i7core_dev *i7core_dev, *tmp;
1179
1180         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1181                 i7core_put_devices(i7core_dev);
1182                 free_i7core_dev(i7core_dev);
1183         }
1184 }
1185
1186 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1187 {
1188         struct pci_dev *pdev = NULL;
1189         int i;
1190
1191         /*
1192          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1193          * aren't announced by acpi. So, we need to use a legacy scan probing
1194          * to detect them
1195          */
1196         while (table && table->descr) {
1197                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1198                 if (unlikely(!pdev)) {
1199                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1200                                 pcibios_scan_specific_bus(255-i);
1201                 }
1202                 pci_dev_put(pdev);
1203                 table++;
1204         }
1205 }
1206
1207 static unsigned i7core_pci_lastbus(void)
1208 {
1209         int last_bus = 0, bus;
1210         struct pci_bus *b = NULL;
1211
1212         while ((b = pci_find_next_bus(b)) != NULL) {
1213                 bus = b->number;
1214                 debugf0("Found bus %d\n", bus);
1215                 if (bus > last_bus)
1216                         last_bus = bus;
1217         }
1218
1219         debugf0("Last bus %d\n", last_bus);
1220
1221         return last_bus;
1222 }
1223
1224 /*
1225  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1226  *                      device/functions we want to reference for this driver
1227  *
1228  *                      Need to 'get' device 16 func 1 and func 2
1229  */
1230 static int i7core_get_onedevice(struct pci_dev **prev,
1231                                 const struct pci_id_table *table,
1232                                 const unsigned devno,
1233                                 const unsigned last_bus)
1234 {
1235         struct i7core_dev *i7core_dev;
1236         const struct pci_id_descr *dev_descr = &table->descr[devno];
1237
1238         struct pci_dev *pdev = NULL;
1239         u8 bus = 0;
1240         u8 socket = 0;
1241
1242         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1243                               dev_descr->dev_id, *prev);
1244
1245         /*
1246          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1247          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1248          * to probe for the alternate address in case of failure
1249          */
1250         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1251                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1252                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1253
1254         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1255                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1256                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1257                                       *prev);
1258
1259         if (!pdev) {
1260                 if (*prev) {
1261                         *prev = pdev;
1262                         return 0;
1263                 }
1264
1265                 if (dev_descr->optional)
1266                         return 0;
1267
1268                 if (devno == 0)
1269                         return -ENODEV;
1270
1271                 i7core_printk(KERN_INFO,
1272                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1273                         dev_descr->dev, dev_descr->func,
1274                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1275
1276                 /* End of list, leave */
1277                 return -ENODEV;
1278         }
1279         bus = pdev->bus->number;
1280
1281         socket = last_bus - bus;
1282
1283         i7core_dev = get_i7core_dev(socket);
1284         if (!i7core_dev) {
1285                 i7core_dev = alloc_i7core_dev(socket, table);
1286                 if (!i7core_dev) {
1287                         pci_dev_put(pdev);
1288                         return -ENOMEM;
1289                 }
1290         }
1291
1292         if (i7core_dev->pdev[devno]) {
1293                 i7core_printk(KERN_ERR,
1294                         "Duplicated device for "
1295                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1296                         bus, dev_descr->dev, dev_descr->func,
1297                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1298                 pci_dev_put(pdev);
1299                 return -ENODEV;
1300         }
1301
1302         i7core_dev->pdev[devno] = pdev;
1303
1304         /* Sanity check */
1305         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1306                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1307                 i7core_printk(KERN_ERR,
1308                         "Device PCI ID %04x:%04x "
1309                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1310                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1311                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1312                         bus, dev_descr->dev, dev_descr->func);
1313                 return -ENODEV;
1314         }
1315
1316         /* Be sure that the device is enabled */
1317         if (unlikely(pci_enable_device(pdev) < 0)) {
1318                 i7core_printk(KERN_ERR,
1319                         "Couldn't enable "
1320                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1321                         bus, dev_descr->dev, dev_descr->func,
1322                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1323                 return -ENODEV;
1324         }
1325
1326         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1327                 socket, bus, dev_descr->dev,
1328                 dev_descr->func,
1329                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1330
1331         /*
1332          * As stated on drivers/pci/search.c, the reference count for
1333          * @from is always decremented if it is not %NULL. So, as we need
1334          * to get all devices up to null, we need to do a get for the device
1335          */
1336         pci_dev_get(pdev);
1337
1338         *prev = pdev;
1339
1340         return 0;
1341 }
1342
1343 static int i7core_get_all_devices(void)
1344 {
1345         int i, rc, last_bus;
1346         struct pci_dev *pdev = NULL;
1347         const struct pci_id_table *table = pci_dev_table;
1348
1349         last_bus = i7core_pci_lastbus();
1350
1351         while (table && table->descr) {
1352                 for (i = 0; i < table->n_devs; i++) {
1353                         pdev = NULL;
1354                         do {
1355                                 rc = i7core_get_onedevice(&pdev, table, i,
1356                                                           last_bus);
1357                                 if (rc < 0) {
1358                                         if (i == 0) {
1359                                                 i = table->n_devs;
1360                                                 break;
1361                                         }
1362                                         i7core_put_all_devices();
1363                                         return -ENODEV;
1364                                 }
1365                         } while (pdev);
1366                 }
1367                 table++;
1368         }
1369
1370         return 0;
1371 }
1372
1373 static int mci_bind_devs(struct mem_ctl_info *mci,
1374                          struct i7core_dev *i7core_dev)
1375 {
1376         struct i7core_pvt *pvt = mci->pvt_info;
1377         struct pci_dev *pdev;
1378         int i, func, slot;
1379         char *family;
1380
1381         pvt->is_registered = false;
1382         pvt->enable_scrub  = false;
1383         for (i = 0; i < i7core_dev->n_devs; i++) {
1384                 pdev = i7core_dev->pdev[i];
1385                 if (!pdev)
1386                         continue;
1387
1388                 func = PCI_FUNC(pdev->devfn);
1389                 slot = PCI_SLOT(pdev->devfn);
1390                 if (slot == 3) {
1391                         if (unlikely(func > MAX_MCR_FUNC))
1392                                 goto error;
1393                         pvt->pci_mcr[func] = pdev;
1394                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1395                         if (unlikely(func > MAX_CHAN_FUNC))
1396                                 goto error;
1397                         pvt->pci_ch[slot - 4][func] = pdev;
1398                 } else if (!slot && !func) {
1399                         pvt->pci_noncore = pdev;
1400
1401                         /* Detect the processor family */
1402                         switch (pdev->device) {
1403                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1404                                 family = "Xeon 35xx/ i7core";
1405                                 pvt->enable_scrub = false;
1406                                 break;
1407                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1408                                 family = "i7-800/i5-700";
1409                                 pvt->enable_scrub = false;
1410                                 break;
1411                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1412                                 family = "Xeon 34xx";
1413                                 pvt->enable_scrub = false;
1414                                 break;
1415                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1416                                 family = "Xeon 55xx";
1417                                 pvt->enable_scrub = true;
1418                                 break;
1419                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1420                                 family = "Xeon 56xx / i7-900";
1421                                 pvt->enable_scrub = true;
1422                                 break;
1423                         default:
1424                                 family = "unknown";
1425                                 pvt->enable_scrub = false;
1426                         }
1427                         debugf0("Detected a processor type %s\n", family);
1428                 } else
1429                         goto error;
1430
1431                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1432                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1433                         pdev, i7core_dev->socket);
1434
1435                 if (PCI_SLOT(pdev->devfn) == 3 &&
1436                         PCI_FUNC(pdev->devfn) == 2)
1437                         pvt->is_registered = true;
1438         }
1439
1440         return 0;
1441
1442 error:
1443         i7core_printk(KERN_ERR, "Device %d, function %d "
1444                       "is out of the expected range\n",
1445                       slot, func);
1446         return -EINVAL;
1447 }
1448
1449 /****************************************************************************
1450                         Error check routines
1451  ****************************************************************************/
1452 static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,
1453                                       const int chan,
1454                                       const int dimm,
1455                                       const int add)
1456 {
1457         int i;
1458
1459         for (i = 0; i < add; i++) {
1460                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0,
1461                                      chan, dimm, -1, "error", "", NULL);
1462         }
1463 }
1464
1465 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1466                                          const int chan,
1467                                          const int new0,
1468                                          const int new1,
1469                                          const int new2)
1470 {
1471         struct i7core_pvt *pvt = mci->pvt_info;
1472         int add0 = 0, add1 = 0, add2 = 0;
1473         /* Updates CE counters if it is not the first time here */
1474         if (pvt->ce_count_available) {
1475                 /* Updates CE counters */
1476
1477                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1478                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1479                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1480
1481                 if (add2 < 0)
1482                         add2 += 0x7fff;
1483                 pvt->rdimm_ce_count[chan][2] += add2;
1484
1485                 if (add1 < 0)
1486                         add1 += 0x7fff;
1487                 pvt->rdimm_ce_count[chan][1] += add1;
1488
1489                 if (add0 < 0)
1490                         add0 += 0x7fff;
1491                 pvt->rdimm_ce_count[chan][0] += add0;
1492         } else
1493                 pvt->ce_count_available = 1;
1494
1495         /* Store the new values */
1496         pvt->rdimm_last_ce_count[chan][2] = new2;
1497         pvt->rdimm_last_ce_count[chan][1] = new1;
1498         pvt->rdimm_last_ce_count[chan][0] = new0;
1499
1500         /*updated the edac core */
1501         if (add0 != 0)
1502                 i7core_rdimm_update_errcount(mci, chan, 0, add0);
1503         if (add1 != 0)
1504                 i7core_rdimm_update_errcount(mci, chan, 1, add1);
1505         if (add2 != 0)
1506                 i7core_rdimm_update_errcount(mci, chan, 2, add2);
1507
1508 }
1509
1510 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1511 {
1512         struct i7core_pvt *pvt = mci->pvt_info;
1513         u32 rcv[3][2];
1514         int i, new0, new1, new2;
1515
1516         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1517         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1518                                                                 &rcv[0][0]);
1519         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1520                                                                 &rcv[0][1]);
1521         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1522                                                                 &rcv[1][0]);
1523         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1524                                                                 &rcv[1][1]);
1525         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1526                                                                 &rcv[2][0]);
1527         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1528                                                                 &rcv[2][1]);
1529         for (i = 0 ; i < 3; i++) {
1530                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1531                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1532                 /*if the channel has 3 dimms*/
1533                 if (pvt->channel[i].dimms > 2) {
1534                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1535                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1536                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1537                 } else {
1538                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1539                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1540                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1541                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1542                         new2 = 0;
1543                 }
1544
1545                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1546         }
1547 }
1548
1549 /* This function is based on the device 3 function 4 registers as described on:
1550  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1551  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1552  * also available at:
1553  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1554  */
1555 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1556 {
1557         struct i7core_pvt *pvt = mci->pvt_info;
1558         u32 rcv1, rcv0;
1559         int new0, new1, new2;
1560
1561         if (!pvt->pci_mcr[4]) {
1562                 debugf0("%s MCR registers not found\n", __func__);
1563                 return;
1564         }
1565
1566         /* Corrected test errors */
1567         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1568         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1569
1570         /* Store the new values */
1571         new2 = DIMM2_COR_ERR(rcv1);
1572         new1 = DIMM1_COR_ERR(rcv0);
1573         new0 = DIMM0_COR_ERR(rcv0);
1574
1575         /* Updates CE counters if it is not the first time here */
1576         if (pvt->ce_count_available) {
1577                 /* Updates CE counters */
1578                 int add0, add1, add2;
1579
1580                 add2 = new2 - pvt->udimm_last_ce_count[2];
1581                 add1 = new1 - pvt->udimm_last_ce_count[1];
1582                 add0 = new0 - pvt->udimm_last_ce_count[0];
1583
1584                 if (add2 < 0)
1585                         add2 += 0x7fff;
1586                 pvt->udimm_ce_count[2] += add2;
1587
1588                 if (add1 < 0)
1589                         add1 += 0x7fff;
1590                 pvt->udimm_ce_count[1] += add1;
1591
1592                 if (add0 < 0)
1593                         add0 += 0x7fff;
1594                 pvt->udimm_ce_count[0] += add0;
1595
1596                 if (add0 | add1 | add2)
1597                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1598                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1599                                       add0, add1, add2);
1600         } else
1601                 pvt->ce_count_available = 1;
1602
1603         /* Store the new values */
1604         pvt->udimm_last_ce_count[2] = new2;
1605         pvt->udimm_last_ce_count[1] = new1;
1606         pvt->udimm_last_ce_count[0] = new0;
1607 }
1608
1609 /*
1610  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1611  * Architectures Software Developer’s Manual Volume 3B.
1612  * Nehalem are defined as family 0x06, model 0x1a
1613  *
1614  * The MCA registers used here are the following ones:
1615  *     struct mce field MCA Register
1616  *     m->status        MSR_IA32_MC8_STATUS
1617  *     m->addr          MSR_IA32_MC8_ADDR
1618  *     m->misc          MSR_IA32_MC8_MISC
1619  * In the case of Nehalem, the error information is masked at .status and .misc
1620  * fields
1621  */
1622 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1623                                     const struct mce *m)
1624 {
1625         struct i7core_pvt *pvt = mci->pvt_info;
1626         char *type, *optype, *err, *msg;
1627         enum hw_event_mc_err_type tp_event;
1628         unsigned long error = m->status & 0x1ff0000l;
1629         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1630         bool ripv = m->mcgstatus & 1;
1631         u32 optypenum = (m->status >> 4) & 0x07;
1632         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1633         u32 dimm = (m->misc >> 16) & 0x3;
1634         u32 channel = (m->misc >> 18) & 0x3;
1635         u32 syndrome = m->misc >> 32;
1636         u32 errnum = find_first_bit(&error, 32);
1637
1638         if (uncorrected_error) {
1639                 if (ripv) {
1640                         type = "FATAL";
1641                         tp_event = HW_EVENT_ERR_FATAL;
1642                 } else {
1643                         type = "NON_FATAL";
1644                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1645                 }
1646         } else {
1647                 type = "CORRECTED";
1648                 tp_event = HW_EVENT_ERR_CORRECTED;
1649         }
1650
1651         switch (optypenum) {
1652         case 0:
1653                 optype = "generic undef request";
1654                 break;
1655         case 1:
1656                 optype = "read error";
1657                 break;
1658         case 2:
1659                 optype = "write error";
1660                 break;
1661         case 3:
1662                 optype = "addr/cmd error";
1663                 break;
1664         case 4:
1665                 optype = "scrubbing error";
1666                 break;
1667         default:
1668                 optype = "reserved";
1669                 break;
1670         }
1671
1672         switch (errnum) {
1673         case 16:
1674                 err = "read ECC error";
1675                 break;
1676         case 17:
1677                 err = "RAS ECC error";
1678                 break;
1679         case 18:
1680                 err = "write parity error";
1681                 break;
1682         case 19:
1683                 err = "redundacy loss";
1684                 break;
1685         case 20:
1686                 err = "reserved";
1687                 break;
1688         case 21:
1689                 err = "memory range error";
1690                 break;
1691         case 22:
1692                 err = "RTID out of range";
1693                 break;
1694         case 23:
1695                 err = "address parity error";
1696                 break;
1697         case 24:
1698                 err = "byte enable parity error";
1699                 break;
1700         default:
1701                 err = "unknown";
1702         }
1703
1704         msg = kasprintf(GFP_ATOMIC,
1705                 "addr=0x%08llx cpu=%d count=%d Err=%08llx:%08llx (%s: %s))\n",
1706                 (long long) m->addr, m->cpu, core_err_cnt,
1707                 (long long)m->status, (long long)m->misc, optype, err);
1708
1709         /*
1710          * Call the helper to output message
1711          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1712          * only one event
1713          */
1714         if (uncorrected_error || !pvt->is_registered)
1715                 edac_mc_handle_error(tp_event, mci,
1716                                      m->addr >> PAGE_SHIFT,
1717                                      m->addr & ~PAGE_MASK,
1718                                      syndrome,
1719                                      channel, dimm, -1,
1720                                      err, msg, m);
1721
1722         kfree(msg);
1723 }
1724
1725 /*
1726  *      i7core_check_error      Retrieve and process errors reported by the
1727  *                              hardware. Called by the Core module.
1728  */
1729 static void i7core_check_error(struct mem_ctl_info *mci)
1730 {
1731         struct i7core_pvt *pvt = mci->pvt_info;
1732         int i;
1733         unsigned count = 0;
1734         struct mce *m;
1735
1736         /*
1737          * MCE first step: Copy all mce errors into a temporary buffer
1738          * We use a double buffering here, to reduce the risk of
1739          * losing an error.
1740          */
1741         smp_rmb();
1742         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1743                 % MCE_LOG_LEN;
1744         if (!count)
1745                 goto check_ce_error;
1746
1747         m = pvt->mce_outentry;
1748         if (pvt->mce_in + count > MCE_LOG_LEN) {
1749                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1750
1751                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1752                 smp_wmb();
1753                 pvt->mce_in = 0;
1754                 count -= l;
1755                 m += l;
1756         }
1757         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1758         smp_wmb();
1759         pvt->mce_in += count;
1760
1761         smp_rmb();
1762         if (pvt->mce_overrun) {
1763                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1764                               pvt->mce_overrun);
1765                 smp_wmb();
1766                 pvt->mce_overrun = 0;
1767         }
1768
1769         /*
1770          * MCE second step: parse errors and display
1771          */
1772         for (i = 0; i < count; i++)
1773                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1774
1775         /*
1776          * Now, let's increment CE error counts
1777          */
1778 check_ce_error:
1779         if (!pvt->is_registered)
1780                 i7core_udimm_check_mc_ecc_err(mci);
1781         else
1782                 i7core_rdimm_check_mc_ecc_err(mci);
1783 }
1784
1785 /*
1786  * i7core_mce_check_error       Replicates mcelog routine to get errors
1787  *                              This routine simply queues mcelog errors, and
1788  *                              return. The error itself should be handled later
1789  *                              by i7core_check_error.
1790  * WARNING: As this routine should be called at NMI time, extra care should
1791  * be taken to avoid deadlocks, and to be as fast as possible.
1792  */
1793 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1794                                   void *data)
1795 {
1796         struct mce *mce = (struct mce *)data;
1797         struct i7core_dev *i7_dev;
1798         struct mem_ctl_info *mci;
1799         struct i7core_pvt *pvt;
1800
1801         i7_dev = get_i7core_dev(mce->socketid);
1802         if (!i7_dev)
1803                 return NOTIFY_BAD;
1804
1805         mci = i7_dev->mci;
1806         pvt = mci->pvt_info;
1807
1808         /*
1809          * Just let mcelog handle it if the error is
1810          * outside the memory controller
1811          */
1812         if (((mce->status & 0xffff) >> 7) != 1)
1813                 return NOTIFY_DONE;
1814
1815         /* Bank 8 registers are the only ones that we know how to handle */
1816         if (mce->bank != 8)
1817                 return NOTIFY_DONE;
1818
1819 #ifdef CONFIG_SMP
1820         /* Only handle if it is the right mc controller */
1821         if (mce->socketid != pvt->i7core_dev->socket)
1822                 return NOTIFY_DONE;
1823 #endif
1824
1825         smp_rmb();
1826         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1827                 smp_wmb();
1828                 pvt->mce_overrun++;
1829                 return NOTIFY_DONE;
1830         }
1831
1832         /* Copy memory error at the ringbuffer */
1833         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1834         smp_wmb();
1835         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1836
1837         /* Handle fatal errors immediately */
1838         if (mce->mcgstatus & 1)
1839                 i7core_check_error(mci);
1840
1841         /* Advise mcelog that the errors were handled */
1842         return NOTIFY_STOP;
1843 }
1844
1845 static struct notifier_block i7_mce_dec = {
1846         .notifier_call  = i7core_mce_check_error,
1847 };
1848
1849 struct memdev_dmi_entry {
1850         u8 type;
1851         u8 length;
1852         u16 handle;
1853         u16 phys_mem_array_handle;
1854         u16 mem_err_info_handle;
1855         u16 total_width;
1856         u16 data_width;
1857         u16 size;
1858         u8 form;
1859         u8 device_set;
1860         u8 device_locator;
1861         u8 bank_locator;
1862         u8 memory_type;
1863         u16 type_detail;
1864         u16 speed;
1865         u8 manufacturer;
1866         u8 serial_number;
1867         u8 asset_tag;
1868         u8 part_number;
1869         u8 attributes;
1870         u32 extended_size;
1871         u16 conf_mem_clk_speed;
1872 } __attribute__((__packed__));
1873
1874
1875 /*
1876  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1877  * memory devices show the same speed, and if they don't then consider
1878  * all speeds to be invalid.
1879  */
1880 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1881 {
1882         int *dclk_freq = _dclk_freq;
1883         u16 dmi_mem_clk_speed;
1884
1885         if (*dclk_freq == -1)
1886                 return;
1887
1888         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1889                 struct memdev_dmi_entry *memdev_dmi_entry =
1890                         (struct memdev_dmi_entry *)dh;
1891                 unsigned long conf_mem_clk_speed_offset =
1892                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1893                         (unsigned long)&memdev_dmi_entry->type;
1894                 unsigned long speed_offset =
1895                         (unsigned long)&memdev_dmi_entry->speed -
1896                         (unsigned long)&memdev_dmi_entry->type;
1897
1898                 /* Check that a DIMM is present */
1899                 if (memdev_dmi_entry->size == 0)
1900                         return;
1901
1902                 /*
1903                  * Pick the configured speed if it's available, otherwise
1904                  * pick the DIMM speed, or we don't have a speed.
1905                  */
1906                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1907                         dmi_mem_clk_speed =
1908                                 memdev_dmi_entry->conf_mem_clk_speed;
1909                 } else if (memdev_dmi_entry->length > speed_offset) {
1910                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1911                 } else {
1912                         *dclk_freq = -1;
1913                         return;
1914                 }
1915
1916                 if (*dclk_freq == 0) {
1917                         /* First pass, speed was 0 */
1918                         if (dmi_mem_clk_speed > 0) {
1919                                 /* Set speed if a valid speed is read */
1920                                 *dclk_freq = dmi_mem_clk_speed;
1921                         } else {
1922                                 /* Otherwise we don't have a valid speed */
1923                                 *dclk_freq = -1;
1924                         }
1925                 } else if (*dclk_freq > 0 &&
1926                            *dclk_freq != dmi_mem_clk_speed) {
1927                         /*
1928                          * If we have a speed, check that all DIMMS are the same
1929                          * speed, otherwise set the speed as invalid.
1930                          */
1931                         *dclk_freq = -1;
1932                 }
1933         }
1934 }
1935
1936 /*
1937  * The default DCLK frequency is used as a fallback if we
1938  * fail to find anything reliable in the DMI. The value
1939  * is taken straight from the datasheet.
1940  */
1941 #define DEFAULT_DCLK_FREQ 800
1942
1943 static int get_dclk_freq(void)
1944 {
1945         int dclk_freq = 0;
1946
1947         dmi_walk(decode_dclk, (void *)&dclk_freq);
1948
1949         if (dclk_freq < 1)
1950                 return DEFAULT_DCLK_FREQ;
1951
1952         return dclk_freq;
1953 }
1954
1955 /*
1956  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1957  *                              to hardware according to SCRUBINTERVAL formula
1958  *                              found in datasheet.
1959  */
1960 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1961 {
1962         struct i7core_pvt *pvt = mci->pvt_info;
1963         struct pci_dev *pdev;
1964         u32 dw_scrub;
1965         u32 dw_ssr;
1966
1967         /* Get data from the MC register, function 2 */
1968         pdev = pvt->pci_mcr[2];
1969         if (!pdev)
1970                 return -ENODEV;
1971
1972         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1973
1974         if (new_bw == 0) {
1975                 /* Prepare to disable petrol scrub */
1976                 dw_scrub &= ~STARTSCRUB;
1977                 /* Stop the patrol scrub engine */
1978                 write_and_test(pdev, MC_SCRUB_CONTROL,
1979                                dw_scrub & ~SCRUBINTERVAL_MASK);
1980
1981                 /* Get current status of scrub rate and set bit to disable */
1982                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1983                 dw_ssr &= ~SSR_MODE_MASK;
1984                 dw_ssr |= SSR_MODE_DISABLE;
1985         } else {
1986                 const int cache_line_size = 64;
1987                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1988                 unsigned long long scrub_interval;
1989                 /*
1990                  * Translate the desired scrub rate to a register value and
1991                  * program the corresponding register value.
1992                  */
1993                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1994                         cache_line_size * 1000000;
1995                 do_div(scrub_interval, new_bw);
1996
1997                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1998                         return -EINVAL;
1999
2000                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2001
2002                 /* Start the patrol scrub engine */
2003                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2004                                        STARTSCRUB | dw_scrub);
2005
2006                 /* Get current status of scrub rate and set bit to enable */
2007                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2008                 dw_ssr &= ~SSR_MODE_MASK;
2009                 dw_ssr |= SSR_MODE_ENABLE;
2010         }
2011         /* Disable or enable scrubbing */
2012         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2013
2014         return new_bw;
2015 }
2016
2017 /*
2018  * get_sdram_scrub_rate         This routine convert current scrub rate value
2019  *                              into byte/sec bandwidth accourding to
2020  *                              SCRUBINTERVAL formula found in datasheet.
2021  */
2022 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2023 {
2024         struct i7core_pvt *pvt = mci->pvt_info;
2025         struct pci_dev *pdev;
2026         const u32 cache_line_size = 64;
2027         const u32 freq_dclk_mhz = pvt->dclk_freq;
2028         unsigned long long scrub_rate;
2029         u32 scrubval;
2030
2031         /* Get data from the MC register, function 2 */
2032         pdev = pvt->pci_mcr[2];
2033         if (!pdev)
2034                 return -ENODEV;
2035
2036         /* Get current scrub control data */
2037         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2038
2039         /* Mask highest 8-bits to 0 */
2040         scrubval &=  SCRUBINTERVAL_MASK;
2041         if (!scrubval)
2042                 return 0;
2043
2044         /* Calculate scrub rate value into byte/sec bandwidth */
2045         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2046                 1000000 * cache_line_size;
2047         do_div(scrub_rate, scrubval);
2048         return (int)scrub_rate;
2049 }
2050
2051 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2052 {
2053         struct i7core_pvt *pvt = mci->pvt_info;
2054         u32 pci_lock;
2055
2056         /* Unlock writes to pci registers */
2057         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2058         pci_lock &= ~0x3;
2059         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2060                                pci_lock | MC_CFG_UNLOCK);
2061
2062         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2063         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2064 }
2065
2066 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2067 {
2068         struct i7core_pvt *pvt = mci->pvt_info;
2069         u32 pci_lock;
2070
2071         /* Lock writes to pci registers */
2072         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2073         pci_lock &= ~0x3;
2074         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2075                                pci_lock | MC_CFG_LOCK);
2076 }
2077
2078 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2079 {
2080         pvt->i7core_pci = edac_pci_create_generic_ctl(
2081                                                 &pvt->i7core_dev->pdev[0]->dev,
2082                                                 EDAC_MOD_STR);
2083         if (unlikely(!pvt->i7core_pci))
2084                 i7core_printk(KERN_WARNING,
2085                               "Unable to setup PCI error report via EDAC\n");
2086 }
2087
2088 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2089 {
2090         if (likely(pvt->i7core_pci))
2091                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2092         else
2093                 i7core_printk(KERN_ERR,
2094                                 "Couldn't find mem_ctl_info for socket %d\n",
2095                                 pvt->i7core_dev->socket);
2096         pvt->i7core_pci = NULL;
2097 }
2098
2099 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2100 {
2101         struct mem_ctl_info *mci = i7core_dev->mci;
2102         struct i7core_pvt *pvt;
2103
2104         if (unlikely(!mci || !mci->pvt_info)) {
2105                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2106                         __func__, &i7core_dev->pdev[0]->dev);
2107
2108                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2109                 return;
2110         }
2111
2112         pvt = mci->pvt_info;
2113
2114         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2115                 __func__, mci, &i7core_dev->pdev[0]->dev);
2116
2117         /* Disable scrubrate setting */
2118         if (pvt->enable_scrub)
2119                 disable_sdram_scrub_setting(mci);
2120
2121         mce_unregister_decode_chain(&i7_mce_dec);
2122
2123         /* Disable EDAC polling */
2124         i7core_pci_ctl_release(pvt);
2125
2126         /* Remove MC sysfs nodes */
2127         edac_mc_del_mc(mci->dev);
2128
2129         debugf1("%s: free mci struct\n", mci->ctl_name);
2130         kfree(mci->ctl_name);
2131         edac_mc_free(mci);
2132         i7core_dev->mci = NULL;
2133 }
2134
2135 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2136 {
2137         struct mem_ctl_info *mci;
2138         struct i7core_pvt *pvt;
2139         int rc;
2140         struct edac_mc_layer layers[2];
2141
2142         /* allocate a new MC control structure */
2143
2144         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2145         layers[0].size = NUM_CHANS;
2146         layers[0].is_virt_csrow = false;
2147         layers[1].type = EDAC_MC_LAYER_SLOT;
2148         layers[1].size = MAX_DIMMS;
2149         layers[1].is_virt_csrow = true;
2150         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2151                             sizeof(*pvt));
2152         if (unlikely(!mci))
2153                 return -ENOMEM;
2154
2155         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2156                 __func__, mci, &i7core_dev->pdev[0]->dev);
2157
2158         pvt = mci->pvt_info;
2159         memset(pvt, 0, sizeof(*pvt));
2160
2161         /* Associates i7core_dev and mci for future usage */
2162         pvt->i7core_dev = i7core_dev;
2163         i7core_dev->mci = mci;
2164
2165         /*
2166          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2167          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2168          * memory channels
2169          */
2170         mci->mtype_cap = MEM_FLAG_DDR3;
2171         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2172         mci->edac_cap = EDAC_FLAG_NONE;
2173         mci->mod_name = "i7core_edac.c";
2174         mci->mod_ver = I7CORE_REVISION;
2175         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2176                                   i7core_dev->socket);
2177         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2178         mci->ctl_page_to_phys = NULL;
2179
2180         /* Store pci devices at mci for faster access */
2181         rc = mci_bind_devs(mci, i7core_dev);
2182         if (unlikely(rc < 0))
2183                 goto fail0;
2184
2185         if (pvt->is_registered)
2186                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2187         else
2188                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2189
2190         /* Get dimm basic config */
2191         get_dimm_config(mci);
2192         /* record ptr to the generic device */
2193         mci->dev = &i7core_dev->pdev[0]->dev;
2194         /* Set the function pointer to an actual operation function */
2195         mci->edac_check = i7core_check_error;
2196
2197         /* Enable scrubrate setting */
2198         if (pvt->enable_scrub)
2199                 enable_sdram_scrub_setting(mci);
2200
2201         /* add this new MC control structure to EDAC's list of MCs */
2202         if (unlikely(edac_mc_add_mc(mci))) {
2203                 debugf0("MC: " __FILE__
2204                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2205                 /* FIXME: perhaps some code should go here that disables error
2206                  * reporting if we just enabled it
2207                  */
2208
2209                 rc = -EINVAL;
2210                 goto fail0;
2211         }
2212
2213         /* Default error mask is any memory */
2214         pvt->inject.channel = 0;
2215         pvt->inject.dimm = -1;
2216         pvt->inject.rank = -1;
2217         pvt->inject.bank = -1;
2218         pvt->inject.page = -1;
2219         pvt->inject.col = -1;
2220
2221         /* allocating generic PCI control info */
2222         i7core_pci_ctl_create(pvt);
2223
2224         /* DCLK for scrub rate setting */
2225         pvt->dclk_freq = get_dclk_freq();
2226
2227         mce_register_decode_chain(&i7_mce_dec);
2228
2229         return 0;
2230
2231 fail0:
2232         kfree(mci->ctl_name);
2233         edac_mc_free(mci);
2234         i7core_dev->mci = NULL;
2235         return rc;
2236 }
2237
2238 /*
2239  *      i7core_probe    Probe for ONE instance of device to see if it is
2240  *                      present.
2241  *      return:
2242  *              0 for FOUND a device
2243  *              < 0 for error code
2244  */
2245
2246 static int __devinit i7core_probe(struct pci_dev *pdev,
2247                                   const struct pci_device_id *id)
2248 {
2249         int rc, count = 0;
2250         struct i7core_dev *i7core_dev;
2251
2252         /* get the pci devices we want to reserve for our use */
2253         mutex_lock(&i7core_edac_lock);
2254
2255         /*
2256          * All memory controllers are allocated at the first pass.
2257          */
2258         if (unlikely(probed >= 1)) {
2259                 mutex_unlock(&i7core_edac_lock);
2260                 return -ENODEV;
2261         }
2262         probed++;
2263
2264         rc = i7core_get_all_devices();
2265         if (unlikely(rc < 0))
2266                 goto fail0;
2267
2268         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2269                 count++;
2270                 rc = i7core_register_mci(i7core_dev);
2271                 if (unlikely(rc < 0))
2272                         goto fail1;
2273         }
2274
2275         /*
2276          * Nehalem-EX uses a different memory controller. However, as the
2277          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2278          * need to indirectly probe via a X58 PCI device. The same devices
2279          * are found on (some) Nehalem-EX. So, on those machines, the
2280          * probe routine needs to return -ENODEV, as the actual Memory
2281          * Controller registers won't be detected.
2282          */
2283         if (!count) {
2284                 rc = -ENODEV;
2285                 goto fail1;
2286         }
2287
2288         i7core_printk(KERN_INFO,
2289                       "Driver loaded, %d memory controller(s) found.\n",
2290                       count);
2291
2292         mutex_unlock(&i7core_edac_lock);
2293         return 0;
2294
2295 fail1:
2296         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2297                 i7core_unregister_mci(i7core_dev);
2298
2299         i7core_put_all_devices();
2300 fail0:
2301         mutex_unlock(&i7core_edac_lock);
2302         return rc;
2303 }
2304
2305 /*
2306  *      i7core_remove   destructor for one instance of device
2307  *
2308  */
2309 static void __devexit i7core_remove(struct pci_dev *pdev)
2310 {
2311         struct i7core_dev *i7core_dev;
2312
2313         debugf0(__FILE__ ": %s()\n", __func__);
2314
2315         /*
2316          * we have a trouble here: pdev value for removal will be wrong, since
2317          * it will point to the X58 register used to detect that the machine
2318          * is a Nehalem or upper design. However, due to the way several PCI
2319          * devices are grouped together to provide MC functionality, we need
2320          * to use a different method for releasing the devices
2321          */
2322
2323         mutex_lock(&i7core_edac_lock);
2324
2325         if (unlikely(!probed)) {
2326                 mutex_unlock(&i7core_edac_lock);
2327                 return;
2328         }
2329
2330         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2331                 i7core_unregister_mci(i7core_dev);
2332
2333         /* Release PCI resources */
2334         i7core_put_all_devices();
2335
2336         probed--;
2337
2338         mutex_unlock(&i7core_edac_lock);
2339 }
2340
2341 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2342
2343 /*
2344  *      i7core_driver   pci_driver structure for this module
2345  *
2346  */
2347 static struct pci_driver i7core_driver = {
2348         .name     = "i7core_edac",
2349         .probe    = i7core_probe,
2350         .remove   = __devexit_p(i7core_remove),
2351         .id_table = i7core_pci_tbl,
2352 };
2353
2354 /*
2355  *      i7core_init             Module entry function
2356  *                      Try to initialize this module for its devices
2357  */
2358 static int __init i7core_init(void)
2359 {
2360         int pci_rc;
2361
2362         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2363
2364         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2365         opstate_init();
2366
2367         if (use_pci_fixup)
2368                 i7core_xeon_pci_fixup(pci_dev_table);
2369
2370         pci_rc = pci_register_driver(&i7core_driver);
2371
2372         if (pci_rc >= 0)
2373                 return 0;
2374
2375         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2376                       pci_rc);
2377
2378         return pci_rc;
2379 }
2380
2381 /*
2382  *      i7core_exit()   Module exit function
2383  *                      Unregister the driver
2384  */
2385 static void __exit i7core_exit(void)
2386 {
2387         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2388         pci_unregister_driver(&i7core_driver);
2389 }
2390
2391 module_init(i7core_init);
2392 module_exit(i7core_exit);
2393
2394 MODULE_LICENSE("GPL");
2395 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2396 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2397 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2398                    I7CORE_REVISION);
2399
2400 module_param(edac_op_state, int, 0444);
2401 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");