]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/edac/i7core_edac.c
i7core_edac: Add scrubbing support
[karo-tx-linux.git] / drivers / edac / i7core_edac.c
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  *       Mauro Carvalho Chehab <mchehab@redhat.com>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  *      http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
39
40 #include "edac_core.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81   #define MC_CFG_UNLOCK         0x02
82   #define MC_CFG_LOCK           0x00
83
84         /* OFFSETS for Device 3 Function 0 */
85
86 #define MC_CONTROL      0x48
87 #define MC_STATUS       0x4c
88 #define MC_MAX_DOD      0x64
89
90 /*
91  * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
92  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93  */
94
95 #define MC_TEST_ERR_RCV1        0x60
96   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
97
98 #define MC_TEST_ERR_RCV0        0x64
99   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
100   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
101
102 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
103 #define MC_SSRCONTROL           0x48
104   #define SSR_MODE_DISABLE      0x00
105   #define SSR_MODE_ENABLE       0x01
106   #define SSR_MODE_MASK         0x03
107
108 #define MC_SCRUB_CONTROL        0x4c
109   #define STARTSCRUB            (1 << 24)
110
111 #define MC_COR_ECC_CNT_0        0x80
112 #define MC_COR_ECC_CNT_1        0x84
113 #define MC_COR_ECC_CNT_2        0x88
114 #define MC_COR_ECC_CNT_3        0x8c
115 #define MC_COR_ECC_CNT_4        0x90
116 #define MC_COR_ECC_CNT_5        0x94
117
118 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
119 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
120
121
122         /* OFFSETS for Devices 4,5 and 6 Function 0 */
123
124 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
125   #define THREE_DIMMS_PRESENT           (1 << 24)
126   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
127   #define QUAD_RANK_PRESENT             (1 << 22)
128   #define REGISTERED_DIMM               (1 << 15)
129
130 #define MC_CHANNEL_MAPPER       0x60
131   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
132   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
133
134 #define MC_CHANNEL_RANK_PRESENT 0x7c
135   #define RANK_PRESENT_MASK             0xffff
136
137 #define MC_CHANNEL_ADDR_MATCH   0xf0
138 #define MC_CHANNEL_ERROR_MASK   0xf8
139 #define MC_CHANNEL_ERROR_INJECT 0xfc
140   #define INJECT_ADDR_PARITY    0x10
141   #define INJECT_ECC            0x08
142   #define MASK_CACHELINE        0x06
143   #define MASK_FULL_CACHELINE   0x06
144   #define MASK_MSB32_CACHELINE  0x04
145   #define MASK_LSB32_CACHELINE  0x02
146   #define NO_MASK_CACHELINE     0x00
147   #define REPEAT_EN             0x01
148
149         /* OFFSETS for Devices 4,5 and 6 Function 1 */
150
151 #define MC_DOD_CH_DIMM0         0x48
152 #define MC_DOD_CH_DIMM1         0x4c
153 #define MC_DOD_CH_DIMM2         0x50
154   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
155   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
156   #define DIMM_PRESENT_MASK     (1 << 9)
157   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
158   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
159   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
160   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
161   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
162   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
163   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
164   #define MC_DOD_NUMCOL_MASK            3
165   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
166
167 #define MC_RANK_PRESENT         0x7c
168
169 #define MC_SAG_CH_0     0x80
170 #define MC_SAG_CH_1     0x84
171 #define MC_SAG_CH_2     0x88
172 #define MC_SAG_CH_3     0x8c
173 #define MC_SAG_CH_4     0x90
174 #define MC_SAG_CH_5     0x94
175 #define MC_SAG_CH_6     0x98
176 #define MC_SAG_CH_7     0x9c
177
178 #define MC_RIR_LIMIT_CH_0       0x40
179 #define MC_RIR_LIMIT_CH_1       0x44
180 #define MC_RIR_LIMIT_CH_2       0x48
181 #define MC_RIR_LIMIT_CH_3       0x4C
182 #define MC_RIR_LIMIT_CH_4       0x50
183 #define MC_RIR_LIMIT_CH_5       0x54
184 #define MC_RIR_LIMIT_CH_6       0x58
185 #define MC_RIR_LIMIT_CH_7       0x5C
186 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
187
188 #define MC_RIR_WAY_CH           0x80
189   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
190   #define MC_RIR_WAY_RANK_MASK          0x7
191
192 /*
193  * i7core structs
194  */
195
196 #define NUM_CHANS 3
197 #define MAX_DIMMS 3             /* Max DIMMS per channel */
198 #define MAX_MCR_FUNC  4
199 #define MAX_CHAN_FUNC 3
200
201 struct i7core_info {
202         u32     mc_control;
203         u32     mc_status;
204         u32     max_dod;
205         u32     ch_map;
206 };
207
208
209 struct i7core_inject {
210         int     enable;
211
212         u32     section;
213         u32     type;
214         u32     eccmask;
215
216         /* Error address mask */
217         int channel, dimm, rank, bank, page, col;
218 };
219
220 struct i7core_channel {
221         u32             ranks;
222         u32             dimms;
223 };
224
225 struct pci_id_descr {
226         int                     dev;
227         int                     func;
228         int                     dev_id;
229         int                     optional;
230 };
231
232 struct pci_id_table {
233         const struct pci_id_descr       *descr;
234         int                             n_devs;
235 };
236
237 struct i7core_dev {
238         struct list_head        list;
239         u8                      socket;
240         struct pci_dev          **pdev;
241         int                     n_devs;
242         struct mem_ctl_info     *mci;
243 };
244
245 struct i7core_pvt {
246         struct pci_dev  *pci_noncore;
247         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
248         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
249
250         struct i7core_dev *i7core_dev;
251
252         struct i7core_info      info;
253         struct i7core_inject    inject;
254         struct i7core_channel   channel[NUM_CHANS];
255
256         int             ce_count_available;
257         int             csrow_map[NUM_CHANS][MAX_DIMMS];
258
259                         /* ECC corrected errors counts per udimm */
260         unsigned long   udimm_ce_count[MAX_DIMMS];
261         int             udimm_last_ce_count[MAX_DIMMS];
262                         /* ECC corrected errors counts per rdimm */
263         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
264         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
265
266         unsigned int    is_registered;
267
268         /* mcelog glue */
269         struct edac_mce         edac_mce;
270
271         /* Fifo double buffers */
272         struct mce              mce_entry[MCE_LOG_LEN];
273         struct mce              mce_outentry[MCE_LOG_LEN];
274
275         /* Fifo in/out counters */
276         unsigned                mce_in, mce_out;
277
278         /* Count indicator to show errors not got */
279         unsigned                mce_overrun;
280
281         /* Struct to control EDAC polling */
282         struct edac_pci_ctl_info *i7core_pci;
283 };
284
285 #define PCI_DESCR(device, function, device_id)  \
286         .dev = (device),                        \
287         .func = (function),                     \
288         .dev_id = (device_id)
289
290 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
291                 /* Memory controller */
292         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
293         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
294                         /* Exists only for RDIMM */
295         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
296         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
297
298                 /* Channel 0 */
299         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
300         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
301         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
302         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
303
304                 /* Channel 1 */
305         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
306         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
307         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
308         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
309
310                 /* Channel 2 */
311         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
312         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
313         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
314         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
315
316                 /* Generic Non-core registers */
317         /*
318          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
319          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
320          * the probing code needs to test for the other address in case of
321          * failure of this one
322          */
323         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
324
325 };
326
327 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
328         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
329         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
330         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
331
332         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
333         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
334         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
335         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
336
337         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
338         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
339         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
340         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
341
342         /*
343          * This is the PCI device has an alternate address on some
344          * processors like Core i7 860
345          */
346         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
347 };
348
349 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
350                 /* Memory controller */
351         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
352         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
353                         /* Exists only for RDIMM */
354         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
355         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
356
357                 /* Channel 0 */
358         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
359         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
360         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
361         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
362
363                 /* Channel 1 */
364         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
365         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
366         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
367         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
368
369                 /* Channel 2 */
370         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
371         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
372         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
373         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
374
375                 /* Generic Non-core registers */
376         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
377
378 };
379
380 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
381 static const struct pci_id_table pci_dev_table[] = {
382         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
383         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
384         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
385         {0,}                    /* 0 terminated list. */
386 };
387
388 /*
389  *      pci_device_id   table for which devices we are looking for
390  */
391 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
392         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
393         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
394         {0,}                    /* 0 terminated list. */
395 };
396
397 /****************************************************************************
398                         Anciliary status routines
399  ****************************************************************************/
400
401         /* MC_CONTROL bits */
402 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
403 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
404
405         /* MC_STATUS bits */
406 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
407 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
408
409         /* MC_MAX_DOD read functions */
410 static inline int numdimms(u32 dimms)
411 {
412         return (dimms & 0x3) + 1;
413 }
414
415 static inline int numrank(u32 rank)
416 {
417         static int ranks[4] = { 1, 2, 4, -EINVAL };
418
419         return ranks[rank & 0x3];
420 }
421
422 static inline int numbank(u32 bank)
423 {
424         static int banks[4] = { 4, 8, 16, -EINVAL };
425
426         return banks[bank & 0x3];
427 }
428
429 static inline int numrow(u32 row)
430 {
431         static int rows[8] = {
432                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
433                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
434         };
435
436         return rows[row & 0x7];
437 }
438
439 static inline int numcol(u32 col)
440 {
441         static int cols[8] = {
442                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
443         };
444         return cols[col & 0x3];
445 }
446
447 static struct i7core_dev *get_i7core_dev(u8 socket)
448 {
449         struct i7core_dev *i7core_dev;
450
451         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
452                 if (i7core_dev->socket == socket)
453                         return i7core_dev;
454         }
455
456         return NULL;
457 }
458
459 static struct i7core_dev *alloc_i7core_dev(u8 socket,
460                                            const struct pci_id_table *table)
461 {
462         struct i7core_dev *i7core_dev;
463
464         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
465         if (!i7core_dev)
466                 return NULL;
467
468         i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
469                                    GFP_KERNEL);
470         if (!i7core_dev->pdev) {
471                 kfree(i7core_dev);
472                 return NULL;
473         }
474
475         i7core_dev->socket = socket;
476         i7core_dev->n_devs = table->n_devs;
477         list_add_tail(&i7core_dev->list, &i7core_edac_list);
478
479         return i7core_dev;
480 }
481
482 static void free_i7core_dev(struct i7core_dev *i7core_dev)
483 {
484         list_del(&i7core_dev->list);
485         kfree(i7core_dev->pdev);
486         kfree(i7core_dev);
487 }
488
489 /****************************************************************************
490                         Memory check routines
491  ****************************************************************************/
492 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
493                                           unsigned func)
494 {
495         struct i7core_dev *i7core_dev = get_i7core_dev(socket);
496         int i;
497
498         if (!i7core_dev)
499                 return NULL;
500
501         for (i = 0; i < i7core_dev->n_devs; i++) {
502                 if (!i7core_dev->pdev[i])
503                         continue;
504
505                 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
506                     PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
507                         return i7core_dev->pdev[i];
508                 }
509         }
510
511         return NULL;
512 }
513
514 /**
515  * i7core_get_active_channels() - gets the number of channels and csrows
516  * @socket:     Quick Path Interconnect socket
517  * @channels:   Number of channels that will be returned
518  * @csrows:     Number of csrows found
519  *
520  * Since EDAC core needs to know in advance the number of available channels
521  * and csrows, in order to allocate memory for csrows/channels, it is needed
522  * to run two similar steps. At the first step, implemented on this function,
523  * it checks the number of csrows/channels present at one socket.
524  * this is used in order to properly allocate the size of mci components.
525  *
526  * It should be noticed that none of the current available datasheets explain
527  * or even mention how csrows are seen by the memory controller. So, we need
528  * to add a fake description for csrows.
529  * So, this driver is attributing one DIMM memory for one csrow.
530  */
531 static int i7core_get_active_channels(const u8 socket, unsigned *channels,
532                                       unsigned *csrows)
533 {
534         struct pci_dev *pdev = NULL;
535         int i, j;
536         u32 status, control;
537
538         *channels = 0;
539         *csrows = 0;
540
541         pdev = get_pdev_slot_func(socket, 3, 0);
542         if (!pdev) {
543                 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
544                               socket);
545                 return -ENODEV;
546         }
547
548         /* Device 3 function 0 reads */
549         pci_read_config_dword(pdev, MC_STATUS, &status);
550         pci_read_config_dword(pdev, MC_CONTROL, &control);
551
552         for (i = 0; i < NUM_CHANS; i++) {
553                 u32 dimm_dod[3];
554                 /* Check if the channel is active */
555                 if (!(control & (1 << (8 + i))))
556                         continue;
557
558                 /* Check if the channel is disabled */
559                 if (status & (1 << i))
560                         continue;
561
562                 pdev = get_pdev_slot_func(socket, i + 4, 1);
563                 if (!pdev) {
564                         i7core_printk(KERN_ERR, "Couldn't find socket %d "
565                                                 "fn %d.%d!!!\n",
566                                                 socket, i + 4, 1);
567                         return -ENODEV;
568                 }
569                 /* Devices 4-6 function 1 */
570                 pci_read_config_dword(pdev,
571                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
572                 pci_read_config_dword(pdev,
573                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
574                 pci_read_config_dword(pdev,
575                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
576
577                 (*channels)++;
578
579                 for (j = 0; j < 3; j++) {
580                         if (!DIMM_PRESENT(dimm_dod[j]))
581                                 continue;
582                         (*csrows)++;
583                 }
584         }
585
586         debugf0("Number of active channels on socket %d: %d\n",
587                 socket, *channels);
588
589         return 0;
590 }
591
592 static int get_dimm_config(const struct mem_ctl_info *mci)
593 {
594         struct i7core_pvt *pvt = mci->pvt_info;
595         struct csrow_info *csr;
596         struct pci_dev *pdev;
597         int i, j;
598         int csrow = 0;
599         unsigned long last_page = 0;
600         enum edac_type mode;
601         enum mem_type mtype;
602
603         /* Get data from the MC register, function 0 */
604         pdev = pvt->pci_mcr[0];
605         if (!pdev)
606                 return -ENODEV;
607
608         /* Device 3 function 0 reads */
609         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
610         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
611         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
612         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
613
614         debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
615                 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
616                 pvt->info.max_dod, pvt->info.ch_map);
617
618         if (ECC_ENABLED(pvt)) {
619                 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
620                 if (ECCx8(pvt))
621                         mode = EDAC_S8ECD8ED;
622                 else
623                         mode = EDAC_S4ECD4ED;
624         } else {
625                 debugf0("ECC disabled\n");
626                 mode = EDAC_NONE;
627         }
628
629         /* FIXME: need to handle the error codes */
630         debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
631                 "x%x x 0x%x\n",
632                 numdimms(pvt->info.max_dod),
633                 numrank(pvt->info.max_dod >> 2),
634                 numbank(pvt->info.max_dod >> 4),
635                 numrow(pvt->info.max_dod >> 6),
636                 numcol(pvt->info.max_dod >> 9));
637
638         for (i = 0; i < NUM_CHANS; i++) {
639                 u32 data, dimm_dod[3], value[8];
640
641                 if (!pvt->pci_ch[i][0])
642                         continue;
643
644                 if (!CH_ACTIVE(pvt, i)) {
645                         debugf0("Channel %i is not active\n", i);
646                         continue;
647                 }
648                 if (CH_DISABLED(pvt, i)) {
649                         debugf0("Channel %i is disabled\n", i);
650                         continue;
651                 }
652
653                 /* Devices 4-6 function 0 */
654                 pci_read_config_dword(pvt->pci_ch[i][0],
655                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
656
657                 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
658                                                 4 : 2;
659
660                 if (data & REGISTERED_DIMM)
661                         mtype = MEM_RDDR3;
662                 else
663                         mtype = MEM_DDR3;
664 #if 0
665                 if (data & THREE_DIMMS_PRESENT)
666                         pvt->channel[i].dimms = 3;
667                 else if (data & SINGLE_QUAD_RANK_PRESENT)
668                         pvt->channel[i].dimms = 1;
669                 else
670                         pvt->channel[i].dimms = 2;
671 #endif
672
673                 /* Devices 4-6 function 1 */
674                 pci_read_config_dword(pvt->pci_ch[i][1],
675                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
676                 pci_read_config_dword(pvt->pci_ch[i][1],
677                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
678                 pci_read_config_dword(pvt->pci_ch[i][1],
679                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
680
681                 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
682                         "%d ranks, %cDIMMs\n",
683                         i,
684                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
685                         data,
686                         pvt->channel[i].ranks,
687                         (data & REGISTERED_DIMM) ? 'R' : 'U');
688
689                 for (j = 0; j < 3; j++) {
690                         u32 banks, ranks, rows, cols;
691                         u32 size, npages;
692
693                         if (!DIMM_PRESENT(dimm_dod[j]))
694                                 continue;
695
696                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
697                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
698                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
699                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
700
701                         /* DDR3 has 8 I/O banks */
702                         size = (rows * cols * banks * ranks) >> (20 - 3);
703
704                         pvt->channel[i].dimms++;
705
706                         debugf0("\tdimm %d %d Mb offset: %x, "
707                                 "bank: %d, rank: %d, row: %#x, col: %#x\n",
708                                 j, size,
709                                 RANKOFFSET(dimm_dod[j]),
710                                 banks, ranks, rows, cols);
711
712                         npages = MiB_TO_PAGES(size);
713
714                         csr = &mci->csrows[csrow];
715                         csr->first_page = last_page + 1;
716                         last_page += npages;
717                         csr->last_page = last_page;
718                         csr->nr_pages = npages;
719
720                         csr->page_mask = 0;
721                         csr->grain = 8;
722                         csr->csrow_idx = csrow;
723                         csr->nr_channels = 1;
724
725                         csr->channels[0].chan_idx = i;
726                         csr->channels[0].ce_count = 0;
727
728                         pvt->csrow_map[i][j] = csrow;
729
730                         switch (banks) {
731                         case 4:
732                                 csr->dtype = DEV_X4;
733                                 break;
734                         case 8:
735                                 csr->dtype = DEV_X8;
736                                 break;
737                         case 16:
738                                 csr->dtype = DEV_X16;
739                                 break;
740                         default:
741                                 csr->dtype = DEV_UNKNOWN;
742                         }
743
744                         csr->edac_mode = mode;
745                         csr->mtype = mtype;
746
747                         csrow++;
748                 }
749
750                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
751                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
752                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
753                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
754                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
755                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
756                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
757                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
758                 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
759                 for (j = 0; j < 8; j++)
760                         debugf1("\t\t%#x\t%#x\t%#x\n",
761                                 (value[j] >> 27) & 0x1,
762                                 (value[j] >> 24) & 0x7,
763                                 (value[j] & ((1 << 24) - 1)));
764         }
765
766         return 0;
767 }
768
769 /****************************************************************************
770                         Error insertion routines
771  ****************************************************************************/
772
773 /* The i7core has independent error injection features per channel.
774    However, to have a simpler code, we don't allow enabling error injection
775    on more than one channel.
776    Also, since a change at an inject parameter will be applied only at enable,
777    we're disabling error injection on all write calls to the sysfs nodes that
778    controls the error code injection.
779  */
780 static int disable_inject(const struct mem_ctl_info *mci)
781 {
782         struct i7core_pvt *pvt = mci->pvt_info;
783
784         pvt->inject.enable = 0;
785
786         if (!pvt->pci_ch[pvt->inject.channel][0])
787                 return -ENODEV;
788
789         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
790                                 MC_CHANNEL_ERROR_INJECT, 0);
791
792         return 0;
793 }
794
795 /*
796  * i7core inject inject.section
797  *
798  *      accept and store error injection inject.section value
799  *      bit 0 - refers to the lower 32-byte half cacheline
800  *      bit 1 - refers to the upper 32-byte half cacheline
801  */
802 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
803                                            const char *data, size_t count)
804 {
805         struct i7core_pvt *pvt = mci->pvt_info;
806         unsigned long value;
807         int rc;
808
809         if (pvt->inject.enable)
810                 disable_inject(mci);
811
812         rc = strict_strtoul(data, 10, &value);
813         if ((rc < 0) || (value > 3))
814                 return -EIO;
815
816         pvt->inject.section = (u32) value;
817         return count;
818 }
819
820 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
821                                               char *data)
822 {
823         struct i7core_pvt *pvt = mci->pvt_info;
824         return sprintf(data, "0x%08x\n", pvt->inject.section);
825 }
826
827 /*
828  * i7core inject.type
829  *
830  *      accept and store error injection inject.section value
831  *      bit 0 - repeat enable - Enable error repetition
832  *      bit 1 - inject ECC error
833  *      bit 2 - inject parity error
834  */
835 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
836                                         const char *data, size_t count)
837 {
838         struct i7core_pvt *pvt = mci->pvt_info;
839         unsigned long value;
840         int rc;
841
842         if (pvt->inject.enable)
843                 disable_inject(mci);
844
845         rc = strict_strtoul(data, 10, &value);
846         if ((rc < 0) || (value > 7))
847                 return -EIO;
848
849         pvt->inject.type = (u32) value;
850         return count;
851 }
852
853 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
854                                               char *data)
855 {
856         struct i7core_pvt *pvt = mci->pvt_info;
857         return sprintf(data, "0x%08x\n", pvt->inject.type);
858 }
859
860 /*
861  * i7core_inject_inject.eccmask_store
862  *
863  * The type of error (UE/CE) will depend on the inject.eccmask value:
864  *   Any bits set to a 1 will flip the corresponding ECC bit
865  *   Correctable errors can be injected by flipping 1 bit or the bits within
866  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
867  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
868  *   uncorrectable error to be injected.
869  */
870 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
871                                         const char *data, size_t count)
872 {
873         struct i7core_pvt *pvt = mci->pvt_info;
874         unsigned long value;
875         int rc;
876
877         if (pvt->inject.enable)
878                 disable_inject(mci);
879
880         rc = strict_strtoul(data, 10, &value);
881         if (rc < 0)
882                 return -EIO;
883
884         pvt->inject.eccmask = (u32) value;
885         return count;
886 }
887
888 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
889                                               char *data)
890 {
891         struct i7core_pvt *pvt = mci->pvt_info;
892         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
893 }
894
895 /*
896  * i7core_addrmatch
897  *
898  * The type of error (UE/CE) will depend on the inject.eccmask value:
899  *   Any bits set to a 1 will flip the corresponding ECC bit
900  *   Correctable errors can be injected by flipping 1 bit or the bits within
901  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
902  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
903  *   uncorrectable error to be injected.
904  */
905
906 #define DECLARE_ADDR_MATCH(param, limit)                        \
907 static ssize_t i7core_inject_store_##param(                     \
908                 struct mem_ctl_info *mci,                       \
909                 const char *data, size_t count)                 \
910 {                                                               \
911         struct i7core_pvt *pvt;                                 \
912         long value;                                             \
913         int rc;                                                 \
914                                                                 \
915         debugf1("%s()\n", __func__);                            \
916         pvt = mci->pvt_info;                                    \
917                                                                 \
918         if (pvt->inject.enable)                                 \
919                 disable_inject(mci);                            \
920                                                                 \
921         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
922                 value = -1;                                     \
923         else {                                                  \
924                 rc = strict_strtoul(data, 10, &value);          \
925                 if ((rc < 0) || (value >= limit))               \
926                         return -EIO;                            \
927         }                                                       \
928                                                                 \
929         pvt->inject.param = value;                              \
930                                                                 \
931         return count;                                           \
932 }                                                               \
933                                                                 \
934 static ssize_t i7core_inject_show_##param(                      \
935                 struct mem_ctl_info *mci,                       \
936                 char *data)                                     \
937 {                                                               \
938         struct i7core_pvt *pvt;                                 \
939                                                                 \
940         pvt = mci->pvt_info;                                    \
941         debugf1("%s() pvt=%p\n", __func__, pvt);                \
942         if (pvt->inject.param < 0)                              \
943                 return sprintf(data, "any\n");                  \
944         else                                                    \
945                 return sprintf(data, "%d\n", pvt->inject.param);\
946 }
947
948 #define ATTR_ADDR_MATCH(param)                                  \
949         {                                                       \
950                 .attr = {                                       \
951                         .name = #param,                         \
952                         .mode = (S_IRUGO | S_IWUSR)             \
953                 },                                              \
954                 .show  = i7core_inject_show_##param,            \
955                 .store = i7core_inject_store_##param,           \
956         }
957
958 DECLARE_ADDR_MATCH(channel, 3);
959 DECLARE_ADDR_MATCH(dimm, 3);
960 DECLARE_ADDR_MATCH(rank, 4);
961 DECLARE_ADDR_MATCH(bank, 32);
962 DECLARE_ADDR_MATCH(page, 0x10000);
963 DECLARE_ADDR_MATCH(col, 0x4000);
964
965 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
966 {
967         u32 read;
968         int count;
969
970         debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
971                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
972                 where, val);
973
974         for (count = 0; count < 10; count++) {
975                 if (count)
976                         msleep(100);
977                 pci_write_config_dword(dev, where, val);
978                 pci_read_config_dword(dev, where, &read);
979
980                 if (read == val)
981                         return 0;
982         }
983
984         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
985                 "write=%08x. Read=%08x\n",
986                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
987                 where, val, read);
988
989         return -EINVAL;
990 }
991
992 /*
993  * This routine prepares the Memory Controller for error injection.
994  * The error will be injected when some process tries to write to the
995  * memory that matches the given criteria.
996  * The criteria can be set in terms of a mask where dimm, rank, bank, page
997  * and col can be specified.
998  * A -1 value for any of the mask items will make the MCU to ignore
999  * that matching criteria for error injection.
1000  *
1001  * It should be noticed that the error will only happen after a write operation
1002  * on a memory that matches the condition. if REPEAT_EN is not enabled at
1003  * inject mask, then it will produce just one error. Otherwise, it will repeat
1004  * until the injectmask would be cleaned.
1005  *
1006  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
1007  *    is reliable enough to check if the MC is using the
1008  *    three channels. However, this is not clear at the datasheet.
1009  */
1010 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
1011                                        const char *data, size_t count)
1012 {
1013         struct i7core_pvt *pvt = mci->pvt_info;
1014         u32 injectmask;
1015         u64 mask = 0;
1016         int  rc;
1017         long enable;
1018
1019         if (!pvt->pci_ch[pvt->inject.channel][0])
1020                 return 0;
1021
1022         rc = strict_strtoul(data, 10, &enable);
1023         if ((rc < 0))
1024                 return 0;
1025
1026         if (enable) {
1027                 pvt->inject.enable = 1;
1028         } else {
1029                 disable_inject(mci);
1030                 return count;
1031         }
1032
1033         /* Sets pvt->inject.dimm mask */
1034         if (pvt->inject.dimm < 0)
1035                 mask |= 1LL << 41;
1036         else {
1037                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1038                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
1039                 else
1040                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
1041         }
1042
1043         /* Sets pvt->inject.rank mask */
1044         if (pvt->inject.rank < 0)
1045                 mask |= 1LL << 40;
1046         else {
1047                 if (pvt->channel[pvt->inject.channel].dimms > 2)
1048                         mask |= (pvt->inject.rank & 0x1LL) << 34;
1049                 else
1050                         mask |= (pvt->inject.rank & 0x3LL) << 34;
1051         }
1052
1053         /* Sets pvt->inject.bank mask */
1054         if (pvt->inject.bank < 0)
1055                 mask |= 1LL << 39;
1056         else
1057                 mask |= (pvt->inject.bank & 0x15LL) << 30;
1058
1059         /* Sets pvt->inject.page mask */
1060         if (pvt->inject.page < 0)
1061                 mask |= 1LL << 38;
1062         else
1063                 mask |= (pvt->inject.page & 0xffff) << 14;
1064
1065         /* Sets pvt->inject.column mask */
1066         if (pvt->inject.col < 0)
1067                 mask |= 1LL << 37;
1068         else
1069                 mask |= (pvt->inject.col & 0x3fff);
1070
1071         /*
1072          * bit    0: REPEAT_EN
1073          * bits 1-2: MASK_HALF_CACHELINE
1074          * bit    3: INJECT_ECC
1075          * bit    4: INJECT_ADDR_PARITY
1076          */
1077
1078         injectmask = (pvt->inject.type & 1) |
1079                      (pvt->inject.section & 0x3) << 1 |
1080                      (pvt->inject.type & 0x6) << (3 - 1);
1081
1082         /* Unlock writes to registers - this register is write only */
1083         pci_write_config_dword(pvt->pci_noncore,
1084                                MC_CFG_CONTROL, 0x2);
1085
1086         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1087                                MC_CHANNEL_ADDR_MATCH, mask);
1088         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1089                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1090
1091         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1092                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1093
1094         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1095                                MC_CHANNEL_ERROR_INJECT, injectmask);
1096
1097         /*
1098          * This is something undocumented, based on my tests
1099          * Without writing 8 to this register, errors aren't injected. Not sure
1100          * why.
1101          */
1102         pci_write_config_dword(pvt->pci_noncore,
1103                                MC_CFG_CONTROL, 8);
1104
1105         debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1106                 " inject 0x%08x\n",
1107                 mask, pvt->inject.eccmask, injectmask);
1108
1109
1110         return count;
1111 }
1112
1113 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1114                                         char *data)
1115 {
1116         struct i7core_pvt *pvt = mci->pvt_info;
1117         u32 injectmask;
1118
1119         if (!pvt->pci_ch[pvt->inject.channel][0])
1120                 return 0;
1121
1122         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1123                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1124
1125         debugf0("Inject error read: 0x%018x\n", injectmask);
1126
1127         if (injectmask & 0x0c)
1128                 pvt->inject.enable = 1;
1129
1130         return sprintf(data, "%d\n", pvt->inject.enable);
1131 }
1132
1133 #define DECLARE_COUNTER(param)                                  \
1134 static ssize_t i7core_show_counter_##param(                     \
1135                 struct mem_ctl_info *mci,                       \
1136                 char *data)                                     \
1137 {                                                               \
1138         struct i7core_pvt *pvt = mci->pvt_info;                 \
1139                                                                 \
1140         debugf1("%s() \n", __func__);                           \
1141         if (!pvt->ce_count_available || (pvt->is_registered))   \
1142                 return sprintf(data, "data unavailable\n");     \
1143         return sprintf(data, "%lu\n",                           \
1144                         pvt->udimm_ce_count[param]);            \
1145 }
1146
1147 #define ATTR_COUNTER(param)                                     \
1148         {                                                       \
1149                 .attr = {                                       \
1150                         .name = __stringify(udimm##param),      \
1151                         .mode = (S_IRUGO | S_IWUSR)             \
1152                 },                                              \
1153                 .show  = i7core_show_counter_##param            \
1154         }
1155
1156 DECLARE_COUNTER(0);
1157 DECLARE_COUNTER(1);
1158 DECLARE_COUNTER(2);
1159
1160 /*
1161  * Sysfs struct
1162  */
1163
1164 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1165         ATTR_ADDR_MATCH(channel),
1166         ATTR_ADDR_MATCH(dimm),
1167         ATTR_ADDR_MATCH(rank),
1168         ATTR_ADDR_MATCH(bank),
1169         ATTR_ADDR_MATCH(page),
1170         ATTR_ADDR_MATCH(col),
1171         { } /* End of list */
1172 };
1173
1174 static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
1175         .name  = "inject_addrmatch",
1176         .mcidev_attr = i7core_addrmatch_attrs,
1177 };
1178
1179 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1180         ATTR_COUNTER(0),
1181         ATTR_COUNTER(1),
1182         ATTR_COUNTER(2),
1183         { .attr = { .name = NULL } }
1184 };
1185
1186 static const struct mcidev_sysfs_group i7core_udimm_counters = {
1187         .name  = "all_channel_counts",
1188         .mcidev_attr = i7core_udimm_counters_attrs,
1189 };
1190
1191 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
1192         {
1193                 .attr = {
1194                         .name = "inject_section",
1195                         .mode = (S_IRUGO | S_IWUSR)
1196                 },
1197                 .show  = i7core_inject_section_show,
1198                 .store = i7core_inject_section_store,
1199         }, {
1200                 .attr = {
1201                         .name = "inject_type",
1202                         .mode = (S_IRUGO | S_IWUSR)
1203                 },
1204                 .show  = i7core_inject_type_show,
1205                 .store = i7core_inject_type_store,
1206         }, {
1207                 .attr = {
1208                         .name = "inject_eccmask",
1209                         .mode = (S_IRUGO | S_IWUSR)
1210                 },
1211                 .show  = i7core_inject_eccmask_show,
1212                 .store = i7core_inject_eccmask_store,
1213         }, {
1214                 .grp = &i7core_inject_addrmatch,
1215         }, {
1216                 .attr = {
1217                         .name = "inject_enable",
1218                         .mode = (S_IRUGO | S_IWUSR)
1219                 },
1220                 .show  = i7core_inject_enable_show,
1221                 .store = i7core_inject_enable_store,
1222         },
1223         { }     /* End of list */
1224 };
1225
1226 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1227         {
1228                 .attr = {
1229                         .name = "inject_section",
1230                         .mode = (S_IRUGO | S_IWUSR)
1231                 },
1232                 .show  = i7core_inject_section_show,
1233                 .store = i7core_inject_section_store,
1234         }, {
1235                 .attr = {
1236                         .name = "inject_type",
1237                         .mode = (S_IRUGO | S_IWUSR)
1238                 },
1239                 .show  = i7core_inject_type_show,
1240                 .store = i7core_inject_type_store,
1241         }, {
1242                 .attr = {
1243                         .name = "inject_eccmask",
1244                         .mode = (S_IRUGO | S_IWUSR)
1245                 },
1246                 .show  = i7core_inject_eccmask_show,
1247                 .store = i7core_inject_eccmask_store,
1248         }, {
1249                 .grp = &i7core_inject_addrmatch,
1250         }, {
1251                 .attr = {
1252                         .name = "inject_enable",
1253                         .mode = (S_IRUGO | S_IWUSR)
1254                 },
1255                 .show  = i7core_inject_enable_show,
1256                 .store = i7core_inject_enable_store,
1257         }, {
1258                 .grp = &i7core_udimm_counters,
1259         },
1260         { }     /* End of list */
1261 };
1262
1263 /****************************************************************************
1264         Device initialization routines: put/get, init/exit
1265  ****************************************************************************/
1266
1267 /*
1268  *      i7core_put_all_devices  'put' all the devices that we have
1269  *                              reserved via 'get'
1270  */
1271 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1272 {
1273         int i;
1274
1275         debugf0(__FILE__ ": %s()\n", __func__);
1276         for (i = 0; i < i7core_dev->n_devs; i++) {
1277                 struct pci_dev *pdev = i7core_dev->pdev[i];
1278                 if (!pdev)
1279                         continue;
1280                 debugf0("Removing dev %02x:%02x.%d\n",
1281                         pdev->bus->number,
1282                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1283                 pci_dev_put(pdev);
1284         }
1285 }
1286
1287 static void i7core_put_all_devices(void)
1288 {
1289         struct i7core_dev *i7core_dev, *tmp;
1290
1291         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1292                 i7core_put_devices(i7core_dev);
1293                 free_i7core_dev(i7core_dev);
1294         }
1295 }
1296
1297 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1298 {
1299         struct pci_dev *pdev = NULL;
1300         int i;
1301
1302         /*
1303          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1304          * aren't announced by acpi. So, we need to use a legacy scan probing
1305          * to detect them
1306          */
1307         while (table && table->descr) {
1308                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1309                 if (unlikely(!pdev)) {
1310                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1311                                 pcibios_scan_specific_bus(255-i);
1312                 }
1313                 pci_dev_put(pdev);
1314                 table++;
1315         }
1316 }
1317
1318 static unsigned i7core_pci_lastbus(void)
1319 {
1320         int last_bus = 0, bus;
1321         struct pci_bus *b = NULL;
1322
1323         while ((b = pci_find_next_bus(b)) != NULL) {
1324                 bus = b->number;
1325                 debugf0("Found bus %d\n", bus);
1326                 if (bus > last_bus)
1327                         last_bus = bus;
1328         }
1329
1330         debugf0("Last bus %d\n", last_bus);
1331
1332         return last_bus;
1333 }
1334
1335 /*
1336  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1337  *                      device/functions we want to reference for this driver
1338  *
1339  *                      Need to 'get' device 16 func 1 and func 2
1340  */
1341 static int i7core_get_onedevice(struct pci_dev **prev,
1342                                 const struct pci_id_table *table,
1343                                 const unsigned devno,
1344                                 const unsigned last_bus)
1345 {
1346         struct i7core_dev *i7core_dev;
1347         const struct pci_id_descr *dev_descr = &table->descr[devno];
1348
1349         struct pci_dev *pdev = NULL;
1350         u8 bus = 0;
1351         u8 socket = 0;
1352
1353         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1354                               dev_descr->dev_id, *prev);
1355
1356         /*
1357          * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1358          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1359          * to probe for the alternate address in case of failure
1360          */
1361         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1362                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1363                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1364
1365         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1366                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1367                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1368                                       *prev);
1369
1370         if (!pdev) {
1371                 if (*prev) {
1372                         *prev = pdev;
1373                         return 0;
1374                 }
1375
1376                 if (dev_descr->optional)
1377                         return 0;
1378
1379                 if (devno == 0)
1380                         return -ENODEV;
1381
1382                 i7core_printk(KERN_INFO,
1383                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1384                         dev_descr->dev, dev_descr->func,
1385                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1386
1387                 /* End of list, leave */
1388                 return -ENODEV;
1389         }
1390         bus = pdev->bus->number;
1391
1392         socket = last_bus - bus;
1393
1394         i7core_dev = get_i7core_dev(socket);
1395         if (!i7core_dev) {
1396                 i7core_dev = alloc_i7core_dev(socket, table);
1397                 if (!i7core_dev) {
1398                         pci_dev_put(pdev);
1399                         return -ENOMEM;
1400                 }
1401         }
1402
1403         if (i7core_dev->pdev[devno]) {
1404                 i7core_printk(KERN_ERR,
1405                         "Duplicated device for "
1406                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1407                         bus, dev_descr->dev, dev_descr->func,
1408                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1409                 pci_dev_put(pdev);
1410                 return -ENODEV;
1411         }
1412
1413         i7core_dev->pdev[devno] = pdev;
1414
1415         /* Sanity check */
1416         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1417                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1418                 i7core_printk(KERN_ERR,
1419                         "Device PCI ID %04x:%04x "
1420                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1421                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1422                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1423                         bus, dev_descr->dev, dev_descr->func);
1424                 return -ENODEV;
1425         }
1426
1427         /* Be sure that the device is enabled */
1428         if (unlikely(pci_enable_device(pdev) < 0)) {
1429                 i7core_printk(KERN_ERR,
1430                         "Couldn't enable "
1431                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1432                         bus, dev_descr->dev, dev_descr->func,
1433                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1434                 return -ENODEV;
1435         }
1436
1437         debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1438                 socket, bus, dev_descr->dev,
1439                 dev_descr->func,
1440                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1441
1442         /*
1443          * As stated on drivers/pci/search.c, the reference count for
1444          * @from is always decremented if it is not %NULL. So, as we need
1445          * to get all devices up to null, we need to do a get for the device
1446          */
1447         pci_dev_get(pdev);
1448
1449         *prev = pdev;
1450
1451         return 0;
1452 }
1453
1454 static int i7core_get_all_devices(void)
1455 {
1456         int i, rc, last_bus;
1457         struct pci_dev *pdev = NULL;
1458         const struct pci_id_table *table = pci_dev_table;
1459
1460         last_bus = i7core_pci_lastbus();
1461
1462         while (table && table->descr) {
1463                 for (i = 0; i < table->n_devs; i++) {
1464                         pdev = NULL;
1465                         do {
1466                                 rc = i7core_get_onedevice(&pdev, table, i,
1467                                                           last_bus);
1468                                 if (rc < 0) {
1469                                         if (i == 0) {
1470                                                 i = table->n_devs;
1471                                                 break;
1472                                         }
1473                                         i7core_put_all_devices();
1474                                         return -ENODEV;
1475                                 }
1476                         } while (pdev);
1477                 }
1478                 table++;
1479         }
1480
1481         return 0;
1482 }
1483
1484 static int mci_bind_devs(struct mem_ctl_info *mci,
1485                          struct i7core_dev *i7core_dev)
1486 {
1487         struct i7core_pvt *pvt = mci->pvt_info;
1488         struct pci_dev *pdev;
1489         int i, func, slot;
1490
1491         pvt->is_registered = 0;
1492         for (i = 0; i < i7core_dev->n_devs; i++) {
1493                 pdev = i7core_dev->pdev[i];
1494                 if (!pdev)
1495                         continue;
1496
1497                 func = PCI_FUNC(pdev->devfn);
1498                 slot = PCI_SLOT(pdev->devfn);
1499                 if (slot == 3) {
1500                         if (unlikely(func > MAX_MCR_FUNC))
1501                                 goto error;
1502                         pvt->pci_mcr[func] = pdev;
1503                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1504                         if (unlikely(func > MAX_CHAN_FUNC))
1505                                 goto error;
1506                         pvt->pci_ch[slot - 4][func] = pdev;
1507                 } else if (!slot && !func)
1508                         pvt->pci_noncore = pdev;
1509                 else
1510                         goto error;
1511
1512                 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1513                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1514                         pdev, i7core_dev->socket);
1515
1516                 if (PCI_SLOT(pdev->devfn) == 3 &&
1517                         PCI_FUNC(pdev->devfn) == 2)
1518                         pvt->is_registered = 1;
1519         }
1520
1521         return 0;
1522
1523 error:
1524         i7core_printk(KERN_ERR, "Device %d, function %d "
1525                       "is out of the expected range\n",
1526                       slot, func);
1527         return -EINVAL;
1528 }
1529
1530 /****************************************************************************
1531                         Error check routines
1532  ****************************************************************************/
1533 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1534                                       const int chan,
1535                                       const int dimm,
1536                                       const int add)
1537 {
1538         char *msg;
1539         struct i7core_pvt *pvt = mci->pvt_info;
1540         int row = pvt->csrow_map[chan][dimm], i;
1541
1542         for (i = 0; i < add; i++) {
1543                 msg = kasprintf(GFP_KERNEL, "Corrected error "
1544                                 "(Socket=%d channel=%d dimm=%d)",
1545                                 pvt->i7core_dev->socket, chan, dimm);
1546
1547                 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1548                 kfree (msg);
1549         }
1550 }
1551
1552 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1553                                          const int chan,
1554                                          const int new0,
1555                                          const int new1,
1556                                          const int new2)
1557 {
1558         struct i7core_pvt *pvt = mci->pvt_info;
1559         int add0 = 0, add1 = 0, add2 = 0;
1560         /* Updates CE counters if it is not the first time here */
1561         if (pvt->ce_count_available) {
1562                 /* Updates CE counters */
1563
1564                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1565                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1566                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1567
1568                 if (add2 < 0)
1569                         add2 += 0x7fff;
1570                 pvt->rdimm_ce_count[chan][2] += add2;
1571
1572                 if (add1 < 0)
1573                         add1 += 0x7fff;
1574                 pvt->rdimm_ce_count[chan][1] += add1;
1575
1576                 if (add0 < 0)
1577                         add0 += 0x7fff;
1578                 pvt->rdimm_ce_count[chan][0] += add0;
1579         } else
1580                 pvt->ce_count_available = 1;
1581
1582         /* Store the new values */
1583         pvt->rdimm_last_ce_count[chan][2] = new2;
1584         pvt->rdimm_last_ce_count[chan][1] = new1;
1585         pvt->rdimm_last_ce_count[chan][0] = new0;
1586
1587         /*updated the edac core */
1588         if (add0 != 0)
1589                 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1590         if (add1 != 0)
1591                 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1592         if (add2 != 0)
1593                 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1594
1595 }
1596
1597 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1598 {
1599         struct i7core_pvt *pvt = mci->pvt_info;
1600         u32 rcv[3][2];
1601         int i, new0, new1, new2;
1602
1603         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1604         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1605                                                                 &rcv[0][0]);
1606         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1607                                                                 &rcv[0][1]);
1608         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1609                                                                 &rcv[1][0]);
1610         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1611                                                                 &rcv[1][1]);
1612         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1613                                                                 &rcv[2][0]);
1614         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1615                                                                 &rcv[2][1]);
1616         for (i = 0 ; i < 3; i++) {
1617                 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1618                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1619                 /*if the channel has 3 dimms*/
1620                 if (pvt->channel[i].dimms > 2) {
1621                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1622                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1623                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1624                 } else {
1625                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1626                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1627                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1628                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1629                         new2 = 0;
1630                 }
1631
1632                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1633         }
1634 }
1635
1636 /* This function is based on the device 3 function 4 registers as described on:
1637  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1638  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1639  * also available at:
1640  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1641  */
1642 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1643 {
1644         struct i7core_pvt *pvt = mci->pvt_info;
1645         u32 rcv1, rcv0;
1646         int new0, new1, new2;
1647
1648         if (!pvt->pci_mcr[4]) {
1649                 debugf0("%s MCR registers not found\n", __func__);
1650                 return;
1651         }
1652
1653         /* Corrected test errors */
1654         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1655         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1656
1657         /* Store the new values */
1658         new2 = DIMM2_COR_ERR(rcv1);
1659         new1 = DIMM1_COR_ERR(rcv0);
1660         new0 = DIMM0_COR_ERR(rcv0);
1661
1662         /* Updates CE counters if it is not the first time here */
1663         if (pvt->ce_count_available) {
1664                 /* Updates CE counters */
1665                 int add0, add1, add2;
1666
1667                 add2 = new2 - pvt->udimm_last_ce_count[2];
1668                 add1 = new1 - pvt->udimm_last_ce_count[1];
1669                 add0 = new0 - pvt->udimm_last_ce_count[0];
1670
1671                 if (add2 < 0)
1672                         add2 += 0x7fff;
1673                 pvt->udimm_ce_count[2] += add2;
1674
1675                 if (add1 < 0)
1676                         add1 += 0x7fff;
1677                 pvt->udimm_ce_count[1] += add1;
1678
1679                 if (add0 < 0)
1680                         add0 += 0x7fff;
1681                 pvt->udimm_ce_count[0] += add0;
1682
1683                 if (add0 | add1 | add2)
1684                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1685                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1686                                       add0, add1, add2);
1687         } else
1688                 pvt->ce_count_available = 1;
1689
1690         /* Store the new values */
1691         pvt->udimm_last_ce_count[2] = new2;
1692         pvt->udimm_last_ce_count[1] = new1;
1693         pvt->udimm_last_ce_count[0] = new0;
1694 }
1695
1696 /*
1697  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1698  * Architectures Software Developer’s Manual Volume 3B.
1699  * Nehalem are defined as family 0x06, model 0x1a
1700  *
1701  * The MCA registers used here are the following ones:
1702  *     struct mce field MCA Register
1703  *     m->status        MSR_IA32_MC8_STATUS
1704  *     m->addr          MSR_IA32_MC8_ADDR
1705  *     m->misc          MSR_IA32_MC8_MISC
1706  * In the case of Nehalem, the error information is masked at .status and .misc
1707  * fields
1708  */
1709 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1710                                     const struct mce *m)
1711 {
1712         struct i7core_pvt *pvt = mci->pvt_info;
1713         char *type, *optype, *err, *msg;
1714         unsigned long error = m->status & 0x1ff0000l;
1715         u32 optypenum = (m->status >> 4) & 0x07;
1716         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1717         u32 dimm = (m->misc >> 16) & 0x3;
1718         u32 channel = (m->misc >> 18) & 0x3;
1719         u32 syndrome = m->misc >> 32;
1720         u32 errnum = find_first_bit(&error, 32);
1721         int csrow;
1722
1723         if (m->mcgstatus & 1)
1724                 type = "FATAL";
1725         else
1726                 type = "NON_FATAL";
1727
1728         switch (optypenum) {
1729         case 0:
1730                 optype = "generic undef request";
1731                 break;
1732         case 1:
1733                 optype = "read error";
1734                 break;
1735         case 2:
1736                 optype = "write error";
1737                 break;
1738         case 3:
1739                 optype = "addr/cmd error";
1740                 break;
1741         case 4:
1742                 optype = "scrubbing error";
1743                 break;
1744         default:
1745                 optype = "reserved";
1746                 break;
1747         }
1748
1749         switch (errnum) {
1750         case 16:
1751                 err = "read ECC error";
1752                 break;
1753         case 17:
1754                 err = "RAS ECC error";
1755                 break;
1756         case 18:
1757                 err = "write parity error";
1758                 break;
1759         case 19:
1760                 err = "redundacy loss";
1761                 break;
1762         case 20:
1763                 err = "reserved";
1764                 break;
1765         case 21:
1766                 err = "memory range error";
1767                 break;
1768         case 22:
1769                 err = "RTID out of range";
1770                 break;
1771         case 23:
1772                 err = "address parity error";
1773                 break;
1774         case 24:
1775                 err = "byte enable parity error";
1776                 break;
1777         default:
1778                 err = "unknown";
1779         }
1780
1781         /* FIXME: should convert addr into bank and rank information */
1782         msg = kasprintf(GFP_ATOMIC,
1783                 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1784                 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1785                 type, (long long) m->addr, m->cpu, dimm, channel,
1786                 syndrome, core_err_cnt, (long long)m->status,
1787                 (long long)m->misc, optype, err);
1788
1789         debugf0("%s", msg);
1790
1791         csrow = pvt->csrow_map[channel][dimm];
1792
1793         /* Call the helper to output message */
1794         if (m->mcgstatus & 1)
1795                 edac_mc_handle_fbd_ue(mci, csrow, 0,
1796                                 0 /* FIXME: should be channel here */, msg);
1797         else if (!pvt->is_registered)
1798                 edac_mc_handle_fbd_ce(mci, csrow,
1799                                 0 /* FIXME: should be channel here */, msg);
1800
1801         kfree(msg);
1802 }
1803
1804 /*
1805  *      i7core_check_error      Retrieve and process errors reported by the
1806  *                              hardware. Called by the Core module.
1807  */
1808 static void i7core_check_error(struct mem_ctl_info *mci)
1809 {
1810         struct i7core_pvt *pvt = mci->pvt_info;
1811         int i;
1812         unsigned count = 0;
1813         struct mce *m;
1814
1815         /*
1816          * MCE first step: Copy all mce errors into a temporary buffer
1817          * We use a double buffering here, to reduce the risk of
1818          * losing an error.
1819          */
1820         smp_rmb();
1821         count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1822                 % MCE_LOG_LEN;
1823         if (!count)
1824                 goto check_ce_error;
1825
1826         m = pvt->mce_outentry;
1827         if (pvt->mce_in + count > MCE_LOG_LEN) {
1828                 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1829
1830                 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1831                 smp_wmb();
1832                 pvt->mce_in = 0;
1833                 count -= l;
1834                 m += l;
1835         }
1836         memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1837         smp_wmb();
1838         pvt->mce_in += count;
1839
1840         smp_rmb();
1841         if (pvt->mce_overrun) {
1842                 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1843                               pvt->mce_overrun);
1844                 smp_wmb();
1845                 pvt->mce_overrun = 0;
1846         }
1847
1848         /*
1849          * MCE second step: parse errors and display
1850          */
1851         for (i = 0; i < count; i++)
1852                 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1853
1854         /*
1855          * Now, let's increment CE error counts
1856          */
1857 check_ce_error:
1858         if (!pvt->is_registered)
1859                 i7core_udimm_check_mc_ecc_err(mci);
1860         else
1861                 i7core_rdimm_check_mc_ecc_err(mci);
1862 }
1863
1864 /*
1865  * i7core_mce_check_error       Replicates mcelog routine to get errors
1866  *                              This routine simply queues mcelog errors, and
1867  *                              return. The error itself should be handled later
1868  *                              by i7core_check_error.
1869  * WARNING: As this routine should be called at NMI time, extra care should
1870  * be taken to avoid deadlocks, and to be as fast as possible.
1871  */
1872 static int i7core_mce_check_error(void *priv, struct mce *mce)
1873 {
1874         struct mem_ctl_info *mci = priv;
1875         struct i7core_pvt *pvt = mci->pvt_info;
1876
1877         /*
1878          * Just let mcelog handle it if the error is
1879          * outside the memory controller
1880          */
1881         if (((mce->status & 0xffff) >> 7) != 1)
1882                 return 0;
1883
1884         /* Bank 8 registers are the only ones that we know how to handle */
1885         if (mce->bank != 8)
1886                 return 0;
1887
1888 #ifdef CONFIG_SMP
1889         /* Only handle if it is the right mc controller */
1890         if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1891                 return 0;
1892 #endif
1893
1894         smp_rmb();
1895         if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1896                 smp_wmb();
1897                 pvt->mce_overrun++;
1898                 return 0;
1899         }
1900
1901         /* Copy memory error at the ringbuffer */
1902         memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1903         smp_wmb();
1904         pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1905
1906         /* Handle fatal errors immediately */
1907         if (mce->mcgstatus & 1)
1908                 i7core_check_error(mci);
1909
1910         /* Advise mcelog that the errors were handled */
1911         return 1;
1912 }
1913
1914 /*
1915  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1916  *                              to hardware according to SCRUBINTERVAL formula
1917  *                              found in datasheet.
1918  */
1919 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1920 {
1921         struct i7core_pvt *pvt = mci->pvt_info;
1922         struct pci_dev *pdev;
1923         const u32 cache_line_size = 64;
1924         const u32 freq_dclk = 800*1000000;
1925         u32 dw_scrub;
1926         u32 dw_ssr;
1927
1928         /* Get data from the MC register, function 2 */
1929         pdev = pvt->pci_mcr[2];
1930         if (!pdev)
1931                 return -ENODEV;
1932
1933         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1934
1935         if (new_bw == 0) {
1936                 /* Prepare to disable petrol scrub */
1937                 dw_scrub &= ~STARTSCRUB;
1938                 /* Stop the patrol scrub engine */
1939                 write_and_test(pdev, MC_SCRUB_CONTROL, dw_scrub & ~0x00ffffff);
1940
1941                 /* Get current status of scrub rate and set bit to disable */
1942                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1943                 dw_ssr &= ~SSR_MODE_MASK;
1944                 dw_ssr |= SSR_MODE_DISABLE;
1945         } else {
1946                 /*
1947                  * Translate the desired scrub rate to a register value and
1948                  * program the cooresponding register value.
1949                  */
1950                 dw_scrub = 0x00ffffff & (cache_line_size * freq_dclk / new_bw);
1951
1952                 /* Start the patrol scrub engine */
1953                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1954                                        STARTSCRUB | dw_scrub);
1955
1956                 /* Get current status of scrub rate and set bit to enable */
1957                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1958                 dw_ssr &= ~SSR_MODE_MASK;
1959                 dw_ssr |= SSR_MODE_ENABLE;
1960         }
1961         /* Disable or enable scrubbing */
1962         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
1963
1964         return new_bw;
1965 }
1966
1967 /*
1968  * get_sdram_scrub_rate         This routine convert current scrub rate value
1969  *                              into byte/sec bandwidth accourding to
1970  *                              SCRUBINTERVAL formula found in datasheet.
1971  */
1972 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
1973 {
1974         struct i7core_pvt *pvt = mci->pvt_info;
1975         struct pci_dev *pdev;
1976         const u32 cache_line_size = 64;
1977         const u32 freq_dclk = 800*1000000;
1978         u32 scrubval;
1979
1980         /* Get data from the MC register, function 2 */
1981         pdev = pvt->pci_mcr[2];
1982         if (!pdev)
1983                 return -ENODEV;
1984
1985         /* Get current scrub control data */
1986         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
1987
1988         /* Mask highest 8-bits to 0 */
1989         scrubval &=  0x00ffffff;
1990         if (!scrubval)
1991                 return 0;
1992
1993         /* Calculate scrub rate value into byte/sec bandwidth */
1994         return 0xffffffff & (cache_line_size * freq_dclk / (u64) scrubval);
1995 }
1996
1997 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
1998 {
1999         struct i7core_pvt *pvt = mci->pvt_info;
2000         u32 pci_lock;
2001
2002         /* Unlock writes to pci registers */
2003         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2004         pci_lock &= ~0x3;
2005         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2006                                pci_lock | MC_CFG_UNLOCK);
2007
2008         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2009         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2010 }
2011
2012 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2013 {
2014         struct i7core_pvt *pvt = mci->pvt_info;
2015         u32 pci_lock;
2016
2017         /* Lock writes to pci registers */
2018         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2019         pci_lock &= ~0x3;
2020         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2021                                pci_lock | MC_CFG_LOCK);
2022 }
2023
2024 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2025 {
2026         pvt->i7core_pci = edac_pci_create_generic_ctl(
2027                                                 &pvt->i7core_dev->pdev[0]->dev,
2028                                                 EDAC_MOD_STR);
2029         if (unlikely(!pvt->i7core_pci))
2030                 pr_warn("Unable to setup PCI error report via EDAC\n");
2031 }
2032
2033 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2034 {
2035         if (likely(pvt->i7core_pci))
2036                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2037         else
2038                 i7core_printk(KERN_ERR,
2039                                 "Couldn't find mem_ctl_info for socket %d\n",
2040                                 pvt->i7core_dev->socket);
2041         pvt->i7core_pci = NULL;
2042 }
2043
2044 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2045 {
2046         struct mem_ctl_info *mci = i7core_dev->mci;
2047         struct i7core_pvt *pvt;
2048
2049         if (unlikely(!mci || !mci->pvt_info)) {
2050                 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
2051                         __func__, &i7core_dev->pdev[0]->dev);
2052
2053                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2054                 return;
2055         }
2056
2057         pvt = mci->pvt_info;
2058
2059         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2060                 __func__, mci, &i7core_dev->pdev[0]->dev);
2061
2062         /* Disable scrubrate setting */
2063         disable_sdram_scrub_setting(mci);
2064
2065         /* Disable MCE NMI handler */
2066         edac_mce_unregister(&pvt->edac_mce);
2067
2068         /* Disable EDAC polling */
2069         i7core_pci_ctl_release(pvt);
2070
2071         /* Remove MC sysfs nodes */
2072         edac_mc_del_mc(mci->dev);
2073
2074         debugf1("%s: free mci struct\n", mci->ctl_name);
2075         kfree(mci->ctl_name);
2076         edac_mc_free(mci);
2077         i7core_dev->mci = NULL;
2078 }
2079
2080 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2081 {
2082         struct mem_ctl_info *mci;
2083         struct i7core_pvt *pvt;
2084         int rc, channels, csrows;
2085
2086         /* Check the number of active and not disabled channels */
2087         rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
2088         if (unlikely(rc < 0))
2089                 return rc;
2090
2091         /* allocate a new MC control structure */
2092         mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
2093         if (unlikely(!mci))
2094                 return -ENOMEM;
2095
2096         debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
2097                 __func__, mci, &i7core_dev->pdev[0]->dev);
2098
2099         pvt = mci->pvt_info;
2100         memset(pvt, 0, sizeof(*pvt));
2101
2102         /* Associates i7core_dev and mci for future usage */
2103         pvt->i7core_dev = i7core_dev;
2104         i7core_dev->mci = mci;
2105
2106         /*
2107          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2108          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2109          * memory channels
2110          */
2111         mci->mtype_cap = MEM_FLAG_DDR3;
2112         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2113         mci->edac_cap = EDAC_FLAG_NONE;
2114         mci->mod_name = "i7core_edac.c";
2115         mci->mod_ver = I7CORE_REVISION;
2116         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2117                                   i7core_dev->socket);
2118         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2119         mci->ctl_page_to_phys = NULL;
2120
2121         /* Store pci devices at mci for faster access */
2122         rc = mci_bind_devs(mci, i7core_dev);
2123         if (unlikely(rc < 0))
2124                 goto fail0;
2125
2126         if (pvt->is_registered)
2127                 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
2128         else
2129                 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
2130
2131         /* Get dimm basic config */
2132         get_dimm_config(mci);
2133         /* record ptr to the generic device */
2134         mci->dev = &i7core_dev->pdev[0]->dev;
2135         /* Set the function pointer to an actual operation function */
2136         mci->edac_check = i7core_check_error;
2137
2138         /* Enable scrubrate setting */
2139         enable_sdram_scrub_setting(mci);
2140
2141         /* add this new MC control structure to EDAC's list of MCs */
2142         if (unlikely(edac_mc_add_mc(mci))) {
2143                 debugf0("MC: " __FILE__
2144                         ": %s(): failed edac_mc_add_mc()\n", __func__);
2145                 /* FIXME: perhaps some code should go here that disables error
2146                  * reporting if we just enabled it
2147                  */
2148
2149                 rc = -EINVAL;
2150                 goto fail0;
2151         }
2152
2153         /* Default error mask is any memory */
2154         pvt->inject.channel = 0;
2155         pvt->inject.dimm = -1;
2156         pvt->inject.rank = -1;
2157         pvt->inject.bank = -1;
2158         pvt->inject.page = -1;
2159         pvt->inject.col = -1;
2160
2161         /* allocating generic PCI control info */
2162         i7core_pci_ctl_create(pvt);
2163
2164         /* Registers on edac_mce in order to receive memory errors */
2165         pvt->edac_mce.priv = mci;
2166         pvt->edac_mce.check_error = i7core_mce_check_error;
2167         rc = edac_mce_register(&pvt->edac_mce);
2168         if (unlikely(rc < 0)) {
2169                 debugf0("MC: " __FILE__
2170                         ": %s(): failed edac_mce_register()\n", __func__);
2171                 goto fail1;
2172         }
2173
2174         return 0;
2175
2176 fail1:
2177         i7core_pci_ctl_release(pvt);
2178         edac_mc_del_mc(mci->dev);
2179 fail0:
2180         kfree(mci->ctl_name);
2181         edac_mc_free(mci);
2182         i7core_dev->mci = NULL;
2183         return rc;
2184 }
2185
2186 /*
2187  *      i7core_probe    Probe for ONE instance of device to see if it is
2188  *                      present.
2189  *      return:
2190  *              0 for FOUND a device
2191  *              < 0 for error code
2192  */
2193
2194 static int __devinit i7core_probe(struct pci_dev *pdev,
2195                                   const struct pci_device_id *id)
2196 {
2197         int rc;
2198         struct i7core_dev *i7core_dev;
2199
2200         /* get the pci devices we want to reserve for our use */
2201         mutex_lock(&i7core_edac_lock);
2202
2203         /*
2204          * All memory controllers are allocated at the first pass.
2205          */
2206         if (unlikely(probed >= 1)) {
2207                 mutex_unlock(&i7core_edac_lock);
2208                 return -ENODEV;
2209         }
2210         probed++;
2211
2212         rc = i7core_get_all_devices();
2213         if (unlikely(rc < 0))
2214                 goto fail0;
2215
2216         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2217                 rc = i7core_register_mci(i7core_dev);
2218                 if (unlikely(rc < 0))
2219                         goto fail1;
2220         }
2221
2222         i7core_printk(KERN_INFO, "Driver loaded.\n");
2223
2224         mutex_unlock(&i7core_edac_lock);
2225         return 0;
2226
2227 fail1:
2228         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2229                 i7core_unregister_mci(i7core_dev);
2230
2231         i7core_put_all_devices();
2232 fail0:
2233         mutex_unlock(&i7core_edac_lock);
2234         return rc;
2235 }
2236
2237 /*
2238  *      i7core_remove   destructor for one instance of device
2239  *
2240  */
2241 static void __devexit i7core_remove(struct pci_dev *pdev)
2242 {
2243         struct i7core_dev *i7core_dev;
2244
2245         debugf0(__FILE__ ": %s()\n", __func__);
2246
2247         /*
2248          * we have a trouble here: pdev value for removal will be wrong, since
2249          * it will point to the X58 register used to detect that the machine
2250          * is a Nehalem or upper design. However, due to the way several PCI
2251          * devices are grouped together to provide MC functionality, we need
2252          * to use a different method for releasing the devices
2253          */
2254
2255         mutex_lock(&i7core_edac_lock);
2256
2257         if (unlikely(!probed)) {
2258                 mutex_unlock(&i7core_edac_lock);
2259                 return;
2260         }
2261
2262         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2263                 i7core_unregister_mci(i7core_dev);
2264
2265         /* Release PCI resources */
2266         i7core_put_all_devices();
2267
2268         probed--;
2269
2270         mutex_unlock(&i7core_edac_lock);
2271 }
2272
2273 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2274
2275 /*
2276  *      i7core_driver   pci_driver structure for this module
2277  *
2278  */
2279 static struct pci_driver i7core_driver = {
2280         .name     = "i7core_edac",
2281         .probe    = i7core_probe,
2282         .remove   = __devexit_p(i7core_remove),
2283         .id_table = i7core_pci_tbl,
2284 };
2285
2286 /*
2287  *      i7core_init             Module entry function
2288  *                      Try to initialize this module for its devices
2289  */
2290 static int __init i7core_init(void)
2291 {
2292         int pci_rc;
2293
2294         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2295
2296         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2297         opstate_init();
2298
2299         if (use_pci_fixup)
2300                 i7core_xeon_pci_fixup(pci_dev_table);
2301
2302         pci_rc = pci_register_driver(&i7core_driver);
2303
2304         if (pci_rc >= 0)
2305                 return 0;
2306
2307         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2308                       pci_rc);
2309
2310         return pci_rc;
2311 }
2312
2313 /*
2314  *      i7core_exit()   Module exit function
2315  *                      Unregister the driver
2316  */
2317 static void __exit i7core_exit(void)
2318 {
2319         debugf2("MC: " __FILE__ ": %s()\n", __func__);
2320         pci_unregister_driver(&i7core_driver);
2321 }
2322
2323 module_init(i7core_init);
2324 module_exit(i7core_exit);
2325
2326 MODULE_LICENSE("GPL");
2327 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2328 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2329 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2330                    I7CORE_REVISION);
2331
2332 module_param(edac_op_state, int, 0444);
2333 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");