]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/x86/kernel/cpu/mcheck/mce_amd.c
Merge remote-tracking branch 'usb-chipidea-next/ci-for-usb-next'
[karo-tx-linux.git] / arch / x86 / kernel / cpu / mcheck / mce_amd.c
1 /*
2  *  (c) 2005-2015 Advanced Micro Devices, Inc.
3  *  Your use of this code is subject to the terms and conditions of the
4  *  GNU general public license version 2. See "COPYING" or
5  *  http://www.gnu.org/licenses/gpl.html
6  *
7  *  Written by Jacob Shin - AMD, Inc.
8  *  Maintained by: Borislav Petkov <bp@alien8.de>
9  *
10  *  All MC4_MISCi registers are shared between cores on a node.
11  */
12 #include <linux/interrupt.h>
13 #include <linux/notifier.h>
14 #include <linux/kobject.h>
15 #include <linux/percpu.h>
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/sysfs.h>
19 #include <linux/slab.h>
20 #include <linux/init.h>
21 #include <linux/cpu.h>
22 #include <linux/smp.h>
23
24 #include <asm/amd_nb.h>
25 #include <asm/apic.h>
26 #include <asm/idle.h>
27 #include <asm/mce.h>
28 #include <asm/msr.h>
29 #include <asm/trace/irq_vectors.h>
30
31 #define NR_BLOCKS         5
32 #define THRESHOLD_MAX     0xFFF
33 #define INT_TYPE_APIC     0x00020000
34 #define MASK_VALID_HI     0x80000000
35 #define MASK_CNTP_HI      0x40000000
36 #define MASK_LOCKED_HI    0x20000000
37 #define MASK_LVTOFF_HI    0x00F00000
38 #define MASK_COUNT_EN_HI  0x00080000
39 #define MASK_INT_TYPE_HI  0x00060000
40 #define MASK_OVERFLOW_HI  0x00010000
41 #define MASK_ERR_COUNT_HI 0x00000FFF
42 #define MASK_BLKPTR_LO    0xFF000000
43 #define MCG_XBLK_ADDR     0xC0000400
44
45 /* Deferred error settings */
46 #define MSR_CU_DEF_ERR          0xC0000410
47 #define MASK_DEF_LVTOFF         0x000000F0
48 #define MASK_DEF_INT_TYPE       0x00000006
49 #define DEF_LVT_OFF             0x2
50 #define DEF_INT_TYPE_APIC       0x2
51
52 /* Scalable MCA: */
53
54 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
55 #define SMCA_THR_LVT_OFF        0xF000
56
57 /*
58  * OS is required to set the MCAX bit to acknowledge that it is now using the
59  * new MSR ranges and new registers under each bank. It also means that the OS
60  * will configure deferred errors in the new MCx_CONFIG register. If the bit is
61  * not set, uncorrectable errors will cause a system panic.
62  */
63 #define SMCA_MCAX_EN_OFF        0x1
64
65 static const char * const th_names[] = {
66         "load_store",
67         "insn_fetch",
68         "combined_unit",
69         "",
70         "northbridge",
71         "execution_unit",
72 };
73
74 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
75 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
76
77 static void amd_threshold_interrupt(void);
78 static void amd_deferred_error_interrupt(void);
79
80 static void default_deferred_error_interrupt(void)
81 {
82         pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
83 }
84 void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
85
86 /*
87  * CPU Initialization
88  */
89
90 struct thresh_restart {
91         struct threshold_block  *b;
92         int                     reset;
93         int                     set_lvt_off;
94         int                     lvt_off;
95         u16                     old_limit;
96 };
97
98 static inline bool is_shared_bank(int bank)
99 {
100         /*
101          * Scalable MCA provides for only one core to have access to the MSRs of
102          * a shared bank.
103          */
104         if (mce_flags.smca)
105                 return false;
106
107         /* Bank 4 is for northbridge reporting and is thus shared */
108         return (bank == 4);
109 }
110
111 static const char *bank4_names(const struct threshold_block *b)
112 {
113         switch (b->address) {
114         /* MSR4_MISC0 */
115         case 0x00000413:
116                 return "dram";
117
118         case 0xc0000408:
119                 return "ht_links";
120
121         case 0xc0000409:
122                 return "l3_cache";
123
124         default:
125                 WARN(1, "Funny MSR: 0x%08x\n", b->address);
126                 return "";
127         }
128 };
129
130
131 static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
132 {
133         /*
134          * bank 4 supports APIC LVT interrupts implicitly since forever.
135          */
136         if (bank == 4)
137                 return true;
138
139         /*
140          * IntP: interrupt present; if this bit is set, the thresholding
141          * bank can generate APIC LVT interrupts
142          */
143         return msr_high_bits & BIT(28);
144 }
145
146 static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
147 {
148         int msr = (hi & MASK_LVTOFF_HI) >> 20;
149
150         if (apic < 0) {
151                 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
152                        "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
153                        b->bank, b->block, b->address, hi, lo);
154                 return 0;
155         }
156
157         if (apic != msr) {
158                 /*
159                  * On SMCA CPUs, LVT offset is programmed at a different MSR, and
160                  * the BIOS provides the value. The original field where LVT offset
161                  * was set is reserved. Return early here:
162                  */
163                 if (mce_flags.smca)
164                         return 0;
165
166                 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
167                        "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
168                        b->cpu, apic, b->bank, b->block, b->address, hi, lo);
169                 return 0;
170         }
171
172         return 1;
173 };
174
175 /*
176  * Called via smp_call_function_single(), must be called with correct
177  * cpu affinity.
178  */
179 static void threshold_restart_bank(void *_tr)
180 {
181         struct thresh_restart *tr = _tr;
182         u32 hi, lo;
183
184         rdmsr(tr->b->address, lo, hi);
185
186         if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
187                 tr->reset = 1;  /* limit cannot be lower than err count */
188
189         if (tr->reset) {                /* reset err count and overflow bit */
190                 hi =
191                     (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
192                     (THRESHOLD_MAX - tr->b->threshold_limit);
193         } else if (tr->old_limit) {     /* change limit w/o reset */
194                 int new_count = (hi & THRESHOLD_MAX) +
195                     (tr->old_limit - tr->b->threshold_limit);
196
197                 hi = (hi & ~MASK_ERR_COUNT_HI) |
198                     (new_count & THRESHOLD_MAX);
199         }
200
201         /* clear IntType */
202         hi &= ~MASK_INT_TYPE_HI;
203
204         if (!tr->b->interrupt_capable)
205                 goto done;
206
207         if (tr->set_lvt_off) {
208                 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
209                         /* set new lvt offset */
210                         hi &= ~MASK_LVTOFF_HI;
211                         hi |= tr->lvt_off << 20;
212                 }
213         }
214
215         if (tr->b->interrupt_enable)
216                 hi |= INT_TYPE_APIC;
217
218  done:
219
220         hi |= MASK_COUNT_EN_HI;
221         wrmsr(tr->b->address, lo, hi);
222 }
223
224 static void mce_threshold_block_init(struct threshold_block *b, int offset)
225 {
226         struct thresh_restart tr = {
227                 .b                      = b,
228                 .set_lvt_off            = 1,
229                 .lvt_off                = offset,
230         };
231
232         b->threshold_limit              = THRESHOLD_MAX;
233         threshold_restart_bank(&tr);
234 };
235
236 static int setup_APIC_mce_threshold(int reserved, int new)
237 {
238         if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
239                                               APIC_EILVT_MSG_FIX, 0))
240                 return new;
241
242         return reserved;
243 }
244
245 static int setup_APIC_deferred_error(int reserved, int new)
246 {
247         if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
248                                               APIC_EILVT_MSG_FIX, 0))
249                 return new;
250
251         return reserved;
252 }
253
254 static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
255 {
256         u32 low = 0, high = 0;
257         int def_offset = -1, def_new;
258
259         if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
260                 return;
261
262         def_new = (low & MASK_DEF_LVTOFF) >> 4;
263         if (!(low & MASK_DEF_LVTOFF)) {
264                 pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
265                 def_new = DEF_LVT_OFF;
266                 low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
267         }
268
269         def_offset = setup_APIC_deferred_error(def_offset, def_new);
270         if ((def_offset == def_new) &&
271             (deferred_error_int_vector != amd_deferred_error_interrupt))
272                 deferred_error_int_vector = amd_deferred_error_interrupt;
273
274         low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
275         wrmsr(MSR_CU_DEF_ERR, low, high);
276 }
277
278 static int
279 prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
280                         int offset, u32 misc_high)
281 {
282         unsigned int cpu = smp_processor_id();
283         struct threshold_block b;
284         int new;
285
286         if (!block)
287                 per_cpu(bank_map, cpu) |= (1 << bank);
288
289         memset(&b, 0, sizeof(b));
290         b.cpu                   = cpu;
291         b.bank                  = bank;
292         b.block                 = block;
293         b.address               = addr;
294         b.interrupt_capable     = lvt_interrupt_supported(bank, misc_high);
295
296         if (!b.interrupt_capable)
297                 goto done;
298
299         b.interrupt_enable = 1;
300
301         if (mce_flags.smca) {
302                 u32 smca_low, smca_high;
303                 u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
304
305                 if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
306                         smca_high |= SMCA_MCAX_EN_OFF;
307                         wrmsr(smca_addr, smca_low, smca_high);
308                 }
309
310                 /* Gather LVT offset for thresholding: */
311                 if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
312                         goto out;
313
314                 new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
315         } else {
316                 new = (misc_high & MASK_LVTOFF_HI) >> 20;
317         }
318
319         offset = setup_APIC_mce_threshold(offset, new);
320
321         if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
322                 mce_threshold_vector = amd_threshold_interrupt;
323
324 done:
325         mce_threshold_block_init(&b, offset);
326
327 out:
328         return offset;
329 }
330
331 /* cpu init entry point, called from mce.c with preempt off */
332 void mce_amd_feature_init(struct cpuinfo_x86 *c)
333 {
334         u32 low = 0, high = 0, address = 0;
335         unsigned int bank, block;
336         int offset = -1;
337
338         for (bank = 0; bank < mca_cfg.banks; ++bank) {
339                 for (block = 0; block < NR_BLOCKS; ++block) {
340                         if (block == 0)
341                                 address = MSR_IA32_MCx_MISC(bank);
342                         else if (block == 1) {
343                                 address = (low & MASK_BLKPTR_LO) >> 21;
344                                 if (!address)
345                                         break;
346
347                                 address += MCG_XBLK_ADDR;
348                         } else
349                                 ++address;
350
351                         if (rdmsr_safe(address, &low, &high))
352                                 break;
353
354                         if (!(high & MASK_VALID_HI))
355                                 continue;
356
357                         if (!(high & MASK_CNTP_HI)  ||
358                              (high & MASK_LOCKED_HI))
359                                 continue;
360
361                         offset = prepare_threshold_block(bank, block, address, offset, high);
362                 }
363         }
364
365         if (mce_flags.succor)
366                 deferred_error_interrupt_enable(c);
367 }
368
369 static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
370 {
371         struct mce m;
372         u64 status;
373
374         rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
375         if (!(status & MCI_STATUS_VAL))
376                 return;
377
378         mce_setup(&m);
379
380         m.status = status;
381         m.bank = bank;
382
383         if (threshold_err)
384                 m.misc = misc;
385
386         if (m.status & MCI_STATUS_ADDRV)
387                 rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
388
389         mce_log(&m);
390         wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
391 }
392
393 static inline void __smp_deferred_error_interrupt(void)
394 {
395         inc_irq_stat(irq_deferred_error_count);
396         deferred_error_int_vector();
397 }
398
399 asmlinkage __visible void smp_deferred_error_interrupt(void)
400 {
401         entering_irq();
402         __smp_deferred_error_interrupt();
403         exiting_ack_irq();
404 }
405
406 asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
407 {
408         entering_irq();
409         trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
410         __smp_deferred_error_interrupt();
411         trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
412         exiting_ack_irq();
413 }
414
415 /* APIC interrupt handler for deferred errors */
416 static void amd_deferred_error_interrupt(void)
417 {
418         u64 status;
419         unsigned int bank;
420
421         for (bank = 0; bank < mca_cfg.banks; ++bank) {
422                 rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
423
424                 if (!(status & MCI_STATUS_VAL) ||
425                     !(status & MCI_STATUS_DEFERRED))
426                         continue;
427
428                 __log_error(bank, false, 0);
429                 break;
430         }
431 }
432
433 /*
434  * APIC Interrupt Handler
435  */
436
437 /*
438  * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
439  * the interrupt goes off when error_count reaches threshold_limit.
440  * the handler will simply log mcelog w/ software defined bank number.
441  */
442
443 static void amd_threshold_interrupt(void)
444 {
445         u32 low = 0, high = 0, address = 0;
446         int cpu = smp_processor_id();
447         unsigned int bank, block;
448
449         /* assume first bank caused it */
450         for (bank = 0; bank < mca_cfg.banks; ++bank) {
451                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
452                         continue;
453                 for (block = 0; block < NR_BLOCKS; ++block) {
454                         if (block == 0) {
455                                 address = MSR_IA32_MCx_MISC(bank);
456                         } else if (block == 1) {
457                                 address = (low & MASK_BLKPTR_LO) >> 21;
458                                 if (!address)
459                                         break;
460                                 address += MCG_XBLK_ADDR;
461                         } else {
462                                 ++address;
463                         }
464
465                         if (rdmsr_safe(address, &low, &high))
466                                 break;
467
468                         if (!(high & MASK_VALID_HI)) {
469                                 if (block)
470                                         continue;
471                                 else
472                                         break;
473                         }
474
475                         if (!(high & MASK_CNTP_HI)  ||
476                              (high & MASK_LOCKED_HI))
477                                 continue;
478
479                         /*
480                          * Log the machine check that caused the threshold
481                          * event.
482                          */
483                         if (high & MASK_OVERFLOW_HI)
484                                 goto log;
485                 }
486         }
487         return;
488
489 log:
490         __log_error(bank, true, ((u64)high << 32) | low);
491 }
492
493 /*
494  * Sysfs Interface
495  */
496
497 struct threshold_attr {
498         struct attribute attr;
499         ssize_t (*show) (struct threshold_block *, char *);
500         ssize_t (*store) (struct threshold_block *, const char *, size_t count);
501 };
502
503 #define SHOW_FIELDS(name)                                               \
504 static ssize_t show_ ## name(struct threshold_block *b, char *buf)      \
505 {                                                                       \
506         return sprintf(buf, "%lu\n", (unsigned long) b->name);          \
507 }
508 SHOW_FIELDS(interrupt_enable)
509 SHOW_FIELDS(threshold_limit)
510
511 static ssize_t
512 store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
513 {
514         struct thresh_restart tr;
515         unsigned long new;
516
517         if (!b->interrupt_capable)
518                 return -EINVAL;
519
520         if (kstrtoul(buf, 0, &new) < 0)
521                 return -EINVAL;
522
523         b->interrupt_enable = !!new;
524
525         memset(&tr, 0, sizeof(tr));
526         tr.b            = b;
527
528         smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
529
530         return size;
531 }
532
533 static ssize_t
534 store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
535 {
536         struct thresh_restart tr;
537         unsigned long new;
538
539         if (kstrtoul(buf, 0, &new) < 0)
540                 return -EINVAL;
541
542         if (new > THRESHOLD_MAX)
543                 new = THRESHOLD_MAX;
544         if (new < 1)
545                 new = 1;
546
547         memset(&tr, 0, sizeof(tr));
548         tr.old_limit = b->threshold_limit;
549         b->threshold_limit = new;
550         tr.b = b;
551
552         smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
553
554         return size;
555 }
556
557 static ssize_t show_error_count(struct threshold_block *b, char *buf)
558 {
559         u32 lo, hi;
560
561         rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
562
563         return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
564                                      (THRESHOLD_MAX - b->threshold_limit)));
565 }
566
567 static struct threshold_attr error_count = {
568         .attr = {.name = __stringify(error_count), .mode = 0444 },
569         .show = show_error_count,
570 };
571
572 #define RW_ATTR(val)                                                    \
573 static struct threshold_attr val = {                                    \
574         .attr   = {.name = __stringify(val), .mode = 0644 },            \
575         .show   = show_## val,                                          \
576         .store  = store_## val,                                         \
577 };
578
579 RW_ATTR(interrupt_enable);
580 RW_ATTR(threshold_limit);
581
582 static struct attribute *default_attrs[] = {
583         &threshold_limit.attr,
584         &error_count.attr,
585         NULL,   /* possibly interrupt_enable if supported, see below */
586         NULL,
587 };
588
589 #define to_block(k)     container_of(k, struct threshold_block, kobj)
590 #define to_attr(a)      container_of(a, struct threshold_attr, attr)
591
592 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
593 {
594         struct threshold_block *b = to_block(kobj);
595         struct threshold_attr *a = to_attr(attr);
596         ssize_t ret;
597
598         ret = a->show ? a->show(b, buf) : -EIO;
599
600         return ret;
601 }
602
603 static ssize_t store(struct kobject *kobj, struct attribute *attr,
604                      const char *buf, size_t count)
605 {
606         struct threshold_block *b = to_block(kobj);
607         struct threshold_attr *a = to_attr(attr);
608         ssize_t ret;
609
610         ret = a->store ? a->store(b, buf, count) : -EIO;
611
612         return ret;
613 }
614
615 static const struct sysfs_ops threshold_ops = {
616         .show                   = show,
617         .store                  = store,
618 };
619
620 static struct kobj_type threshold_ktype = {
621         .sysfs_ops              = &threshold_ops,
622         .default_attrs          = default_attrs,
623 };
624
625 static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
626                                      unsigned int block, u32 address)
627 {
628         struct threshold_block *b = NULL;
629         u32 low, high;
630         int err;
631
632         if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
633                 return 0;
634
635         if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
636                 return 0;
637
638         if (!(high & MASK_VALID_HI)) {
639                 if (block)
640                         goto recurse;
641                 else
642                         return 0;
643         }
644
645         if (!(high & MASK_CNTP_HI)  ||
646              (high & MASK_LOCKED_HI))
647                 goto recurse;
648
649         b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
650         if (!b)
651                 return -ENOMEM;
652
653         b->block                = block;
654         b->bank                 = bank;
655         b->cpu                  = cpu;
656         b->address              = address;
657         b->interrupt_enable     = 0;
658         b->interrupt_capable    = lvt_interrupt_supported(bank, high);
659         b->threshold_limit      = THRESHOLD_MAX;
660
661         if (b->interrupt_capable) {
662                 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
663                 b->interrupt_enable = 1;
664         } else {
665                 threshold_ktype.default_attrs[2] = NULL;
666         }
667
668         INIT_LIST_HEAD(&b->miscj);
669
670         if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
671                 list_add(&b->miscj,
672                          &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
673         } else {
674                 per_cpu(threshold_banks, cpu)[bank]->blocks = b;
675         }
676
677         err = kobject_init_and_add(&b->kobj, &threshold_ktype,
678                                    per_cpu(threshold_banks, cpu)[bank]->kobj,
679                                    (bank == 4 ? bank4_names(b) : th_names[bank]));
680         if (err)
681                 goto out_free;
682 recurse:
683         if (!block) {
684                 address = (low & MASK_BLKPTR_LO) >> 21;
685                 if (!address)
686                         return 0;
687                 address += MCG_XBLK_ADDR;
688         } else {
689                 ++address;
690         }
691
692         err = allocate_threshold_blocks(cpu, bank, ++block, address);
693         if (err)
694                 goto out_free;
695
696         if (b)
697                 kobject_uevent(&b->kobj, KOBJ_ADD);
698
699         return err;
700
701 out_free:
702         if (b) {
703                 kobject_put(&b->kobj);
704                 list_del(&b->miscj);
705                 kfree(b);
706         }
707         return err;
708 }
709
710 static int __threshold_add_blocks(struct threshold_bank *b)
711 {
712         struct list_head *head = &b->blocks->miscj;
713         struct threshold_block *pos = NULL;
714         struct threshold_block *tmp = NULL;
715         int err = 0;
716
717         err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
718         if (err)
719                 return err;
720
721         list_for_each_entry_safe(pos, tmp, head, miscj) {
722
723                 err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
724                 if (err) {
725                         list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
726                                 kobject_del(&pos->kobj);
727
728                         return err;
729                 }
730         }
731         return err;
732 }
733
734 static int threshold_create_bank(unsigned int cpu, unsigned int bank)
735 {
736         struct device *dev = per_cpu(mce_device, cpu);
737         struct amd_northbridge *nb = NULL;
738         struct threshold_bank *b = NULL;
739         const char *name = th_names[bank];
740         int err = 0;
741
742         if (is_shared_bank(bank)) {
743                 nb = node_to_amd_nb(amd_get_nb_id(cpu));
744
745                 /* threshold descriptor already initialized on this node? */
746                 if (nb && nb->bank4) {
747                         /* yes, use it */
748                         b = nb->bank4;
749                         err = kobject_add(b->kobj, &dev->kobj, name);
750                         if (err)
751                                 goto out;
752
753                         per_cpu(threshold_banks, cpu)[bank] = b;
754                         atomic_inc(&b->cpus);
755
756                         err = __threshold_add_blocks(b);
757
758                         goto out;
759                 }
760         }
761
762         b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
763         if (!b) {
764                 err = -ENOMEM;
765                 goto out;
766         }
767
768         b->kobj = kobject_create_and_add(name, &dev->kobj);
769         if (!b->kobj) {
770                 err = -EINVAL;
771                 goto out_free;
772         }
773
774         per_cpu(threshold_banks, cpu)[bank] = b;
775
776         if (is_shared_bank(bank)) {
777                 atomic_set(&b->cpus, 1);
778
779                 /* nb is already initialized, see above */
780                 if (nb) {
781                         WARN_ON(nb->bank4);
782                         nb->bank4 = b;
783                 }
784         }
785
786         err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
787         if (!err)
788                 goto out;
789
790  out_free:
791         kfree(b);
792
793  out:
794         return err;
795 }
796
797 /* create dir/files for all valid threshold banks */
798 static int threshold_create_device(unsigned int cpu)
799 {
800         unsigned int bank;
801         struct threshold_bank **bp;
802         int err = 0;
803
804         bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
805                      GFP_KERNEL);
806         if (!bp)
807                 return -ENOMEM;
808
809         per_cpu(threshold_banks, cpu) = bp;
810
811         for (bank = 0; bank < mca_cfg.banks; ++bank) {
812                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
813                         continue;
814                 err = threshold_create_bank(cpu, bank);
815                 if (err)
816                         return err;
817         }
818
819         return err;
820 }
821
822 static void deallocate_threshold_block(unsigned int cpu,
823                                                  unsigned int bank)
824 {
825         struct threshold_block *pos = NULL;
826         struct threshold_block *tmp = NULL;
827         struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
828
829         if (!head)
830                 return;
831
832         list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
833                 kobject_put(&pos->kobj);
834                 list_del(&pos->miscj);
835                 kfree(pos);
836         }
837
838         kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
839         per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
840 }
841
842 static void __threshold_remove_blocks(struct threshold_bank *b)
843 {
844         struct threshold_block *pos = NULL;
845         struct threshold_block *tmp = NULL;
846
847         kobject_del(b->kobj);
848
849         list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
850                 kobject_del(&pos->kobj);
851 }
852
853 static void threshold_remove_bank(unsigned int cpu, int bank)
854 {
855         struct amd_northbridge *nb;
856         struct threshold_bank *b;
857
858         b = per_cpu(threshold_banks, cpu)[bank];
859         if (!b)
860                 return;
861
862         if (!b->blocks)
863                 goto free_out;
864
865         if (is_shared_bank(bank)) {
866                 if (!atomic_dec_and_test(&b->cpus)) {
867                         __threshold_remove_blocks(b);
868                         per_cpu(threshold_banks, cpu)[bank] = NULL;
869                         return;
870                 } else {
871                         /*
872                          * the last CPU on this node using the shared bank is
873                          * going away, remove that bank now.
874                          */
875                         nb = node_to_amd_nb(amd_get_nb_id(cpu));
876                         nb->bank4 = NULL;
877                 }
878         }
879
880         deallocate_threshold_block(cpu, bank);
881
882 free_out:
883         kobject_del(b->kobj);
884         kobject_put(b->kobj);
885         kfree(b);
886         per_cpu(threshold_banks, cpu)[bank] = NULL;
887 }
888
889 static void threshold_remove_device(unsigned int cpu)
890 {
891         unsigned int bank;
892
893         for (bank = 0; bank < mca_cfg.banks; ++bank) {
894                 if (!(per_cpu(bank_map, cpu) & (1 << bank)))
895                         continue;
896                 threshold_remove_bank(cpu, bank);
897         }
898         kfree(per_cpu(threshold_banks, cpu));
899 }
900
901 /* get notified when a cpu comes on/off */
902 static void
903 amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
904 {
905         switch (action) {
906         case CPU_ONLINE:
907         case CPU_ONLINE_FROZEN:
908                 threshold_create_device(cpu);
909                 break;
910         case CPU_DEAD:
911         case CPU_DEAD_FROZEN:
912                 threshold_remove_device(cpu);
913                 break;
914         default:
915                 break;
916         }
917 }
918
919 static __init int threshold_init_device(void)
920 {
921         unsigned lcpu = 0;
922
923         /* to hit CPUs online before the notifier is up */
924         for_each_online_cpu(lcpu) {
925                 int err = threshold_create_device(lcpu);
926
927                 if (err)
928                         return err;
929         }
930         threshold_cpu_callback = amd_64_threshold_cpu_callback;
931
932         return 0;
933 }
934 /*
935  * there are 3 funcs which need to be _initcalled in a logic sequence:
936  * 1. xen_late_init_mcelog
937  * 2. mcheck_init_device
938  * 3. threshold_init_device
939  *
940  * xen_late_init_mcelog must register xen_mce_chrdev_device before
941  * native mce_chrdev_device registration if running under xen platform;
942  *
943  * mcheck_init_device should be inited before threshold_init_device to
944  * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
945  *
946  * so we use following _initcalls
947  * 1. device_initcall(xen_late_init_mcelog);
948  * 2. device_initcall_sync(mcheck_init_device);
949  * 3. late_initcall(threshold_init_device);
950  *
951  * when running under xen, the initcall order is 1,2,3;
952  * on baremetal, we skip 1 and we do only 2 and 3.
953  */
954 late_initcall(threshold_init_device);