]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 May 2012 23:14:12 +0000 (16:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 May 2012 23:14:12 +0000 (16:14 -0700)
Pull x86/mce merge window patches from Tony Luck:
 "Including two that make error_context() checks less sucky"

* tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  x86/mce: Add instruction recovery signatures to mce-severity table
  x86/mce: Fix check for processor context when machine check was taken.
  MCE: Fix vm86 handling for 32bit mce handler
  x86/mce Add validation check before GHES error is recorded
  x86/mce: Avoid reading every machine check bank register twice.

1  2 
arch/x86/kernel/cpu/mcheck/mce.c

index 2afcbd253e1da1768a10eeb6bdb9a74d286048b7,5f793e6c854bb095e65890c6968fcbd10aeb9fc3..b772dd6ad45016e5e943ec37cafdef5fac03a152
@@@ -437,6 -437,14 +437,14 @@@ static inline void mce_gather_info(stru
                if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
                        m->ip = regs->ip;
                        m->cs = regs->cs;
+                       /*
+                        * When in VM86 mode make the cs look like ring 3
+                        * always. This is a lie, but it's better than passing
+                        * the additional vm86 bit around everywhere.
+                        */
+                       if (v8086_mode(regs))
+                               m->cs |= 3;
                }
                /* Use accurate RIP reporting if available. */
                if (rip_msr)
@@@ -583,7 -591,7 +591,7 @@@ void machine_check_poll(enum mcp_flags 
        struct mce m;
        int i;
  
 -      percpu_inc(mce_poll_count);
 +      this_cpu_inc(mce_poll_count);
  
        mce_gather_info(&m, NULL);
  
@@@ -641,16 -649,18 +649,18 @@@ EXPORT_SYMBOL_GPL(machine_check_poll)
   * Do a quick check if any of the events requires a panic.
   * This decides if we keep the events around or clear them.
   */
- static int mce_no_way_out(struct mce *m, char **msg)
+ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
  {
-       int i;
+       int i, ret = 0;
  
        for (i = 0; i < banks; i++) {
                m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+               if (m->status & MCI_STATUS_VAL)
+                       __set_bit(i, validp);
                if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
-                       return 1;
+                       ret = 1;
        }
-       return 0;
+       return ret;
  }
  
  /*
@@@ -945,10 -955,9 +955,10 @@@ struct mce_info 
        atomic_t                inuse;
        struct task_struct      *t;
        __u64                   paddr;
 +      int                     restartable;
  } mce_info[MCE_INFO_MAX];
  
 -static void mce_save_info(__u64 addr)
 +static void mce_save_info(__u64 addr, int c)
  {
        struct mce_info *mi;
  
                if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
                        mi->t = current;
                        mi->paddr = addr;
 +                      mi->restartable = c;
                        return;
                }
        }
@@@ -1013,11 -1021,12 +1023,12 @@@ void do_machine_check(struct pt_regs *r
         */
        int kill_it = 0;
        DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+       DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
        char *msg = "Unknown";
  
        atomic_inc(&mce_entry);
  
 -      percpu_inc(mce_exception_count);
 +      this_cpu_inc(mce_exception_count);
  
        if (!banks)
                goto out;
        final = &__get_cpu_var(mces_seen);
        *final = m;
  
-       no_way_out = mce_no_way_out(&m, &msg);
+       memset(valid_banks, 0, sizeof(valid_banks));
+       no_way_out = mce_no_way_out(&m, &msg, valid_banks);
  
        barrier();
  
        order = mce_start(&no_way_out);
        for (i = 0; i < banks; i++) {
                __clear_bit(i, toclear);
+               if (!test_bit(i, valid_banks))
+                       continue;
                if (!mce_banks[i].ctl)
                        continue;
  
                        mce_panic("Fatal machine check on current CPU", &m, msg);
                if (worst == MCE_AR_SEVERITY) {
                        /* schedule action before return to userland */
 -                      mce_save_info(m.addr);
 +                      mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
                        set_thread_flag(TIF_MCE_NOTIFY);
                } else if (kill_it) {
                        force_sig(SIGBUS, current);
@@@ -1181,13 -1193,7 +1195,13 @@@ void mce_notify_process(void
  
        pr_err("Uncorrected hardware memory error in user-access at %llx",
                 mi->paddr);
 -      if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
 +      /*
 +       * We must call memory_failure() here even if the current process is
 +       * doomed. We still need to mark the page as poisoned and alert any
 +       * other users of the page.
 +       */
 +      if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
 +                         mi->restartable == 0) {
                pr_err("Memory error not recovered");
                force_sig(SIGBUS, current);
        }
@@@ -1431,43 -1437,6 +1445,43 @@@ static int __cpuinit __mcheck_cpu_apply
                 */
                 if (c->x86 == 6 && banks > 0)
                        mce_banks[0].ctl = 0;
 +
 +               /*
 +                * Turn off MC4_MISC thresholding banks on those models since
 +                * they're not supported there.
 +                */
 +               if (c->x86 == 0x15 &&
 +                   (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
 +                       int i;
 +                       u64 val, hwcr;
 +                       bool need_toggle;
 +                       u32 msrs[] = {
 +                              0x00000413, /* MC4_MISC0 */
 +                              0xc0000408, /* MC4_MISC1 */
 +                       };
 +
 +                       rdmsrl(MSR_K7_HWCR, hwcr);
 +
 +                       /* McStatusWrEn has to be set */
 +                       need_toggle = !(hwcr & BIT(18));
 +
 +                       if (need_toggle)
 +                               wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 +
 +                       for (i = 0; i < ARRAY_SIZE(msrs); i++) {
 +                               rdmsrl(msrs[i], val);
 +
 +                               /* CntP bit set? */
 +                               if (val & BIT(62)) {
 +                                       val &= ~BIT(62);
 +                                       wrmsrl(msrs[i], val);
 +                               }
 +                       }
 +
 +                       /* restore old settings */
 +                       if (need_toggle)
 +                               wrmsrl(MSR_K7_HWCR, hwcr);
 +               }
        }
  
        if (c->x86_vendor == X86_VENDOR_INTEL) {