2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <linux/percpu.h>
28 #include <linux/export.h>
29 #include <linux/irq_work.h>
32 static DEFINE_PER_CPU(int, mce_nest_count);
33 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
35 /* Queue for delayed MCE events. */
36 static DEFINE_PER_CPU(int, mce_queue_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
39 static void machine_check_process_queued_event(struct irq_work *work);
40 static struct irq_work mce_event_process_work = {
41 .func = machine_check_process_queued_event,
44 static void mce_set_error_info(struct machine_check_event *mce,
45 struct mce_error_info *mce_err)
47 mce->error_type = mce_err->error_type;
48 switch (mce_err->error_type) {
49 case MCE_ERROR_TYPE_UE:
50 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
52 case MCE_ERROR_TYPE_SLB:
53 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
55 case MCE_ERROR_TYPE_ERAT:
56 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
58 case MCE_ERROR_TYPE_TLB:
59 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
61 case MCE_ERROR_TYPE_USER:
62 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
64 case MCE_ERROR_TYPE_RA:
65 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
67 case MCE_ERROR_TYPE_LINK:
68 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
70 case MCE_ERROR_TYPE_UNKNOWN:
77 * Decode and save high level MCE information into per cpu buffer which
78 * is an array of machine_check_event structure.
80 void save_mce_event(struct pt_regs *regs, long handled,
81 struct mce_error_info *mce_err,
82 uint64_t nip, uint64_t addr)
84 int index = __this_cpu_inc_return(mce_nest_count) - 1;
85 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
88 * Return if we don't have enough space to log mce event.
89 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
90 * the check below will stop buffer overrun.
92 if (index >= MAX_MC_EVT)
95 /* Populate generic machine check info */
96 mce->version = MCE_V1;
98 mce->srr1 = regs->msr;
99 mce->gpr3 = regs->gpr[3];
102 /* Mark it recovered if we have handled it and MSR(RI=1). */
103 if (handled && (regs->msr & MSR_RI))
104 mce->disposition = MCE_DISPOSITION_RECOVERED;
106 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
108 mce->initiator = mce_err->initiator;
109 mce->severity = mce_err->severity;
112 * Populate the mce error_type and type-specific error_type.
114 mce_set_error_info(mce, mce_err);
119 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
120 mce->u.tlb_error.effective_address_provided = true;
121 mce->u.tlb_error.effective_address = addr;
122 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
123 mce->u.slb_error.effective_address_provided = true;
124 mce->u.slb_error.effective_address = addr;
125 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
126 mce->u.erat_error.effective_address_provided = true;
127 mce->u.erat_error.effective_address = addr;
128 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
129 mce->u.user_error.effective_address_provided = true;
130 mce->u.user_error.effective_address = addr;
131 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
132 mce->u.ra_error.effective_address_provided = true;
133 mce->u.ra_error.effective_address = addr;
134 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
135 mce->u.link_error.effective_address_provided = true;
136 mce->u.link_error.effective_address = addr;
137 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
138 mce->u.ue_error.effective_address_provided = true;
139 mce->u.ue_error.effective_address = addr;
146 * mce Pointer to machine_check_event structure to be filled.
147 * release Flag to indicate whether to free the event slot or not.
148 * 0 <= do not release the mce event. Caller will invoke
149 * release_mce_event() once event has been consumed.
150 * 1 <= release the slot.
155 * get_mce_event() will be called by platform specific machine check
156 * handle routine and in KVM.
157 * When we call get_mce_event(), we are still in interrupt context and
158 * preemption will not be scheduled until ret_from_expect() routine
161 int get_mce_event(struct machine_check_event *mce, bool release)
163 int index = __this_cpu_read(mce_nest_count) - 1;
164 struct machine_check_event *mc_evt;
171 /* Check if we have MCE info to process. */
172 if (index < MAX_MC_EVT) {
173 mc_evt = this_cpu_ptr(&mce_event[index]);
174 /* Copy the event structure and release the original */
181 /* Decrement the count to free the slot. */
183 __this_cpu_dec(mce_nest_count);
188 void release_mce_event(void)
190 get_mce_event(NULL, true);
194 * Queue up the MCE event which then can be handled later.
196 void machine_check_queue_event(void)
199 struct machine_check_event evt;
201 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
204 index = __this_cpu_inc_return(mce_queue_count) - 1;
205 /* If queue is full, just return for now. */
206 if (index >= MAX_MC_EVT) {
207 __this_cpu_dec(mce_queue_count);
210 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
212 /* Queue irq work to process this event later. */
213 irq_work_queue(&mce_event_process_work);
217 * process pending MCE event from the mce event queue. This function will be
218 * called during syscall exit.
220 static void machine_check_process_queued_event(struct irq_work *work)
225 * For now just print it to console.
226 * TODO: log this error event to FSP or nvram.
228 while (__this_cpu_read(mce_queue_count) > 0) {
229 index = __this_cpu_read(mce_queue_count) - 1;
230 machine_check_print_event_info(
231 this_cpu_ptr(&mce_event_queue[index]));
232 __this_cpu_dec(mce_queue_count);
236 void machine_check_print_event_info(struct machine_check_event *evt)
238 const char *level, *sevstr, *subtype;
239 static const char *mc_ue_types[] = {
242 "Page table walk ifetch",
244 "Page table walk Load/Store",
246 static const char *mc_slb_types[] = {
251 static const char *mc_erat_types[] = {
256 static const char *mc_tlb_types[] = {
261 static const char *mc_user_types[] = {
265 static const char *mc_ra_types[] = {
267 "Instruction fetch (bad)",
268 "Page table walk ifetch (bad)",
269 "Page table walk ifetch (foreign)",
272 "Page table walk Load/Store (bad)",
273 "Page table walk Load/Store (foreign)",
274 "Load/Store (foreign)",
276 static const char *mc_link_types[] = {
278 "Instruction fetch (timeout)",
279 "Page table walk ifetch (timeout)",
282 "Page table walk Load/Store (timeout)",
285 /* Print things out */
286 if (evt->version != MCE_V1) {
287 pr_err("Machine Check Exception, Unknown event version %d !\n",
291 switch (evt->severity) {
292 case MCE_SEV_NO_ERROR:
296 case MCE_SEV_WARNING:
297 level = KERN_WARNING;
300 case MCE_SEV_ERROR_SYNC:
311 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
312 evt->disposition == MCE_DISPOSITION_RECOVERED ?
313 "Recovered" : "[Not recovered");
314 printk("%s Initiator: %s\n", level,
315 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
316 switch (evt->error_type) {
317 case MCE_ERROR_TYPE_UE:
318 subtype = evt->u.ue_error.ue_error_type <
319 ARRAY_SIZE(mc_ue_types) ?
320 mc_ue_types[evt->u.ue_error.ue_error_type]
322 printk("%s Error type: UE [%s]\n", level, subtype);
323 if (evt->u.ue_error.effective_address_provided)
324 printk("%s Effective address: %016llx\n",
325 level, evt->u.ue_error.effective_address);
326 if (evt->u.ue_error.physical_address_provided)
327 printk("%s Physical address: %016llx\n",
328 level, evt->u.ue_error.physical_address);
330 case MCE_ERROR_TYPE_SLB:
331 subtype = evt->u.slb_error.slb_error_type <
332 ARRAY_SIZE(mc_slb_types) ?
333 mc_slb_types[evt->u.slb_error.slb_error_type]
335 printk("%s Error type: SLB [%s]\n", level, subtype);
336 if (evt->u.slb_error.effective_address_provided)
337 printk("%s Effective address: %016llx\n",
338 level, evt->u.slb_error.effective_address);
340 case MCE_ERROR_TYPE_ERAT:
341 subtype = evt->u.erat_error.erat_error_type <
342 ARRAY_SIZE(mc_erat_types) ?
343 mc_erat_types[evt->u.erat_error.erat_error_type]
345 printk("%s Error type: ERAT [%s]\n", level, subtype);
346 if (evt->u.erat_error.effective_address_provided)
347 printk("%s Effective address: %016llx\n",
348 level, evt->u.erat_error.effective_address);
350 case MCE_ERROR_TYPE_TLB:
351 subtype = evt->u.tlb_error.tlb_error_type <
352 ARRAY_SIZE(mc_tlb_types) ?
353 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
355 printk("%s Error type: TLB [%s]\n", level, subtype);
356 if (evt->u.tlb_error.effective_address_provided)
357 printk("%s Effective address: %016llx\n",
358 level, evt->u.tlb_error.effective_address);
360 case MCE_ERROR_TYPE_USER:
361 subtype = evt->u.user_error.user_error_type <
362 ARRAY_SIZE(mc_user_types) ?
363 mc_user_types[evt->u.user_error.user_error_type]
365 printk("%s Error type: User [%s]\n", level, subtype);
366 if (evt->u.user_error.effective_address_provided)
367 printk("%s Effective address: %016llx\n",
368 level, evt->u.user_error.effective_address);
370 case MCE_ERROR_TYPE_RA:
371 subtype = evt->u.ra_error.ra_error_type <
372 ARRAY_SIZE(mc_ra_types) ?
373 mc_ra_types[evt->u.ra_error.ra_error_type]
375 printk("%s Error type: Real address [%s]\n", level, subtype);
376 if (evt->u.ra_error.effective_address_provided)
377 printk("%s Effective address: %016llx\n",
378 level, evt->u.ra_error.effective_address);
380 case MCE_ERROR_TYPE_LINK:
381 subtype = evt->u.link_error.link_error_type <
382 ARRAY_SIZE(mc_link_types) ?
383 mc_link_types[evt->u.link_error.link_error_type]
385 printk("%s Error type: Link [%s]\n", level, subtype);
386 if (evt->u.link_error.effective_address_provided)
387 printk("%s Effective address: %016llx\n",
388 level, evt->u.link_error.effective_address);
391 case MCE_ERROR_TYPE_UNKNOWN:
392 printk("%s Error type: Unknown\n", level);
397 uint64_t get_mce_fault_addr(struct machine_check_event *evt)
399 switch (evt->error_type) {
400 case MCE_ERROR_TYPE_UE:
401 if (evt->u.ue_error.effective_address_provided)
402 return evt->u.ue_error.effective_address;
404 case MCE_ERROR_TYPE_SLB:
405 if (evt->u.slb_error.effective_address_provided)
406 return evt->u.slb_error.effective_address;
408 case MCE_ERROR_TYPE_ERAT:
409 if (evt->u.erat_error.effective_address_provided)
410 return evt->u.erat_error.effective_address;
412 case MCE_ERROR_TYPE_TLB:
413 if (evt->u.tlb_error.effective_address_provided)
414 return evt->u.tlb_error.effective_address;
416 case MCE_ERROR_TYPE_USER:
417 if (evt->u.user_error.effective_address_provided)
418 return evt->u.user_error.effective_address;
420 case MCE_ERROR_TYPE_RA:
421 if (evt->u.ra_error.effective_address_provided)
422 return evt->u.ra_error.effective_address;
424 case MCE_ERROR_TYPE_LINK:
425 if (evt->u.link_error.effective_address_provided)
426 return evt->u.link_error.effective_address;
429 case MCE_ERROR_TYPE_UNKNOWN:
434 EXPORT_SYMBOL(get_mce_fault_addr);